From 5c37f25d7e2e0cc09c37ea15144eb3ccbb60188f Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 2 Mar 2022 12:57:05 +0100 Subject: [PATCH 01/24] First commit --- R/Intbc.R | 49 +++++++++++ R/Interpolation.R | 28 +++++++ R/Intlr.R | 163 ++++++++++++++++++++++++++++++++++++ R/Utils.R | 12 +++ examples/interpolation-bc.R | 72 ++++++++++++++++ examples/interpolation-lr.R | 74 ++++++++++++++++ examples/interpolation.R | 61 ++++++++++++++ 7 files changed, 459 insertions(+) create mode 100644 R/Intbc.R create mode 100644 R/Interpolation.R create mode 100644 R/Intlr.R create mode 100644 R/Utils.R create mode 100644 examples/interpolation-bc.R create mode 100644 examples/interpolation-lr.R create mode 100644 examples/interpolation.R diff --git a/R/Intbc.R b/R/Intbc.R new file mode 100644 index 0000000..19da72c --- /dev/null +++ b/R/Intbc.R @@ -0,0 +1,49 @@ +Intbc <- function(exp, obs, target_grid, int_method, bc_method, ncores = 1) { + + require(CSTools) + + stopifnot(bc_method %in% c('sbc', 'cal', 'qm', 'simple_bias', 'calibration', 'quantile_mapping')) + stopifnot(c('member','sdate') %in% names(dim(exp$data))) + + lonmin <- attr(exp$lon,"first_lon") # this should not be needed + lonmax <- attr(exp$lon,"last_lon") + latmin <- attr(exp$lat,"first_lat") + latmax <- attr(exp$lat,"last_lat") + exp_interpolated <- interpolation(exp = exp, target_grid = target_grid, method = int_method, + crop = c(lonmin, lonmax, latmin, latmax)) + + # Interpolate obs to the same target grid to ensure the matching with exp coordinates + obs_interpolated <- interpolation(exp = obs, target_grid = target_grid, method = int_method, + crop = c(lonmin, lonmax, latmin, latmax)) + + #.check_coords + if (bc_method == 'sbc' | bc_method == 'simple_bias') { + if (dim(obs_interpolated$data)['sdate'] == 1) { + warning('**WARNING: Simple Bias Correction should not be used with + only one observation. Returning NA. ') + } + res <- BiasCorrection(exp = exp_interpolated$data, + obs = obs_interpolated$data) + } + else if (bc_method == 'cal' | bc_method == 'calibration') { + if (dim(exp_interpolated$data)['member'] == 1) { + stop('**ERROR: Calibration must not be used with + only one ensemble member.') + } + res <- Calibration(exp = exp_interpolated$data, obs = obs_interpolated$data, + cal.method = 'mse_min', ncores = ncores) + } + else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_interpolated$data, + sample_dims = 'sdate', method = 'QUANT', ncores = ncores) + } + + # Create an s2dv_cube object + res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat)) + + return(res_s2dv) +} + + + + diff --git a/R/Interpolation.R b/R/Interpolation.R new file mode 100644 index 0000000..d466d7c --- /dev/null +++ b/R/Interpolation.R @@ -0,0 +1,28 @@ +#methods implemented: con, bil, bic, nn, con2 +#target_grid has to be either a grid recognised by CDO or a NETCDF file +# requires CDO_1.9.8 or newer versions when using nn method +Interpolation <- function(exp, target_grid, method, ...) { + + require(s2dv) + + # Input data must be an s2dv_cube object + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + # Regrid with CDO + res <- CDORemap(data_array = exp$data, + lats = exp$lat, + lons = exp$lon, + grid = target_grid, + method = method, + ...) + + # Create an s2dv_cube object + res_s2dv <- suppressWarnings(s2dv_cube(data = res$data_array, lon = res$lons, lat = res$lats)) + + return(res_s2dv) +} + + + diff --git a/R/Intlr.R b/R/Intlr.R new file mode 100644 index 0000000..5522a5c --- /dev/null +++ b/R/Intlr.R @@ -0,0 +1,163 @@ +Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NULL, loocv = FALSE, ncores = 1) { + + require(multiApply) + + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + if (is.array(predictors)) { + stopifnot('sdate' %in% names(dim(predictors))) + } + + stopifnot('sdate' %in% names(dim(exp$data))) + stopifnot('sdate' %in% names(dim(obs$data))) + + lonmin <- attr(exp$lon,"first_lon") # this should not be needed + lonmax <- attr(exp$lon,"last_lon") + latmin <- attr(exp$lat,"first_lat") + latmax <- attr(exp$lat,"last_lat") + exp_interpolated <- interpolation(exp = exp, target_grid = target_grid, method = int_method, + crop = c(lonmin, lonmax, latmin, latmax)) + + # Interpolate obs to the same target grid to ensure the matching with exp coordinates + obs_interpolated <- interpolation(exp = obs, target_grid = target_grid, method = int_method, + crop = c(lonmin, lonmax, latmin, latmax)) + + # TODO: add check .check_coords? + if (lr_method == 'basic') { + pred <- exp_interpolated$data + trgt_dims <- 'sdate' + } + + # les observacions són indexos i s'utilitzen de predictors?? + else if (lr_method == 'large-scale') { + if (is.null(predictors)) { + stop("The large-scale predictors must be passed through the parametre 'predictors'") + } + pred <- predictors + + var_dim_in_predictors <- names(dim(predictors))[names(dim(predictors)) != 'sdate'] + trgt_dims <- c('sdate', var_dim_in_predictors) + } + + else if (lr_method == '4nn') { + pred <- Apply(list(exp_interpolated$data), target_dims = list(c('lat','lon')), fun = find_4nn)$output1 + trgt_dims <- c('sdate','nn') + } + + else { + stop(paste0(lr_method, " method is not implemented yet")) + } + + res <- Apply(list(pred, obs_interpolated$data), target_dims = list(trgt_dims, 'sdate'), fun = .intlr, + loocv = loocv, ncores = ncores)$output1 + + names(dim(res))[1] <- 'sdate' + + # Reorder dimensions to match those of the input model data + res <- .reorder_dims(arr_ref = exp$data, arr_to_reorder = res) + + # Create an s2dv_cube object + res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat)) + + return(res_s2dv) +} + +.intlr <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + + # training + lm1 <- train_lm(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + + return(res) + +} + +train_lm <- function(df, loocv) { + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + lm1 <- lapply(1:nrow(df), function(j) lm(df[-j,], formula = y ~ .)) + + } else { + + lm1 <- list(lm(data = df, formula = y ~ .)) + } + + return(lm1) +} + +pred_lm <- function(df, lm1, loocv) { + + if (loocv) { + + pred_vals <- sapply(1:nrow(df), function(j) predict(lm1[[j]], df[j,])) + + } else { + + pred_vals_ls <- lapply(lm1, predict, data = df) + pred_vals <- unlist(pred_vals_ls) + } + + return(pred_vals) +} + +find_4nn <- function(object) { + + nearests <- InsertDim(object, posdim = 3, lendim = 4, name = 'nn') + + for (i in seq(dim(object)['lon'])) { + lon_max_border <- FALSE + lon_min_border <- FALSE + if (((i + 1) > dim(object)['lon'])) { + lon_max_border <- TRUE + } else if (((i - 1) < 1)) { + lon_min_border <- TRUE + } + for (j in seq(dim(object)['lat'])) { + lat_max_border <- FALSE + lat_min_border <- FALSE + if (((j + 1) > dim(object)['lat'])) { + lat_max_border <- TRUE + } else if (((j - 1) < 1)) { + lat_min_border <- TRUE + } + if (lon_max_border) { + nearests[j, i, 2] <- NA + nearests[j, i, 4] <- object[j, i - 1] + } else if (lon_min_border) { + nearests[j, i, 2] <- object[j, i + 1] + nearests[j, i, 4] <- NA + } else { + nearests[j, i, 2] <- object[j, i + 1] + nearests[j, i, 4] <- object[j, i - 1] + } + if (lat_max_border) { + nearests[j, i, 1] <- NA + nearests[j, i, 3] <- object[j - 1, i] + } else if (lat_min_border) { + nearests[j, i, 1] <- object[j + 1, i] + nearests[j, i, 3] <- NA + } else { + nearests[j, i, 1] <- object[j + 1, i] + nearests[j, i, 3] <- object[j - 1, i] + } + } + } + + return(nearests) +} + + diff --git a/R/Utils.R b/R/Utils.R new file mode 100644 index 0000000..bcb9521 --- /dev/null +++ b/R/Utils.R @@ -0,0 +1,12 @@ +.check_coords <- function(lat1, lon1, lat2, lon2) { + if (!all(all((lat1 == lat2)) & all((lon1 == lon2)))) { + stop('**ERROR: Latitude and longitude values from exp and obs must match.') + } +} + +.reorder_dims <- function(arr_ref, arr_to_reorder) { + orddim <- match(names(dim(arr_ref)),names(dim(arr_to_reorder))) + return(Reorder(data = arr_to_reorder, order = orddim)) +} + + diff --git a/examples/interpolation-bc.R b/examples/interpolation-bc.R new file mode 100644 index 0000000..5b3eac5 --- /dev/null +++ b/examples/interpolation-bc.R @@ -0,0 +1,72 @@ + +library(startR) +library(s2dv) +library(CSTools) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/R/functions.R') + +plotpath <- '/esarchive/scratch/jramon/downscaling/plots' +target_grid <- '/esarchive/recon/ecmwf/era5/daily/tasmax-r1440x721cds/tasmax_201702.nc' + +lonmin <- -22 +lonmax <- 45 +latmin <- 27 +latmax <- 72 + +#obs2 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily/$var$-r1440x721cds/$var$_201702.nc', +# var = 'tasmax', time = indices(1:5), lat = values(list(latmin, latmax)), +# lat_reorder = Sort(decreasing = TRUE), lon = values(list(lonmin, lonmax)), +# lon_reorder = CircularSort(-180, 180), +# synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), +# lat = c('lat', 'latitude')), return_vars = list(lat = NULL, lon = NULL), +# num_procs = 1, retrieve = TRUE) + +#exp2 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily/$var$/$var$_20170201.nc', +# var = 'tasmax', time = indices(1:5), member = indices(1:3), +# lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = TRUE), +# lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), +# synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), +# lat = c('lat', 'latitude'), member = c('member','ensemble')), +# return_vars = list(lat = NULL, lon = NULL), +# num_procs = 1, retrieve = TRUE) +#names(dim(exp))[5] <- 'sdate' +#names(dim(obs))[5] <- 'sdate' + +#lat_obs <- as.numeric(attr(obs,'Variables')$dat$lat) +#lon_obs <- as.numeric(attr(obs,'Variables')$dat$lon) +#lat_exp <- as.numeric(attr(exp,'Variables')$dat$lat) +#lon_exp <- as.numeric(attr(exp,'Variables')$dat$lon) + +obs <- CST_Load(var = 'tasmax', + obs = 'era5', + sdates = c('20170201','20170301','20170401','20170501','20170601'), + leadtimemax = 1, + latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, + output = 'lonlat') + +exp <- CST_Load(var = 'tasmax', + exp = 'system5c3s', + nmember = 3, + sdates = c('20170201','20170301','20170401','20170501','20170601'), + leadtimemax = 1, + latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, + output = 'lonlat') + +int_methods <- c('con', 'bil', 'bic', 'nn', 'con2') +bc_methods <- c('sbc', 'cal', 'qm') + +for (i in seq(int_methods)) { + for (b in seq(bc_methods)) { + downscaled <- Intbc(exp = exp, obs = obs, target_grid = target_grid, + int_method = int_methods[i], bc_method = bc_methods[b], ncores = 4) + lats <- downscaled$lats + lons <- downscaled$lons + data <- downscaled$data_array + s2dv::PlotEquiMap(var = data[1,1,1,1,,], lat = lats, lon = lons, filled.continents = FALSE, + toptitle = paste0(int_methods[i], ' ', bc_methods[b]), + fileout = file.path(plotpath, paste0(int_methods[i],'-',bc_methods[b],'.png'))) + } +} + + + + diff --git a/examples/interpolation-lr.R b/examples/interpolation-lr.R new file mode 100644 index 0000000..593a6a1 --- /dev/null +++ b/examples/interpolation-lr.R @@ -0,0 +1,74 @@ + +library(CSTools) +library(startR) +library(s2dv) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/R/functions.R') + +plotpath <- '/esarchive/scratch/jramon/downscaling/plots' +target_grid <- '/esarchive/recon/ecmwf/era5/daily/tasmax-r1440x721cds/tasmax_201702.nc' + +lonmin <- -22 +lonmax <- 45 +latmin <- 27 +latmax <- 72 + +#obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily/$var$/$var$_201702.nc', +# var = 'tasmax', lon = 'all', lat = 'all', time = indices(1:10), +# synonims = list(var = c('var','variable'),lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), +# return_vars = list(lat = 'dat', lon = 'dat'), +# num_procs = 1, retrieve = TRUE) + +#exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily/$var$/$var$_20170201.nc', +# var = 'tasmax', lon = 'all', lat = 'all', time = indices(1:10), member = indices(1), +# synonims = list(var = c('var','variable'),lon = c('lon', 'longitude'), lat = c('lat', 'latitude'), member = c('member','ensemble')), +# return_vars = list(lat = 'dat', lon = 'dat'), +# num_procs = 1, retrieve = TRUE) + +#lat_obs <- as.numeric(attr(obs,'Variables')$dat$lat) +#lon_obs <- as.numeric(attr(obs,'Variables')$dat$lon) +#lat_exp <- as.numeric(attr(exp,'Variables')$dat$lat) +#lon_exp <- as.numeric(attr(exp,'Variables')$dat$lon) + +obs <- CST_Load(var = 'tasmax', + obs = 'era5', + sdates = c('20170201','20170301','20170401','20170501','20170601'), + leadtimemax = 1, + latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, + output = 'lonlat') +obs$data <- drop(obs$data) + +exp <- CST_Load(var = 'tasmax', + exp = 'system5c3s', + nmember = 3, + sdates = c('20170201','20170301','20170401','20170501','20170601'), + leadtimemax = 1, + latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, + output = 'lonlat') + +#ind rdm ha de ser o bé un vector o bé un array amb una dimensió anomenada 'sdate' +ind_rdm <- array(NA, dim = c('sdate' = 5,'vars' = 2)) +ind_rdm[,1] <- rnorm(n=5,mean=0,sd=1) +ind_rdm[,2] <- rnorm(n=5,mean=0,sd=1) + +int_method <- 'bil' +lr_methods <- c('basic', 'large-scale', '4nn') + +for (i in seq(lr_methods)) { + + if (lr_methods[i] == 'large-scale') { + predictors <- ind_rdm + } else { + predictors <- NULL + } + downscaled <- Intlr(exp = exp, obs = obs, target_grid = target_grid, lr_method = lr_methods[i], + int_method = int_method, predictors = predictors, loocv = TRUE, ncores = 4) + lats <- downscaled$lat + lons <- downscaled$lon + data <- downscaled$data + s2dv::PlotEquiMap(var = data[1,,,1,1,1], lat = lats, lon = lons, filled.continents = FALSE, + toptitle = paste0(int_method, ' ', lr_methods[i]), + fileout = file.path(plotpath, paste0(int_method, ' ', lr_methods[i],'-lr.png'))) +} + + + diff --git a/examples/interpolation.R b/examples/interpolation.R new file mode 100644 index 0000000..157901b --- /dev/null +++ b/examples/interpolation.R @@ -0,0 +1,61 @@ +library(CSTools) +library(startR) +library(s2dv) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/functions.R') + +lonmin <- -22 +lonmax <- 45 +latmin <- 27 +latmax <- 72 + +target_grid <- '/esarchive/recon/ecmwf/era5/daily/tasmax/tasmax_201702.nc' + +obs <- CST_Load(var = 'tasmax', + obs = 'era5', + sdates = '20170201', + leadtimemax = 1, + latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, + output = 'lonlat') + +exp <- CST_Load(var = 'tasmax', + exp = 'system5c3s', + nmember = 1, + sdates = '20170201', + leadtimemax = 1, + latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, + output = 'lonlat') + +lon_exp <- exp$lon +lat_exp <- exp$lat +lon_obs <- obs$lon +lat_obs <- obs$lat + +exp_con <- Interpolation(exp = exp, target_grid = target_grid, method = 'con', crop = c(lonmin,lonmax,latmin,latmax)) +lat_con <- exp_con$lats +lon_con <- exp_con$lons +exp_con <- exp_con$data_array +exp_bil <- Interpolation(exp = exp, target_grid = target_grid, method = 'bil', crop = c(lonmin,lonmax,latmin,latmax)) +lat_bil <- exp_bil$lats +lon_bil <- exp_bil$lons +exp_bil <- exp_bil$data_array +exp_bic <- Interpolation(exp = exp, target_grid = target_grid, method = 'bic', crop = c(lonmin,lonmax,latmin,latmax)) +lat_bic <- exp_bic$lats +lon_bic <- exp_bic$lons +exp_bic <- exp_bic$data_array +exp_nn <- Interpolation(exp = exp, target_grid = target_grid, method = 'nn', crop = c(lonmin,lonmax,latmin,latmax)) +lat_nn <- exp_nn$lats +lon_nn <- exp_nn$lons +exp_nn <- exp_nn$data_array +exp_con2 <- Interpolation(exp = exp, target_grid = target_grid, method = 'con2', crop = c(lonmin,lonmax,latmin,latmax)) +lat_con2 <- exp_con2$lats +lon_con2 <- exp_con2$lons +exp_con2 <- exp_con2$data_array + +s2dv::PlotEquiMap(var = obs, lat = lat_obs, lon = lon_obs, filled.continents = FALSE, toptitle = 'Observations') +s2dv::PlotEquiMap(var = exp$data, lat = lat_exp, lon = lon_exp, filled.continents = FALSE, toptitle = 'Predictions') +s2dv::PlotEquiMap(var = exp_con, lat = lat_con, lon = lon_con, filled.continents = FALSE, toptitle = 'Conservative') +s2dv::PlotEquiMap(var = exp_bil, lat = lat_bil, lon = lon_bil, filled.continents = FALSE, toptitle = 'Bilinear') +s2dv::PlotEquiMap(var = exp_bic, lat = lat_bic, lon = lon_bic, filled.continents = FALSE, toptitle = 'Bicubic') +s2dv::PlotEquiMap(var = exp_nn, lat = lat_nn, lon = lon_nn, filled.continents = FALSE, toptitle = 'Nearest neighbour') +s2dv::PlotEquiMap(var = exp_con2, lat = lat_con2, lon = lon_con2, filled.continents = FALSE, toptitle = '2nd order conservative') + -- GitLab From a983c5e074be44351b6b6fb6c09faf32e7114f87 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Fri, 4 Mar 2022 11:15:25 +0100 Subject: [PATCH 02/24] Three methods for LR --- R/Intlr.R | 83 +++++++++++++++++++++++++++++-------- examples/interpolation-lr.R | 57 +++++++++++-------------- 2 files changed, 91 insertions(+), 49 deletions(-) diff --git a/R/Intlr.R b/R/Intlr.R index 5522a5c..613b525 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -2,72 +2,110 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL require(multiApply) + #----------------------------------- + # Checkings + #----------------------------------- + # input exp and obs must be s2dv_cube objects if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") } + # input exp and obs must be s2dv_cube objects if (!inherits(obs,'s2dv_cube')) { stop("Parameter 'obs' must be of the class 's2dv_cube'") } + # sdate must be the time dimension in the input data + stopifnot('sdate' %in% names(dim(exp$data))) + stopifnot('sdate' %in% names(dim(obs$data))) + + # the parametre 'predictors' must contain the same sdates as exp$data if (is.array(predictors)) { stopifnot('sdate' %in% names(dim(predictors))) + stopifnot(dim(predictors)['sdate'] == dim(exp$data)['sdate']) } - stopifnot('sdate' %in% names(dim(exp$data))) - stopifnot('sdate' %in% names(dim(obs$data))) - - lonmin <- attr(exp$lon,"first_lon") # this should not be needed + #----------------------------------- + # Interpolation + #----------------------------------- + # Interpolate coarse-scale data + lonmin <- attr(exp$lon,"first_lon") lonmax <- attr(exp$lon,"last_lon") latmin <- attr(exp$lat,"first_lat") latmax <- attr(exp$lat,"last_lat") - exp_interpolated <- interpolation(exp = exp, target_grid = target_grid, method = int_method, + exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method = int_method, crop = c(lonmin, lonmax, latmin, latmax)) # Interpolate obs to the same target grid to ensure the matching with exp coordinates - obs_interpolated <- interpolation(exp = obs, target_grid = target_grid, method = int_method, + obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method = int_method, crop = c(lonmin, lonmax, latmin, latmax)) - # TODO: add check .check_coords? + # Ensure that the interpolated exp and obs coordinates match + .check_coords(lat1 = exp_interpolated$lat, lon1 = exp_interpolated$lon, + lat2 = obs_interpolated$lat, lon2 = obs_interpolated$lon) + + #----------------------------------- + # Linear regressions + #----------------------------------- + # Pointwise linear regression + # Predictor: model data + # Predictand: observations if (lr_method == 'basic') { - pred <- exp_interpolated$data - trgt_dims <- 'sdate' + predictor <- exp_interpolated$data + predictand <- obs_interpolated$data + target_dims_predictor <- 'sdate' + target_dims_predictand <- 'sdate' } - # les observacions són indexos i s'utilitzen de predictors?? + # (Multi) linear regression with large-scale predictors + # Predictor: passed through the parameter 'predictors' by the user. Can be model or observations + # Predictand: model data else if (lr_method == 'large-scale') { if (is.null(predictors)) { stop("The large-scale predictors must be passed through the parametre 'predictors'") } - pred <- predictors + predictand <- exp_interpolated$data + predictor <- predictors - var_dim_in_predictors <- names(dim(predictors))[names(dim(predictors)) != 'sdate'] - trgt_dims <- c('sdate', var_dim_in_predictors) + var_dim_in_predictor <- names(dim(predictors))[names(dim(predictors)) != 'sdate'] + target_dims_predictor <- c('sdate', var_dim_in_predictor) + target_dims_predictand <- 'sdate' } + # Multi-linear regression with the four nearest neighbours + # Predictors: model data + # Predictand: observations else if (lr_method == '4nn') { - pred <- Apply(list(exp_interpolated$data), target_dims = list(c('lat','lon')), fun = find_4nn)$output1 - trgt_dims <- c('sdate','nn') + predictor <- Apply(list(exp_interpolated$data), target_dims = list(c('lat','lon')), fun = find_4nn)$output1 + predictand <- obs_interpolated$data + target_dims_predictor <- c('sdate','nn') + target_dims_predictand <- 'sdate' } else { stop(paste0(lr_method, " method is not implemented yet")) } - res <- Apply(list(pred, obs_interpolated$data), target_dims = list(trgt_dims, 'sdate'), fun = .intlr, - loocv = loocv, ncores = ncores)$output1 + # Apply the linear regressions + res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), + fun = .intlr, loocv = loocv, ncores = ncores)$output1 names(dim(res))[1] <- 'sdate' # Reorder dimensions to match those of the input model data res <- .reorder_dims(arr_ref = exp$data, arr_to_reorder = res) + #----------------------------------- # Create an s2dv_cube object + #----------------------------------- res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat)) return(res_s2dv) } +#----------------------------------- +# Atomic function to generate and apply the linear regressions +#----------------------------------- .intlr <- function(x, y, loocv) { tmp_df <- data.frame(x = x, y = y) @@ -82,6 +120,10 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL } +#----------------------------------- +# Function to generate the linear regressions. +# Returns a list +#----------------------------------- train_lm <- function(df, loocv) { # Remove columns containing only NA's @@ -99,6 +141,9 @@ train_lm <- function(df, loocv) { return(lm1) } +#----------------------------------- +# Function to apply the linear regressions. +#----------------------------------- pred_lm <- function(df, lm1, loocv) { if (loocv) { @@ -114,6 +159,10 @@ pred_lm <- function(df, lm1, loocv) { return(pred_vals) } +#----------------------------------- +# Function to find the four nearest neighbours. +# Notes the case for the latitude and longitude borders +#----------------------------------- find_4nn <- function(object) { nearests <- InsertDim(object, posdim = 3, lendim = 4, name = 'nn') diff --git a/examples/interpolation-lr.R b/examples/interpolation-lr.R index 593a6a1..38bafba 100644 --- a/examples/interpolation-lr.R +++ b/examples/interpolation-lr.R @@ -2,7 +2,9 @@ library(CSTools) library(startR) library(s2dv) -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/R/functions.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') plotpath <- '/esarchive/scratch/jramon/downscaling/plots' target_grid <- '/esarchive/recon/ecmwf/era5/daily/tasmax-r1440x721cds/tasmax_201702.nc' @@ -12,43 +14,21 @@ lonmax <- 45 latmin <- 27 latmax <- 72 -#obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily/$var$/$var$_201702.nc', -# var = 'tasmax', lon = 'all', lat = 'all', time = indices(1:10), -# synonims = list(var = c('var','variable'),lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), -# return_vars = list(lat = 'dat', lon = 'dat'), -# num_procs = 1, retrieve = TRUE) - -#exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily/$var$/$var$_20170201.nc', -# var = 'tasmax', lon = 'all', lat = 'all', time = indices(1:10), member = indices(1), -# synonims = list(var = c('var','variable'),lon = c('lon', 'longitude'), lat = c('lat', 'latitude'), member = c('member','ensemble')), -# return_vars = list(lat = 'dat', lon = 'dat'), -# num_procs = 1, retrieve = TRUE) - -#lat_obs <- as.numeric(attr(obs,'Variables')$dat$lat) -#lon_obs <- as.numeric(attr(obs,'Variables')$dat$lon) -#lat_exp <- as.numeric(attr(exp,'Variables')$dat$lat) -#lon_exp <- as.numeric(attr(exp,'Variables')$dat$lon) - -obs <- CST_Load(var = 'tasmax', - obs = 'era5', - sdates = c('20170201','20170301','20170401','20170501','20170601'), - leadtimemax = 1, +sdates <- c('20170201','20170301','20170401','20170501','20170601','20170701','20170801','20170901') + +obs <- CST_Load(var = 'tasmax', obs = 'era5', sdates = sdates, leadtimemax = 1, latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, output = 'lonlat') obs$data <- drop(obs$data) -exp <- CST_Load(var = 'tasmax', - exp = 'system5c3s', - nmember = 3, - sdates = c('20170201','20170301','20170401','20170501','20170601'), - leadtimemax = 1, +exp <- CST_Load(var = 'tasmax', exp = 'system5c3s', nmember = 3, sdates = sdates, leadtimemax = 1, latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, output = 'lonlat') #ind rdm ha de ser o bé un vector o bé un array amb una dimensió anomenada 'sdate' -ind_rdm <- array(NA, dim = c('sdate' = 5,'vars' = 2)) -ind_rdm[,1] <- rnorm(n=5,mean=0,sd=1) -ind_rdm[,2] <- rnorm(n=5,mean=0,sd=1) +ind_rdm <- array(NA, dim = c('sdate' = 8,'vars' = 2)) +ind_rdm[,1] <- rnorm(n=8,mean=0,sd=1) +ind_rdm[,2] <- rnorm(n=8,mean=0,sd=1) int_method <- 'bil' lr_methods <- c('basic', 'large-scale', '4nn') @@ -65,10 +45,23 @@ for (i in seq(lr_methods)) { lats <- downscaled$lat lons <- downscaled$lon data <- downscaled$data - s2dv::PlotEquiMap(var = data[1,,,1,1,1], lat = lats, lon = lons, filled.continents = FALSE, - toptitle = paste0(int_method, ' ', lr_methods[i]), + s2dv::PlotEquiMap(var = data[1,1,1,1,,], lat = lats, lon = lons, filled.continents = FALSE, + toptitle = paste0(int_method, ' ', lr_methods[i]), brks = seq(250,310,5), fileout = file.path(plotpath, paste0(int_method, ' ', lr_methods[i],'-lr.png'))) } +# Predictions +s2dv::PlotEquiMap(var = exp$data[1,1,1,1,,], lat = exp$lat, lon = exp$lon, filled.continents = FALSE, + toptitle = "Predictions Europe", brks = seq(250,310,5), + fileout = file.path(plotpath, "Predictions_Europe.png")) + +# Observations +s2dv::PlotEquiMap(var = obs$data[1,,], lat = obs$lat, lon = obs$lon, filled.continents = FALSE, + toptitle = "Observations Europe", brks = seq(250,310,5), + fileout = file.path(plotpath, "Observations_Europe.png")) + + + + -- GitLab From 898ede104a56cb961f6c6b952ed54b2bd7219846 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Fri, 4 Mar 2022 16:51:09 +0100 Subject: [PATCH 03/24] Added downscaling to point locations --- R/Intbc.R | 10 ++----- R/Interpolation.R | 71 +++++++++++++++++++++++++++++++++++++++++------ R/Intlr.R | 10 ++----- 3 files changed, 66 insertions(+), 25 deletions(-) diff --git a/R/Intbc.R b/R/Intbc.R index 19da72c..dcd663f 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -5,16 +5,10 @@ Intbc <- function(exp, obs, target_grid, int_method, bc_method, ncores = 1) { stopifnot(bc_method %in% c('sbc', 'cal', 'qm', 'simple_bias', 'calibration', 'quantile_mapping')) stopifnot(c('member','sdate') %in% names(dim(exp$data))) - lonmin <- attr(exp$lon,"first_lon") # this should not be needed - lonmax <- attr(exp$lon,"last_lon") - latmin <- attr(exp$lat,"first_lat") - latmax <- attr(exp$lat,"last_lat") - exp_interpolated <- interpolation(exp = exp, target_grid = target_grid, method = int_method, - crop = c(lonmin, lonmax, latmin, latmax)) + exp_interpolated <- interpolation(exp = exp, target_grid = target_grid, method = int_method) # Interpolate obs to the same target grid to ensure the matching with exp coordinates - obs_interpolated <- interpolation(exp = obs, target_grid = target_grid, method = int_method, - crop = c(lonmin, lonmax, latmin, latmax)) + obs_interpolated <- interpolation(exp = obs, target_grid = target_grid, method = int_method) #.check_coords if (bc_method == 'sbc' | bc_method == 'simple_bias') { diff --git a/R/Interpolation.R b/R/Interpolation.R index d466d7c..6e3b0c9 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -1,22 +1,53 @@ #methods implemented: con, bil, bic, nn, con2 #target_grid has to be either a grid recognised by CDO or a NETCDF file # requires CDO_1.9.8 or newer versions when using nn method -Interpolation <- function(exp, target_grid, method, ...) { +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/create_interp_weights.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/interpolate_model_data.R') +Interpolation <- function(exp, method, target_grid = NULL, points = NULL) { require(s2dv) - + # Input data must be an s2dv_cube object if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") } - # Regrid with CDO - res <- CDORemap(data_array = exp$data, - lats = exp$lat, - lons = exp$lon, - grid = target_grid, - method = method, - ...) + if (!is.null(points)) { + if (!inherits(points, 'list')) { + stop("Parameter 'points' must be a list of two elements containing the point ", + "latitudes and longitudes in the form 'list(c(latitudes), c(longitudes))'") + } + + # Check that the number of latitudes and longitudes match + stopifnot(length(unique(lengths(points))) == 1L) + } + + lonmin <- attr(exp$lon,"first_lon") + lonmax <- attr(exp$lon,"last_lon") + latmin <- attr(exp$lat,"first_lat") + latmax <- attr(exp$lat,"last_lat") + + # Map regrid with CDO + if (is.null(points)) { + res <- CDORemap(data_array = exp$data, + lats = exp$lat, + lons = exp$lon, + grid = target_grid, + method = method, + crop = c(lonmin, lonmax, latmin, latmax)) + + # Interpolate to point locations + } else { + # First create interpolation weights, depending on the chosen method + weights <- create_interp_weights(ncfile = exp$source_files[1], locids = 1:unique(lengths(points)), + lats = points[[1]], lons = points[[2]], method = method) + + # Select coarse-scale data to be interpolated + model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp$data) + + # Interpolate model data to point locations + res <- interpolate_data(model_data_gridpoints, weights) + } # Create an s2dv_cube object res_s2dv <- suppressWarnings(s2dv_cube(data = res$data_array, lon = res$lons, lat = res$lats)) @@ -26,3 +57,25 @@ Interpolation <- function(exp, target_grid, method, ...) { + +get_model_data <- function(weights.df, mdata) + + #---------------------------------- + # Compute interpolation weights. Get grid from first file + #---------------------------------- + rea.path <- grep(reanalyses[rea],reanalyses_paths,value=T) + ncfirst <- paste(rea.path,paste0(variable,"_",s1,"01.nc"),sep="/") + weights <- create_interp_weights(ncfile = ncfirst,locids = metadata$Name, lats = metadata$Lat, + lons = metadata$Lon, method = method) + + #---------------------------------- + # Load data at (i,j) locations + # sdates must be in format yyyymm + #---------------------------------- + sdates <- format(ymd(paste0(s1,"0101")) + months(0:11) + rep(years(0:(s2-s1)),each=12),"%Y%m") + nsdates <- length(sdates) + + files <- paste(rea.path,"$var$_$sdate$.nc",sep="/") + model_data <- get_model_data(weights,dat=files,var=variable,sdate=sdates,time='all',num_procs=4) + + diff --git a/R/Intlr.R b/R/Intlr.R index 613b525..98290ff 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -29,16 +29,10 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL # Interpolation #----------------------------------- # Interpolate coarse-scale data - lonmin <- attr(exp$lon,"first_lon") - lonmax <- attr(exp$lon,"last_lon") - latmin <- attr(exp$lat,"first_lat") - latmax <- attr(exp$lat,"last_lat") - exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method = int_method, - crop = c(lonmin, lonmax, latmin, latmax)) + exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method = int_method) # Interpolate obs to the same target grid to ensure the matching with exp coordinates - obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method = int_method, - crop = c(lonmin, lonmax, latmin, latmax)) + obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method = int_method) # Ensure that the interpolated exp and obs coordinates match .check_coords(lat1 = exp_interpolated$lat, lon1 = exp_interpolated$lon, -- GitLab From d53a33d4d73ca9d48c9bc457ef40037131bcff50 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Thu, 17 Mar 2022 16:31:20 +0100 Subject: [PATCH 04/24] Added analogs function and corrected LR with 4nn --- R/Analogs.R | 100 ++++++++++ R/Interpolation.R | 484 +++++++++++++++++++++++++++++++++++++++++++--- R/Intlr.R | 117 ++++++----- R/Utils.R | 16 ++ 4 files changed, 625 insertions(+), 92 deletions(-) create mode 100644 R/Analogs.R diff --git a/R/Analogs.R b/R/Analogs.R new file mode 100644 index 0000000..808d661 --- /dev/null +++ b/R/Analogs.R @@ -0,0 +1,100 @@ + +# As it is now, selects only the best analog +Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", ncores = 1) { + + require(multiApply) + require(CSTools) + + #----------------------------------- + # Checkings + #----------------------------------- + # input exp and obs must be s2dv_cube objects + if (!inherits(exp,'s2dv_cube')) { + stop("**ERROR: Parameter 'exp' must be of the class 's2dv_cube'") + } + + # input exp and obs must be s2dv_cube objects + if (!inherits(obs,'s2dv_cube')) { + stop("**ERROR: Parameter 'obs' must be of the class 's2dv_cube'") + } + + # Select a function to apply to the analogs selected for a given observation + stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) + + # sdate must be the time dimension in the input data + #stopifnot('sdate' %in% names(dim(exp$data))) + #stopifnot('sdate' %in% names(dim(obs$data))) + + #----------------------------------- + # Interpolate high-res observations to the coarse grid + #----------------------------------- + grid_exp <- exp$source_files[1] + + # Since we are interpolated to a coarser grid, round bordering coordinates + attr(obs$lon,"first_lon") <- round(attr(obs$lon,"first_lon")) + attr(obs$lon,"last_lon") <- round(attr(obs$lon,"last_lon")) + attr(obs$lat,"first_lat") <- round(attr(obs$lat,"first_lat")) + attr(obs$lat,"last_lat") <- round(attr(obs$lat,"last_lat")) + + obs_interpolated <- Interpolation(exp = obs, target_grid = grid_exp, method_remap = "conservative") + lats <- obs$lat + lons <- obs$lon + + #----------------------------------- + # Reshape train and test + #----------------------------------- + res <- Apply(list(obs_interpolated$data, exp$data, obs$data), target_dims = list(c('sdate', 'lat', 'lon'), + c('sdate', 'lat', 'lon'), c('sdate', 'lat', 'lon')), fun = function(tr, te, ob) + .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, fun = fun_analog))$output1 + + res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = lons, lat = lats)) + + return(res_s2dv) +} + + +# For each element in test return the corresponding k-NN observations, or its mean if indicated in fun +.analogs <- function(train, test, obs_hres, k, fun = NULL) { + + require(FNN) + require(multiApply) + + # Get knn indices for each test element + knn.ind <- get_knn(train = train, test = test, k = k) + + dist <- knn.ind$nn.dist + idx <- knn.ind$nn.index + names(dim(idx)) <- c("sdate", "analog"); names(dim(dist)) <- c("sdate", "analog") + + # Obtain the obs for the knn indices + analogs <- Apply(idx, margins = "analog", fun = function(an) { obs_hres[an, , ] })$output1 + + # Apply function for analogs + if(!is.null(fun)) { + + margins_analogs <- names(dim(analogs))[names(dim(analogs)) != "analog"] + + if (fun == "wmean") { + w <- 1 / dist + analogs <- Apply(list(analogs, dist), margins = list(margins_analogs, "sdate"), + fun = function(x, w) weighted.mean(x, w))$output1 + } else { + analogs <- Apply(analogs, margins = margins_analogs, fun = fun)$output1 + } + } + + return(analogs) +} + +# For each element in test, find the indices of the k nearest neigbhors in train +get_knn <- function(train, test, k) { + # Reformat train and test as an array with (time, points) + train <- aperm(Apply(train, target_dims = list(c('lat', 'lon')), fun=function(x) { as.vector(x) })$output1) + test <- aperm(Apply(test, target_dims = list(c('lat', 'lon')), fun=function(x) { as.vector(x) })$output1) + + # For each row in test, find the k closest rows in train + knn.ind <- get.knnx(train, test, k) + return(knn.ind) +} + + diff --git a/R/Interpolation.R b/R/Interpolation.R index 6e3b0c9..6bf4a3f 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -1,25 +1,50 @@ -#methods implemented: con, bil, bic, nn, con2 +#method_remap implemented: con, bil, bic, nn, con2 +#method_point_interp: nearest, bilinear, 9point, invdist4nn, NE, NW, SE, SW #target_grid has to be either a grid recognised by CDO or a NETCDF file # requires CDO_1.9.8 or newer versions when using nn method -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/create_interp_weights.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/interpolate_model_data.R') -Interpolation <- function(exp, method, target_grid = NULL, points = NULL) { - +Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, + method_point_interp = NULL) +{ require(s2dv) # Input data must be an s2dv_cube object if (!inherits(exp,'s2dv_cube')) { - stop("Parameter 'exp' must be of the class 's2dv_cube'") + stop("**ERROR: Parameter 'exp' must be of the class 's2dv_cube'") } + # checkings for the case of interpolation to point locations if (!is.null(points)) { if (!inherits(points, 'list')) { - stop("Parameter 'points' must be a list of two elements containing the point ", + stop("**ERROR: Parameter 'points' must be a list of two elements containing the point ", + "latitudes and longitudes in the form 'list(c(latitudes), c(longitudes))'") + } + + if (is.null(method_point_interp)) { + stop("**ERROR: Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + # Points must be a list of two elements + if (length(points) != 2) { + stop("**ERROR: 'points' must be a lis of two elements containing the point ", "latitudes and longitudes in the form 'list(c(latitudes), c(longitudes))'") - } + } # Check that the number of latitudes and longitudes match - stopifnot(length(unique(lengths(points))) == 1L) + if (length(unique(lengths(points))) != 1L) { + stop("**ERROR: the number of latitudes and longitudes must match") + } + } else { + if (is.null(method_remap)) { + stop("**ERROR: Parameter 'method_remap' must be a character vector indicating the ", + "interpolation method. Accepted methods are con, bil, bic, nn, con2") + } + if (is.null(target_grid)) { + stop("**ERROR: Parameter 'target_grid' must be a character vector indicating the ", + "target grid to be passed to CDO. It must be a grid recognised by CDO ", + "or a NetCDF file") + } } lonmin <- attr(exp$lon,"first_lon") @@ -27,55 +52,452 @@ Interpolation <- function(exp, method, target_grid = NULL, points = NULL) { latmin <- attr(exp$lat,"first_lat") latmax <- attr(exp$lat,"last_lat") + #---------------------------------- # Map regrid with CDO + #---------------------------------- if (is.null(points)) { res <- CDORemap(data_array = exp$data, lats = exp$lat, lons = exp$lon, grid = target_grid, - method = method, + method = method_remap, crop = c(lonmin, lonmax, latmin, latmax)) + # Create an s2dv_cube object + res_s2dv <- suppressWarnings(s2dv_cube(data = res$data_array, lon = res$lons, lat = res$lats)) + + #---------------------------------- # Interpolate to point locations + #---------------------------------- } else { # First create interpolation weights, depending on the chosen method - weights <- create_interp_weights(ncfile = exp$source_files[1], locids = 1:unique(lengths(points)), - lats = points[[1]], lons = points[[2]], method = method) + ncfile_exp <- exp$source_files[1] + weights <- create_interp_weights(ncfile = ncfile_exp, locids = 1:unique(lengths(points)), + lats = points[[1]], lons = points[[2]], method = method_point_interp) # Select coarse-scale data to be interpolated model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp$data) # Interpolate model data to point locations res <- interpolate_data(model_data_gridpoints, weights) + + # Create an s2dv_cube object + res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = points[[1]], lat = points[[2]])) + } - - # Create an s2dv_cube object - res_s2dv <- suppressWarnings(s2dv_cube(data = res$data_array, lon = res$lons, lat = res$lats)) return(res_s2dv) } +#====================== +# Compute weights for interpolation at several (lat,lon) positions +# We assume that grid boxes are centered in the grid point. +#====================== +create_interp_weights <- function(ncfile, locids, lats, lons, + method = c("nearest", "bilinear", "9point", "invdist4nn", "NE", + "NW", "SE", "SW")) +{ #---------------- + # Read grid description and compute (i,j) of requested locations (including decimals) + #---------------- + griddes <- get_griddes(ncfile) + if (!is.null(griddes)) { + stop("**ERROR: 'griddes' not found in the NetCDF source files") + } + gridpoints <- latlon2ij(griddes, lats, lons) + + #---------------- + # Compute the weights according to the selected method + #---------------- + if(method == "nearest") { + #---------------- + # Round i and j to closest integer. Weight is always 1. + #---------------- + + # | | | + # -+-----+-----+- + # | x| | + # | a | | + # | | | + # -+-----+-----+- + # | | | + + centeri <- round(gridpoints$i,0) + centeri[centeri == griddes$xsize+1] <- 1 # close longitudes + + weights.df <- data.frame(locid = locids, + lat = lats, + lon = lons, + rawi = gridpoints$i, + rawj = gridpoints$j, + i = centeri, + j = round(gridpoints$j, 0), + weight = 1) + } else if (method %in% c("bilinear","invdist4nn")) { + #---------------- + # Get the (i,j) coordinates of the 4 points (a,b,c,d) around x. + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + lowi <- floor(gridpoints$i) + highi <- ceiling(gridpoints$i) + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- floor(gridpoints$j) + highj <- ceiling(gridpoints$j) + # Note: highi and lowi are the same if i is integer + # Note: highj and lowj are the same if j is integer + + #---------------- + # Get x position wrt ad and ab axes (from 0 to 1) + #---------------- + pcti <- gridpoints$i - lowi + pctj <- gridpoints$j - lowj + + #---------------- + # Compute weights for a,b,c,d grid points + #---------------- + if(method == "bilinear") { + wa = (1 - pcti) * (1 - pctj) + wb = (1 - pcti) * pctj + wc = pcti * pctj + wd = pcti * (1 - pctj) + } else if(method == "invdist4nn") { + #---------------- + # Note: the distance is computed in the (i,j) space. + # Note2: this method does not guarantees a continuous interpolation. + # Use bilinear if that's desirable. + # When x is on the ab line, c and d would be used. In the limit of x + # being just left of ab other points would be used. + # Here we just dropped c and d coeffs when over ab. Same for ad line, + # b and c coeffs dropped. This prevents repeated nodes. + #---------------- + ida = 1 / sqrt(pcti^2 + pctj^2) + idb = 1 / sqrt(pcti^2 + (1 - pctj)^2) + idc = 1 / sqrt((1-pcti)^2 + (1-pctj)^2) + idd = 1 / sqrt((1-pcti)^2 + pctj^2) + idb[pctj == 0] <- 0; + idc[pctj == 0] <- 0; + idc[pcti == 0] <- 0; + idd[pcti == 0] <- 0; + + #---------------- + # Normalize vector of inverse distances + #---------------- + invdist <- cbind(ida, idb, idc, idd) + print(invdist) + w <- t(apply(invdistc(1),function(x) { print(x); if(any(is.infinite(x))) { + x <- is.infinite(x) * 1 } ; x <- x/sum(x) })) + print(w) + + wa = w[ , 1] + wb = w[ , 2] + wc = w[ , 3] + wd = w[ , 4] + } + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats,lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = wa) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = wb) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = wc) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = wd) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df) + } else if(method == "9point") { + #---------------- + # Get the (i,j) coordinates of the 9 points (a,b,...,i) around x + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | | + #-+-----+-----+-----+- + # | | | | + # | c | f | i | + # | | | | + #-+-----+-----+-----+- + # | | x| | + # | b | e | h | + # | | | | + #-+-----+-----+-----+- + # | | | | + # | a | d | g | + # | | | | + #-+-----+-----+-----+- + # | | | | + + centeri <- round(gridpoints$i, 0) + centeri[centeri == griddes$xsize + 1] <- 1 + centerj <- round(gridpoints$j, 0) + lowi <- centeri - 1 + highi <- centeri + 1 + lowi[lowi == 0] <- griddes$xsize # close the longitudes + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- centerj - 1 + highj <- centerj + 1 + + #---------------- + # For the north and south pole do a 6-point average + #---------------- + w_highj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,0 ,1/9)) + w_centerj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,1/6,1/9)) + w_lowj <- ifelse(centerj == 1,0 ,ifelse(centerj == griddes$ysize,1/6,1/9)) + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = w_lowj) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = centerj, weight = w_centerj) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = w_highj) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = lowj, weight = w_lowj) + weightse.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = centerj, weight = w_centerj) + weightsf.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = highj, weight = w_highj) + weightsg.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = w_lowj) + weightsh.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = centerj, weight = w_centerj) + weightsi.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = w_highj) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df, weightse.df, + weightsf.df, weightsg.df, weightsh.df, weightsi.df) + } else if(method %in% c("NE", "NW", "SW", "SE")) { + #---------------- + # Find if increasing i and j increases or decreases lat and lon + #---------------- + westtoeast <- (griddes$xinc > 0) + southtonorth <- T + if(griddes$gridtype == "gaussian") { + # We assume gaussian grid latitudes are ordered north to south + southtonorth <- F + } else { #lonlat + if(griddes$yinc < 0) {southtonorth <- F} + } + + #---------------- + # Get the (i,j) coordinates of the desired point (a,b,c or d) around x + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + if(substr(method,1,1) == "N" & southtonorth == T) { selj <- ceiling(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == T) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "N" & southtonorth == F) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == F) { selj <- ceiling(gridpoints$j) } + + if(substr(method,2,2) == "E" & westtoeast == T) {seli <- ceiling(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == T) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "E" & westtoeast == F) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == F) {seli <- ceil(gridpoints$i) } + + seli[seli == griddes$xsize + 1] <- 1 # close the longitudes + + weights.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = seli, j = selj, weight = 1) + } else { + stop(paste0("**ERROR: Method " ,method, " not implemented")) + } + + #---------------- + # Order by locid and remove lines with 0 weight + # This also removes some duplicates in the bilinear/invdist4nn methods when i + # or j is a whole number, or in the 9-point method when at the poles. + #---------------- + weights.df <- weights.df[order(weights.df$locid), ] + weights.df <- weights.df[weights.df$weight != 0, ] + + #---------------- + # Add as attributes the method and the nc file used to compute the weights + #---------------- + attributes(weights.df)$nc_file <- normalizePath(ncfile) + attributes(weights.df)$method <- method + + return(weights.df) +} + +#====================== +# Compute (i,j) from (lat,lon). +# Works only for 'lonlat' and 'gaussian' grids. +# Grids are supposed to cover whole globe. +#====================== +latlon2ij <- function(griddes, lats, lons) { + #------------ + # Check input params + #------------ + if(length(lons) != length(lats)) {stop("**ERROR: input lat and lon have different lengths.")} + if(any(lats < -90) | any(lats > 90)) {stop("**ERROR: latitude out of valid range")} + if(round(griddes$xinc*griddes$xsize)!=360) {stop("**ERROR: grid is not global")} + # no need to resize lons to [0,360) + + #------------ + # Compute i (with decimals) + # i lies in [1,xsize+1) + #------------ + gridpoints <- list() + gridpoints$i <- 1 + (((lons-griddes$xfirst) / griddes$xinc) %% griddes$xsize) + #------------ + # Compute j (with decimals) + #------------ + if(griddes$gridtype=='lonlat') { + gridpoints$j <- 1 + (lats-griddes$yfirst) / griddes$yinc + } else if(griddes$gridtype == 'gaussian') { + # We assume gaussian grid latitudes are ordered north to south + # findInterval can only work with monotonic ascending values so we revert twice + northj <- griddes$ysize-findInterval(lats, -griddes$yvals) + southj <- northj + 1 -get_model_data <- function(weights.df, mdata) + # Special case: We are north of the first lat + gridpoints$j[northj == 0] <- 1 - #---------------------------------- - # Compute interpolation weights. Get grid from first file - #---------------------------------- - rea.path <- grep(reanalyses[rea],reanalyses_paths,value=T) - ncfirst <- paste(rea.path,paste0(variable,"_",s1,"01.nc"),sep="/") - weights <- create_interp_weights(ncfile = ncfirst,locids = metadata$Name, lats = metadata$Lat, - lons = metadata$Lon, method = method) + # Special case: We are south of the last lat + gridpoints$j[southj == griddes$ysize + 1] <- griddes$ysize - #---------------------------------- - # Load data at (i,j) locations - # sdates must be in format yyyymm - #---------------------------------- - sdates <- format(ymd(paste0(s1,"0101")) + months(0:11) + rep(years(0:(s2-s1)),each=12),"%Y%m") - nsdates <- length(sdates) + # Generic case + ok_idx <- !(northj == 0 | southj == griddes$ysize+1) + gridpoints$j[ok_idx] <- northj[ok_idx] + (griddes$yvals[northj[ok_idx]] - + lats[ok_idx])/(griddes$yvals[northj[ok_idx]] - griddes$yvals[southj[ok_idx]]) + } else { stop("**ERROR: unsupported grid") } + + return(gridpoints) +} + +#====================== +# Use cdo griddes to obtain grid information +#====================== +get_griddes <- function(ncfile) { + tmp <- system(paste0("cdo griddes ", ncfile, + " 2>/dev/null | egrep 'gridtype|xsize|ysize|xfirst|xinc|yfirst|yinc'"), intern = T) + arr <- do.call(rbind, strsplit(tmp,"\\s+= ", perl = T)) + griddes <- as.list(arr[,2]) + names(griddes) <- arr[,1] + + if(griddes$gridtype == "gaussian") { + griddes$yvals <- get_lats(ncfile) + } + + # Convert some fields to numeric. Ensures all fields are present. + for(nm in c("xsize", "ysize", "xfirst", "yfirst", "xinc", "yinc")) { + griddes[[nm]] <- ifelse(is.null(griddes[[nm]]), NA, as.numeric(griddes[[nm]])) + } + + return(griddes) +} + +#====================== +# Use nco to obtain latitudes. Latitudes shall be named "lat" or "latitude". +#====================== +get_lats <- function(ncfile) { + + tmp <- system(paste0('ncks -H -s "%f " -v latitude ',ncfile),intern=T) + + if(!is.null(attributes(tmp)$status)) { + tmp <- system(paste0('ncks -H -s "%f " -v lat ',ncfile),intern=T) + } + + lats <- as.numeric(strsplit(tmp[1],"\\s+",perl=T)[[1]]) + + return(lats) +} - files <- paste(rea.path,"$var$_$sdate$.nc",sep="/") - model_data <- get_model_data(weights,dat=files,var=variable,sdate=sdates,time='all',num_procs=4) +#====================== +# Load model data at all (i,j) pairs listed in the weights dataframe. +# Uses StartR. All ... parameters go to Start (i.e. specify dat, var, +# sdate, time, ensemble, num_procs, etc) +#====================== +get_model_data <- function(weights.df, mdata) { + + require(plyr) + + #----------------- + # Get data for all combinations of i and j. + # (inefficient, getting many unneded pairs). + # Avoid retrieving duplicates with unique() + #----------------- + is <- weights.df$i + js <- weights.df$j + + #----------------- + # Get indices of original is and js in unique(is),unique(js) that were requested + #----------------- + idxi <- match(is, unique(is)) + idxj <- match(js, unique(js)) + + #----------------- + # Subsample mdata to keep only the needed (i,j) pairs. + #----------------- + if (is.na(match("longitude", names(dim(mdata))))) { + londim <- match("lon", names(dim(mdata))) + } else { + londim <- match("longitude", names(dim(mdata))) + } + if (is.na(match("latitude", names(dim(mdata))))) { + latdim <- match("lat", names(dim(mdata))) + } else { + latdim <- match("latitude", names(dim(mdata))) + } + + # trick: exchange idxi and idxj + if(londim > latdim) { idx.tmp <- idxi; idxi <- idxj; idxj <- idx.tmp } + keepdims <- (1:length(dim(mdata)))[-c(londim,latdim)] + + sub_mdata <- apply(mdata,keepdims, function(x) { + laply(1:length(is),function(k) { x[idxi[k],idxj[k]] }) }) + names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Return an array that contains as many gridpoints as (i,j) pairs were requested + #----------------- + return(sub_mdata) +} + +#====================== +# Multiply the grid-point series by the weights, +# to obtain the desired interpolations +#====================== +interpolate_data <- function(model_data, weights.df) { + #----------------- + # Multiply each gridpoint matrix by its corresponding weight + #----------------- + gpdim <- match("gridpoint", names(dim(model_data))) + weighted_data <- sweep(model_data, gpdim, weights.df$weight, "*") + + #----------------- + # Sum all series that belong to same interpolation point + # Return an array that contains the requested locations and interpolation type + #----------------- + interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) + names(dim(interp_data))[1] <- "location" + return(interp_data) +} diff --git a/R/Intlr.R b/R/Intlr.R index 98290ff..6c7e6d0 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -1,4 +1,5 @@ -Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NULL, loocv = FALSE, ncores = 1) { +Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NULL, + large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { require(multiApply) @@ -19,25 +20,29 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL stopifnot('sdate' %in% names(dim(exp$data))) stopifnot('sdate' %in% names(dim(obs$data))) - # the parametre 'predictors' must contain the same sdates as exp$data + # checkings for the parametre 'predictors' if (is.array(predictors)) { + # ensure the predictor variable name matches the parametre large_scale_predictor_dimname + stopifnot(large_scale_predictor_dimname %in% names(dim(predictors))) stopifnot('sdate' %in% names(dim(predictors))) - stopifnot(dim(predictors)['sdate'] == dim(exp$data)['sdate']) + stopifnot(dim(predictors)['sdate'] == dim(exp$data)['sdate']) } #----------------------------------- # Interpolation #----------------------------------- - # Interpolate coarse-scale data - exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method = int_method) + if (method != '4nn') { + # Interpolate coarse-scale data + exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method_remap = int_method) - # Interpolate obs to the same target grid to ensure the matching with exp coordinates - obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method = int_method) - - # Ensure that the interpolated exp and obs coordinates match - .check_coords(lat1 = exp_interpolated$lat, lon1 = exp_interpolated$lon, - lat2 = obs_interpolated$lat, lon2 = obs_interpolated$lon) + # Interpolate obs to the same target grid to ensure the matching with exp coordinates + obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method_remap = int_method) + # Ensure that the interpolated exp and obs coordinates match + .check_coords(lat1 = exp_interpolated$lat, lon1 = exp_interpolated$lon, + lat2 = obs_interpolated$lat, lon2 = obs_interpolated$lon) + } + #----------------------------------- # Linear regressions #----------------------------------- @@ -47,9 +52,13 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL if (lr_method == 'basic') { predictor <- exp_interpolated$data predictand <- obs_interpolated$data + target_dims_predictor <- 'sdate' target_dims_predictand <- 'sdate' - } + + lats <- obs_interpolated$lat + lons <- obs_interpolated$lon + } # (Multi) linear regression with large-scale predictors # Predictor: passed through the parameter 'predictors' by the user. Can be model or observations @@ -58,22 +67,29 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL if (is.null(predictors)) { stop("The large-scale predictors must be passed through the parametre 'predictors'") } - predictand <- exp_interpolated$data + predictand <- obs_interpolated$data predictor <- predictors - var_dim_in_predictor <- names(dim(predictors))[names(dim(predictors)) != 'sdate'] - target_dims_predictor <- c('sdate', var_dim_in_predictor) + target_dims_predictor <- c('sdate', large_scale_predictor_dimname) target_dims_predictand <- 'sdate' + + lats <- obs_interpolated$lat + lons <- obs_interpolated$lon } # Multi-linear regression with the four nearest neighbours # Predictors: model data # Predictand: observations else if (lr_method == '4nn') { - predictor <- Apply(list(exp_interpolated$data), target_dims = list(c('lat','lon')), fun = find_4nn)$output1 - predictand <- obs_interpolated$data + warning("Interpolating model and observation data but not needed.") + predictor <- find_nn(hres = obs, coar = exp, nn = 4) + predictand <- obs$data + target_dims_predictor <- c('sdate','nn') target_dims_predictand <- 'sdate' + + lats <- obs$lat + lons <- obs$lon } else { @@ -92,7 +108,7 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL #----------------------------------- # Create an s2dv_cube object #----------------------------------- - res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat)) + res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = lons, lat = lats)) return(res_s2dv) } @@ -110,6 +126,7 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL # prediction res <- pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + #return(lm1) return(res) } @@ -154,53 +171,31 @@ pred_lm <- function(df, lm1, loocv) { } #----------------------------------- -# Function to find the four nearest neighbours. -# Notes the case for the latitude and longitude borders +# Function to find N nearest neighbours. +# 'hres' and 'coar' are s2dv_objects #----------------------------------- -find_4nn <- function(object) { +find_nn <- function(hres, coar, nn = 4) { + + require(abind) - nearests <- InsertDim(object, posdim = 3, lendim = 4, name = 'nn') + lats_hres <- hres$lat + lons_hres <- hres$lon + lats_coar <- coar$lat + lons_coar <- coar$lon - for (i in seq(dim(object)['lon'])) { - lon_max_border <- FALSE - lon_min_border <- FALSE - if (((i + 1) > dim(object)['lon'])) { - lon_max_border <- TRUE - } else if (((i - 1) < 1)) { - lon_min_border <- TRUE - } - for (j in seq(dim(object)['lat'])) { - lat_max_border <- FALSE - lat_min_border <- FALSE - if (((j + 1) > dim(object)['lat'])) { - lat_max_border <- TRUE - } else if (((j - 1) < 1)) { - lat_min_border <- TRUE - } - if (lon_max_border) { - nearests[j, i, 2] <- NA - nearests[j, i, 4] <- object[j, i - 1] - } else if (lon_min_border) { - nearests[j, i, 2] <- object[j, i + 1] - nearests[j, i, 4] <- NA - } else { - nearests[j, i, 2] <- object[j, i + 1] - nearests[j, i, 4] <- object[j, i - 1] - } - if (lat_max_border) { - nearests[j, i, 1] <- NA - nearests[j, i, 3] <- object[j - 1, i] - } else if (lat_min_border) { - nearests[j, i, 1] <- object[j + 1, i] - nearests[j, i, 3] <- NA - } else { - nearests[j, i, 1] <- object[j + 1, i] - nearests[j, i, 3] <- object[j - 1, i] - } - } - } + # Sort the distances from closest to furthest + idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) + idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) + + names(dim(idx_lat)) <- c('nn', 'lat') + names(dim(idx_lon)) <- c('nn', 'lon') + + # obtain the values of the nearest neighbours + nearest <- Apply(list(coar$data, idx_lat, idx_lon), + target_dims = list(c('lat','lon'),'lat','lon'), + fun = function(x, y, z) x[y, z])$output1 - return(nearests) + return(nearest) } diff --git a/R/Utils.R b/R/Utils.R index bcb9521..8c63a22 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -4,7 +4,23 @@ } } +# reorder dims to a reference array. If they do not exist, they are created +arr_ref <- array(NA, c(dataset = 1, sdate = 8, member = 3, ftime = 1, lon = 269, lat = 181)) +arr_to_reorder <- array(NA, c(dataset = 1, member = 3, sdate = 8, lat = 181, lon = 269, pp = 1)) + .reorder_dims <- function(arr_ref, arr_to_reorder) { + + miss_dims <- names(dim(arr_ref))[!(names(dim(arr_ref)) %in% names(dim(arr_to_reorder)))] + + if (length(miss_dims) != 0) { + for (m in seq(miss_dims)) { + arr_to_reorder <- InsertDim(data = arr_to_reorder, posdim = length(dim(arr_to_reorder)) + 1, lendim = 1, + name = miss_dims[m]) + } + } + + # TODO: add code to reorder dimensions and put the non-common dimensions at the end + orddim <- match(names(dim(arr_ref)),names(dim(arr_to_reorder))) return(Reorder(data = arr_to_reorder, order = orddim)) } -- GitLab From e459a48c0e9a56abab28db265cc331b1841aa092 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Fri, 29 Apr 2022 17:26:59 +0200 Subject: [PATCH 05/24] . --- R/Analogs.R | 95 +++++++++++++++------ R/Intbc.R | 52 ++++++++++-- R/Interpolation.R | 205 ++++++++++++++++++++++++++++++++++++---------- R/Intlr.R | 45 +++++++--- R/Utils.R | 8 +- 5 files changed, 319 insertions(+), 86 deletions(-) diff --git a/R/Analogs.R b/R/Analogs.R index 808d661..ed5a389 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -1,6 +1,6 @@ -# As it is now, selects only the best analog -Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", ncores = 1) { +Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", data_dim = "data", lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", ncores = 1) { require(multiApply) require(CSTools) @@ -10,42 +10,77 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", ncores = 1) { #----------------------------------- # input exp and obs must be s2dv_cube objects if (!inherits(exp,'s2dv_cube')) { - stop("**ERROR: Parameter 'exp' must be of the class 's2dv_cube'") + stop("Parameter 'exp' must be of the class 's2dv_cube'") } # input exp and obs must be s2dv_cube objects if (!inherits(obs,'s2dv_cube')) { - stop("**ERROR: Parameter 'obs' must be of the class 's2dv_cube'") + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + if (!inherits(nanalogs, 'numeric')) { + stop("Parameter 'nanalogs' must be of the class 'character'") + } + + if (!inherits(fun_analog, 'character')) { + stop("Parameter 'fun_analog' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp[[data_dim]])))) | is.na(match(lon_dim, names(dim(obs[[data_dim]]))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter + 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp[[data_dim]])))) | is.na(match(lat_dim, names(dim(obs[[data_dim]]))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter + 'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp[[data_dim]])))) | is.na(match(sdate_dim, names(dim(obs[[data_dim]]))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter + 'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(exp[[data_dim]])))) | is.na(match(time_dim, names(dim(obs[[data_dim]]))))) { + stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter + 'time_dim'") + } + + # Ensure we have enough data to interpolate from high-res to coarse grid + if ((attr(obs[[lat_dim]],"first_lat") > attr(exp[[lat_dim]],"first_lat")) | + (attr(obs[[lat_dim]],"last_lat") < attr(exp[[lat_dim]],"last_lat")) | + (attr(obs[[lon_dim]],"first_lon") > attr(exp[[lon_dim]],"first_lon")) | + (attr(obs[[lon_dim]],"last_lon") < attr(exp[[lon_dim]],"last_lon"))) { + stop("There are not enough data in 'obs'. Please to add more latitudes or ", + "longitudes.") } # Select a function to apply to the analogs selected for a given observation stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) - # sdate must be the time dimension in the input data - #stopifnot('sdate' %in% names(dim(exp$data))) - #stopifnot('sdate' %in% names(dim(obs$data))) - #----------------------------------- # Interpolate high-res observations to the coarse grid #----------------------------------- grid_exp <- exp$source_files[1] - # Since we are interpolated to a coarser grid, round bordering coordinates - attr(obs$lon,"first_lon") <- round(attr(obs$lon,"first_lon")) - attr(obs$lon,"last_lon") <- round(attr(obs$lon,"last_lon")) - attr(obs$lat,"first_lat") <- round(attr(obs$lat,"first_lat")) - attr(obs$lat,"last_lat") <- round(attr(obs$lat,"last_lat")) - - obs_interpolated <- Interpolation(exp = obs, target_grid = grid_exp, method_remap = "conservative") - lats <- obs$lat - lons <- obs$lon + lonmin <- attr(exp[[lon_dim]],"first_lon") + lonmax <- attr(exp[[lon_dim]],"last_lon") + latmin <- attr(exp[[lat_dim]],"first_lat") + latmax <- attr(exp[[lat_dim]],"last_lat") + obs_interpolated <- Interpolation(exp = obs, target_grid = grid_exp, method_remap = "conservative", + remap_region = c(lonmin, lonmax, latmin, latmax)) + lats <- obs[[lat_dim]] + lons <- obs[[lon_dim]] #----------------------------------- # Reshape train and test #----------------------------------- - res <- Apply(list(obs_interpolated$data, exp$data, obs$data), target_dims = list(c('sdate', 'lat', 'lon'), - c('sdate', 'lat', 'lon'), c('sdate', 'lat', 'lon')), fun = function(tr, te, ob) - .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, fun = fun_analog))$output1 + res <- Apply(list(obs_interpolated$data, exp[[data_dim]], obs[[data_dim]]), + target_dims = list(c(time_dim, sdate_dim, lat_dim, lon_dim), + c(time_dim, sdate_dim, lat_dim, lon_dim), c(time_data, sdate_dim, lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun = fun_analog))$output1 res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = lons, lat = lats)) @@ -64,19 +99,19 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", ncores = 1) { dist <- knn.ind$nn.dist idx <- knn.ind$nn.index - names(dim(idx)) <- c("sdate", "analog"); names(dim(dist)) <- c("sdate", "analog") + names(dim(idx)) <- c(sdate_dim, "analog"); names(dim(dist)) <- c(sdate_dim, "analog") # Obtain the obs for the knn indices analogs <- Apply(idx, margins = "analog", fun = function(an) { obs_hres[an, , ] })$output1 # Apply function for analogs - if(!is.null(fun)) { + if (!is.null(fun)) { margins_analogs <- names(dim(analogs))[names(dim(analogs)) != "analog"] if (fun == "wmean") { w <- 1 / dist - analogs <- Apply(list(analogs, dist), margins = list(margins_analogs, "sdate"), + analogs <- Apply(list(analogs, dist), margins = list(margins_analogs, sdate_dim), fun = function(x, w) weighted.mean(x, w))$output1 } else { analogs <- Apply(analogs, margins = margins_analogs, fun = fun)$output1 @@ -89,11 +124,19 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", ncores = 1) { # For each element in test, find the indices of the k nearest neigbhors in train get_knn <- function(train, test, k) { # Reformat train and test as an array with (time, points) - train <- aperm(Apply(train, target_dims = list(c('lat', 'lon')), fun=function(x) { as.vector(x) })$output1) - test <- aperm(Apply(test, target_dims = list(c('lat', 'lon')), fun=function(x) { as.vector(x) })$output1) + train <- aperm(Apply(train, target_dims = list(c(lat_dim, lon_dim)), output_dims = 'space', fun = function(x) { as.vector(x) })$output1) + test <- aperm(Apply(test, target_dims = list(c(lat_dim, lon_dim)), output_dims = 'space', fun = function(x) { as.vector(x) })$output1) # For each row in test, find the k closest rows in train - knn.ind <- get.knnx(train, test, k) + nsdates <- dim(train)[names(dim(train)) == sdate_dim] + knn.ind <- lapply(1:nsdates, function(j) { + te <- test[ j, , , drop = FALSE] + tr <- train[ -j, , ] + tr <- Apply(tr, target_dims = list(c(sdate_dim, time_dim)), output_dims = 't', fun = function(x) { as.vector(x) })$output1 + te <- Apply(te, target_dims = list(c(sdate_dim, time_dim)), output_dims = 't', fun = function(x) { as.vector(x) })$output1 + get.knnx(tr, te, k) + }) + return(knn.ind) } diff --git a/R/Intbc.R b/R/Intbc.R index dcd663f..f6472a4 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -1,19 +1,57 @@ Intbc <- function(exp, obs, target_grid, int_method, bc_method, ncores = 1) { require(CSTools) + + # Input data must be an s2dv_cube object + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + if (!inherits(int_method, 'character')) { + stop("Parameter 'int_method' must be of the class 'character'") + } + + if (!inherits(bc_method, 'character')) { + stop("Parameter 'bc_method' must be of the class 'character'") + } stopifnot(bc_method %in% c('sbc', 'cal', 'qm', 'simple_bias', 'calibration', 'quantile_mapping')) stopifnot(c('member','sdate') %in% names(dim(exp$data))) - exp_interpolated <- interpolation(exp = exp, target_grid = target_grid, method = int_method) + #if ((attr(obs$lat,"first_lat") < attr(exp$lat,"first_lat")) | + # (attr(obs$lat,"last_lat") > attr(exp$lat,"last_lat")) | + # (attr(obs$lon,"first_lon") < attr(exp$lon,"first_lon")) | + # (attr(obs$lon,"last_lon") > attr(exp$lon,"last_lon"))) { + # stop("There are not enough data in 'exp'. Please to add more latitudes and ", + # "longitudes.") + #} + + lonmin <- attr(obs$lon,"first_lon") + lonmax <- attr(obs$lon,"last_lon") + latmin <- attr(obs$lat,"first_lat") + latmax <- attr(obs$lat,"last_lat") - # Interpolate obs to the same target grid to ensure the matching with exp coordinates - obs_interpolated <- interpolation(exp = obs, target_grid = target_grid, method = int_method) + exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method_remap = int_method, + remap_region = c(lonmin, lonmax, latmin, latmax)) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if (!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs$lat, lon1 = exp_interpolated$lon, + lon2 = obs$lon)) { + obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method_remap = int_method, + remap_region = c(lonmin, lonmax, latmin, latmax)) + } else { + obs_interpolated <- obs + } #.check_coords if (bc_method == 'sbc' | bc_method == 'simple_bias') { if (dim(obs_interpolated$data)['sdate'] == 1) { - warning('**WARNING: Simple Bias Correction should not be used with + warning('Simple Bias Correction should not be used with only one observation. Returning NA. ') } res <- BiasCorrection(exp = exp_interpolated$data, @@ -21,13 +59,17 @@ Intbc <- function(exp, obs, target_grid, int_method, bc_method, ncores = 1) { } else if (bc_method == 'cal' | bc_method == 'calibration') { if (dim(exp_interpolated$data)['member'] == 1) { - stop('**ERROR: Calibration must not be used with + stop('Calibration must not be used with only one ensemble member.') } res <- Calibration(exp = exp_interpolated$data, obs = obs_interpolated$data, cal.method = 'mse_min', ncores = ncores) } else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { + if (any(is.na(exp_interpolated$data))) { + warning('Found NAs in "exp" data, either introduced with the initial object or ', + 'by the interpolation method. Quantile Mapping method is likely to fail.') + } res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_interpolated$data, sample_dims = 'sdate', method = 'QUANT', ncores = ncores) } diff --git a/R/Interpolation.R b/R/Interpolation.R index 6bf4a3f..39c6673 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -2,66 +2,161 @@ #method_point_interp: nearest, bilinear, 9point, invdist4nn, NE, NW, SE, SW #target_grid has to be either a grid recognised by CDO or a NETCDF file # requires CDO_1.9.8 or newer versions when using nn method +# Latitudes must range from -90 to 90 (or a subset of these values) and longitudes +# must range from -180 to 180 Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, - method_point_interp = NULL) + remap_region = NULL, method_point_interp = NULL) { require(s2dv) # Input data must be an s2dv_cube object if (!inherits(exp,'s2dv_cube')) { - stop("**ERROR: Parameter 'exp' must be of the class 's2dv_cube'") + stop("Parameter 'exp' must be of the class 's2dv_cube'") } + if (!is.null(method_remap)) { + if (!inherits(method_remap, 'character')) { + stop("Parameter 'method_remap' must be of the class 'character'") + } + } + + if (!is.null(method_point_interp)) { + if (!inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match("lon", names(dim(exp$data))))) { + stop("Longitude dimension in 'exp' must be named 'lon'") + } + + if (is.na(match("lat", names(dim(exp$data))))) { + stop("Latitude dimension in 'exp' must be named 'lat'") + } + + if (is.na(match("sdate", names(dim(exp$data))))) { + stop("Time dimension in 'exp' must be named 'sdate'") + } + + # Check for negative longitudes in the exp data + if (any(exp$lon < -180 | exp$lon > 180) ) { + stop("Out-of-range longitudes have been found in 'exp$lon'. Longitudes must range from ", + "-180 to 180") + } + + # Check for negative latitudes in the exp data + if (any(exp$lat < -90 | exp$lat > 90) ) { + stop("Out-of-range latitudes have been found in 'exp$lat'. Latitudes must range from ", + "-90 to 90") + } + # checkings for the case of interpolation to point locations if (!is.null(points)) { if (!inherits(points, 'list')) { - stop("**ERROR: Parameter 'points' must be a list of two elements containing the point ", - "latitudes and longitudes in the form 'list(c(latitudes), c(longitudes))'") + stop("Parameter 'points' must be a list of two elements containing the point ", + "latitudes and longitudes in the form 'points$lat', 'points$lon'") } - + if (is.null(method_point_interp)) { - stop("**ERROR: Parameter 'method_point_interp' must be a character vector indicating the ", + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + if (!(method_point_interp %in% c('nearest', 'bilinear', '9point', 'invdist4nn', 'NE', 'NW', 'SE', 'SW'))) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", "interpolation method. Accepted methods are nearest, bilinear, 9point, ", "invdist4nn, NE, NW, SE, SW") } # Points must be a list of two elements if (length(points) != 2) { - stop("**ERROR: 'points' must be a lis of two elements containing the point ", - "latitudes and longitudes in the form 'list(c(latitudes), c(longitudes))'") + stop("'points' must be a lis of two elements containing the point ", + "latitudes and longitudes in the form 'points$lat', 'points$lon'") + } + + # The names of the two elements must be 'lat' and 'lon' + if (any(!(c('lat', 'lon') %in% names(points)))) { + stop("The names of the elements in the list 'points' must be 'lat' and 'lon'") } # Check that the number of latitudes and longitudes match if (length(unique(lengths(points))) != 1L) { - stop("**ERROR: the number of latitudes and longitudes must match") + stop("The number of latitudes and longitudes must match") + } + + # Check for negative longitudes in the point coordinates + if (any(points$lon < -180 | points$lon > 180) ) { + stop("Out-of-range longitudes have been found in 'points'. Longitudes must range from ", + "-180 to 180") + } + + # Check for negative latitudes in the point coordinates + if (any(points$lat < -90 | points$lat > 90) ) { + stop("Out-of-range latitudes have been found in 'points'. Latitudes must range from ", + "-90 to 90") } } else { if (is.null(method_remap)) { - stop("**ERROR: Parameter 'method_remap' must be a character vector indicating the ", + stop("Parameter 'method_remap' must be a character vector indicating the ", "interpolation method. Accepted methods are con, bil, bic, nn, con2") } + if (is.null(target_grid)) { - stop("**ERROR: Parameter 'target_grid' must be a character vector indicating the ", + stop("Parameter 'target_grid' must be a character vector indicating the ", "target grid to be passed to CDO. It must be a grid recognised by CDO ", "or a NetCDF file") } } - lonmin <- attr(exp$lon,"first_lon") - lonmax <- attr(exp$lon,"last_lon") - latmin <- attr(exp$lat,"first_lat") - latmax <- attr(exp$lat,"last_lat") + #---------------------------------- + # Limits of the region defined by the model data + #---------------------------------- + if (is.null(attr(exp$lon,"first_lon"))) { + lonmin <- exp$lon[1] + } else { + lonmin <- attr(exp$lon,"first_lon") + } + if (is.null(attr(exp$lon,"last_lon"))) { + lonmax <- exp$lon[length(exp$lon)] + } else { + lonmax <- attr(exp$lon,"last_lon") + } + if (is.null(attr(exp$lat,"first_lat"))) { + latmin <- exp$lat[1] + } else { + latmin <- attr(exp$lat,"first_lat") + } + if (is.null(attr(exp$lat,"last_lat"))) { + latmax <- exp$lat[length(exp$lat)] + } else { + latmax <- attr(exp$lat,"last_lat") + } + + # for the case when region limits are not passed by the user + if (is.null(remap_region)) { + remap_region <- c(lonmin, lonmax, latmin, latmax) + } + + # Ensure points to be within the region limits + if (!is.null(points)) { + if (any(points$lat > latmax) | any(points$lat < latmin) | + any(points$lon > lonmax) | any(points$lon < lonmin)) { + stop("At least one of the points lies outside the model region") + } + } #---------------------------------- # Map regrid with CDO #---------------------------------- if (is.null(points)) { - res <- CDORemap(data_array = exp$data, - lats = exp$lat, - lons = exp$lon, - grid = target_grid, - method = method_remap, - crop = c(lonmin, lonmax, latmin, latmax)) + res <- s2dv::CDORemap(data_array = exp$data, + lats = exp$lat, + lons = exp$lon, + grid = target_grid, + method = method_remap, + crop = remap_region) # Create an s2dv_cube object res_s2dv <- suppressWarnings(s2dv_cube(data = res$data_array, lon = res$lons, lat = res$lats)) @@ -72,8 +167,10 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = } else { # First create interpolation weights, depending on the chosen method ncfile_exp <- exp$source_files[1] - weights <- create_interp_weights(ncfile = ncfile_exp, locids = 1:unique(lengths(points)), - lats = points[[1]], lons = points[[2]], method = method_point_interp) + weights <- create_interp_weights(ncfile = ncfile_exp, locids = 1:unique(lengths(points)), + lats = points$lat, lons = points$lon, method = method_point_interp, + region = list(lat_min = latmin, lat_max = latmax, lon_min = lonmin, + lon_max = lonmax)) # Select coarse-scale data to be interpolated model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp$data) @@ -82,28 +179,43 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = res <- interpolate_data(model_data_gridpoints, weights) # Create an s2dv_cube object - res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = points[[1]], lat = points[[2]])) + res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = points$lon, lat = points$lat)) } return(res_s2dv) } - #====================== # Compute weights for interpolation at several (lat,lon) positions # We assume that grid boxes are centered in the grid point. #====================== -create_interp_weights <- function(ncfile, locids, lats, lons, +create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, method = c("nearest", "bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW")) -{ #---------------- +{ + # crop the region to get the correct weights - save temporary file + system(paste0('cdo sellonlatbox,', region$lon_min, ',', region$lon_max, ',', region$lat_min, + ',', region$lat_max, ' ', ncfile, ' ', ncfile, '_cropped')) + + #---------------- # Read grid description and compute (i,j) of requested locations (including decimals) #---------------- - griddes <- get_griddes(ncfile) - if (!is.null(griddes)) { - stop("**ERROR: 'griddes' not found in the NetCDF source files") + griddes <- get_griddes(paste0(ncfile,'_cropped')) + + # If latitudes are decreasingly ordered, revert them + if (griddes$yinc < 0) { + system(paste0('cdo invertlat ',ncfile,'_cropped ',ncfile,'_cropped2')) + griddes <- get_griddes(paste0(ncfile,'_cropped2')) + } + # remove temporary files + system(paste0('rm ',ncfile,'_cropped')) + system(paste0('rm ',ncfile,'_cropped2')) + + if (is.null(griddes)) { + stop("'griddes' not found in the NetCDF source files") } + gridpoints <- latlon2ij(griddes, lats, lons) #---------------- @@ -324,7 +436,7 @@ create_interp_weights <- function(ncfile, locids, lats, lons, weights.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, rawj = gridpoints$j, i = seli, j = selj, weight = 1) } else { - stop(paste0("**ERROR: Method " ,method, " not implemented")) + stop(paste0("Method " ,method, " not implemented")) } #---------------- @@ -353,23 +465,24 @@ latlon2ij <- function(griddes, lats, lons) { #------------ # Check input params #------------ - if(length(lons) != length(lats)) {stop("**ERROR: input lat and lon have different lengths.")} - if(any(lats < -90) | any(lats > 90)) {stop("**ERROR: latitude out of valid range")} - if(round(griddes$xinc*griddes$xsize)!=360) {stop("**ERROR: grid is not global")} + if(length(lons) != length(lats)) {stop("Input lat and lon have different lengths.")} + if(any(lats < -90) | any(lats > 90)) {stop("Latitude out of valid range")} + #if(round(griddes$xinc*griddes$xsize) != 360) {stop("Grid is not global")} # no need to resize lons to [0,360) #------------ # Compute i (with decimals) # i lies in [1,xsize+1) + # %% gives the remainder #------------ gridpoints <- list() - gridpoints$i <- 1 + (((lons-griddes$xfirst) / griddes$xinc) %% griddes$xsize) - + gridpoints$i <- 1 + (((lons - griddes$xfirst) / griddes$xinc) %% griddes$xsize) + #------------ # Compute j (with decimals) #------------ if(griddes$gridtype=='lonlat') { - gridpoints$j <- 1 + (lats-griddes$yfirst) / griddes$yinc + gridpoints$j <- 1 + (lats - griddes$yfirst) / griddes$yinc } else if(griddes$gridtype == 'gaussian') { # We assume gaussian grid latitudes are ordered north to south # findInterval can only work with monotonic ascending values so we revert twice @@ -386,7 +499,7 @@ latlon2ij <- function(griddes, lats, lons) { ok_idx <- !(northj == 0 | southj == griddes$ysize+1) gridpoints$j[ok_idx] <- northj[ok_idx] + (griddes$yvals[northj[ok_idx]] - lats[ok_idx])/(griddes$yvals[northj[ok_idx]] - griddes$yvals[southj[ok_idx]]) - } else { stop("**ERROR: unsupported grid") } + } else { stop("Unsupported grid") } return(gridpoints) } @@ -437,11 +550,13 @@ get_lats <- function(ncfile) { get_model_data <- function(weights.df, mdata) { require(plyr) + require(multiApply) #----------------- # Get data for all combinations of i and j. # (inefficient, getting many unneded pairs). # Avoid retrieving duplicates with unique() + # These are the indices of the global grid #----------------- is <- weights.df$i js <- weights.df$j @@ -467,11 +582,17 @@ get_model_data <- function(weights.df, mdata) { } # trick: exchange idxi and idxj - if(londim > latdim) { idx.tmp <- idxi; idxi <- idxj; idxj <- idx.tmp } - keepdims <- (1:length(dim(mdata)))[-c(londim,latdim)] + #if(londim > latdim) { idx.tmp <- idxi; idxi <- idxj; idxj <- idx.tmp } + #keepdims <- (1:length(dim(mdata)))[-c(londim,latdim)] - sub_mdata <- apply(mdata,keepdims, function(x) { - laply(1:length(is),function(k) { x[idxi[k],idxj[k]] }) }) + #sub_mdata <- apply(mdata, keepdims, function(x) { + # laply(1:length(is),function(k) { x[idxi[k],idxj[k]] }) }) + #names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Retrieve with multiApply + #----------------- + sub_mdata <- Apply(mdata, target_dims = list(c('lat', 'lon')), fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) })$output1 names(dim(sub_mdata))[1] <- "gridpoint" #----------------- diff --git a/R/Intlr.R b/R/Intlr.R index 6c7e6d0..e155b88 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -16,6 +16,22 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL stop("Parameter 'obs' must be of the class 's2dv_cube'") } + if (!inherits(int_method, 'character')) { + stop("Parameter 'int_method' must be of the class 'character'") + } + + if (!inherits(lr_method, 'character')) { + stop("Parameter 'lr_method' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + # sdate must be the time dimension in the input data stopifnot('sdate' %in% names(dim(exp$data))) stopifnot('sdate' %in% names(dim(obs$data))) @@ -31,16 +47,25 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL #----------------------------------- # Interpolation #----------------------------------- - if (method != '4nn') { - # Interpolate coarse-scale data - exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method_remap = int_method) - - # Interpolate obs to the same target grid to ensure the matching with exp coordinates - obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method_remap = int_method) - - # Ensure that the interpolated exp and obs coordinates match - .check_coords(lat1 = exp_interpolated$lat, lon1 = exp_interpolated$lon, - lat2 = obs_interpolated$lat, lon2 = obs_interpolated$lon) + if (lr_method != '4nn') { + + lonmin <- attr(obs$lon,"first_lon") + lonmax <- attr(obs$lon,"last_lon") + latmin <- attr(obs$lat,"first_lat") + latmax <- attr(obs$lat,"last_lat") + + exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method_remap = int_method, + remap_region = c(lonmin, lonmax, latmin, latmax)) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if (!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs$lat, + lon1 = exp_interpolated$lon, lon2 = obs$lon)) { + obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method_remap = int_method, + remap_region = c(lonmin, lonmax, latmin, latmax)) + } else { + obs_interpolated <- obs + } } #----------------------------------- diff --git a/R/Utils.R b/R/Utils.R index 8c63a22..addddc9 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -1,7 +1,9 @@ .check_coords <- function(lat1, lon1, lat2, lon2) { - if (!all(all((lat1 == lat2)) & all((lon1 == lon2)))) { - stop('**ERROR: Latitude and longitude values from exp and obs must match.') - } + match <- TRUE + if (!((length(lat1) == length(lat2)) & (length(lon1) == length(lon2)))) { + match <- FALSE + } + return(match) } # reorder dims to a reference array. If they do not exist, they are created -- GitLab From f4cbeca740df37568536af938679610454d8bcc5 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Tue, 10 May 2022 12:16:15 +0200 Subject: [PATCH 06/24] Analogs without window --- R/Analogs.R | 99 +++++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/R/Analogs.R b/R/Analogs.R index ed5a389..78a72c3 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -1,5 +1,5 @@ -Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", data_dim = "data", lat_dim = "lat", lon_dim = "lon", +Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", ncores = 1) { require(multiApply) @@ -27,22 +27,22 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", data_dim = "dat } # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names - if (is.na(match(lon_dim, names(dim(exp[[data_dim]])))) | is.na(match(lon_dim, names(dim(obs[[data_dim]]))))) { + if (is.na(match(lon_dim, names(dim(exp$data)))) | is.na(match(lon_dim, names(dim(obs$data))))) { stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter 'lon_dim'") } - if (is.na(match(lat_dim, names(dim(exp[[data_dim]])))) | is.na(match(lat_dim, names(dim(obs[[data_dim]]))))) { + if (is.na(match(lat_dim, names(dim(exp$data)))) | is.na(match(lat_dim, names(dim(obs$data))))) { stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter 'lat_dim'") } - if (is.na(match(sdate_dim, names(dim(exp[[data_dim]])))) | is.na(match(sdate_dim, names(dim(obs[[data_dim]]))))) { + if (is.na(match(sdate_dim, names(dim(exp$data)))) | is.na(match(sdate_dim, names(dim(obs$data))))) { stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter 'sdate_dim'") } - if (is.na(match(time_dim, names(dim(exp[[data_dim]])))) | is.na(match(time_dim, names(dim(obs[[data_dim]]))))) { + if (is.na(match(time_dim, names(dim(exp$data)))) | is.na(match(time_dim, names(dim(obs$data))))) { stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter 'time_dim'") } @@ -76,11 +76,17 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", data_dim = "dat #----------------------------------- # Reshape train and test #----------------------------------- - res <- Apply(list(obs_interpolated$data, exp[[data_dim]], obs[[data_dim]]), - target_dims = list(c(time_dim, sdate_dim, lat_dim, lon_dim), - c(time_dim, sdate_dim, lat_dim, lon_dim), c(time_data, sdate_dim, lat_dim, lon_dim)), + res <- Apply(list(obs_interpolated$data, exp$data, obs$data), + target_dims = c(sdate_dim, time_dim, lat_dim, lon_dim), fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, - fun = fun_analog))$output1 + fun_analog = fun_analog), ncores = ncores)$output1 + + #test <- exp$data[1,1,,1,,,] + #train <- obs_interpolated$data[1,1,,,,] + #obs_hres <- obs$data[1,1,,,,] + #test <- aperm(test,c(2,1,3,4)) + #train <- aperm(train,c(3,1,2,4)) + #obs_hres <- aperm(obs_hres,c(3,1,2,4)) res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = lons, lat = lats)) @@ -88,56 +94,51 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", data_dim = "dat } -# For each element in test return the corresponding k-NN observations, or its mean if indicated in fun -.analogs <- function(train, test, obs_hres, k, fun = NULL) { - +# For each element in test, find the indices of the k nearest neigbhors in train +.analogs <- function(train, test, obs_hres, k, fun_analog) { + # train, test, and obs_hres dim: 4 dimensions sdate, time, lat and lon (in this order) require(FNN) require(multiApply) - # Get knn indices for each test element - knn.ind <- get_knn(train = train, test = test, k = k) + # Number of lats/lons of the high-resolution data + space_dims_hres <- dim(obs_hres)[c(3,4)] + nsdates <- dim(train)[1] - dist <- knn.ind$nn.dist - idx <- knn.ind$nn.index - names(dim(idx)) <- c(sdate_dim, "analog"); names(dim(dist)) <- c(sdate_dim, "analog") + # Reformat train and test as an array with (time, points) + train <- apply(train, c(1,2), as.vector); names(dim(train))[1] <- "space" + test <- apply(test, c(1,2), as.vector); names(dim(test))[1] <- "space" + obs_hres <- apply(obs_hres, c(1,2), as.vector); names(dim(obs_hres))[1] <- "space" - # Obtain the obs for the knn indices - analogs <- Apply(idx, margins = "analog", fun = function(an) { obs_hres[an, , ] })$output1 + # Here we do cross-validation: the start date considered is removed from the training + analogs <- lapply(1:nsdates, function(j) { + tr <- apply(train[ , -j, ], 1, as.vector); names(dim(tr))[1] <- "time" + te <- aperm(test[ , j, ]) + ob <- apply(obs_hres[ , -j, ], 1, as.vector); names(dim(ob))[1] <- "time" - # Apply function for analogs - if (!is.null(fun)) { - - margins_analogs <- names(dim(analogs))[names(dim(analogs)) != "analog"] + knn.ind <- get.knnx(tr, te, k) - if (fun == "wmean") { - w <- 1 / dist - analogs <- Apply(list(analogs, dist), margins = list(margins_analogs, sdate_dim), - fun = function(x, w) weighted.mean(x, w))$output1 - } else { - analogs <- Apply(analogs, margins = margins_analogs, fun = fun)$output1 + dist <- knn.ind$nn.dist + idx <- knn.ind$nn.index + names(dim(idx)) <- c("time", "analog"); names(dim(dist)) <- c("time", "analog") + + analogs <- Apply(idx, margins = "analog", fun = function(an) { ob[ an, ] })$output1 + dim(analogs) <- c(dim(analogs)[1], space_dims_hres, analog = k) + + if (!is.null(fun_analog)) { + + margins_analogs <- names(dim(analogs))[names(dim(analogs)) != "analog"] + + if (fun_analog == "wmean") { + weight <- 1 / dist + analogs <- Apply(list(analogs, weight), margins = list(margins_analogs, "time"), + fun = function(x, w) weighted.mean(x, w))$output1 + } else { + analogs <- Apply(analogs, margins = margins_analogs, fun = fun_analog)$output1 + } } - } - - return(analogs) -} - -# For each element in test, find the indices of the k nearest neigbhors in train -get_knn <- function(train, test, k) { - # Reformat train and test as an array with (time, points) - train <- aperm(Apply(train, target_dims = list(c(lat_dim, lon_dim)), output_dims = 'space', fun = function(x) { as.vector(x) })$output1) - test <- aperm(Apply(test, target_dims = list(c(lat_dim, lon_dim)), output_dims = 'space', fun = function(x) { as.vector(x) })$output1) - - # For each row in test, find the k closest rows in train - nsdates <- dim(train)[names(dim(train)) == sdate_dim] - knn.ind <- lapply(1:nsdates, function(j) { - te <- test[ j, , , drop = FALSE] - tr <- train[ -j, , ] - tr <- Apply(tr, target_dims = list(c(sdate_dim, time_dim)), output_dims = 't', fun = function(x) { as.vector(x) })$output1 - te <- Apply(te, target_dims = list(c(sdate_dim, time_dim)), output_dims = 't', fun = function(x) { as.vector(x) })$output1 - get.knnx(tr, te, k) }) - return(knn.ind) + return(analogs) } -- GitLab From 5725b7ef47766fa34d88e809508a3f901378ef13 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Mon, 23 May 2022 17:44:52 +0200 Subject: [PATCH 07/24] Write documentation --- R/Analogs.R | 228 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 177 insertions(+), 51 deletions(-) diff --git a/R/Analogs.R b/R/Analogs.R index 78a72c3..9ae5775 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -1,10 +1,83 @@ - -Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", time_dim = "time", ncores = 1) { - - require(multiApply) - require(CSTools) - +#'@rdname Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with +#'similar conditions in the historical observations. The coarse scale and +#'observation data can be either global or regional. In the latter case, the +#'region is defined by the user. In principle, the coarse and observation data +#'should be of the same variable, although different variables can also be admitted. +#'The analogs function will find the N best analogs based in Minimum Euclidean +#'distance. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an 's2dv_cube' object containing the experimental field on the +#'large scale for which the analog is aimed. The element 'data' in the 's2dv_cube' +#'object must have, at least, the dimensions latitude, longitude, start date and time. +#'The object is expect to be already subset for the desired large scale region. +#'@param obs an 's2dv_cube' object containing the observational field with the +#'target high-resolution scale. The element 'data' in the 's2dv_cube' object must have, +#'at least, the dimensions latitude, longitude, start date and time. The object is +#'expect to be already subset for the desired large scale region. +#'@param nanalogs an integer indicating the number of analogs to be searched +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". This is expected to have daily frequency. +#'@param grid_exp a character vector indicating the coarse grid to be passed to CDO. It +#'must be a grid recognised by CDO or a NetCDF file. This information can also be passed +#'via the element 'source_files' in exp. Default to NULL. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@import multiApply +#'@import CSTools +#'@importFrom s2dv InsertDim CDORemap +#'@importFrom FNN get.knnx +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An 's2dv_cube' object containing the dowscaled values. If fun_analog is set to NULL, +#'the output array in 'data' also contains the dimension 'analog' with the best analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'lon_exp <- 1:5 +#'lat_exp <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'lon_obs <- seq(0,6, 6/14) +#'lat_obs <- seq(0,6, 6/11) +#'exp <- s2dv_cube(data = exp, lat = lat_exp, lon = lon_exp) +#'obs <- s2dv_cube(data = obs, lat = lat_obs, lon = lon_obs) +#'attr(exp$lon,"first_lon") <- 1 +#'attr(exp$lon,"last_lon") <- 5 +#'attr(exp$lat,"first_lat") <- 1 +#'attr(exp$lat,"last_lat") <- 4 +#'attr(obs$lon,"first_lon") <- 0 +#'attr(obs$lon,"last_lon") <- 6 +#'attr(obs$lat,"first_lat") <- 0 +#'attr(obs$lat,"last_lat") <- 6 +#'downscaled_field <- Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') +#'@export +Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", grid_exp = NULL, ncores = 1) { #----------------------------------- # Checkings #----------------------------------- @@ -26,25 +99,48 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat" stop("Parameter 'fun_analog' must be of the class 'character'") } + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(time_dim, 'character')) { + stop("Parameter 'time_dim' must be of the class 'character'") + } + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names if (is.na(match(lon_dim, names(dim(exp$data)))) | is.na(match(lon_dim, names(dim(obs$data))))) { - stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter - 'lon_dim'") + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") } if (is.na(match(lat_dim, names(dim(exp$data)))) | is.na(match(lat_dim, names(dim(obs$data))))) { - stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter - 'lat_dim'") + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") } if (is.na(match(sdate_dim, names(dim(exp$data)))) | is.na(match(sdate_dim, names(dim(obs$data))))) { - stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter - 'sdate_dim'") + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") } if (is.na(match(time_dim, names(dim(exp$data)))) | is.na(match(time_dim, names(dim(obs$data))))) { - stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter - 'time_dim'") + stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'time_dim'") + } + + if (is.null(exp$source_files) & is.null(grid_exp)) { + stop("I do not know which the coarse grid is. This information can be passed within ", + "'exp' with the element '$source_files' or via the parameter 'grid_exp'. ", + "Parameter 'grid_exp' must be a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") } # Ensure we have enough data to interpolate from high-res to coarse grid @@ -59,11 +155,24 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat" # Select a function to apply to the analogs selected for a given observation stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) + # Create window if user does not have it in obs$data + if ( !("window" %in% names(dim(obs$data))) ) { + nsdates <- dim(obs$data)[names(dim(obs$data)) == sdate_dim] + ntimes <- dim(obs$data)[names(dim(obs$data)) == time_dim] + window <- Apply(list(obs$data), target_dims = list(c(time_dim, sdate_dim)), + fun = as.vector, output_dims = 'window')$output1 + obs$data <- InsertDim(obs$data, posdim = 1, lendim = nsdates * ntimes, name = "window") + obs$data <- Apply(list(obs$data, window), target_dims = 'window', + fun = function(x,y) x <- y)$output1 + } + #----------------------------------- # Interpolate high-res observations to the coarse grid #----------------------------------- - grid_exp <- exp$source_files[1] - + if (is.null(grid_exp)) { + grid_exp <- exp$source_files[1] + } + lonmin <- attr(exp[[lon_dim]],"first_lon") lonmax <- attr(exp[[lon_dim]],"last_lon") latmin <- attr(exp[[lat_dim]],"first_lat") @@ -73,20 +182,31 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat" lats <- obs[[lat_dim]] lons <- obs[[lon_dim]] + # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to + # the same grid to force the matching + if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp[[lat_dim]], lon1 = obs_interpolated$lon, + lon2 = exp[[lon_dim]])) { + exp_interpolated <- Interpolation(exp = exp, target_grid = grid_exp, method_remap = "conservative", + remap_region = c(lonmin, lonmax, latmin, latmax)) + } else { + exp_interpolated <- exp + } + #----------------------------------- # Reshape train and test #----------------------------------- - res <- Apply(list(obs_interpolated$data, exp$data, obs$data), - target_dims = c(sdate_dim, time_dim, lat_dim, lon_dim), + res <- Apply(list(obs_interpolated$data, exp_interpolated$data, obs$data), + target_dims = list(c("window", sdate_dim, time_dim, lat_dim, lon_dim), + c(sdate_dim, time_dim, lat_dim, lon_dim), c("window", sdate_dim, time_dim, lat_dim, lon_dim)), fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, fun_analog = fun_analog), ncores = ncores)$output1 #test <- exp$data[1,1,,1,,,] - #train <- obs_interpolated$data[1,1,,,,] - #obs_hres <- obs$data[1,1,,,,] + #train <- obs_interpolated$data[,1,1,,,,] + #obs_hres <- obs$data[,1,1,,,,] #test <- aperm(test,c(2,1,3,4)) - #train <- aperm(train,c(3,1,2,4)) - #obs_hres <- aperm(obs_hres,c(3,1,2,4)) + #train <- aperm(train,c(1,4,2,3,5)) + #obs_hres <- aperm(obs_hres,c(1,4,2,3,5)) res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = lons, lat = lats)) @@ -97,48 +217,54 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat" # For each element in test, find the indices of the k nearest neigbhors in train .analogs <- function(train, test, obs_hres, k, fun_analog) { # train, test, and obs_hres dim: 4 dimensions sdate, time, lat and lon (in this order) - require(FNN) - require(multiApply) # Number of lats/lons of the high-resolution data - space_dims_hres <- dim(obs_hres)[c(3,4)] - nsdates <- dim(train)[1] + space_dims_hres <- dim(obs_hres)[c(4,5)] + nsdates <- dim(train)[2] + ntimes <- dim(train)[3] # Reformat train and test as an array with (time, points) - train <- apply(train, c(1,2), as.vector); names(dim(train))[1] <- "space" + train <- apply(train, c(1,2,3), as.vector); names(dim(train))[1] <- "space" test <- apply(test, c(1,2), as.vector); names(dim(test))[1] <- "space" - obs_hres <- apply(obs_hres, c(1,2), as.vector); names(dim(obs_hres))[1] <- "space" + obs_hres <- apply(obs_hres, c(1,2,3), as.vector); names(dim(obs_hres))[1] <- "space" # Here we do cross-validation: the start date considered is removed from the training - analogs <- lapply(1:nsdates, function(j) { - tr <- apply(train[ , -j, ], 1, as.vector); names(dim(tr))[1] <- "time" - te <- aperm(test[ , j, ]) - ob <- apply(obs_hres[ , -j, ], 1, as.vector); names(dim(ob))[1] <- "time" + analogs_ls <- lapply(1:nsdates, function(s) { + lapply(1:ntimes, function(t) { + tr <- apply(train[ , , -s, t], 1, as.vector); names(dim(tr))[1] <- "time" + te <- test[ , s, t] + te <- InsertDim(data = te, posdim = 1, lendim = 1, name = 'time'); names(dim(te))[2] <- "space" + ob <- apply(obs_hres[ , , -s, t], 1, as.vector); names(dim(ob))[1] <- "time" - knn.ind <- get.knnx(tr, te, k) + knn.ind <- get.knnx(tr, te, k) - dist <- knn.ind$nn.dist - idx <- knn.ind$nn.index - names(dim(idx)) <- c("time", "analog"); names(dim(dist)) <- c("time", "analog") - - analogs <- Apply(idx, margins = "analog", fun = function(an) { ob[ an, ] })$output1 - dim(analogs) <- c(dim(analogs)[1], space_dims_hres, analog = k) + dist <- knn.ind$nn.dist + idx <- knn.ind$nn.index + names(dim(idx)) <- c("time", "analog"); names(dim(dist)) <- c("time", "analog") - if (!is.null(fun_analog)) { + analogs <- Apply(idx, margins = "analog", fun = function(an) { ob[ an, ] })$output1 + + dim(analogs) <- c(space_dims_hres, analogs = k) - margins_analogs <- names(dim(analogs))[names(dim(analogs)) != "analog"] - - if (fun_analog == "wmean") { - weight <- 1 / dist - analogs <- Apply(list(analogs, weight), margins = list(margins_analogs, "time"), - fun = function(x, w) weighted.mean(x, w))$output1 - } else { - analogs <- Apply(analogs, margins = margins_analogs, fun = fun_analog)$output1 + if (!is.null(fun_analog)) { + + if (fun_analog == "wmean") { + weight <- 1 / dist + analogs <- apply(analogs, c(1,2), function(x) weighted.mean(x, weight)) + } else { + analogs <- apply(analogs, c(1,2), fun_analog) + } } - } + }) }) + + if (is.null(fun_analog)) { + analogs_arr <- array(unlist(analogs_ls), dim = c(space_dims_hres, analogs = k, ntimes, nsdates)) + } else { + analogs_arr <- array(unlist(analogs_ls), dim = c(space_dims_hres, ntimes, nsdates)) + } - return(analogs) + return(analogs_arr) } -- GitLab From d89eb3ed647078a49a80d0685fcec30750bd2feb Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Tue, 24 May 2022 17:19:30 +0200 Subject: [PATCH 08/24] Wrote documentation --- R/Interpolation.R | 140 ++++++++++++++++++++++++++++++---------------- 1 file changed, 92 insertions(+), 48 deletions(-) diff --git a/R/Interpolation.R b/R/Interpolation.R index 39c6673..a23e60f 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -1,11 +1,55 @@ -#method_remap implemented: con, bil, bic, nn, con2 -#method_point_interp: nearest, bilinear, 9point, invdist4nn, NE, NW, SE, SW -#target_grid has to be either a grid recognised by CDO or a NETCDF file -# requires CDO_1.9.8 or newer versions when using nn method -# Latitudes must range from -90 to 90 (or a subset of these values) and longitudes -# must range from -180 to 180 -Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, - remap_region = NULL, method_point_interp = NULL) +#'@rdname Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp an 's2dv_cube' object containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The element 'data' in the 's2dv_cube' +#'object must have, at least, the dimensions latitude and longitude. The object is +#'expected to be already subset for the desired large scale region. Latitudes must range +#'from -90 to 90 (or a subset) and longitudes must range from -180 to 180. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#''lat' and 'lon'. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp. Default set to "lon". +#'@param remap_region a numeric vector indicating the limits of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. If set to +#'NULL (default), the function takes the minimum and maximum values of the latitudes and +#'longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest" +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@import multiApply +#'@importFrom s2dv CDORemap +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An 's2dv_cube' object containing the dowscaled values. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lon_exp <- 1:5 +#'lat_exp <- 1:4 +#'exp <- s2dv_cube(data = exp, lat = lat_exp, lon = lon_exp) +#'downscaling_remap <- Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, lat_dim = "lat", + lon_dim = "lon", remap_region = NULL, method_point_interp = NULL) { require(s2dv) @@ -26,28 +70,25 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = } } - # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names - if (is.na(match("lon", names(dim(exp$data))))) { - stop("Longitude dimension in 'exp' must be named 'lon'") + if (is.na(match(lon_dim, names(dim(exp$data))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") } - if (is.na(match("lat", names(dim(exp$data))))) { - stop("Latitude dimension in 'exp' must be named 'lat'") - } - - if (is.na(match("sdate", names(dim(exp$data))))) { - stop("Time dimension in 'exp' must be named 'sdate'") + if (is.na(match(lat_dim, names(dim(exp$data))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") } # Check for negative longitudes in the exp data - if (any(exp$lon < -180 | exp$lon > 180) ) { - stop("Out-of-range longitudes have been found in 'exp$lon'. Longitudes must range from ", + if (any(exp[[lon_dim]] < -180 | exp[[lon_dim]] > 180) ) { + stop("Out-of-range longitudes have been found in 'exp'. Longitudes must range from ", "-180 to 180") } # Check for negative latitudes in the exp data - if (any(exp$lat < -90 | exp$lat > 90) ) { - stop("Out-of-range latitudes have been found in 'exp$lat'. Latitudes must range from ", + if (any(exp[[lat_dim]] < -90 | exp[[lat_dim]] > 90) ) { + stop("Out-of-range latitudes have been found in 'exp'. Latitudes must range from ", "-90 to 90") } @@ -77,8 +118,9 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = } # The names of the two elements must be 'lat' and 'lon' - if (any(!(c('lat', 'lon') %in% names(points)))) { - stop("The names of the elements in the list 'points' must be 'lat' and 'lon'") + if (any(!(c(lat_dim, lon_dim) %in% names(points)))) { + stop("The names of the elements in the list 'points' must coincide with the parametres ", + "'lat_dim' and 'lon_dim'") } # Check that the number of latitudes and longitudes match @@ -87,16 +129,21 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = } # Check for negative longitudes in the point coordinates - if (any(points$lon < -180 | points$lon > 180) ) { + if (any(points[[lon_dim]] < -180 | points[[lon_dim]] > 180) ) { stop("Out-of-range longitudes have been found in 'points'. Longitudes must range from ", "-180 to 180") } # Check for negative latitudes in the point coordinates - if (any(points$lat < -90 | points$lat > 90) ) { + if (any(points[[lat_dim]] < -90 | points[[lat_dim]] > 90) ) { stop("Out-of-range latitudes have been found in 'points'. Latitudes must range from ", "-90 to 90") } + + if (is.null(exp$source_files)) { + stop("No source files found. This information must be passed within ", + "'exp' with the element '$source_files'. At least one source file is needed") + } } else { if (is.null(method_remap)) { stop("Parameter 'method_remap' must be a character vector indicating the ", @@ -113,25 +160,25 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = #---------------------------------- # Limits of the region defined by the model data #---------------------------------- - if (is.null(attr(exp$lon,"first_lon"))) { - lonmin <- exp$lon[1] + if (is.null(attr(exp[[lon_dim]],"first_lon"))) { + lonmin <- exp[[lon_dim]][1] } else { - lonmin <- attr(exp$lon,"first_lon") + lonmin <- attr(exp[[lon_dim]],"first_lon") } - if (is.null(attr(exp$lon,"last_lon"))) { - lonmax <- exp$lon[length(exp$lon)] + if (is.null(attr(exp[[lon_dim]],"last_lon"))) { + lonmax <- exp[[lon_dim]][length(exp[[lon_dim]])] } else { - lonmax <- attr(exp$lon,"last_lon") + lonmax <- attr(exp[[lon_dim]],"last_lon") } - if (is.null(attr(exp$lat,"first_lat"))) { - latmin <- exp$lat[1] + if (is.null(attr(exp[[lat_dim]],"first_lat"))) { + latmin <- exp[[lat_dim]][1] } else { - latmin <- attr(exp$lat,"first_lat") + latmin <- attr(exp[[lat_dim]],"first_lat") } - if (is.null(attr(exp$lat,"last_lat"))) { - latmax <- exp$lat[length(exp$lat)] + if (is.null(attr(exp[[lat_dim]],"last_lat"))) { + latmax <- exp[[lat_dim]][length(exp[[lat_dim]])] } else { - latmax <- attr(exp$lat,"last_lat") + latmax <- attr(exp[[lat_dim]],"last_lat") } # for the case when region limits are not passed by the user @@ -141,8 +188,8 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = # Ensure points to be within the region limits if (!is.null(points)) { - if (any(points$lat > latmax) | any(points$lat < latmin) | - any(points$lon > lonmax) | any(points$lon < lonmin)) { + if (any(points[[lat_dim]] > latmax) | any(points[[lat_dim]] < latmin) | + any(points[[lon_dim]] > lonmax) | any(points[[lon_dim]] < lonmin)) { stop("At least one of the points lies outside the model region") } } @@ -152,8 +199,8 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = #---------------------------------- if (is.null(points)) { res <- s2dv::CDORemap(data_array = exp$data, - lats = exp$lat, - lons = exp$lon, + lats = exp[[lat_dim]], + lons = exp[[lon_dim]], grid = target_grid, method = method_remap, crop = remap_region) @@ -168,9 +215,9 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = # First create interpolation weights, depending on the chosen method ncfile_exp <- exp$source_files[1] weights <- create_interp_weights(ncfile = ncfile_exp, locids = 1:unique(lengths(points)), - lats = points$lat, lons = points$lon, method = method_point_interp, - region = list(lat_min = latmin, lat_max = latmax, lon_min = lonmin, - lon_max = lonmax)) + lats = points[[lat_dim]], lons = points[[lon_dim]], + method = method_point_interp, region = list(lat_min = latmin, + lat_max = latmax, lon_min = lonmin, lon_max = lonmax)) # Select coarse-scale data to be interpolated model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp$data) @@ -179,7 +226,7 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = res <- interpolate_data(model_data_gridpoints, weights) # Create an s2dv_cube object - res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = points$lon, lat = points$lat)) + res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = points[[lon_dim]], lat = points[[lat_dim]])) } @@ -549,9 +596,6 @@ get_lats <- function(ncfile) { #====================== get_model_data <- function(weights.df, mdata) { - require(plyr) - require(multiApply) - #----------------- # Get data for all combinations of i and j. # (inefficient, getting many unneded pairs). -- GitLab From c57d151366177ee2497487e9a9bb31f4f36cae1f Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Fri, 3 Jun 2022 17:57:29 +0200 Subject: [PATCH 09/24] Started adding CST_* to functions --- R/Intbc.R | 178 ++++++++++++++++++++++++++++++++++++++-------- R/Interpolation.R | 126 ++++++++++++++++---------------- R/Intlr.R | 31 ++++---- R/Utils.R | 5 +- 4 files changed, 229 insertions(+), 111 deletions(-) diff --git a/R/Intbc.R b/R/Intbc.R index f6472a4..f5ec8de 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -1,8 +1,7 @@ -Intbc <- function(exp, obs, target_grid, int_method, bc_method, ncores = 1) { - - require(CSTools) - - # Input data must be an s2dv_cube object +CST_Intbc <- function(exp, obs, target_grid, int_method, bc_method, points = NULL, method_point_interp = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", + remap_region = NULL, cal_method = "mse_min", ncores = 1) +{ if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") } @@ -11,6 +10,92 @@ Intbc <- function(exp, obs, target_grid, int_method, bc_method, ncores = 1) { stop("Parameter 'obs' must be of the class 's2dv_cube'") } + res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp[[lat_dim]], exp_lons = exp[[lon_dim]], + obs_lats = obs[[lat_dim]], obs_lons = obs[[lon_dim]], target_grid = target_grid, + int_method = int_method, bc_method = bc_method, points = points, + method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, member_dim = member_dim, remap_region = remap_region, + cal_method = cal_method, ncores = ncores) + + res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + + return(res_s2dv) +} + + +#'@rdname Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv_cube' object containing the experimental field on the coarse scale for +#'which the downscaling is aimed. The element 'data' in the 's2dv_cube' object must have, at +#'least, the dimensions latitude, longitude, start date and member. The object is expected to +#'be already subset for the desired large scale region. Latitudes must range from -90 to 90 +#'(or a subset) and longitudes must range from -180 to 180. +#'@param obs an 's2dv_cube' object containing the observational field. The element 'data' in +#'the 's2dv_cube' object must have, at least, the dimensions latitude, longitude and start date. +#'The object is expect to be already subset for the desired large scale region. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The +#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'@param points a list of two elements containing the point latitudes and longitudes of the +#'locations to downscale the model data. The list must contain the two elements 'lat' and 'lon'. +#'If the downscaling is to a point location, only regular grids are allowed for exp and obs. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import CSTools +#' +#'@seealso \code{\link[CSTools]{BiasCorrection}} +#'@seealso \code{\link[CSTools]{Calibration}} +#'@seealso \code{\link[CSTools]{QuantileMapping}} +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'lon_exp <- 1:5 +#'lat_exp <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'lon_obs <- seq(1,5, 4/14) +#'lat_obs <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = lat_exp, lon = lon_exp) +#'obs <- s2dv_cube(data = obs, lat = lat_obs, lon = lon_obs) +#'attr(exp$lon,"first_lon") <- 1 +#'attr(exp$lon,"last_lon") <- 5 +#'attr(exp$lat,"first_lat") <- 1 +#'attr(exp$lat,"last_lat") <- 4 +#'attr(obs$lon,"first_lon") <- 1 +#'attr(obs$lon,"last_lon") <- 5 +#'attr(obs$lat,"first_lat") <- 1 +#'attr(obs$lat,"last_lat") <- 4 +#'downscaled_field <- Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export +Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, int_method, bc_method, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + member_dim = "member", remap_region = NULL, cal_method = "mse_min", ncores = 1) { + if (!inherits(int_method, 'character')) { stop("Parameter 'int_method' must be of the class 'character'") } @@ -18,39 +103,70 @@ Intbc <- function(exp, obs, target_grid, int_method, bc_method, ncores = 1) { if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") } - - stopifnot(bc_method %in% c('sbc', 'cal', 'qm', 'simple_bias', 'calibration', 'quantile_mapping')) - stopifnot(c('member','sdate') %in% names(dim(exp$data))) - #if ((attr(obs$lat,"first_lat") < attr(exp$lat,"first_lat")) | - # (attr(obs$lat,"last_lat") > attr(exp$lat,"last_lat")) | - # (attr(obs$lon,"first_lon") < attr(exp$lon,"first_lon")) | - # (attr(obs$lon,"last_lon") > attr(exp$lon,"last_lon"))) { - # stop("There are not enough data in 'exp'. Please to add more latitudes and ", - # "longitudes.") - #} + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } - lonmin <- attr(obs$lon,"first_lon") - lonmax <- attr(obs$lon,"last_lon") - latmin <- attr(obs$lat,"first_lat") - latmax <- attr(obs$lat,"last_lat") + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } - exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method_remap = int_method, - remap_region = c(lonmin, lonmax, latmin, latmax)) + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp', or does not match the parameter 'member_dim'") + } + + if (!(bc_method %in% c('sbc', 'cal', 'qm', 'simple_bias', 'calibration', 'quantile_mapping'))) { + stop("Parameter 'bc_method' must be a character vector indicating the bias adjustment method. ", + "Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The abbreviations ", + "'sbc', 'cal', 'qm' can also be used.") + } + + if (is.null(remap_region)) { + remap_region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, + method_point_interp = method_point_interp, remap_region = remap_region) # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching - if (!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs$lat, lon1 = exp_interpolated$lon, - lon2 = obs$lon)) { - obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method_remap = int_method, - remap_region = c(lonmin, lonmax, latmin, latmax)) + if (!.check_coords(lat1 = exp_lats, lat2 = obs_lats, lon1 = exp_lons, lon2 = obs_lons) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, + method_point_interp = method_point_interp, remap_region = remap_region) } else { obs_interpolated <- obs } #.check_coords if (bc_method == 'sbc' | bc_method == 'simple_bias') { - if (dim(obs_interpolated$data)['sdate'] == 1) { + if (dim(obs_interpolated$data)[sdate_dim] == 1) { warning('Simple Bias Correction should not be used with only one observation. Returning NA. ') } @@ -58,12 +174,12 @@ Intbc <- function(exp, obs, target_grid, int_method, bc_method, ncores = 1) { obs = obs_interpolated$data) } else if (bc_method == 'cal' | bc_method == 'calibration') { - if (dim(exp_interpolated$data)['member'] == 1) { + if (dim(exp_interpolated$data)[member_dim] == 1) { stop('Calibration must not be used with only one ensemble member.') } res <- Calibration(exp = exp_interpolated$data, obs = obs_interpolated$data, - cal.method = 'mse_min', ncores = ncores) + cal.method = cal_method, ncores = ncores) } else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { if (any(is.na(exp_interpolated$data))) { @@ -71,13 +187,13 @@ Intbc <- function(exp, obs, target_grid, int_method, bc_method, ncores = 1) { 'by the interpolation method. Quantile Mapping method is likely to fail.') } res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_interpolated$data, - sample_dims = 'sdate', method = 'QUANT', ncores = ncores) + sample_dims = sdate_dim, method = 'QUANT', ncores = ncores) } # Create an s2dv_cube object - res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat)) + res <- list(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat) - return(res_s2dv) + return(res) } diff --git a/R/Interpolation.R b/R/Interpolation.R index a23e60f..e9cb1ca 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -1,3 +1,21 @@ +CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, + lat_dim = "lat", lon_dim = "lon", remap_region = NULL, + method_point_interp = NULL) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + res <- Interpolation(exp = exp$data, lats = exp[[lat_dim]], lons = exp[[lon_dim]], + source_file = exp$source_files[1], points = points, + method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, + lon_dim = lon_dim, remap_region = remap_region, method_point_interp = method_point_interp) + + res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + + return(res_s2dv) +} + #'@rdname Interpolation #'@title Regrid or interpolate gridded data to a point location. #' @@ -16,7 +34,8 @@ #'from -90 to 90 (or a subset) and longitudes must range from -180 to 180. #'@param points a list of two elements containing the point latitudes and longitudes #'of the locations to downscale the model data. The list must contain the two elements -#''lat' and 'lon'. +#''lat' and 'lon'. If the downscaling is to a point location, only regular grids are +#'allowed for exp and obs. #'@param method_remap a character vector indicating the regridding method to be passed #'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is #'to be used, CDO_1.9.8 or newer version is required. @@ -31,9 +50,10 @@ #'NULL (default), the function takes the minimum and maximum values of the latitudes and #'longitudes. #'@param method_point_interp a character vector indicating the interpolation method to -#'interpolate model gridded data into the point locations. Accepted methods are "nearest" +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", #'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". -#'@import multiApply +#'@import multiApply +#'@import plyr #'@importFrom s2dv CDORemap #' #'@seealso \code{\link[s2dverification]{CDORemap}} @@ -48,16 +68,10 @@ #'exp <- s2dv_cube(data = exp, lat = lat_exp, lon = lon_exp) #'downscaling_remap <- Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') #'@export -Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, lat_dim = "lat", - lon_dim = "lon", remap_region = NULL, method_point_interp = NULL) +Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, method_remap = NULL, + target_grid = NULL, lat_dim = "lat", lon_dim = "lon", remap_region = NULL, + method_point_interp = NULL) { - require(s2dv) - - # Input data must be an s2dv_cube object - if (!inherits(exp,'s2dv_cube')) { - stop("Parameter 'exp' must be of the class 's2dv_cube'") - } - if (!is.null(method_remap)) { if (!inherits(method_remap, 'character')) { stop("Parameter 'method_remap' must be of the class 'character'") @@ -70,26 +84,22 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = } } - if (is.na(match(lon_dim, names(dim(exp$data))))) { - stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", - "'lon_dim'") + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter 'lon_dim'") } - if (is.na(match(lat_dim, names(dim(exp$data))))) { - stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", - "'lat_dim'") + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter 'lat_dim'") } - # Check for negative longitudes in the exp data - if (any(exp[[lon_dim]] < -180 | exp[[lon_dim]] > 180) ) { - stop("Out-of-range longitudes have been found in 'exp'. Longitudes must range from ", - "-180 to 180") + # Check for negative longitudes + if (any(lons < -180 | lons > 180)) { + stop("Out-of-range longitudes have been found. Longitudes must range from -180 to 180") } # Check for negative latitudes in the exp data - if (any(exp[[lat_dim]] < -90 | exp[[lat_dim]] > 90) ) { - stop("Out-of-range latitudes have been found in 'exp'. Latitudes must range from ", - "-90 to 90") + if (any(lats < -90 | lats > 90) ) { + stop("Out-of-range latitudes have been found. Latitudes must range from -90 to 90") } # checkings for the case of interpolation to point locations @@ -140,9 +150,8 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = "-90 to 90") } - if (is.null(exp$source_files)) { - stop("No source files found. This information must be passed within ", - "'exp' with the element '$source_files'. At least one source file is needed") + if (is.null(source_file)) { + stop("No source file found.") } } else { if (is.null(method_remap)) { @@ -160,36 +169,16 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = #---------------------------------- # Limits of the region defined by the model data #---------------------------------- - if (is.null(attr(exp[[lon_dim]],"first_lon"))) { - lonmin <- exp[[lon_dim]][1] - } else { - lonmin <- attr(exp[[lon_dim]],"first_lon") - } - if (is.null(attr(exp[[lon_dim]],"last_lon"))) { - lonmax <- exp[[lon_dim]][length(exp[[lon_dim]])] - } else { - lonmax <- attr(exp[[lon_dim]],"last_lon") - } - if (is.null(attr(exp[[lat_dim]],"first_lat"))) { - latmin <- exp[[lat_dim]][1] - } else { - latmin <- attr(exp[[lat_dim]],"first_lat") - } - if (is.null(attr(exp[[lat_dim]],"last_lat"))) { - latmax <- exp[[lat_dim]][length(exp[[lat_dim]])] - } else { - latmax <- attr(exp[[lat_dim]],"last_lat") - } - # for the case when region limits are not passed by the user + # remap_regions contains the following elements in order: lonmin, lonmax, latmin, latmax if (is.null(remap_region)) { - remap_region <- c(lonmin, lonmax, latmin, latmax) + remap_region <- c(lons[1], lons[length(lons)], lats[1], lats[length(lats)]) } # Ensure points to be within the region limits if (!is.null(points)) { - if (any(points[[lat_dim]] > latmax) | any(points[[lat_dim]] < latmin) | - any(points[[lon_dim]] > lonmax) | any(points[[lon_dim]] < lonmin)) { + if (any(points[[lat_dim]] > remap_region[4]) | any(points[[lat_dim]] < remap_region[3]) | + any(points[[lon_dim]] > remap_region[2]) | any(points[[lon_dim]] < remap_region[1])) { stop("At least one of the points lies outside the model region") } } @@ -198,39 +187,44 @@ Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = # Map regrid with CDO #---------------------------------- if (is.null(points)) { - res <- s2dv::CDORemap(data_array = exp$data, - lats = exp[[lat_dim]], - lons = exp[[lon_dim]], + res <- s2dv::CDORemap(data_array = exp, + lats = lats, + lons = lons, grid = target_grid, method = method_remap, crop = remap_region) + # Return a list + res <- list(data = res$data_array, lon = res$lons, lat = res$lats) + # Create an s2dv_cube object - res_s2dv <- suppressWarnings(s2dv_cube(data = res$data_array, lon = res$lons, lat = res$lats)) + #res_s2dv <- suppressWarnings(s2dv_cube(data = res$data_array, lon = res$lons, lat = res$lats)) #---------------------------------- # Interpolate to point locations #---------------------------------- } else { # First create interpolation weights, depending on the chosen method - ncfile_exp <- exp$source_files[1] - weights <- create_interp_weights(ncfile = ncfile_exp, locids = 1:unique(lengths(points)), + weights <- create_interp_weights(ncfile = source_file, locids = 1:unique(lengths(points)), lats = points[[lat_dim]], lons = points[[lon_dim]], - method = method_point_interp, region = list(lat_min = latmin, - lat_max = latmax, lon_min = lonmin, lon_max = lonmax)) + method = method_point_interp, region = list(lat_min = remap_region[3], + lat_max = remap_region[4], lon_min = remap_region[1], lon_max = remap_region[2])) # Select coarse-scale data to be interpolated - model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp$data) + model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp) # Interpolate model data to point locations res <- interpolate_data(model_data_gridpoints, weights) + # Return a list + res <- list(data = res, lon = points[[lon_dim]], lat = points[[lat_dim]]) + # Create an s2dv_cube object - res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = points[[lon_dim]], lat = points[[lat_dim]])) + #res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = points[[lon_dim]], lat = points[[lat_dim]])) } - return(res_s2dv) + return(res) } #====================== @@ -250,6 +244,12 @@ create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, #---------------- griddes <- get_griddes(paste0(ncfile,'_cropped')) + if (is.null(griddes$yinc)) { + system(paste0('rm ',ncfile,'_cropped')) + stop("'griddes$yinc' not found in NetCDF file. Remember that only regular grids are accepted when ", + "downscaling to point locations.") + } + # If latitudes are decreasingly ordered, revert them if (griddes$yinc < 0) { system(paste0('cdo invertlat ',ncfile,'_cropped ',ncfile,'_cropped2')) diff --git a/R/Intlr.R b/R/Intlr.R index e155b88..2b030dc 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -1,4 +1,5 @@ -Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NULL, +Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NULL, lat_dim = "lat", + lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { require(multiApply) @@ -49,18 +50,18 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL #----------------------------------- if (lr_method != '4nn') { - lonmin <- attr(obs$lon,"first_lon") - lonmax <- attr(obs$lon,"last_lon") - latmin <- attr(obs$lat,"first_lat") - latmax <- attr(obs$lat,"last_lat") + lonmin <- attr(obs[[lon_dim]],"first_lon") + lonmax <- attr(obs[[lon_dim]],"last_lon") + latmin <- attr(obs[[lat_dim]],"first_lat") + latmax <- attr(obs[[lat_dim]],"last_lat") exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method_remap = int_method, remap_region = c(lonmin, lonmax, latmin, latmax)) # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching - if (!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs$lat, - lon1 = exp_interpolated$lon, lon2 = obs$lon)) { + if (!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs[[lat_dim]], + lon1 = exp_interpolated$lon, lon2 = obs[[lon_dim]])) { obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method_remap = int_method, remap_region = c(lonmin, lonmax, latmin, latmax)) } else { @@ -107,14 +108,14 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL # Predictand: observations else if (lr_method == '4nn') { warning("Interpolating model and observation data but not needed.") - predictor <- find_nn(hres = obs, coar = exp, nn = 4) + predictor <- find_nn(hres = obs, coar = exp, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4) predictand <- obs$data target_dims_predictor <- c('sdate','nn') target_dims_predictand <- 'sdate' - lats <- obs$lat - lons <- obs$lon + lats <- obs[[lat_dim]] + lons <- obs[[lon_dim]] } else { @@ -199,14 +200,14 @@ pred_lm <- function(df, lm1, loocv) { # Function to find N nearest neighbours. # 'hres' and 'coar' are s2dv_objects #----------------------------------- -find_nn <- function(hres, coar, nn = 4) { +find_nn <- function(hres, coar, lat_dim, lon_dim, nn = 4) { require(abind) - lats_hres <- hres$lat - lons_hres <- hres$lon - lats_coar <- coar$lat - lons_coar <- coar$lon + lats_hres <- hres[[lat_dim]] + lons_hres <- hres[[lon_dim]] + lats_coar <- coar[[lat_dim]] + lons_coar <- coar[[lon_dim]] # Sort the distances from closest to furthest idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) diff --git a/R/Utils.R b/R/Utils.R index addddc9..4c72746 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -7,8 +7,9 @@ } # reorder dims to a reference array. If they do not exist, they are created -arr_ref <- array(NA, c(dataset = 1, sdate = 8, member = 3, ftime = 1, lon = 269, lat = 181)) -arr_to_reorder <- array(NA, c(dataset = 1, member = 3, sdate = 8, lat = 181, lon = 269, pp = 1)) +# example +#arr_ref <- array(NA, c(dataset = 1, sdate = 8, member = 3, ftime = 1, lon = 269, lat = 181)) +#arr_to_reorder <- array(NA, c(dataset = 1, member = 3, sdate = 8, lat = 181, lon = 269, pp = 1)) .reorder_dims <- function(arr_ref, arr_to_reorder) { -- GitLab From 8cf06fd404c5d9a13aa31df8ea79c645c339ce69 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Tue, 7 Jun 2022 17:43:02 +0200 Subject: [PATCH 10/24] examples using CST_* --- examples/analogs.R | 118 +++++++++++++++++++++++++++ examples/interpolation-bc.R | 154 +++++++++++++++++++++--------------- examples/interpolation-lr.R | 142 ++++++++++++++++++++------------- examples/interpolation.R | 131 +++++++++++++++++------------- 4 files changed, 368 insertions(+), 177 deletions(-) create mode 100644 examples/analogs.R diff --git a/examples/analogs.R b/examples/analogs.R new file mode 100644 index 0000000..7f27510 --- /dev/null +++ b/examples/analogs.R @@ -0,0 +1,118 @@ + +library(CSTools) +library(startR) +library(s2dv) +library(lubridate) +library(multiApply) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') + +plotpath <- '/esarchive/scratch/jramon/downscaling/plots/ip' +#target_grid <- '/esarchive/recon/ecmwf/era5/daily/tasmax-r1440x721cds/tasmax_201702.nc' + +#lonmin <- -22 +#lonmax <- 45 +#latmin <- 27 +#latmax <- 72 +lonmin <- -11.5 +lonmax <- 5.35 +latmin <- 35.1 +latmax <- 44.1 +extra <- 1 + +#sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') +#sdates <- format(seq(ymd("20000101"), ymd("20071201"), '1 month'), "%Y%m%d") +sdates_exp <- format(ymd("20000501") + rep(years(0:2), each=1),"%Y%m%d") +#sdates_obs <- format(ymd("20000401") + months(0:2) + rep(years(0:4), each=3),"%Y%m") +sdates_obs <- format(ymd("20000501") + rep(years(0:2), each=1),"%Y%m") + +#--------------------------- +# Observations +#--------------------------- +# dim(obs) <- c('sdate', 'smonth', ...) +obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc', + var = 'tas', time = indices(1:28), lat = values(list(latmin - extra, latmax + extra)), + sdate = sdates_obs, lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin - extra, lonmax + extra)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) +obs <- s2dv_cube(obs, lat = attr(obs, "Variables")$dat1$lat, lon = attr(obs, "Variables")$dat1$lon, + source_files = attr(obs, "Files")[1,1,]) +attr(obs$lon,"first_lon") <- obs$lon[1] +attr(obs$lon,"last_lon") <- obs$lon[length(obs$lon)] +attr(obs$lat,"first_lat") <- obs$lat[1] +attr(obs$lat,"last_lat") <- obs$lat[length(obs$lat)] + +# Create window to look for analogues +dim(obs$data) <- c(data = 1, var = 1, time = 28, lat = 39, smonth = 3, sdate = 7, lon = 67) # correct +obs_new <- Apply(obs$data, target_dims = list(c("time", "smonth")), fun = as.vector, output_dims = "time")$output1 +wlen <- 7 + +result <- array(NA, dim = c(window = 2*wlen + 1, time = 28, lat = 39, sdate = 7, lon = 67)) +for (i in 1:28) { + result[,i,,,] <- obs_new[(28 + i - wlen):(28 + i + wlen),1,1,,,] +} + +obs$data <- result + +# > dim(obs$data) +# data var time lat smonth sdate lon +# 1 1 28 39 3 7 67 +# > dim(result) +# window time lat sdate lon +# 15 28 39 7 67 + +#--------------------------- +# Model +#--------------------------- +exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1:28), member = indices(1:3), sdate = sdates_exp, + lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) +exp <- s2dv_cube(exp, lat = attr(exp, "Variables")$dat1$lat, lon = attr(exp, "Variables")$dat1$lon, + source_files = attr(exp, "Files")[1,1,]) +attr(exp$lon,"first_lon") <- exp$lon[1] +attr(exp$lon,"last_lon") <- exp$lon[length(exp$lon)] +attr(exp$lat,"first_lat") <- exp$lat[1] +attr(exp$lat,"last_lat") <- exp$lat[length(exp$lat)] + + +# test examples +ana_mean <- Analogs(exp = exp, obs = obs, fun_analog = "mean", ncores = 4) +ana_wmean <- Analogs(exp = exp, obs = obs, fun_analog = "wmean", ncores = 4) +ana_max <- Analogs(exp = exp, obs = obs, fun_analog = "max", ncores = 4) +ana_min <- Analogs(exp = exp, obs = obs, fun_analog = "min", ncores = 4) +ana_median <- Analogs(exp = exp, obs = obs, fun_analog = "median", ncores = 4) + +# plot examples +s2dv::PlotEquiMap(ana_mean$data[,,1,1,1,1,1], lat = ana_mean$lat, lon = ana_mean$lon, filled.continents = F, + toptitle = 'Analogs mean', brks = seq(275,288,1), height = 8, width = 10, + fileout = file.path(plotpath, "ip_ana_mean_2.png")) +s2dv::PlotEquiMap(ana_wmean$data[1,,,1,1,1,1], lat = ana_wmean$lat, lon = ana_wmean$lon, filled.continents = F, + toptitle = 'Analogs weighted mean', brks = seq(275,288,1), height = 8, width = 10, + fileout = file.path(plotpath, "ip_ana_wmean.png")) +s2dv::PlotEquiMap(ana_max$data[1,,,1,1,1,1], lat = ana_max$lat, lon = ana_max$lon, filled.continents = F, + toptitle = 'Analogs max', brks = seq(275,288,1), height = 8, width = 10, + fileout = file.path(plotpath, "ip_ana_max.png")) +s2dv::PlotEquiMap(ana_median$data[1,,,1,1,1,1], lat = ana_max$lat, lon = ana_max$lon, filled.continents = F, + toptitle = 'Analogs median', brks = seq(275,288,1), height = 8, width = 10, + fileout = file.path(plotpath, "ip_ana_median.png")) +s2dv::PlotEquiMap(ana_min$data[1,,,1,1,1,1], lat = ana_max$lat, lon = ana_max$lon, filled.continents = F, + toptitle = 'Analogs min', brks = seq(275,288,1), height = 8, width = 10, + fileout = file.path(plotpath, "ip_ana_min.png")) + + + + + + + + + + diff --git a/examples/interpolation-bc.R b/examples/interpolation-bc.R index 5b3eac5..fe46cfc 100644 --- a/examples/interpolation-bc.R +++ b/examples/interpolation-bc.R @@ -2,71 +2,97 @@ library(startR) library(s2dv) library(CSTools) -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/R/functions.R') - -plotpath <- '/esarchive/scratch/jramon/downscaling/plots' -target_grid <- '/esarchive/recon/ecmwf/era5/daily/tasmax-r1440x721cds/tasmax_201702.nc' - -lonmin <- -22 -lonmax <- 45 -latmin <- 27 -latmax <- 72 - -#obs2 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily/$var$-r1440x721cds/$var$_201702.nc', -# var = 'tasmax', time = indices(1:5), lat = values(list(latmin, latmax)), -# lat_reorder = Sort(decreasing = TRUE), lon = values(list(lonmin, lonmax)), -# lon_reorder = CircularSort(-180, 180), -# synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), -# lat = c('lat', 'latitude')), return_vars = list(lat = NULL, lon = NULL), -# num_procs = 1, retrieve = TRUE) - -#exp2 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily/$var$/$var$_20170201.nc', -# var = 'tasmax', time = indices(1:5), member = indices(1:3), -# lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = TRUE), -# lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), -# synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), -# lat = c('lat', 'latitude'), member = c('member','ensemble')), -# return_vars = list(lat = NULL, lon = NULL), -# num_procs = 1, retrieve = TRUE) -#names(dim(exp))[5] <- 'sdate' -#names(dim(obs))[5] <- 'sdate' - -#lat_obs <- as.numeric(attr(obs,'Variables')$dat$lat) -#lon_obs <- as.numeric(attr(obs,'Variables')$dat$lon) -#lat_exp <- as.numeric(attr(exp,'Variables')$dat$lat) -#lon_exp <- as.numeric(attr(exp,'Variables')$dat$lon) - -obs <- CST_Load(var = 'tasmax', - obs = 'era5', - sdates = c('20170201','20170301','20170401','20170501','20170601'), - leadtimemax = 1, - latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, - output = 'lonlat') - -exp <- CST_Load(var = 'tasmax', - exp = 'system5c3s', - nmember = 3, - sdates = c('20170201','20170301','20170401','20170501','20170601'), - leadtimemax = 1, - latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, - output = 'lonlat') - -int_methods <- c('con', 'bil', 'bic', 'nn', 'con2') -bc_methods <- c('sbc', 'cal', 'qm') - -for (i in seq(int_methods)) { - for (b in seq(bc_methods)) { - downscaled <- Intbc(exp = exp, obs = obs, target_grid = target_grid, - int_method = int_methods[i], bc_method = bc_methods[b], ncores = 4) - lats <- downscaled$lats - lons <- downscaled$lons - data <- downscaled$data_array - s2dv::PlotEquiMap(var = data[1,1,1,1,,], lat = lats, lon = lons, filled.continents = FALSE, - toptitle = paste0(int_methods[i], ' ', bc_methods[b]), - fileout = file.path(plotpath, paste0(int_methods[i],'-',bc_methods[b],'.png'))) - } -} +library(lubridate) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') +plotpath <- '/esarchive/scratch/jramon/downscaling/plots/ip' +target_grid <- '/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc' +#lonmin <- -22 +#lonmax <- 45 +#latmin <- 27 +#latmax <- 72 +lonmin <- -11.5 +lonmax <- 5.35 +latmin <- 35.1 +latmax <- 44.1 +sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') + +obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$sdate$.nc', + var = 'tas', time = indices(1), lat = values(list(latmin, latmax)), + lat_reorder = Sort(decreasing = FALSE), lon = values(list(lonmin, lonmax)), + lon_reorder = CircularSort(-180, 180), sdate = format(ymd(sdates), "%Y%m"), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + +exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1), member = indices(1:3), sdate = sdates, + lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + +#------------------------------------ +# Downscaling with Intbc +#------------------------------------ +down_1_sbc <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', + bc_method = 'simple_bias', ncores = 4) + +down_1_cal <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', + bc_method = 'calibration', ncores = 4) + +down_1_qm <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', + bc_method = 'quantile_mapping', ncores = 4) + +#------------------------------------ +# Transform exp and obs into s2dv_objects +#------------------------------------ +obs <- s2dv_cube(obs, lat = attr(obs, "Variables")$dat1$lat, lon = attr(obs, "Variables")$dat1$lon, + source_files = attr(obs, "Files")[1,1,]) +attr(obs$lon,"first_lon") <- obs$lon[1] +attr(obs$lon,"last_lon") <- obs$lon[length(obs$lon)] +attr(obs$lat,"first_lat") <- obs$lat[1] +attr(obs$lat,"last_lat") <- obs$lat[length(obs$lat)] + +exp <- s2dv_cube(exp, lat = attr(exp, "Variables")$dat1$lat, lon = attr(exp, "Variables")$dat1$lon, + source_files = attr(exp, "Files")[1,1,]) +attr(exp$lon,"first_lon") <- exp$lon[1] +attr(exp$lon,"last_lon") <- exp$lon[length(exp$lon)] +attr(exp$lat,"first_lat") <- exp$lat[1] +attr(exp$lat,"last_lat") <- exp$lat[length(exp$lat)] + +#------------------------------------ +# Downscaling with CST_Intbc +#------------------------------------ +down_2_sbc <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', bc_method = 'simple_bias', + ncores = 4) + +down_2_cal <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', bc_method = 'calibration', + ncores = 4) + +down_2_qm <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', bc_method = 'quantile_mapping', + ncores = 4) + +#------------------------------------ +# Downscaling to point locations +#------------------------------------ +points <- list(lat = c(36.1, 38.7), lon = c(-1.9, 0.8)) +down_points_sbc <- CST_Intbc(exp = exp, obs = obs, points = points, method_point_interp = 'bilinear', + target_grid = target_grid, bc_method = 'simple_bias', ncores = 4) +down_points_cal <- CST_Intbc(exp = exp, obs = obs, points = points, method_point_interp = 'bilinear', + target_grid = target_grid, bc_method = 'calibration', ncores = 4) +down_points_qm <- CST_Intbc(exp = exp, obs = obs, points = points, method_point_interp = 'bilinear', + target_grid = target_grid, bc_method = 'quantile_mapping', ncores = 4) diff --git a/examples/interpolation-lr.R b/examples/interpolation-lr.R index 38bafba..c32f0b1 100644 --- a/examples/interpolation-lr.R +++ b/examples/interpolation-lr.R @@ -2,66 +2,96 @@ library(CSTools) library(startR) library(s2dv) +library(lubridate) source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') -plotpath <- '/esarchive/scratch/jramon/downscaling/plots' +plotpath <- '/esarchive/scratch/jramon/downscaling/plots/ip' target_grid <- '/esarchive/recon/ecmwf/era5/daily/tasmax-r1440x721cds/tasmax_201702.nc' -lonmin <- -22 -lonmax <- 45 -latmin <- 27 -latmax <- 72 - -sdates <- c('20170201','20170301','20170401','20170501','20170601','20170701','20170801','20170901') - -obs <- CST_Load(var = 'tasmax', obs = 'era5', sdates = sdates, leadtimemax = 1, - latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, - output = 'lonlat') -obs$data <- drop(obs$data) - -exp <- CST_Load(var = 'tasmax', exp = 'system5c3s', nmember = 3, sdates = sdates, leadtimemax = 1, - latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, - output = 'lonlat') - -#ind rdm ha de ser o bé un vector o bé un array amb una dimensió anomenada 'sdate' -ind_rdm <- array(NA, dim = c('sdate' = 8,'vars' = 2)) -ind_rdm[,1] <- rnorm(n=8,mean=0,sd=1) -ind_rdm[,2] <- rnorm(n=8,mean=0,sd=1) - -int_method <- 'bil' -lr_methods <- c('basic', 'large-scale', '4nn') - -for (i in seq(lr_methods)) { - - if (lr_methods[i] == 'large-scale') { - predictors <- ind_rdm - } else { - predictors <- NULL - } - downscaled <- Intlr(exp = exp, obs = obs, target_grid = target_grid, lr_method = lr_methods[i], - int_method = int_method, predictors = predictors, loocv = TRUE, ncores = 4) - lats <- downscaled$lat - lons <- downscaled$lon - data <- downscaled$data - s2dv::PlotEquiMap(var = data[1,1,1,1,,], lat = lats, lon = lons, filled.continents = FALSE, - toptitle = paste0(int_method, ' ', lr_methods[i]), brks = seq(250,310,5), - fileout = file.path(plotpath, paste0(int_method, ' ', lr_methods[i],'-lr.png'))) -} - -# Predictions -s2dv::PlotEquiMap(var = exp$data[1,1,1,1,,], lat = exp$lat, lon = exp$lon, filled.continents = FALSE, - toptitle = "Predictions Europe", brks = seq(250,310,5), - fileout = file.path(plotpath, "Predictions_Europe.png")) - -# Observations -s2dv::PlotEquiMap(var = obs$data[1,,], lat = obs$lat, lon = obs$lon, filled.continents = FALSE, - toptitle = "Observations Europe", brks = seq(250,310,5), - fileout = file.path(plotpath, "Observations_Europe.png")) - - - - - +#lonmin <- -22 +#lonmax <- 45 +#latmin <- 27 +#latmax <- 72 +lonmin <- -11.5 +lonmax <- 5.35 +latmin <- 35.1 +latmax <- 44.1 + + +sdates <- format(ymd("19811201") + rep(years(0:36),each=1),"%Y%m%d") +#sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') + +obs1 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h_i1087/$var$_$sdate$.nc', + var = 'tas', time = indices(1), lat = values(list(latmin, latmax)), + sdate = format(ymd(sdates), "%Y%m"), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + +exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1), member = indices(1:3), sdate = sdates, + lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + +#---------------------------------- +# Downscaling with Intlr +#---------------------------------- +ind_rdm <- array(NA, dim = c('sdate' = length(sdates), member = 3, 'vars' = 2)) +ind_rdm[,,1] <- rnorm(n = prod(length(sdates), 3), mean = 0, sd = 1) +ind_rdm[,,2] <- rnorm(n = prod(length(sdates), 3), mean = 0, sd = 1) + +down_1_bas <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "basic", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) + +down_1_lsc <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "large-scale", + int_method = "conservative", predictors = ind_rdm, loocv = TRUE, ncores = 4) + +down_1_nn <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "4nn", + int_method = "conservative", predictors = ind_rdm, loocv = TRUE, ncores = 4) + +#---------------------------------- +# Create s2dv objects +#---------------------------------- +obs1 <- s2dv_cube(obs1, lat = attr(obs1, "Variables")$dat1$lat, lon = attr(obs1, "Variables")$dat1$lon, + source_files = attr(obs, "Files")[1,1,]) +attr(obs1$lon,"first_lon") <- obs1$lon[1] +attr(obs1$lon,"last_lon") <- obs1$lon[length(obs1$lon)] +attr(obs1$lat,"first_lat") <- obs1$lat[1] +attr(obs1$lat,"last_lat") <- obs1$lat[length(obs1$lat)] + +exp1 <- s2dv_cube(exp1, lat = attr(exp1, "Variables")$dat1$lat, lon = attr(exp1, "Variables")$dat1$lon, + source_files = attr(exp1, "Files")[1,1,]) +attr(exp1$lon,"first_lon") <- exp1$lon[1] +attr(exp1$lon,"last_lon") <- exp1$lon[length(exp1$lon)] +attr(exp1$lat,"first_lat") <- exp1$lat[1] +attr(exp1$lat,"last_lat") <- exp1$lat[length(exp1$lat)] + +#---------------------------------- +# Downscaling with CST_Intlr +#---------------------------------- +down_2_bas <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "basic", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) + +down_2_lsc <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "large-scale", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) + +down_2_nn <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "4nn", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) + +#---------------------------------- +# Downscaling to point locations with CST_Intlr +#---------------------------------- diff --git a/examples/interpolation.R b/examples/interpolation.R index 157901b..367fb90 100644 --- a/examples/interpolation.R +++ b/examples/interpolation.R @@ -1,61 +1,78 @@ library(CSTools) +library(lubridate) library(startR) library(s2dv) -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/functions.R') - -lonmin <- -22 -lonmax <- 45 -latmin <- 27 -latmax <- 72 - -target_grid <- '/esarchive/recon/ecmwf/era5/daily/tasmax/tasmax_201702.nc' - -obs <- CST_Load(var = 'tasmax', - obs = 'era5', - sdates = '20170201', - leadtimemax = 1, - latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, - output = 'lonlat') - -exp <- CST_Load(var = 'tasmax', - exp = 'system5c3s', - nmember = 1, - sdates = '20170201', - leadtimemax = 1, - latmin = latmin, latmax = latmax, lonmin = lonmin, lonmax = lonmax, - output = 'lonlat') - -lon_exp <- exp$lon -lat_exp <- exp$lat -lon_obs <- obs$lon -lat_obs <- obs$lat - -exp_con <- Interpolation(exp = exp, target_grid = target_grid, method = 'con', crop = c(lonmin,lonmax,latmin,latmax)) -lat_con <- exp_con$lats -lon_con <- exp_con$lons -exp_con <- exp_con$data_array -exp_bil <- Interpolation(exp = exp, target_grid = target_grid, method = 'bil', crop = c(lonmin,lonmax,latmin,latmax)) -lat_bil <- exp_bil$lats -lon_bil <- exp_bil$lons -exp_bil <- exp_bil$data_array -exp_bic <- Interpolation(exp = exp, target_grid = target_grid, method = 'bic', crop = c(lonmin,lonmax,latmin,latmax)) -lat_bic <- exp_bic$lats -lon_bic <- exp_bic$lons -exp_bic <- exp_bic$data_array -exp_nn <- Interpolation(exp = exp, target_grid = target_grid, method = 'nn', crop = c(lonmin,lonmax,latmin,latmax)) -lat_nn <- exp_nn$lats -lon_nn <- exp_nn$lons -exp_nn <- exp_nn$data_array -exp_con2 <- Interpolation(exp = exp, target_grid = target_grid, method = 'con2', crop = c(lonmin,lonmax,latmin,latmax)) -lat_con2 <- exp_con2$lats -lon_con2 <- exp_con2$lons -exp_con2 <- exp_con2$data_array - -s2dv::PlotEquiMap(var = obs, lat = lat_obs, lon = lon_obs, filled.continents = FALSE, toptitle = 'Observations') -s2dv::PlotEquiMap(var = exp$data, lat = lat_exp, lon = lon_exp, filled.continents = FALSE, toptitle = 'Predictions') -s2dv::PlotEquiMap(var = exp_con, lat = lat_con, lon = lon_con, filled.continents = FALSE, toptitle = 'Conservative') -s2dv::PlotEquiMap(var = exp_bil, lat = lat_bil, lon = lon_bil, filled.continents = FALSE, toptitle = 'Bilinear') -s2dv::PlotEquiMap(var = exp_bic, lat = lat_bic, lon = lon_bic, filled.continents = FALSE, toptitle = 'Bicubic') -s2dv::PlotEquiMap(var = exp_nn, lat = lat_nn, lon = lon_nn, filled.continents = FALSE, toptitle = 'Nearest neighbour') -s2dv::PlotEquiMap(var = exp_con2, lat = lat_con2, lon = lon_con2, filled.continents = FALSE, toptitle = '2nd order conservative') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') + +plotpath <- '/esarchive/scratch/jramon/downscaling/plots/ip' +target_grid <- '/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc' + +#lonmin <- -22 +#lonmax <- 45 +#latmin <- 27 +#latmax <- 72 +lonmin <- -11.5 +lonmax <- 5.35 +latmin <- 35.1 +latmax <- 44.1 + +sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') + +exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1), member = indices(1:3), sdate = sdates, + lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) +exp2 <- s2dv_cube(exp1, lat = attr(exp1, "Variables")$dat1$lat, lon = attr(exp1, "Variables")$dat1$lon, + source_files = attr(exp1, "Files")[1,1,]) +attr(exp2$lon,"first_lon") <- exp2$lon[1] +attr(exp2$lon,"last_lon") <- exp2$lon[length(exp2$lon)] +attr(exp2$lat,"first_lat") <- exp2$lat[1] +attr(exp2$lat,"last_lat") <- exp2$lat[length(exp2$lat)] + +#-------------------------------- +# Downscaling with Interpolation +#-------------------------------- +exp1_con <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, + lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'con') +exp1_bil <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, + lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'bil') +exp1_bic <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, + lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'bic') +exp1_nn <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, + lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'nn') +exp1_con2 <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, + lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'con2') + +#-------------------------------- +# Downscaling with CST_Interpolation +#-------------------------------- +exp2_con <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_remap = 'con') +exp2_bil <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_remap = 'bil') +exp2_bic <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_remap = 'bic') +exp2_nn <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_remap = 'nn') +exp2_con2 <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_remap = 'con2') + +#-------------------------------- +# Downscaling to point locations +#-------------------------------- +points <- list(lat = c(36.1, 38.7), lon = c(-1.9, 0.8)) +down_points_sbc <- CST_Interpolation(exp = exp, points = points, method_point_interp = 'bilinear', + target_grid = target_grid, int_method = 'con', bc_method = 'simple_bias', + ncores = 4) + +#s2dv::PlotEquiMap(var = obs$data[1,1,1,,,1], lat = obs$lat, lon = obs$lon, filled.continents = FALSE, toptitle = 'Observations', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_obs.png")) +#s2dv::PlotEquiMap(var = exp$data[1,1,1,1,1,,], lat = exp$lat, lon = exp$lon, filled.continents = FALSE, toptitle = 'Predictions', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_fcst.png")) +#s2dv::PlotEquiMap(var = exp1_con$data[1,1,1,1,1,,], lat = exp_con$lat, lon = exp_con$lon, filled.continents = FALSE, toptitle = 'Conservative', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_con.png")) +#s2dv::PlotEquiMap(var = exp1_bil$data[1,1,1,1,1,,], lat = exp_bil$lat, lon = exp_bil$lon, filled.continents = FALSE, toptitle = 'Bilinear', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_bil.png")) +#s2dv::PlotEquiMap(var = exp1_bic$data[1,1,1,1,1,,], lat = exp_bic$lat, lon = exp_bic$lon, filled.continents = FALSE, toptitle = 'Bicubic', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_bic.png")) +#s2dv::PlotEquiMap(var = exp1_nn$data[1,1,1,1,1,,], lat = exp_nn$lat, lon = exp_nn$lon, filled.continents = FALSE, toptitle = 'Nearest neighbour', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_nn.png")) +#s2dv::PlotEquiMap(var = exp1_con2$data[1,1,1,1,1,,], lat = exp_con2$lat, lon = exp_con2$lon, filled.continents = FALSE, toptitle = '2nd order conservative', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_con2.png")) + + + + -- GitLab From 896fd72b0ee7e731e1499eaaeb4124bbd5fcb735 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Tue, 7 Jun 2022 17:43:32 +0200 Subject: [PATCH 11/24] created functions CST_* --- R/Intbc.R | 20 +++--- R/Interpolation.R | 2 +- R/Intlr.R | 163 ++++++++++++++++++++++++++++++---------------- 3 files changed, 117 insertions(+), 68 deletions(-) diff --git a/R/Intbc.R b/R/Intbc.R index f5ec8de..6f180f4 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -1,4 +1,4 @@ -CST_Intbc <- function(exp, obs, target_grid, int_method, bc_method, points = NULL, method_point_interp = NULL, +CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", remap_region = NULL, cal_method = "mse_min", ncores = 1) { @@ -12,7 +12,7 @@ CST_Intbc <- function(exp, obs, target_grid, int_method, bc_method, points = NUL res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp[[lat_dim]], exp_lons = exp[[lon_dim]], obs_lats = obs[[lat_dim]], obs_lons = obs[[lon_dim]], target_grid = target_grid, - int_method = int_method, bc_method = bc_method, points = points, + int_method = int_method, bc_method = bc_method, points = points, source_file = exp$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, remap_region = remap_region, cal_method = cal_method, ncores = ncores) @@ -92,14 +92,10 @@ CST_Intbc <- function(exp, obs, target_grid, int_method, bc_method, points = NUL #'attr(obs$lat,"last_lat") <- 4 #'downscaled_field <- Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') #'@export -Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, int_method, bc_method, +Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - member_dim = "member", remap_region = NULL, cal_method = "mse_min", ncores = 1) { + member_dim = "member", source_file = NULL, remap_region = NULL, cal_method = "mse_min", ncores = 1) { - if (!inherits(int_method, 'character')) { - stop("Parameter 'int_method' must be of the class 'character'") - } - if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") } @@ -145,20 +141,24 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, "Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The abbreviations ", "'sbc', 'cal', 'qm' can also be used.") } + + if (!is.null(points) & is.null(source_file)) { + stop("No source file found. Source file must be provided in the parameter 'source_file'.") + } if (is.null(remap_region)) { remap_region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - method_remap = int_method, points = points, + method_remap = int_method, points = points, source_file = source_file, method_point_interp = method_point_interp, remap_region = remap_region) # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching if (!.check_coords(lat1 = exp_lats, lat2 = obs_lats, lon1 = exp_lons, lon2 = obs_lons) | !is.null(points)) { obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - method_remap = int_method, points = points, + method_remap = int_method, points = points, source_file = source_file, method_point_interp = method_point_interp, remap_region = remap_region) } else { obs_interpolated <- obs diff --git a/R/Interpolation.R b/R/Interpolation.R index e9cb1ca..1bdf5b1 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -151,7 +151,7 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me } if (is.null(source_file)) { - stop("No source file found.") + stop("No source file found. Source file must be provided in the parameter 'source_file'.") } } else { if (is.null(method_remap)) { diff --git a/R/Intlr.R b/R/Intlr.R index 2b030dc..93a98f4 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -1,26 +1,42 @@ -Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NULL, lat_dim = "lat", - lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { - - require(multiApply) +CST_Intlr <- function(exp, obs, target_grid, lr_method, points = NULL, int_method = NULL, method_point_interp = NULL, + source_file = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { - #----------------------------------- - # Checkings - #----------------------------------- - # input exp and obs must be s2dv_cube objects if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") } - - # input exp and obs must be s2dv_cube objects + if (!inherits(obs,'s2dv_cube')) { stop("Parameter 'obs' must be of the class 's2dv_cube'") } - if (!inherits(int_method, 'character')) { - stop("Parameter 'int_method' must be of the class 'character'") - } - + res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp[[lat_dim]], exp_lons = exp[[lon_dim]], + obs_lats = obs[[lat_dim]], obs_lons = obs[[lon_dim]], points = points, source_file = source_file, + target_grid = target_grid, lr_method = lr_method, int_method = int_method, method_point_interp = method_point_interp, + predictors = predictors, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, + remap_region = remap_region, large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, + ncores = ncores) + + #----------------------------------- + # Create an s2dv_cube object + #----------------------------------- + res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + + return(res_s2dv) +} + +#TO DO: Implement point interpolation + +Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, lr_method, points = NULL, + int_method = NULL, method_point_interp = NULL, source_file = NULL, predictors = NULL, lat_dim = "lat", + lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", remap_region = NULL, + large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { + + require(multiApply) + + #----------------------------------- + # Checkings + #----------------------------------- if (!inherits(lr_method, 'character')) { stop("Parameter 'lr_method' must be of the class 'character'") } @@ -33,16 +49,48 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL stop("Parameter 'loocv' must be set to TRUE or FALSE") } + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + # sdate must be the time dimension in the input data - stopifnot('sdate' %in% names(dim(exp$data))) - stopifnot('sdate' %in% names(dim(obs$data))) + stopifnot(sdate_dim %in% names(dim(exp))) + stopifnot(sdate_dim %in% names(dim(obs))) # checkings for the parametre 'predictors' if (is.array(predictors)) { # ensure the predictor variable name matches the parametre large_scale_predictor_dimname stopifnot(large_scale_predictor_dimname %in% names(dim(predictors))) - stopifnot('sdate' %in% names(dim(predictors))) - stopifnot(dim(predictors)['sdate'] == dim(exp$data)['sdate']) + stopifnot(sdate_dim %in% names(dim(predictors))) + stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) } #----------------------------------- @@ -50,20 +98,19 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL #----------------------------------- if (lr_method != '4nn') { - lonmin <- attr(obs[[lon_dim]],"first_lon") - lonmax <- attr(obs[[lon_dim]],"last_lon") - latmin <- attr(obs[[lat_dim]],"first_lat") - latmax <- attr(obs[[lat_dim]],"last_lat") + if (is.null(remap_region)) { + remap_region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } - exp_interpolated <- Interpolation(exp = exp, target_grid = target_grid, method_remap = int_method, - remap_region = c(lonmin, lonmax, latmin, latmax)) + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, remap_region = remap_region) # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching - if (!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs[[lat_dim]], - lon1 = exp_interpolated$lon, lon2 = obs[[lon_dim]])) { - obs_interpolated <- Interpolation(exp = obs, target_grid = target_grid, method_remap = int_method, - remap_region = c(lonmin, lonmax, latmin, latmax)) + if (!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, remap_region = remap_region) } else { obs_interpolated <- obs } @@ -79,8 +126,8 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL predictor <- exp_interpolated$data predictand <- obs_interpolated$data - target_dims_predictor <- 'sdate' - target_dims_predictand <- 'sdate' + target_dims_predictor <- sdate_dim + target_dims_predictand <- sdate_dim lats <- obs_interpolated$lat lons <- obs_interpolated$lon @@ -96,8 +143,8 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL predictand <- obs_interpolated$data predictor <- predictors - target_dims_predictor <- c('sdate', large_scale_predictor_dimname) - target_dims_predictand <- 'sdate' + target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) + target_dims_predictand <- sdate_dim lats <- obs_interpolated$lat lons <- obs_interpolated$lon @@ -108,14 +155,15 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL # Predictand: observations else if (lr_method == '4nn') { warning("Interpolating model and observation data but not needed.") - predictor <- find_nn(hres = obs, coar = exp, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4) - predictand <- obs$data + predictor <- find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, + lons_coar = exp_lons, nn = 4) + predictand <- obs - target_dims_predictor <- c('sdate','nn') - target_dims_predictand <- 'sdate' + target_dims_predictor <- c(sdate_dim,'nn') + target_dims_predictand <- sdate_dim - lats <- obs[[lat_dim]] - lons <- obs[[lon_dim]] + lats <- obs_lats + lons <- obs_lons } else { @@ -126,17 +174,17 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), fun = .intlr, loocv = loocv, ncores = ncores)$output1 - names(dim(res))[1] <- 'sdate' + names(dim(res))[1] <- sdate_dim # Reorder dimensions to match those of the input model data - res <- .reorder_dims(arr_ref = exp$data, arr_to_reorder = res) + res <- .reorder_dims(arr_ref = exp, arr_to_reorder = res) #----------------------------------- # Create an s2dv_cube object #----------------------------------- - res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = lons, lat = lats)) + res <- list(data = res, lon = lons, lat = lats) - return(res_s2dv) + return(res) } #----------------------------------- @@ -144,15 +192,21 @@ Intlr <- function(exp, obs, target_grid, int_method, lr_method, predictors = NUL #----------------------------------- .intlr <- function(x, y, loocv) { - tmp_df <- data.frame(x = x, y = y) + tmp_df <- data.frame(x = x, y = y) - # training - lm1 <- train_lm(df = tmp_df, loocv = loocv) + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { - # prediction - res <- pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + n <- nrow(tmp_df) + res <- rep(NA, n) + + } else { + # training + lm1 <- train_lm(df = tmp_df, loocv = loocv) - #return(lm1) + # prediction + res <- pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + } + return(res) } @@ -198,16 +252,11 @@ pred_lm <- function(df, lm1, loocv) { #----------------------------------- # Function to find N nearest neighbours. -# 'hres' and 'coar' are s2dv_objects +# 'coar' is an array with names dimensions #----------------------------------- -find_nn <- function(hres, coar, lat_dim, lon_dim, nn = 4) { +find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, nn = 4) { require(abind) - - lats_hres <- hres[[lat_dim]] - lons_hres <- hres[[lon_dim]] - lats_coar <- coar[[lat_dim]] - lons_coar <- coar[[lon_dim]] # Sort the distances from closest to furthest idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) @@ -217,7 +266,7 @@ find_nn <- function(hres, coar, lat_dim, lon_dim, nn = 4) { names(dim(idx_lon)) <- c('nn', 'lon') # obtain the values of the nearest neighbours - nearest <- Apply(list(coar$data, idx_lat, idx_lon), + nearest <- Apply(list(coar, idx_lat, idx_lon), target_dims = list(c('lat','lon'),'lat','lon'), fun = function(x, y, z) x[y, z])$output1 -- GitLab From 730130a3bddf9da333842fa3299bd25331977089 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 15 Jun 2022 17:59:19 +0200 Subject: [PATCH 12/24] Finished documentation --- R/Analogs.R | 321 +++++++++++++++++++++++++++++----------------- R/Intbc.R | 173 +++++++++++++++++++------ R/Interpolation.R | 154 ++++++++++++++-------- R/Intlr.R | 304 ++++++++++++++++++++++++++++++++++++------- 4 files changed, 696 insertions(+), 256 deletions(-) diff --git a/R/Analogs.R b/R/Analogs.R index 9ae5775..53c308c 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -1,3 +1,96 @@ +#'@rdname CST_Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with +#'similar conditions in the historical observations. The coarse scale and +#'observation data can be either global or regional. In the latter case, the +#'region is defined by the user. In principle, the coarse and observation data +#'should be of the same variable, although different variables can also be admitted. +#'The analogs function will find the N best analogs based in Minimum Euclidean +#'distance. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an 's2dv' object with named dimensions containing the experimental field on +#'the coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and time. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv' object with named dimensions containing the observational field. +#'The object must have, at least, the dimensions latitude, longitude and start date. +#'The object is expected to be already subset for the desired region. +#'@param nanalogs an integer indicating the number of analogs to be searched +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') +#'@export +CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", region = NULL, ncores = 1) { + + # input exp and obs must be s2dv_cube objects + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + # input exp and obs must be s2dv_cube objects + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp[[lat_dim]], exp_lons = exp[[lon_dim]], + obs_lats = obs[[lat_dim]], obs_lons = obs[[lon_dim]], grid_exp = grid_exp, + nanalogs = nanalogs, fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, time_dim = time_dim, region = region, ncores = ncores) + + res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + + return(res_s2dv) + +} + #'@rdname Analogs #'@title Downscaling using Analogs based on large scale fields. #' @@ -22,14 +115,25 @@ #'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal #'and decadal predictions) but can admit climate projections or reanalyses. It does #'not have constrains of specific region or variables to downscale. -#'@param exp an 's2dv_cube' object containing the experimental field on the -#'large scale for which the analog is aimed. The element 'data' in the 's2dv_cube' -#'object must have, at least, the dimensions latitude, longitude, start date and time. -#'The object is expect to be already subset for the desired large scale region. -#'@param obs an 's2dv_cube' object containing the observational field with the -#'target high-resolution scale. The element 'data' in the 's2dv_cube' object must have, -#'at least, the dimensions latitude, longitude, start date and time. The object is -#'expect to be already subset for the desired large scale region. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and time. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param grid_exp a character vector with a path to an example file of the exp data. #'@param nanalogs an integer indicating the number of analogs to be searched #'@param fun_analog a function to be applied over the found analogs. Only these options #'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), @@ -41,10 +145,12 @@ #'@param sdate_dim a character vector indicating the start date dimension name in the #'element 'data' in exp and obs. Default set to "sdate". #'@param time_dim a character vector indicating the time dimension name in the element -#''data' in exp and obs. Default set to "time". This is expected to have daily frequency. -#'@param grid_exp a character vector indicating the coarse grid to be passed to CDO. It -#'must be a grid recognised by CDO or a NetCDF file. This information can also be passed -#'via the element 'source_files' in exp. Default to NULL. +#''data' in exp and obs. Default set to "time". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. #'@param ncores an integer indicating the number of cores to use in parallel computation. #'@import multiApply #'@import CSTools @@ -53,52 +159,39 @@ #' #'@seealso \code{\link[s2dverification]{CDORemap}} #' -#'@return An 's2dv_cube' object containing the dowscaled values. If fun_analog is set to NULL, -#'the output array in 'data' also contains the dimension 'analog' with the best analog days. +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. #'@examples #'exp <- rnorm(15000) #'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) -#'lon_exp <- 1:5 -#'lat_exp <- 1:4 +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 #'obs <- rnorm(27000) #'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) -#'lon_obs <- seq(0,6, 6/14) -#'lat_obs <- seq(0,6, 6/11) -#'exp <- s2dv_cube(data = exp, lat = lat_exp, lon = lon_exp) -#'obs <- s2dv_cube(data = obs, lat = lat_obs, lon = lon_obs) -#'attr(exp$lon,"first_lon") <- 1 -#'attr(exp$lon,"last_lon") <- 5 -#'attr(exp$lat,"first_lat") <- 1 -#'attr(exp$lat,"last_lat") <- 4 -#'attr(obs$lon,"first_lon") <- 0 -#'attr(obs$lon,"last_lon") <- 6 -#'attr(obs$lat,"first_lat") <- 0 -#'attr(obs$lat,"last_lat") <- 6 -#'downscaled_field <- Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'downscaled_field <- Analogs(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, +#'obs_lats = obs_lats, obs_lons = obs_lons, grid_exp = 'r360x180') #'@export -Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", time_dim = "time", grid_exp = NULL, ncores = 1) { +Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, nanalogs = 3, + fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", region = NULL, ncores = 1) { #----------------------------------- # Checkings #----------------------------------- - # input exp and obs must be s2dv_cube objects - if (!inherits(exp,'s2dv_cube')) { - stop("Parameter 'exp' must be of the class 's2dv_cube'") - } - - # input exp and obs must be s2dv_cube objects - if (!inherits(obs,'s2dv_cube')) { - stop("Parameter 'obs' must be of the class 's2dv_cube'") + if (!inherits(grid_exp, 'character')) { + stop("Parameter 'grid_exp' must be of class 'character'. It can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") } if (!inherits(nanalogs, 'numeric')) { stop("Parameter 'nanalogs' must be of the class 'character'") } - if (!inherits(fun_analog, 'character')) { - stop("Parameter 'fun_analog' must be of the class 'character'") - } - if (!inherits(lat_dim, 'character')) { stop("Parameter 'lat_dim' must be of the class 'character'") } @@ -116,78 +209,67 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat" } # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names - if (is.na(match(lon_dim, names(dim(exp$data)))) | is.na(match(lon_dim, names(dim(obs$data))))) { + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", "'lon_dim'") } - if (is.na(match(lat_dim, names(dim(exp$data)))) | is.na(match(lat_dim, names(dim(obs$data))))) { + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", "'lat_dim'") } - if (is.na(match(sdate_dim, names(dim(exp$data)))) | is.na(match(sdate_dim, names(dim(obs$data))))) { + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", "'sdate_dim'") } - if (is.na(match(time_dim, names(dim(exp$data)))) | is.na(match(time_dim, names(dim(obs$data))))) { + if (is.na(match(time_dim, names(dim(exp)))) | is.na(match(time_dim, names(dim(obs))))) { stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter ", "'time_dim'") } - if (is.null(exp$source_files) & is.null(grid_exp)) { - stop("I do not know which the coarse grid is. This information can be passed within ", - "'exp' with the element '$source_files' or via the parameter 'grid_exp'. ", - "Parameter 'grid_exp' must be a character vector indicating the coarse grid to ", - "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") - } - # Ensure we have enough data to interpolate from high-res to coarse grid - if ((attr(obs[[lat_dim]],"first_lat") > attr(exp[[lat_dim]],"first_lat")) | - (attr(obs[[lat_dim]],"last_lat") < attr(exp[[lat_dim]],"last_lat")) | - (attr(obs[[lon_dim]],"first_lon") > attr(exp[[lon_dim]],"first_lon")) | - (attr(obs[[lon_dim]],"last_lon") < attr(exp[[lon_dim]],"last_lon"))) { + if ((obs_lats[1] > exp_lats[1]) | (obs_lats[length(obs_lats)] < exp_lats[length(exp_lats)]) | + (obs_lons[1] > exp_lons[1]) | (obs_lons[length(obs_lons)] < exp_lons[length(exp_lons)])) { + stop("There are not enough data in 'obs'. Please to add more latitudes or ", "longitudes.") } # Select a function to apply to the analogs selected for a given observation - stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) + if (!is.null(fun_analog)) { + stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) + } - # Create window if user does not have it in obs$data - if ( !("window" %in% names(dim(obs$data))) ) { - nsdates <- dim(obs$data)[names(dim(obs$data)) == sdate_dim] - ntimes <- dim(obs$data)[names(dim(obs$data)) == time_dim] - window <- Apply(list(obs$data), target_dims = list(c(time_dim, sdate_dim)), + # Create window if user does not have it in obs + if ( !("window" %in% names(dim(obs))) ) { + nsdates <- dim(obs)[names(dim(obs)) == sdate_dim] + ntimes <- dim(obs)[names(dim(obs)) == time_dim] + window <- Apply(list(obs), target_dims = list(c(time_dim, sdate_dim)), fun = as.vector, output_dims = 'window')$output1 - obs$data <- InsertDim(obs$data, posdim = 1, lendim = nsdates * ntimes, name = "window") - obs$data <- Apply(list(obs$data, window), target_dims = 'window', - fun = function(x,y) x <- y)$output1 + obs <- InsertDim(obs, posdim = 1, lendim = nsdates * ntimes, name = "window") + obs <- Apply(list(obs, window), target_dims = 'window', + fun = function(x,y) x <- y)$output1 } #----------------------------------- # Interpolate high-res observations to the coarse grid #----------------------------------- - if (is.null(grid_exp)) { - grid_exp <- exp$source_files[1] - } - - lonmin <- attr(exp[[lon_dim]],"first_lon") - lonmax <- attr(exp[[lon_dim]],"last_lon") - latmin <- attr(exp[[lat_dim]],"first_lat") - latmax <- attr(exp[[lat_dim]],"last_lat") - obs_interpolated <- Interpolation(exp = obs, target_grid = grid_exp, method_remap = "conservative", - remap_region = c(lonmin, lonmax, latmin, latmax)) - lats <- obs[[lat_dim]] - lons <- obs[[lon_dim]] + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'exp_lats' and 'exp_lons'.") + region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], exp_lats[length(exp_lats)]) + } + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = grid_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", region = region) # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to # the same grid to force the matching - if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp[[lat_dim]], lon1 = obs_interpolated$lon, - lon2 = exp[[lon_dim]])) { - exp_interpolated <- Interpolation(exp = exp, target_grid = grid_exp, method_remap = "conservative", - remap_region = c(lonmin, lonmax, latmin, latmax)) + if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp_lats, lon1 = obs_interpolated$lon, lon2 = exp_lons)) { + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = grid_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", + region = region)$data } else { exp_interpolated <- exp } @@ -195,25 +277,24 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat" #----------------------------------- # Reshape train and test #----------------------------------- - res <- Apply(list(obs_interpolated$data, exp_interpolated$data, obs$data), + res <- Apply(list(obs_interpolated$data, exp_interpolated, obs), target_dims = list(c("window", sdate_dim, time_dim, lat_dim, lon_dim), c(sdate_dim, time_dim, lat_dim, lon_dim), c("window", sdate_dim, time_dim, lat_dim, lon_dim)), fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, fun_analog = fun_analog), ncores = ncores)$output1 - #test <- exp$data[1,1,,1,,,] + #test <- exp_interpolated[1,1,,1,,,] #train <- obs_interpolated$data[,1,1,,,,] - #obs_hres <- obs$data[,1,1,,,,] + #obs_hres <- obs[,1,1,,,,] #test <- aperm(test,c(2,1,3,4)) #train <- aperm(train,c(1,4,2,3,5)) #obs_hres <- aperm(obs_hres,c(1,4,2,3,5)) - res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = lons, lat = lats)) + res <- list(data = res, lon = obs_lons, lat = obs_lats) - return(res_s2dv) + return(res) } - # For each element in test, find the indices of the k nearest neigbhors in train .analogs <- function(train, test, obs_hres, k, fun_analog) { # train, test, and obs_hres dim: 4 dimensions sdate, time, lat and lon (in this order) @@ -227,44 +308,48 @@ Analogs <- function(exp, obs, nanalogs = 3, fun_analog = "mean", lat_dim = "lat" train <- apply(train, c(1,2,3), as.vector); names(dim(train))[1] <- "space" test <- apply(test, c(1,2), as.vector); names(dim(test))[1] <- "space" obs_hres <- apply(obs_hres, c(1,2,3), as.vector); names(dim(obs_hres))[1] <- "space" - + # Here we do cross-validation: the start date considered is removed from the training - analogs_ls <- lapply(1:nsdates, function(s) { - lapply(1:ntimes, function(t) { + analogs_arr <- array(NA, dim = c(analogs = k, space_dims_hres, ntimes, nsdates)) + for (s in 1:nsdates) { + for (t in 1:ntimes) { tr <- apply(train[ , , -s, t], 1, as.vector); names(dim(tr))[1] <- "time" te <- test[ , s, t] - te <- InsertDim(data = te, posdim = 1, lendim = 1, name = 'time'); names(dim(te))[2] <- "space" ob <- apply(obs_hres[ , , -s, t], 1, as.vector); names(dim(ob))[1] <- "time" - knn.ind <- get.knnx(tr, te, k) - + # Find NA's and remove them + # Assume that NA's are only in the spatial dimension, not in the temporal + idx_na_tr <- is.na(tr[1 , ]) + idx_na_te <- is.na(te) + idx_na <- idx_na_tr | idx_na_te + tr_wo_na <- tr[ , !idx_na] + te_wo_na <- te[!idx_na] + te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = 'time') + names(dim(te_wo_na))[2] <- "space" + + knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) + dist <- knn.ind$nn.dist idx <- knn.ind$nn.index - names(dim(idx)) <- c("time", "analog"); names(dim(dist)) <- c("time", "analog") - - analogs <- Apply(idx, margins = "analog", fun = function(an) { ob[ an, ] })$output1 - - dim(analogs) <- c(space_dims_hres, analogs = k) - - if (!is.null(fun_analog)) { - - if (fun_analog == "wmean") { - weight <- 1 / dist - analogs <- apply(analogs, c(1,2), function(x) weighted.mean(x, weight)) - } else { - analogs <- apply(analogs, c(1,2), fun_analog) - } - } - }) - }) - - if (is.null(fun_analog)) { - analogs_arr <- array(unlist(analogs_ls), dim = c(space_dims_hres, analogs = k, ntimes, nsdates)) - } else { - analogs_arr <- array(unlist(analogs_ls), dim = c(space_dims_hres, ntimes, nsdates)) + + analogs <- ob[ idx, ] + dim(analogs) <- c(analogs = k, space_dims_hres) + + analogs_arr[ , , , t, s] <- analogs + } + } + + # Apply functions to analogs + if (!is.null(fun_analog)) { + if (fun_analog == "wmean") { + weight <- 1 / dist + analogs_arr <- apply(analogs_arr, c(2,3,4,5), function(x) weighted.mean(x, weight)) + } else { + analogs_arr <- apply(analogs_arr, c(2,3,4,5), fun_analog) + } } return(analogs_arr) } - + diff --git a/R/Intbc.R b/R/Intbc.R index 6f180f4..2732b48 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -1,6 +1,76 @@ +#'@rdname CST_Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The +#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param cal_method a character vector with the calibration method to be used. See +#'\code{\link[CSTools]{Calibration}} +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export + CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", - remap_region = NULL, cal_method = "mse_min", ncores = 1) + region = NULL, cal_method = "mse_min", ncores = 1) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -14,7 +84,7 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point obs_lats = obs[[lat_dim]], obs_lons = obs[[lon_dim]], target_grid = target_grid, int_method = int_method, bc_method = bc_method, points = points, source_file = exp$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, - sdate_dim = sdate_dim, member_dim = member_dim, remap_region = remap_region, + sdate_dim = sdate_dim, member_dim = member_dim, region = region, cal_method = cal_method, ncores = ncores) res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) @@ -22,7 +92,6 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point return(res_s2dv) } - #'@rdname Intbc #'@title Downscaling using interpolation and bias adjustment. #' @@ -35,26 +104,40 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'latter case, the region is defined by the user. In principle, the coarse and observation data #'are intended to be of the same variable, although different variables can also be admitted. #' -#'@param exp an 's2dv_cube' object containing the experimental field on the coarse scale for -#'which the downscaling is aimed. The element 'data' in the 's2dv_cube' object must have, at -#'least, the dimensions latitude, longitude, start date and member. The object is expected to -#'be already subset for the desired large scale region. Latitudes must range from -90 to 90 -#'(or a subset) and longitudes must range from -180 to 180. -#'@param obs an 's2dv_cube' object containing the observational field. The element 'data' in -#'the 's2dv_cube' object must have, at least, the dimensions latitude, longitude and start date. -#'The object is expect to be already subset for the desired large scale region. -#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. -#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 -#'or newer version is required. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. #'@param bc_method a character vector indicating the bias adjustment method to be applied after #'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The #'abbreviations 'sbc', 'cal', 'qm' can also be used. -#'@param points a list of two elements containing the point latitudes and longitudes of the -#'locations to downscale the model data. The list must contain the two elements 'lat' and 'lon'. -#'If the downscaling is to a point location, only regular grids are allowed for exp and obs. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. #'@param method_point_interp a character vector indicating the interpolation method to interpolate #'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", -#'"invdist4nn", "NE", "NW", "SE", "SW". +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. #'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' #'in exp and obs. Default set to "lat". #'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' @@ -63,6 +146,15 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #''data' in exp and obs. Default set to "sdate". #'@param member_dim a character vector indicating the member dimension name in the element #''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param cal_method a character vector with the calibration method to be used. See +#'\code{\link[CSTools]{Calibration}} #'@param ncores an integer indicating the number of cores to use in parallel computation. #' #'@import CSTools @@ -71,30 +163,23 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'@seealso \code{\link[CSTools]{Calibration}} #'@seealso \code{\link[CSTools]{QuantileMapping}} #' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. #'@examples #'exp <- rnorm(500) #'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) -#'lon_exp <- 1:5 -#'lat_exp <- 1:4 +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 #'obs <- rnorm(900) #'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) -#'lon_obs <- seq(1,5, 4/14) -#'lat_obs <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, lat = lat_exp, lon = lon_exp) -#'obs <- s2dv_cube(data = obs, lat = lat_obs, lon = lon_obs) -#'attr(exp$lon,"first_lon") <- 1 -#'attr(exp$lon,"last_lon") <- 5 -#'attr(exp$lat,"first_lat") <- 1 -#'attr(exp$lat,"last_lat") <- 4 -#'attr(obs$lon,"first_lon") <- 1 -#'attr(obs$lon,"last_lon") <- 5 -#'attr(obs$lat,"first_lat") <- 1 -#'attr(obs$lat,"last_lat") <- 4 -#'downscaled_field <- Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intbc(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') #'@export Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - member_dim = "member", source_file = NULL, remap_region = NULL, cal_method = "mse_min", ncores = 1) { + member_dim = "member", source_file = NULL, region = NULL, cal_method = "mse_min", ncores = 1) { if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") @@ -146,25 +231,33 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, stop("No source file found. Source file must be provided in the parameter 'source_file'.") } - if (is.null(remap_region)) { - remap_region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, method_remap = int_method, points = points, source_file = source_file, - method_point_interp = method_point_interp, remap_region = remap_region) + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching if (!.check_coords(lat1 = exp_lats, lat2 = obs_lats, lon1 = exp_lons, lon2 = obs_lons) | !is.null(points)) { obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, method_remap = int_method, points = points, source_file = source_file, - method_point_interp = method_point_interp, remap_region = remap_region) + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) } else { obs_interpolated <- obs } - #.check_coords if (bc_method == 'sbc' | bc_method == 'simple_bias') { if (dim(obs_interpolated$data)[sdate_dim] == 1) { warning('Simple Bias Correction should not be used with @@ -190,7 +283,7 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, sample_dims = sdate_dim, method = 'QUANT', ncores = ncores) } - # Create an s2dv_cube object + # Return a list of three elements res <- list(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat) return(res) diff --git a/R/Interpolation.R b/R/Interpolation.R index 1bdf5b1..891e64c 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -1,5 +1,57 @@ +#'@rdname CST_Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp s2dv object containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The field data is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed in the +#'downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An s2dv object containing the dowscaled field. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'exp <- s2dv_cube(data = exp, lat = lats, lon = lons) +#'res <- CST_Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, - lat_dim = "lat", lon_dim = "lon", remap_region = NULL, + lat_dim = "lat", lon_dim = "lon", region = NULL, method_point_interp = NULL) { if (!inherits(exp,'s2dv_cube')) { @@ -9,7 +61,7 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr res <- Interpolation(exp = exp$data, lats = exp[[lat_dim]], lons = exp[[lon_dim]], source_file = exp$source_files[1], points = points, method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, - lon_dim = lon_dim, remap_region = remap_region, method_point_interp = method_point_interp) + lon_dim = lon_dim, region = region, method_point_interp = method_point_interp) res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) @@ -27,49 +79,60 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr #'The gridded model data can be either global or regional. In the latter case, the #'region is defined by the user. It does not have constrains of specific region or #'variables to downscale. -#'@param exp an 's2dv_cube' object containing the experimental field on the -#'coarse scale for which the downscaling is aimed. The element 'data' in the 's2dv_cube' -#'object must have, at least, the dimensions latitude and longitude. The object is -#'expected to be already subset for the desired large scale region. Latitudes must range -#'from -90 to 90 (or a subset) and longitudes must range from -180 to 180. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The object is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param lats a numeric vector containing the latitude values. Latitudes must range from +#'-90 to 90. +#'@param lons a numeric vector containing the longitude values. Longitudes can range from +#'-180 to 180 or from 0 to 360. #'@param points a list of two elements containing the point latitudes and longitudes #'of the locations to downscale the model data. The list must contain the two elements -#''lat' and 'lon'. If the downscaling is to a point location, only regular grids are -#'allowed for exp and obs. +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. #'@param method_remap a character vector indicating the regridding method to be passed #'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is #'to be used, CDO_1.9.8 or newer version is required. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. -#'@param lat_dim a character vector indicating the latitude dimension name in the element -#''data' in exp. Default set to "lat". +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". #'@param lon_dim a character vector indicating the longitude dimension name in the element -#''data' in exp. Default set to "lon". -#'@param remap_region a numeric vector indicating the limits of the region defined in exp. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. If set to -#'NULL (default), the function takes the minimum and maximum values of the latitudes and -#'longitudes. +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. #'@param method_point_interp a character vector indicating the interpolation method to #'interpolate model gridded data into the point locations. Accepted methods are "nearest", -#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling +#'is to a point location. #'@import multiApply #'@import plyr #'@importFrom s2dv CDORemap #' #'@seealso \code{\link[s2dverification]{CDORemap}} #' -#'@return An 's2dv_cube' object containing the dowscaled values. +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. #' #'@examples #'exp <- rnorm(500) #'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) -#'lon_exp <- 1:5 -#'lat_exp <- 1:4 -#'exp <- s2dv_cube(data = exp, lat = lat_exp, lon = lon_exp) -#'downscaling_remap <- Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') +#'lons <- 1:5 +#'lats <- 1:4 +#'res <- Interpolation(exp = exp, lats = lats, lons = lons, method_remap = 'conservative', target_grid = 'r1280x640') #'@export Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, method_remap = NULL, - target_grid = NULL, lat_dim = "lat", lon_dim = "lon", remap_region = NULL, + target_grid = NULL, lat_dim = "lat", lon_dim = "lon", region = NULL, method_point_interp = NULL) { if (!is.null(method_remap)) { @@ -92,11 +155,6 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me stop("Missing latitude dimension in 'exp', or does not match the parameter 'lat_dim'") } - # Check for negative longitudes - if (any(lons < -180 | lons > 180)) { - stop("Out-of-range longitudes have been found. Longitudes must range from -180 to 180") - } - # Check for negative latitudes in the exp data if (any(lats < -90 | lats > 90) ) { stop("Out-of-range latitudes have been found. Latitudes must range from -90 to 90") @@ -106,7 +164,7 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me if (!is.null(points)) { if (!inherits(points, 'list')) { stop("Parameter 'points' must be a list of two elements containing the point ", - "latitudes and longitudes in the form 'points$lat', 'points$lon'") + "latitudes and longitudes.") } if (is.null(method_point_interp)) { @@ -138,12 +196,6 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me stop("The number of latitudes and longitudes must match") } - # Check for negative longitudes in the point coordinates - if (any(points[[lon_dim]] < -180 | points[[lon_dim]] > 180) ) { - stop("Out-of-range longitudes have been found in 'points'. Longitudes must range from ", - "-180 to 180") - } - # Check for negative latitudes in the point coordinates if (any(points[[lat_dim]] < -90 | points[[lat_dim]] > 90) ) { stop("Out-of-range latitudes have been found in 'points'. Latitudes must range from ", @@ -160,9 +212,10 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me } if (is.null(target_grid)) { - stop("Parameter 'target_grid' must be a character vector indicating the ", - "target grid to be passed to CDO. It must be a grid recognised by CDO ", - "or a NetCDF file") + stop("Parameter 'target_grid' can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") } } @@ -170,15 +223,17 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me # Limits of the region defined by the model data #---------------------------------- # for the case when region limits are not passed by the user - # remap_regions contains the following elements in order: lonmin, lonmax, latmin, latmax - if (is.null(remap_region)) { - remap_region <- c(lons[1], lons[length(lons)], lats[1], lats[length(lats)]) + # regions contains the following elements in order: lonmin, lonmax, latmin, latmax + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'lats' and 'lons'.") + region <- c(lons[1], lons[length(lons)], lats[1], lats[length(lats)]) } # Ensure points to be within the region limits if (!is.null(points)) { - if (any(points[[lat_dim]] > remap_region[4]) | any(points[[lat_dim]] < remap_region[3]) | - any(points[[lon_dim]] > remap_region[2]) | any(points[[lon_dim]] < remap_region[1])) { + if (any(points[[lat_dim]] > region[4]) | any(points[[lat_dim]] < region[3]) | + any(points[[lon_dim]] > region[2]) | any(points[[lon_dim]] < region[1])) { stop("At least one of the points lies outside the model region") } } @@ -192,14 +247,11 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me lons = lons, grid = target_grid, method = method_remap, - crop = remap_region) + crop = region) # Return a list res <- list(data = res$data_array, lon = res$lons, lat = res$lats) - # Create an s2dv_cube object - #res_s2dv <- suppressWarnings(s2dv_cube(data = res$data_array, lon = res$lons, lat = res$lats)) - #---------------------------------- # Interpolate to point locations #---------------------------------- @@ -207,8 +259,8 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me # First create interpolation weights, depending on the chosen method weights <- create_interp_weights(ncfile = source_file, locids = 1:unique(lengths(points)), lats = points[[lat_dim]], lons = points[[lon_dim]], - method = method_point_interp, region = list(lat_min = remap_region[3], - lat_max = remap_region[4], lon_min = remap_region[1], lon_max = remap_region[2])) + method = method_point_interp, region = list(lat_min = region[3], + lat_max = region[4], lon_min = region[1], lon_max = region[2])) # Select coarse-scale data to be interpolated model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp) @@ -218,10 +270,6 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me # Return a list res <- list(data = res, lon = points[[lon_dim]], lat = points[[lat_dim]]) - - # Create an s2dv_cube object - #res_s2dv <- suppressWarnings(s2dv_cube(data = res, lon = points[[lon_dim]], lat = points[[lat_dim]])) - } return(res) diff --git a/R/Intlr.R b/R/Intlr.R index 93a98f4..95ef082 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -1,6 +1,99 @@ +#'@rdname CST_Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. +#'Only needed if the downscaling is to a point location. +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to FALSE. +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export CST_Intlr <- function(exp, obs, target_grid, lr_method, points = NULL, int_method = NULL, method_point_interp = NULL, - source_file = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { + source_file_exp = NULL, source_file_obs = NULL, predictors = NULL, lat_dim = "lat", + lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + large_scale_predictor_dimname = 'vars', loocv = FALSE, region = NULL, ncores = 1) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -11,10 +104,11 @@ CST_Intlr <- function(exp, obs, target_grid, lr_method, points = NULL, int_metho } res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp[[lat_dim]], exp_lons = exp[[lon_dim]], - obs_lats = obs[[lat_dim]], obs_lons = obs[[lon_dim]], points = points, source_file = source_file, - target_grid = target_grid, lr_method = lr_method, int_method = int_method, method_point_interp = method_point_interp, + obs_lats = obs[[lat_dim]], obs_lons = obs[[lon_dim]], points = points, source_file_exp = exp$source_files[1], + source_file_obs = obs$source_files[1], target_grid = target_grid, lr_method = lr_method, int_method = int_method, + method_point_interp = method_point_interp, predictors = predictors, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, - remap_region = remap_region, large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, + region = region, large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, ncores = ncores) #----------------------------------- @@ -25,14 +119,109 @@ CST_Intlr <- function(exp, obs, target_grid, lr_method, points = NULL, int_metho return(res_s2dv) } -#TO DO: Implement point interpolation - +#'@rdname Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude and start date. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. +#'Only needed if the downscaling is to a point location. +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, lr_method, points = NULL, - int_method = NULL, method_point_interp = NULL, source_file = NULL, predictors = NULL, lat_dim = "lat", - lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", remap_region = NULL, - large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { - - require(multiApply) + int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, + predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + region = NULL, large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { #----------------------------------- # Checkings @@ -81,6 +270,16 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, "'sdate_dim'") } + if (!is.null(points) & (is.null(source_file_exp) | is.null(source_file_obs))) { + stop("No source files found. Source files for exp and obs must be provided in the parameters ", + "'source_file_exp' and 'source_file_obs', respectively.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + # sdate must be the time dimension in the input data stopifnot(sdate_dim %in% names(dim(exp))) stopifnot(sdate_dim %in% names(dim(obs))) @@ -98,21 +297,31 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, #----------------------------------- if (lr_method != '4nn') { - if (is.null(remap_region)) { - remap_region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - method_remap = int_method, remap_region = remap_region) + points = points, method_point_interp = method_point_interp, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_remap = int_method, region = region) # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching - if (!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons)) { + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - method_remap = int_method, remap_region = remap_region) + points = points, method_point_interp = method_point_interp, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, method_remap = int_method, region = region) + + lats <- obs_interpolated$lat + lons <- obs_interpolated$lon + obs_interpolated <- obs_interpolated$data } else { obs_interpolated <- obs + lats <- obs_lats + lons <- obs_lons } } @@ -124,13 +333,10 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, # Predictand: observations if (lr_method == 'basic') { predictor <- exp_interpolated$data - predictand <- obs_interpolated$data + predictand <- obs_interpolated target_dims_predictor <- sdate_dim target_dims_predictand <- sdate_dim - - lats <- obs_interpolated$lat - lons <- obs_interpolated$lon } # (Multi) linear regression with large-scale predictors @@ -140,30 +346,44 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, if (is.null(predictors)) { stop("The large-scale predictors must be passed through the parametre 'predictors'") } - predictand <- obs_interpolated$data + predictand <- obs_interpolated predictor <- predictors target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) target_dims_predictand <- sdate_dim - - lats <- obs_interpolated$lat - lons <- obs_interpolated$lon } # Multi-linear regression with the four nearest neighbours # Predictors: model data # Predictand: observations else if (lr_method == '4nn') { - warning("Interpolating model and observation data but not needed.") predictor <- find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, - lons_coar = exp_lons, nn = 4) - predictand <- obs + lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4) + + if (is.null(points)) { + predictand <- obs + lats <- obs_lats + lons <- obs_lons + } + # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, interpolate to point locations + else { + predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, source_file = source_file_obs, + method_remap = int_method, region = region) + + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, source_file = source_file_obs, + method_remap = int_method, region = region) + + lats <- predictor$lat + lons <- predictor$lon + predictor <- predictor$data + predictand <- predictand$data + } + target_dims_predictor <- c(sdate_dim,'nn') target_dims_predictand <- sdate_dim - - lats <- obs_lats - lons <- obs_lons } else { @@ -176,12 +396,7 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, names(dim(res))[1] <- sdate_dim - # Reorder dimensions to match those of the input model data - res <- .reorder_dims(arr_ref = exp, arr_to_reorder = res) - - #----------------------------------- - # Create an s2dv_cube object - #----------------------------------- + # Return a list of three elements res <- list(data = res, lon = lons, lat = lats) return(res) @@ -194,6 +409,7 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, tmp_df <- data.frame(x = x, y = y) + # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { n <- nrow(tmp_df) @@ -252,22 +468,20 @@ pred_lm <- function(df, lm1, loocv) { #----------------------------------- # Function to find N nearest neighbours. -# 'coar' is an array with names dimensions +# 'coar' is an array with named dimensions #----------------------------------- -find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, nn = 4) { +find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4) { - require(abind) - # Sort the distances from closest to furthest idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) - names(dim(idx_lat)) <- c('nn', 'lat') - names(dim(idx_lon)) <- c('nn', 'lon') + names(dim(idx_lat)) <- c('nn', lat_dim) + names(dim(idx_lon)) <- c('nn', lon_dim) # obtain the values of the nearest neighbours nearest <- Apply(list(coar, idx_lat, idx_lon), - target_dims = list(c('lat','lon'),'lat','lon'), + target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), fun = function(x, y, z) x[y, z])$output1 return(nearest) -- GitLab From 798010ea407ca43bec28d369d2bcf943448f8320 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Tue, 5 Jul 2022 18:01:38 +0200 Subject: [PATCH 13/24] Created function to generate windows for analogs --- R/Analogs.R | 195 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 127 insertions(+), 68 deletions(-) diff --git a/R/Analogs.R b/R/Analogs.R index 53c308c..d22a1d6 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -177,7 +177,8 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat #'@export Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", region = NULL, ncores = 1) { + time_dim = "time", region = NULL, return_indices = FALSE, loocv_window = TRUE, + ncores = 1) { #----------------------------------- # Checkings #----------------------------------- @@ -230,12 +231,12 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, } # Ensure we have enough data to interpolate from high-res to coarse grid - if ((obs_lats[1] > exp_lats[1]) | (obs_lats[length(obs_lats)] < exp_lats[length(exp_lats)]) | - (obs_lons[1] > exp_lons[1]) | (obs_lons[length(obs_lons)] < exp_lons[length(exp_lons)])) { + #if ((obs_lats[1] > exp_lats[1]) | (obs_lats[length(obs_lats)] < exp_lats[length(exp_lats)]) | + # (obs_lons[1] > exp_lons[1]) | (obs_lons[length(obs_lons)] < exp_lons[length(exp_lons)])) { - stop("There are not enough data in 'obs'. Please to add more latitudes or ", - "longitudes.") - } + # stop("There are not enough data in 'obs'. Please to add more latitudes or ", + # "longitudes.") + #} # Select a function to apply to the analogs selected for a given observation if (!is.null(fun_analog)) { @@ -244,13 +245,7 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, # Create window if user does not have it in obs if ( !("window" %in% names(dim(obs))) ) { - nsdates <- dim(obs)[names(dim(obs)) == sdate_dim] - ntimes <- dim(obs)[names(dim(obs)) == time_dim] - window <- Apply(list(obs), target_dims = list(c(time_dim, sdate_dim)), - fun = as.vector, output_dims = 'window')$output1 - obs <- InsertDim(obs, posdim = 1, lendim = nsdates * ntimes, name = "window") - obs <- Apply(list(obs, window), target_dims = 'window', - fun = function(x,y) x <- y)$output1 + obs <- generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window) } #----------------------------------- @@ -277,79 +272,143 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, #----------------------------------- # Reshape train and test #----------------------------------- - res <- Apply(list(obs_interpolated$data, exp_interpolated, obs), - target_dims = list(c("window", sdate_dim, time_dim, lat_dim, lon_dim), - c(sdate_dim, time_dim, lat_dim, lon_dim), c("window", sdate_dim, time_dim, lat_dim, lon_dim)), - fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, - fun_analog = fun_analog), ncores = ncores)$output1 + res.data <- Apply(list(obs_interpolated$data, exp_interpolated, obs), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun_analog = fun_analog), ncores = ncores)$output1 + + # Return the indices of the best analogs + if (return_indices) { + res.ind <- Apply(list(obs_interpolated$data, exp_interpolated, obs), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun_analog = fun_analog, return_indices = TRUE), ncores = ncores, output_dims = 'ind')$output1 + + res <- list(data = res.data, ind = res.ind, lon = obs_lons, lat = obs_lats) + } else { + res <- list(data = res.data, lon = obs_lons, lat = obs_lats) + } - #test <- exp_interpolated[1,1,,1,,,] - #train <- obs_interpolated$data[,1,1,,,,] - #obs_hres <- obs[,1,1,,,,] + # tests Jaume + #test <- exp_interpolated[1,1,1,1,1,,] + #train <- obs_interpolated$data[,1,1,1,,1,] + #obs_hres <- obs[,1,1,1,,1,] #test <- aperm(test,c(2,1,3,4)) #train <- aperm(train,c(1,4,2,3,5)) #obs_hres <- aperm(obs_hres,c(1,4,2,3,5)) - res <- list(data = res, lon = obs_lons, lat = obs_lats) - + # tests Alba + #train <- obs_interpolated$data[,1,1,,,,] + #train <- InsertDim(train, posdim = 3, lendim = 1, name = "time") + #test <- exp_interpolated[1,1,,1,,,] + #test <- InsertDim(test, posdim = 2, lendim = 1, name = "time") + #obs_hres <- obs[,1,1,,,,] + #obs_hres <- InsertDim(obs_hres, posdim = 3, lendim = 1, name = "time") + return(res) } # For each element in test, find the indices of the k nearest neigbhors in train -.analogs <- function(train, test, obs_hres, k, fun_analog) { - # train, test, and obs_hres dim: 4 dimensions sdate, time, lat and lon (in this order) - +.analogs <- function(train, test, obs_hres, k, fun_analog, return_indices = FALSE) { + # train and obs_hres dim: 3 dimensions window, lat and lon (in this order) + # test dim: 2 dimensions lat and lon (in this order) # Number of lats/lons of the high-resolution data - space_dims_hres <- dim(obs_hres)[c(4,5)] - nsdates <- dim(train)[2] - ntimes <- dim(train)[3] + space_dims_hres <- dim(obs_hres)[c(2,3)] # Reformat train and test as an array with (time, points) - train <- apply(train, c(1,2,3), as.vector); names(dim(train))[1] <- "space" - test <- apply(test, c(1,2), as.vector); names(dim(test))[1] <- "space" - obs_hres <- apply(obs_hres, c(1,2,3), as.vector); names(dim(obs_hres))[1] <- "space" + train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" + test <- as.vector(test) + obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space" + + # Identify and remove NA's + idx_na_tr <- is.na(train[ , 1]) + idx_na_te <- is.na(test) + idx_na <- idx_na_tr | idx_na_te + tr_wo_na <- t(train[!idx_na , ]) + te_wo_na <- test[!idx_na] + te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = "time") - # Here we do cross-validation: the start date considered is removed from the training - analogs_arr <- array(NA, dim = c(analogs = k, space_dims_hres, ntimes, nsdates)) - for (s in 1:nsdates) { - for (t in 1:ntimes) { - tr <- apply(train[ , , -s, t], 1, as.vector); names(dim(tr))[1] <- "time" - te <- test[ , s, t] - ob <- apply(obs_hres[ , , -s, t], 1, as.vector); names(dim(ob))[1] <- "time" - - # Find NA's and remove them - # Assume that NA's are only in the spatial dimension, not in the temporal - idx_na_tr <- is.na(tr[1 , ]) - idx_na_te <- is.na(te) - idx_na <- idx_na_tr | idx_na_te - tr_wo_na <- tr[ , !idx_na] - te_wo_na <- te[!idx_na] - te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = 'time') - names(dim(te_wo_na))[2] <- "space" - - knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) - - dist <- knn.ind$nn.dist - idx <- knn.ind$nn.index - - analogs <- ob[ idx, ] - dim(analogs) <- c(analogs = k, space_dims_hres) - - analogs_arr[ , , , t, s] <- analogs - } + knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) + + dist <- knn.ind$nn.dist + idx <- knn.ind$nn.index + + # Either return only the indices or the analogs + if (return_indices) { + res <- as.numeric(idx) + } else { + res <- obs_hres[ , idx] + dim(res) <- c(space_dims_hres, analogs = k) + + if (!is.null(fun_analog)) { + if (fun_analog == "wmean") { + weight <- 1 / dist + res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) + } else { + res <- apply(res, c(1,2), fun_analog) + } + } } - # Apply functions to analogs - if (!is.null(fun_analog)) { - if (fun_analog == "wmean") { - weight <- 1 / dist - analogs_arr <- apply(analogs_arr, c(2,3,4,5), function(x) weighted.mean(x, weight)) + return(res) +} + +# Add the dimension window to an array that contains, at least, the start date and time +# dimensions +# object has at least dimensions sdate and time +generate_window <- function(obj, sdate_dim, time_dim, size = NULL, loocv = FALSE) { + + rsdates <- 1:dim(obj)[names(dim(obj)) == sdate_dim] + ntimes <- dim(obj)[names(dim(obj)) == time_dim] + rtimes <- 1:dim(obj)[names(dim(obj)) == time_dim] + + # Generate a window containing all the data + if (is.null(size)) { + + # Generate window removing one start date each time (leave-one-out cross-validation) + if (loocv) { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), + output_dims = c('window', 'sdate'))$output1 + } else { - analogs_arr <- apply(analogs_arr, c(2,3,4,5), fun_analog) + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = 'window')$output1 } } + # Generate window of the size specified by the user. Only applied with CV + else { + # For an accurate generation of the window, it is mandatory to add some "extra" data. + if (!("smonth" %in% names(dim(obj)))) { + stop("Missing 'smonth' dimension") + } + + # Concatenate data from previous, target and posterior months + obj_new <- Apply(obj, target_dims = list(c("time", "smonth")), + fun = as.vector, output_dims = "time")$output1 + + if (loocv) { + obj_window <- Apply(list(obj_new, rtimes, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL, NULL), + fun = function(x, t, s) as.vector(x[(ntimes + t - size):(ntimes + t + size), -s]), + output_dims = 'window')$output1 + names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c("time", "sdate") + } else { + obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + t - size):(ntimes + t + size), ])), + output_dims = c('window', 'time'))$output1 - return(analogs_arr) + } + } + + return(obj_window) } - + + + + + + + -- GitLab From d057973640ff96b12335e8df97ea2edbde67c0e4 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Fri, 8 Jul 2022 15:55:27 +0200 Subject: [PATCH 14/24] Added check to quantile mapping with NAs --- R/Intbc.R | 60 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/R/Intbc.R b/R/Intbc.R index 2732b48..575c20f 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -49,8 +49,8 @@ #'to the left border, while lonmax refers to the right border. latmin indicates the lower #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. -#'@param cal_method a character vector with the calibration method to be used. See -#'\code{\link[CSTools]{Calibration}} +#'@param cal_method a character vector with the calibration method to be used. Only used when +#''bc_method' is set to 'calibration'. See \code{\link[CSTools]{Calibration}}. #'@param ncores an integer indicating the number of cores to use in parallel computation. #'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the #'downscaled latitudes, and 'lon' the downscaled longitudes. @@ -153,8 +153,11 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'to the left border, while lonmax refers to the right border. latmin indicates the lower #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. -#'@param cal_method a character vector with the calibration method to be used. See -#'\code{\link[CSTools]{Calibration}} +#'@param cal_method a character vector with the calibration method to be used. Only used when +#''bc_method' is set to 'calibration'. Default to 'mse_min'. See \code{\link[CSTools]{Calibration}}. +#'@param qm_method a character vector with the quantile mapping method to be used. Only used +#'when 'bc_method' is set to 'quantile_mapping'. Default to 'QUANT'. See +#'\code{\link[CSTools]{QuantileMapping}}. #'@param ncores an integer indicating the number of cores to use in parallel computation. #' #'@import CSTools @@ -179,7 +182,8 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'@export Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - member_dim = "member", source_file = NULL, region = NULL, cal_method = "mse_min", ncores = 1) { + member_dim = "member", source_file = NULL, region = NULL, cal_method = "mse_min", + qm_method = "QUANT", ncores = 1) { if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") @@ -249,38 +253,56 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching - if (!.check_coords(lat1 = exp_lats, lat2 = obs_lats, lon1 = exp_lons, lon2 = obs_lons) | !is.null(points)) { + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, method_remap = int_method, points = points, source_file = source_file, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, - region = region) + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region) + obs_ref <- obs_interpolated$data } else { - obs_interpolated <- obs + obs_ref <- obs } if (bc_method == 'sbc' | bc_method == 'simple_bias') { - if (dim(obs_interpolated$data)[sdate_dim] == 1) { + if (dim(obs_ref)[sdate_dim] == 1) { warning('Simple Bias Correction should not be used with only one observation. Returning NA. ') } - res <- BiasCorrection(exp = exp_interpolated$data, - obs = obs_interpolated$data) + # BiasCorrection only accepts the dimension names "member" and "sdate" for exp and + # "sdate" for obs + if (member_dim != 'member') { + names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + which(names(dim(exp_interpolated$data)) == member_dim), 'member') + } + + if (sdate_dim != 'sdate') { + names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + which(names(dim(exp_interpolated$data)) == sdate_dim), 'sdate') + names(dim(obs_ref)) <- replace(names(dim(obs_ref)), + which(names(dim(obs_ref)) == sdate_dim), 'sdate') + } + + res <- BiasCorrection(exp = exp_interpolated$data, obs = obs_ref) } else if (bc_method == 'cal' | bc_method == 'calibration') { if (dim(exp_interpolated$data)[member_dim] == 1) { stop('Calibration must not be used with only one ensemble member.') } - res <- Calibration(exp = exp_interpolated$data, obs = obs_interpolated$data, - cal.method = cal_method, ncores = ncores) + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, cal.method = cal_method, + memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores) } else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { - if (any(is.na(exp_interpolated$data))) { - warning('Found NAs in "exp" data, either introduced with the initial object or ', - 'by the interpolation method. Quantile Mapping method is likely to fail.') + if (any(is.na(exp_interpolated$data)) | any(is.na(obs_ref))) { + stop('Found NAs in "exp" and/or "obs" data, either introduced with the initial ', + 'object or by the interpolation method. Quantile Mapping method cannot be used. ', + 'Please select another method or try to add more data outside the borders of the ', + 'region to avoid producing NAs after interpolating.') } - res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_interpolated$data, - sample_dims = sdate_dim, method = 'QUANT', ncores = ncores) + + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, + sample_dims = sdate_dim, method = qm_method, ncores = ncores) } # Return a list of three elements -- GitLab From 753d521f1f5916c16a0d8f60a1ba7ff9dad9e99f Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Mon, 11 Jul 2022 17:22:59 +0200 Subject: [PATCH 15/24] Added code to return indices --- R/Analogs.R | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/R/Analogs.R b/R/Analogs.R index d22a1d6..924e4a6 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -177,7 +177,7 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat #'@export Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", region = NULL, return_indices = FALSE, loocv_window = TRUE, + time_dim = "time", region = NULL, return_indices = FALSE, loocv_window = FALSE, ncores = 1) { #----------------------------------- # Checkings @@ -243,6 +243,12 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) } + # Correct indices later if cross-validation + loocv_correction <- FALSE + if ( !("window" %in% names(dim(obs))) & loocv_window) { + loocv_correction <- TRUE + } + # Create window if user does not have it in obs if ( !("window" %in% names(dim(obs))) ) { obs <- generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window) @@ -258,7 +264,8 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, } obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = grid_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", region = region) + lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", + region = region) # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to # the same grid to force the matching if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp_lats, lon1 = obs_interpolated$lon, lon2 = exp_lons)) { @@ -285,15 +292,24 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, c("window", lat_dim, lon_dim)), fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, fun_analog = fun_analog, return_indices = TRUE), ncores = ncores, output_dims = 'ind')$output1 - + + # If cross-validation has been applied, correct the indices + if (loocv_correction) { + nsdates <- dim(res.ind)[names(dim(res.ind)) == sdate_dim] + ntimes <- dim(res.ind)[names(dim(res.ind)) == time_dim] + res.ind <- Apply(res.ind, target_dims = c("ind", sdate_dim), function(x) + sapply(1:nsdates, function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]]), + output_dims = c("ind", sdate_dim))$output1 + } res <- list(data = res.data, ind = res.ind, lon = obs_lons, lat = obs_lats) - } else { + } + else { res <- list(data = res.data, lon = obs_lons, lat = obs_lats) } # tests Jaume #test <- exp_interpolated[1,1,1,1,1,,] - #train <- obs_interpolated$data[,1,1,1,,1,] + #train <- obs_interpolated$data[,1,1,1,,] #obs_hres <- obs[,1,1,1,,1,] #test <- aperm(test,c(2,1,3,4)) #train <- aperm(train,c(1,4,2,3,5)) @@ -358,7 +374,7 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, # Add the dimension window to an array that contains, at least, the start date and time # dimensions # object has at least dimensions sdate and time -generate_window <- function(obj, sdate_dim, time_dim, size = NULL, loocv = FALSE) { +generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL) { rsdates <- 1:dim(obj)[names(dim(obj)) == sdate_dim] ntimes <- dim(obj)[names(dim(obj)) == time_dim] @@ -369,13 +385,13 @@ generate_window <- function(obj, sdate_dim, time_dim, size = NULL, loocv = FALSE # Generate window removing one start date each time (leave-one-out cross-validation) if (loocv) { - obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), - fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), - output_dims = c('window', 'sdate'))$output1 - + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), + output_dims = c('window', 'sdate'))$output1 + # Generate window without cross-validation } else { - obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), - fun = as.vector, output_dims = 'window')$output1 + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = 'window')$output1 } } # Generate window of the size specified by the user. Only applied with CV @@ -384,7 +400,7 @@ generate_window <- function(obj, sdate_dim, time_dim, size = NULL, loocv = FALSE if (!("smonth" %in% names(dim(obj)))) { stop("Missing 'smonth' dimension") } - + # Concatenate data from previous, target and posterior months obj_new <- Apply(obj, target_dims = list(c("time", "smonth")), fun = as.vector, output_dims = "time")$output1 -- GitLab From 6f0471f3c0073344eb8d18612f510cd7fb0fc05f Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 20 Jul 2022 12:32:18 +0200 Subject: [PATCH 16/24] Corrected lat/dim names in point interpolation --- R/Interpolation.R | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/R/Interpolation.R b/R/Interpolation.R index 891e64c..9357228 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -18,7 +18,7 @@ #'@param points a list of two elements containing the point latitudes and longitudes #'of the locations to downscale the model data. The list must contain the two elements #'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is -#'to a point location, only regular grids are allowed for exp and obs. Only needed in the +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the #'downscaling is to a point location. #'@param method_remap a character vector indicating the regridding method to be passed #'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is @@ -58,7 +58,16 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr stop("Parameter 'exp' must be of the class 's2dv_cube'") } - res <- Interpolation(exp = exp$data, lats = exp[[lat_dim]], lons = exp[[lon_dim]], + #if (is.null(exp[[lat_dim]]) | is.null(exp[[lon_dim]])) { + # stop("The name of the latitude/longitude elements in 'exp' must match the parametres ", + # "'lat_dim' and 'lon_dim'") + #} + + if ((length(which(names(dim(exp$data)) == lat_dim)) == 0) | (length(which(names(dim(exp$data)) == lon_dim)) == 0)) { + stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") + } + + res <- Interpolation(exp = exp$data, lats = exp$lat, lons = exp$lon, source_file = exp$source_files[1], points = points, method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, lon_dim = lon_dim, region = region, method_point_interp = method_point_interp) @@ -404,7 +413,7 @@ create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, #---------------- invdist <- cbind(ida, idb, idc, idd) print(invdist) - w <- t(apply(invdistc(1),function(x) { print(x); if(any(is.infinite(x))) { + w <- t(apply(invdist, 1, function(x) { print(x); if(any(is.infinite(x))) { x <- is.infinite(x) * 1 } ; x <- x/sum(x) })) print(w) @@ -562,6 +571,10 @@ latlon2ij <- function(griddes, lats, lons) { #------------ if(length(lons) != length(lats)) {stop("Input lat and lon have different lengths.")} if(any(lats < -90) | any(lats > 90)) {stop("Latitude out of valid range")} + if((griddes$xfirst > 180) & (any(lons < 0))) { + stop("Please use the same convention for the latitudes in the source file and the ", + "longitude values in 'points'.") + } #if(round(griddes$xinc*griddes$xsize) != 360) {stop("Grid is not global")} # no need to resize lons to [0,360) @@ -684,7 +697,7 @@ get_model_data <- function(weights.df, mdata) { #----------------- # Retrieve with multiApply #----------------- - sub_mdata <- Apply(mdata, target_dims = list(c('lat', 'lon')), fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) })$output1 + sub_mdata <- Apply(mdata, target_dims = list(c(latdim, londim)), fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) })$output1 names(dim(sub_mdata))[1] <- "gridpoint" #----------------- -- GitLab From 222bfada24272a9e19f0bdd393a92acdf894a6f4 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 20 Jul 2022 17:01:15 +0200 Subject: [PATCH 17/24] Corrected issue with point interpolation --- R/Interpolation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Interpolation.R b/R/Interpolation.R index 9357228..8511784 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -722,7 +722,7 @@ interpolate_data <- function(model_data, weights.df) { # Return an array that contains the requested locations and interpolation type #----------------- interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) - names(dim(interp_data))[1] <- "location" + #names(dim(interp_data))[1] <- "location" return(interp_data) } -- GitLab From 467655e70c9dd0a56808a86779b6ac9f0a4c7b60 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 27 Jul 2022 15:37:24 +0200 Subject: [PATCH 18/24] testthat for Interpolation, Intbc and Intlr --- tests/testthat/test_CST_Intbc.R | 63 +++++++++++++++++++ tests/testthat/test_CST_Interpolation.R | 82 +++++++++++++++++++++++++ tests/testthat/test_CST_Intlr.R | 68 ++++++++++++++++++++ 3 files changed, 213 insertions(+) create mode 100644 tests/testthat/test_CST_Intbc.R create mode 100644 tests/testthat/test_CST_Interpolation.R create mode 100644 tests/testthat/test_CST_Intlr.R diff --git a/tests/testthat/test_CST_Intbc.R b/tests/testthat/test_CST_Intbc.R new file mode 100644 index 0000000..122c50b --- /dev/null +++ b/tests/testthat/test_CST_Intbc.R @@ -0,0 +1,63 @@ +set.seed(1) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') +require(testthat) +require(CSTools) +require(multiApply) +require(s2dv) + +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_Intbc(exp = 1), + "Parameter 'exp' must be of the class 's2dv_cube'") + + exp <- rnorm(500) + dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) + exp_lons <- 1:5 + exp_lats <- 1:4 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons)) + + expect_error(CST_Intbc(exp = exp), "argument \"obs\" is missing, with no default") + + obs <- rnorm(900) + dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 1) + obs_lons <- seq(1,5, 4/14) + obs_lats <- seq(1,4, 3/11) + obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) + + expect_error( + CST_Intbc(exp = exp, obs = obs, bc_method = 'cal'), + paste0("Parameter 'method_remap' must be a character vector indicating the ", + "interpolation method. Accepted methods are con, bil, bic, nn, con2")) + + obs2 <- rnorm(180) + dim(obs2) <- c(lat = 12, lon = 15, sdate = 1) + obs2 <- s2dv_cube(data = obs2, lat = obs_lats, lon = obs_lons) + + expect_warning( + CST_Intbc(exp = exp, obs = obs2, int_method = 'bil', bc_method = 'sbc', target_grid = 'r1280x640'), + 'Simple Bias Correction should not be used with only one observation. Returning NA.') + + exp2 <- rnorm(100) + dim(exp2) <- c(member = 1, lat = 4, lon = 5, sdate = 5, time = 1) + exp2 <- suppressWarnings(s2dv_cube(data = exp2, lat = exp_lats, lon = exp_lons)) + + expect_error( + CST_Intbc(exp = exp2, obs = obs, int_method = 'bil', bc_method = 'cal', target_grid = 'r1280x640'), + 'Calibration must not be used with only one ensemble member.') + + d1 <- CST_Intbc(exp = exp, obs = obs, int_method = 'bil', bc_method = 'sbc', target_grid = 'r640x320') + expect_equal(round(d1$data[, 1, 1, 1, 1], 2), c(-0.40, 0.08, -0.23, 0.38, -0.03)) + + d2 <- CST_Intbc(exp = exp, obs = obs, int_method = 'bil', bc_method = 'cal', target_grid = 'r640x320') + expect_equal(round(d2$data[, 1, 1, 1, 1], 2), c(-0.47, -0.03, -0.31, 0.24, -0.13)) + + d3 <- CST_Intbc(exp = exp, obs = obs, int_method = 'bil', bc_method = 'qm', target_grid = 'r640x320') + expect_equal(round(d3$data[, 1, 1, 1, 1], 2), c(0.00, 0.00, 0.00, 0.33, 0.00)) + }) + + + + diff --git a/tests/testthat/test_CST_Interpolation.R b/tests/testthat/test_CST_Interpolation.R new file mode 100644 index 0000000..ae9dcc5 --- /dev/null +++ b/tests/testthat/test_CST_Interpolation.R @@ -0,0 +1,82 @@ +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +require(testthat) +require(CSTools) + +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_Interpolation(exp = 1), + "Parameter 'exp' must be of the class 's2dv_cube'") + + exp <- rnorm(500) + dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) + lons <- 1:5 + lats <- 1:4 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = lats, lon = lons)) + + expect_error( + CST_Interpolation(exp = exp, lon_dim = 'longitude'), + paste0("The name of the latitude/longitude dimensions in 'exp$data' must ", + "match the parametres 'lat_dim' and 'lon_dim'"), fixed = TRUE) # the '$' needs adding fixed = TRUE + + expect_error( + CST_Interpolation(exp = exp), + paste0("Parameter 'method_remap' must be a character vector indicating the ", + "interpolation method. Accepted methods are con, bil, bic, nn, con2")) + + expect_error( + CST_Interpolation(exp = exp, method_remap = 'bil'), + paste0("Parameter 'target_grid' can be either a path to another NetCDF file which to ", + "read the target grid from (a single grid must be defined in such file) or a character ", + "vector indicating the coarse grid to be passed to CDO, and it must be a grid recognised ", + "by CDO or a NetCDF file."), fixed = TRUE) + + expect_warning( + CST_Interpolation(exp = exp, method_remap = 'bil', target_grid = 'r1280x640', region = NULL), + paste0("The borders of the downscaling region have not been provided. Assuming the ", + "four borders of the downscaling region are defined by the first and last elements ", + "of the parametres 'lats' and 'lons'")) + + d <- CST_Interpolation(exp = exp, method_remap = 'bil', target_grid = 'r1280x640') + + expect_equal(length(d), 8) + expect_equal(class(d), "s2dv_cube") + expect_equal(as.numeric(dim(d$data)[names(dim(d$data)) == 'lat']), length(d$lat)) + expect_equal(as.numeric(dim(d$data)[names(dim(d$data)) == 'lon']), length(d$lon)) + + expect_error( + CST_Interpolation(exp = exp, points = 1), + paste0("Parameter 'points' must be a list of two elements containing the point latitudes ", + "and longitudes.")) + + expect_error( + CST_Interpolation(exp = exp, points = list(latitudes = c(1,0), longitudes = c(-1,1)), + method_point_interp = 'bilinear', lat_dim = 'lat', lon_dim = 'lon'), + paste0("The names of the elements in the list 'points' must coincide with the parametres ", + "'lat_dim' and 'lon_dim'")) + + exp$source_files[1] <- 'null.nc' + expect_error( + CST_Interpolation(exp = exp, points = list(lat = c(1,0), lon = c(-1,1)), + method_point_interp = 'bilinear'), + "At least one of the points lies outside the model region") + + exp <- rep(letters[1:5], 100) + dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) + lons <- 1:5 + lats <- 1:4 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = lats, lon = lons)) + + expect_error( + CST_Interpolation(exp = exp, method_remap = 'bil', target_grid = 'r1280x640', region = c(1, 5, 1, 4)), + "Parameter 'data_array' must be a numeric array.") + + exp <- array(1:6, c(lat = 3, lon = 2)) + lons <- 1:2 + lats <- 1:3 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = lats, lon = lons)) + + d2 <- CST_Interpolation(exp = exp, method_remap = 'bil', target_grid = 'r640x320')$data + expect_equal(d2, array(c(2, 2, 3, 3, 4, 5), dim = c(lat = 3, lon = 2))) + }) + diff --git a/tests/testthat/test_CST_Intlr.R b/tests/testthat/test_CST_Intlr.R new file mode 100644 index 0000000..decdb08 --- /dev/null +++ b/tests/testthat/test_CST_Intlr.R @@ -0,0 +1,68 @@ +set.seed(1) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') +require(testthat) +require(CSTools) +require(multiApply) +require(s2dv) + +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_Intlr(exp = 1), + "Parameter 'exp' must be of the class 's2dv_cube'") + + exp <- rnorm(500) + dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) + exp_lons <- 1:5 + exp_lats <- 1:4 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons)) + + expect_error(CST_Intlr(exp = exp), "argument \"obs\" is missing, with no default") + + obs <- rnorm(900) + dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 1) + obs_lons <- seq(1,5, 4/14) + obs_lats <- seq(1,4, 3/11) + obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) + + expect_error( + CST_Intlr(exp = exp, obs = obs, lr_method = 1), + "Parameter 'lr_method' must be of the class 'character'") + + expect_error( + CST_Intlr(exp = exp, obs = obs, lr_method = "basic"), + paste0("Parameter 'int_method' must be a character vector indicating the interpolation ", + "method. Accepted methods are con, bil, bic, nn, con2")) + + expect_error( + CST_Intlr(exp = exp, obs = obs, lr_method = "large-scale", int_method = "bil", target_grid = 'r1280x640', + region = c(1, 5, 1, 4)), + "The large-scale predictors must be passed through the parametre 'predictors'") + + expect_error( + CST_Intlr(exp = exp, obs = obs, lr_method = "large-scale", int_method = "bil", target_grid = 'r1280x640', + region = c(1, 5, 1, 4), predictors = 1), + "Parameter 'predictors' must be of the class 'array'") + + expect_error( + CST_Intlr(exp = exp, obs = obs, int_method = "bil", target_grid = 'r1280x640', region = c(1, 5, 1, 4), + lr_method = "a"), + "a method is not implemented yet") + + d1 <- CST_Intlr(exp = exp, obs = obs, lr_method = "basic", int_method = "bil", target_grid = 'r1280x640') + expect_equal(round(d1$data[, 1, 1, 1, 1], 2), c(-0.70, -0.20, 0.56, 0.87, 0.60)) + + d2 <- CST_Intlr(exp = exp, obs = obs, lr_method = "4nn") + expect_equal(round(d2$data[, 1, 1, 1, 1], 2), c(0.08, 1.08, 0.80, 0.47, -0.52)) + + ind_rdm <- array(rnorm(30), dim = c(sdate = 5, member = 3, vars = 2)) + d3 <- CST_Intlr(exp = exp, obs = obs, lr_method = "large-scale", int_method = "bil", target_grid = 'r1280x640', + region = c(1, 5, 1, 4), predictors = ind_rdm) + expect_equal(round(d3$data[, 1, 1, 1, 1], 2), c(-0.26, -0.92, 0.70, 0.74, 0.86)) + }) + + + + -- GitLab From 9f9ed7d61927f9d0884429f11aac1802fca36f6c Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Fri, 29 Jul 2022 09:19:41 +0200 Subject: [PATCH 19/24] . --- R/Analogs.R | 45 +++++++-- R/Intbc.R | 92 ++++++++--------- R/Intlr.R | 81 +++++++++------ examples/analogs.R | 124 ++++++++++++++++------- examples/interpolation-bc.R | 24 +++-- examples/interpolation-lr.R | 45 +++++++-- examples/interpolation.R | 19 ++-- tests/QuantileMapping_new.R | 159 ++++++++++++++++++++++++++++++ tests/testthat/test_CST_Analogs.R | 44 +++++++++ 9 files changed, 487 insertions(+), 146 deletions(-) create mode 100644 tests/QuantileMapping_new.R create mode 100644 tests/testthat/test_CST_Analogs.R diff --git a/R/Analogs.R b/R/Analogs.R index 924e4a6..ccd3bf2 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -31,6 +31,10 @@ #'@param obs an 's2dv' object with named dimensions containing the observational field. #'The object must have, at least, the dimensions latitude, longitude and start date. #'The object is expected to be already subset for the desired region. +#'@param grid_exp a character vector with a path to an example file of the exp data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file. #'@param nanalogs an integer indicating the number of analogs to be searched #'@param fun_analog a function to be applied over the found analogs. Only these options #'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), @@ -48,6 +52,11 @@ #'to the left border, while lonmax refers to the right border. latmin indicates the lower #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. #'@param ncores an integer indicating the number of cores to use in parallel computation. #' #'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the @@ -68,7 +77,8 @@ #'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') #'@export CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", time_dim = "time", region = NULL, ncores = 1) { + sdate_dim = "sdate", time_dim = "time", region = NULL, return_indices = FALSE, + loocv_window = TRUE, ncores = 1) { # input exp and obs must be s2dv_cube objects if (!inherits(exp,'s2dv_cube')) { @@ -80,10 +90,11 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp[[lat_dim]], exp_lons = exp[[lon_dim]], - obs_lats = obs[[lat_dim]], obs_lons = obs[[lon_dim]], grid_exp = grid_exp, + res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, grid_exp = grid_exp, nanalogs = nanalogs, fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, - sdate_dim = sdate_dim, time_dim = time_dim, region = region, ncores = ncores) + sdate_dim = sdate_dim, time_dim = time_dim, region = region, return_indices = return_indices, + loocv_window = loocv_window, ncores = ncores) res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) @@ -123,8 +134,11 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat #'the borders of the region should be specified in the parameter 'region'. See parameter #''region'. #'@param obs an array with named dimensions containing the observational field. The object -#'must have, at least, the dimensions latitude, longitude and start date. The object is -#'expected to be already subset for the desired region. +#'must have, at least, the dimensions latitude, longitude, start date and time. The object +#'is expected to be already subset for the desired region. Optionally, 'obs' can have the +#'dimension 'window', containing the sampled fields into which the function will look for +#'the analogs. See function 'generate_window()'. Otherwise, the function will look for +#'analogs using all the possible fields contained in obs. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -133,8 +147,11 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat #'range from -90 to 90. #'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes #'can range from -180 to 180 or from 0 to 360. -#'@param grid_exp a character vector with a path to an example file of the exp data. -#'@param nanalogs an integer indicating the number of analogs to be searched +#'@param grid_exp a character vector with a path to an example file of the exp data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file. +#'@param nanalogs an integer indicating the number of analogs to be searched. #'@param fun_analog a function to be applied over the found analogs. Only these options #'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), #'the function returns the found analogs. @@ -151,6 +168,13 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat #'to the left border, while lonmax refers to the right border. latmin indicates the lower #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. The indices refer to the position of the +#'element in the vector time * start_date. If 'obs' contain the dimension 'window', it will +#'refer to the position of the element in the dimension 'window'. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. #'@param ncores an integer indicating the number of cores to use in parallel computation. #'@import multiApply #'@import CSTools @@ -177,7 +201,7 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat #'@export Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", region = NULL, return_indices = FALSE, loocv_window = FALSE, + time_dim = "time", region = NULL, return_indices = FALSE, loocv_window = TRUE, ncores = 1) { #----------------------------------- # Checkings @@ -259,7 +283,8 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, #----------------------------------- if (is.null(region)) { warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", - "downscaling region are defined by the first and last elements of the parametres 'exp_lats' and 'exp_lons'.") + "downscaling region are defined by the first and last elements of the parametres 'exp_lats' and ", + "'exp_lons'.") region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], exp_lats[length(exp_lats)]) } diff --git a/R/Intbc.R b/R/Intbc.R index 575c20f..ce50c40 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -49,8 +49,6 @@ #'to the left border, while lonmax refers to the right border. latmin indicates the lower #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. -#'@param cal_method a character vector with the calibration method to be used. Only used when -#''bc_method' is set to 'calibration'. See \code{\link[CSTools]{Calibration}}. #'@param ncores an integer indicating the number of cores to use in parallel computation. #'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the #'downscaled latitudes, and 'lon' the downscaled longitudes. @@ -70,7 +68,7 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", - region = NULL, cal_method = "mse_min", ncores = 1) + region = NULL, ncores = 1) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -80,12 +78,11 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp[[lat_dim]], exp_lons = exp[[lon_dim]], - obs_lats = obs[[lat_dim]], obs_lons = obs[[lon_dim]], target_grid = target_grid, + res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, int_method = int_method, bc_method = bc_method, points = points, source_file = exp$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, - sdate_dim = sdate_dim, member_dim = member_dim, region = region, - cal_method = cal_method, ncores = ncores) + sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores) res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) @@ -153,11 +150,6 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'to the left border, while lonmax refers to the right border. latmin indicates the lower #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. -#'@param cal_method a character vector with the calibration method to be used. Only used when -#''bc_method' is set to 'calibration'. Default to 'mse_min'. See \code{\link[CSTools]{Calibration}}. -#'@param qm_method a character vector with the quantile mapping method to be used. Only used -#'when 'bc_method' is set to 'quantile_mapping'. Default to 'QUANT'. See -#'\code{\link[CSTools]{QuantileMapping}}. #'@param ncores an integer indicating the number of cores to use in parallel computation. #' #'@import CSTools @@ -182,8 +174,7 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'@export Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - member_dim = "member", source_file = NULL, region = NULL, cal_method = "mse_min", - qm_method = "QUANT", ncores = 1) { + time_dim = "time", member_dim = "member", source_file = NULL, region = NULL, ncores = 1, ...) { if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") @@ -225,7 +216,8 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, stop("Missing member dimension in 'exp', or does not match the parameter 'member_dim'") } - if (!(bc_method %in% c('sbc', 'cal', 'qm', 'simple_bias', 'calibration', 'quantile_mapping'))) { + if (!(bc_method %in% c('sbc', 'cal', 'qm', 'dbc', 'simple_bias', 'calibration', + 'quantile_mapping', 'dynamical_bias'))) { stop("Parameter 'bc_method' must be a character vector indicating the bias adjustment method. ", "Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The abbreviations ", "'sbc', 'cal', 'qm' can also be used.") @@ -241,8 +233,9 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, } if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", - "downscaling region are defined by the first and last elements of the parametres 'obs_lats' and 'obs_lons'.") + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } @@ -264,46 +257,53 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, obs_ref <- obs } + # Some functions only accept the dimension names "member" and "sdate" for exp and + # "sdate" for obs + if (member_dim != 'member') { + names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + which(names(dim(exp_interpolated$data)) == member_dim), 'member') + } + + if (sdate_dim != 'sdate') { + names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + which(names(dim(exp_interpolated$data)) == sdate_dim), 'sdate') + names(dim(obs_ref)) <- replace(names(dim(obs_ref)), + which(names(dim(obs_ref)) == sdate_dim), 'sdate') + } + if (bc_method == 'sbc' | bc_method == 'simple_bias') { if (dim(obs_ref)[sdate_dim] == 1) { - warning('Simple Bias Correction should not be used with - only one observation. Returning NA. ') + warning('Simple Bias Correction should not be used with only one observation. Returning NA.') } - # BiasCorrection only accepts the dimension names "member" and "sdate" for exp and - # "sdate" for obs - if (member_dim != 'member') { - names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), - which(names(dim(exp_interpolated$data)) == member_dim), 'member') - } - - if (sdate_dim != 'sdate') { - names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), - which(names(dim(exp_interpolated$data)) == sdate_dim), 'sdate') - names(dim(obs_ref)) <- replace(names(dim(obs_ref)), - which(names(dim(obs_ref)) == sdate_dim), 'sdate') - } - res <- BiasCorrection(exp = exp_interpolated$data, obs = obs_ref) + res <- BiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ...) } else if (bc_method == 'cal' | bc_method == 'calibration') { if (dim(exp_interpolated$data)[member_dim] == 1) { - stop('Calibration must not be used with - only one ensemble member.') + stop('Calibration must not be used with only one ensemble member.') } - res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, cal.method = cal_method, - memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores) + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = 'member', + sdate_dim = 'sdate', ...) } else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { - if (any(is.na(exp_interpolated$data)) | any(is.na(obs_ref))) { - stop('Found NAs in "exp" and/or "obs" data, either introduced with the initial ', - 'object or by the interpolation method. Quantile Mapping method cannot be used. ', - 'Please select another method or try to add more data outside the borders of the ', - 'region to avoid producing NAs after interpolating.') - } - - res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, - sample_dims = sdate_dim, method = qm_method, ncores = ncores) + source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/tests/QuantileMapping_new.R') + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, sample_dims = sdate_dim, + na.rm = TRUE, ...) } + else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { + # the temporal dimension must be only one dimension called "time" + if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { + exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = "time")$output1 + } + if (all(c(time_dim, sdate_dim) %in% names(dim(obs_ref)))) { + obs_ref <- Apply(obs_ref, target_dims = c(time_dim, sdate_dim), fun = as.vector, + output_dims = "time")$output1 + } + + res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ...) + } + # Return a list of three elements res <- list(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat) diff --git a/R/Intlr.R b/R/Intlr.R index 95ef082..5295205 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -22,8 +22,6 @@ #'@param obs an 's2dv object' containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. -#'@param target_grid a character vector indicating the target grid to be passed to CDO. -#'It must be a grid recognised by CDO or a NetCDF file. #'@param lr_method a character vector indicating the linear regression method to be applied #'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' #'method fits a linear regression using high resolution observations as predictands and the @@ -34,6 +32,8 @@ #'applied to the interpolated model values. Finally, the '4nn' method uses a linear #'regression with the four nearest neighbours as predictors and high-resolution observations #'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. #'@param points a list of two elements containing the point latitudes and longitudes #'of the locations to downscale the model data. The list must contain the two elements #'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is @@ -90,9 +90,9 @@ #'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) #'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') #'@export -CST_Intlr <- function(exp, obs, target_grid, lr_method, points = NULL, int_method = NULL, method_point_interp = NULL, - source_file_exp = NULL, source_file_obs = NULL, predictors = NULL, lat_dim = "lat", - lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", +CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, + method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, + predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", large_scale_predictor_dimname = 'vars', loocv = FALSE, region = NULL, ncores = 1) { if (!inherits(exp,'s2dv_cube')) { @@ -103,11 +103,11 @@ CST_Intlr <- function(exp, obs, target_grid, lr_method, points = NULL, int_metho stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp[[lat_dim]], exp_lons = exp[[lon_dim]], - obs_lats = obs[[lat_dim]], obs_lons = obs[[lon_dim]], points = points, source_file_exp = exp$source_files[1], - source_file_obs = obs$source_files[1], target_grid = target_grid, lr_method = lr_method, int_method = int_method, - method_point_interp = method_point_interp, - predictors = predictors, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, + res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, points = points, source_file_exp = exp$source_files[1], + source_file_obs = obs$source_files[1], target_grid = target_grid, lr_method = lr_method, + int_method = int_method, method_point_interp = method_point_interp, predictors = predictors, + lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, region = region, large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, ncores = ncores) @@ -151,8 +151,6 @@ CST_Intlr <- function(exp, obs, target_grid, lr_method, points = NULL, int_metho #'range from -90 to 90. #'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes #'can range from -180 to 180 or from 0 to 360. -#'@param target_grid a character vector indicating the target grid to be passed to CDO. -#'It must be a grid recognised by CDO or a NetCDF file. #'@param lr_method a character vector indicating the linear regression method to be applied #'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' #'method fits a linear regression using high resolution observations as predictands and the @@ -163,6 +161,8 @@ CST_Intlr <- function(exp, obs, target_grid, lr_method, points = NULL, int_metho #'applied to the interpolated model values. Finally, the '4nn' method uses a linear #'regression with the four nearest neighbours as predictors and high-resolution observations #'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. #'@param points a list of two elements containing the point latitudes and longitudes #'of the locations to downscale the model data. The list must contain the two elements #'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is @@ -218,7 +218,7 @@ CST_Intlr <- function(exp, obs, target_grid, lr_method, points = NULL, int_metho #'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, #'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') #'@export -Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, lr_method, points = NULL, +Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", region = NULL, large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { @@ -285,35 +285,47 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, stopifnot(sdate_dim %in% names(dim(obs))) # checkings for the parametre 'predictors' - if (is.array(predictors)) { - # ensure the predictor variable name matches the parametre large_scale_predictor_dimname - stopifnot(large_scale_predictor_dimname %in% names(dim(predictors))) - stopifnot(sdate_dim %in% names(dim(predictors))) - stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) + if (!is.null(predictors)) { + if (!is.array(predictors)) { + stop("Parameter 'predictors' must be of the class 'array'") + } else { + # ensure the predictor variable name matches the parametre large_scale_predictor_dimname + stopifnot(large_scale_predictor_dimname %in% names(dim(predictors))) + stopifnot(sdate_dim %in% names(dim(predictors))) + stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) + } } #----------------------------------- # Interpolation #----------------------------------- if (lr_method != '4nn') { - + + if (is.null(int_method)) { + stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", + "Accepted methods are con, bil, bic, nn, con2") + } + if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", - "downscaling region are defined by the first and last elements of the parametres 'obs_lats' and 'obs_lons'.") + warning("The borders of the downscaling region have not been provided. Assuming the ", + "four borders of the downscaling region are defined by the first and last ", + "elements of the parametres 'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - points = points, method_point_interp = method_point_interp, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_remap = int_method, region = region) + points = points, method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region) # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - points = points, method_point_interp = method_point_interp, source_file = source_file_obs, - lat_dim = lat_dim, lon_dim = lon_dim, method_remap = int_method, region = region) + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region) lats <- obs_interpolated$lat lons <- obs_interpolated$lon @@ -346,6 +358,7 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, if (is.null(predictors)) { stop("The large-scale predictors must be passed through the parametre 'predictors'") } + predictand <- obs_interpolated predictor <- predictors @@ -361,20 +374,24 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4) if (is.null(points)) { + if (!is.null(target_grid)) { + warning("Interpolating to the 'obs' grid") + } predictand <- obs lats <- obs_lats lons <- obs_lons } - # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, interpolate to point locations + # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, + # interpolate to point locations else { - predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - points = points, method_point_interp = method_point_interp, source_file = source_file_obs, - method_remap = int_method, region = region) + predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region) - predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - points = points, method_point_interp = method_point_interp, source_file = source_file_obs, - method_remap = int_method, region = region) + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region) lats <- predictor$lat lons <- predictor$lon diff --git a/examples/analogs.R b/examples/analogs.R index 7f27510..7886cff 100644 --- a/examples/analogs.R +++ b/examples/analogs.R @@ -4,6 +4,7 @@ library(startR) library(s2dv) library(lubridate) library(multiApply) +library(FNN) source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') @@ -24,38 +25,36 @@ extra <- 1 #sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') #sdates <- format(seq(ymd("20000101"), ymd("20071201"), '1 month'), "%Y%m%d") sdates_exp <- format(ymd("20000501") + rep(years(0:2), each=1),"%Y%m%d") -#sdates_obs <- format(ymd("20000401") + months(0:2) + rep(years(0:4), each=3),"%Y%m") -sdates_obs <- format(ymd("20000501") + rep(years(0:2), each=1),"%Y%m") +sdates_obs <- format(ymd("20000401") + months(0:2) + rep(years(0:2), each=3),"%Y%m") +#sdates_obs <- format(ymd("20000501") + rep(years(0:2), each=1),"%Y%m") #--------------------------- # Observations #--------------------------- # dim(obs) <- c('sdate', 'smonth', ...) -obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc', - var = 'tas', time = indices(1:28), lat = values(list(latmin - extra, latmax + extra)), - sdate = sdates_obs, lat_reorder = Sort(decreasing = FALSE), - lon = values(list(lonmin - extra, lonmax + extra)), lon_reorder = CircularSort(-180, 180), - synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), - lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat'), +obs1 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc', + var = 'tas', time = indices(1:28), latitude = values(list(latmin - extra, latmax + extra)), + sdate = sdates_obs, latitude_reorder = Sort(decreasing = FALSE), + longitude = values(list(lonmin - extra, lonmax + extra)), + longitude_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), return_vars = list(latitude = 'dat', longitude = 'dat'), num_procs = 1, retrieve = TRUE) -obs <- s2dv_cube(obs, lat = attr(obs, "Variables")$dat1$lat, lon = attr(obs, "Variables")$dat1$lon, - source_files = attr(obs, "Files")[1,1,]) -attr(obs$lon,"first_lon") <- obs$lon[1] -attr(obs$lon,"last_lon") <- obs$lon[length(obs$lon)] -attr(obs$lat,"first_lat") <- obs$lat[1] -attr(obs$lat,"last_lat") <- obs$lat[length(obs$lat)] -# Create window to look for analogues -dim(obs$data) <- c(data = 1, var = 1, time = 28, lat = 39, smonth = 3, sdate = 7, lon = 67) # correct -obs_new <- Apply(obs$data, target_dims = list(c("time", "smonth")), fun = as.vector, output_dims = "time")$output1 -wlen <- 7 +# Create 's2dv_cube' object +obs2 <- s2dv_cube(obs1, lat = attr(obs1, "Variables")$dat1$lat, lon = attr(obs1, "Variables")$dat1$lon, + source_files = attr(obs1, "Files")[1,1,]) -result <- array(NA, dim = c(window = 2*wlen + 1, time = 28, lat = 39, sdate = 7, lon = 67)) -for (i in 1:28) { - result[,i,,,] <- obs_new[(28 + i - wlen):(28 + i + wlen),1,1,,,] -} +# Create window to look for analogues +#dim(obs2$data) <- c(data = 1, var = 1, time = 28, lat = 39, smonth = 3, sdate = 5, lon = 67) # correct +#obs_new <- Apply(obs2$data, target_dims = list(c("time", "smonth")), fun = as.vector, output_dims = "time")$output1 +#wlen <- 7 -obs$data <- result +#result <- array(NA, dim = c(window = 2*wlen + 1, time = 28, lat = 39, sdate = 7, lon = 67)) +#for (i in 1:28) { +# result[,i,,,] <- obs_new[(28 + i - wlen):(28 + i + wlen),1,1,,,] +#} +#obs2$data <- result # > dim(obs$data) # data var time lat smonth sdate lon @@ -67,21 +66,76 @@ obs$data <- result #--------------------------- # Model #--------------------------- -exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc', var = 'tas', time = indices(1:28), member = indices(1:3), sdate = sdates_exp, - lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), - lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), - synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), - lat = c('lat', 'latitude'), member = c('member','ensemble')), - return_vars = list(lat = 'dat', lon = 'dat'), + latitude = values(list(latmin, latmax)), latitude_reorder = Sort(decreasing = FALSE), + longitude = values(list(lonmin, lonmax)), longitude_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(latitude = 'dat', longitude = 'dat'), num_procs = 1, retrieve = TRUE) -exp <- s2dv_cube(exp, lat = attr(exp, "Variables")$dat1$lat, lon = attr(exp, "Variables")$dat1$lon, - source_files = attr(exp, "Files")[1,1,]) -attr(exp$lon,"first_lon") <- exp$lon[1] -attr(exp$lon,"last_lon") <- exp$lon[length(exp$lon)] -attr(exp$lat,"first_lat") <- exp$lat[1] -attr(exp$lat,"last_lat") <- exp$lat[length(exp$lat)] +# Create 's2dv_cube' object +exp2 <- s2dv_cube(exp1, lat = attr(exp1, "Variables")$dat1$lat, lon = attr(exp1, "Variables")$dat1$lon, + source_files = attr(exp1, "Files")[1,1,]) + +#--------------------------- +# Analogs with no 'window' dimension in the initial object and with cv +#--------------------------- +ana_1 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = TRUE, ncores = 4) + +#--------------------------- +# Analogs with no 'window' dimension in the initial object and without cv +#--------------------------- +ana_2 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = FALSE, ncores = 4) + +#--------------------------- +# Analogs with 'window' dimension in the initial object and with cv +#--------------------------- +dim(obs1) <- c(data = 1, var = 1, time = 28, latitude = 39, smonth = 3, sdate = 3, longitude = 67) +obs_window <- generate_window(obj = obs1, sdate_dim = 'sdate', time_dim = 'time', size = 7, loocv = TRUE) +ana_3 <- Analogs(exp = exp1, obs = obs_window, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', ncores = 4) + +#--------------------------- +# Analogs with cv and returning the indices +#--------------------------- +ana_4 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = TRUE, + return_indices = TRUE, ncores = 4) + +#--------------------------- +# Analogs with 'window' dimension and returning the indices +#--------------------------- +ana_5 <- Analogs(exp = exp1, obs = obs_window, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', return_indices = TRUE, ncores = 4) + +#--------------------------- +# Analogs with NA's in obs +#--------------------------- +obs1[,,,10:20,,] <- NA +ana_mean_2 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, source_file_exp = attr(exp1, "Files")[1,1,1], + fun_analog = 'mean', ncores = 4,lat_dim='latitude',lon_dim='longitude') + + +#--------------------------- +# Analogs with CST_Analogs() +#--------------------------- +ana_mean <- CST_Analogs(exp = exp2, obs = obs2, fun_analog = "mean", ncores = 4) # test examples ana_mean <- Analogs(exp = exp, obs = obs, fun_analog = "mean", ncores = 4) diff --git a/examples/interpolation-bc.R b/examples/interpolation-bc.R index fe46cfc..f1b44b2 100644 --- a/examples/interpolation-bc.R +++ b/examples/interpolation-bc.R @@ -3,6 +3,7 @@ library(startR) library(s2dv) library(CSTools) library(lubridate) +library(multiApply) source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') @@ -19,7 +20,9 @@ lonmax <- 5.35 latmin <- 35.1 latmax <- 44.1 -sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') +#sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') +sdates <- format(ymd("20000201") + rep(years(0:20), each=1),"%Y%m%d") + obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$sdate$.nc', var = 'tas', time = indices(1), lat = values(list(latmin, latmax)), @@ -56,6 +59,13 @@ down_1_qm <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$ obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', bc_method = 'quantile_mapping', ncores = 4) +down_1_dbc <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', + bc_method = 'dynamical_bias', quanti = 0.6, ncores = 4) +#Error in (function (logdista, quanti) : +# Parameter 'quanti' is too high for the length of the data provided. + #------------------------------------ # Transform exp and obs into s2dv_objects #------------------------------------ @@ -76,14 +86,14 @@ attr(exp$lat,"last_lat") <- exp$lat[length(exp$lat)] #------------------------------------ # Downscaling with CST_Intbc #------------------------------------ -down_2_sbc <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', bc_method = 'simple_bias', - ncores = 4) +down_2_sbc <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', + bc_method = 'simple_bias', ncores = 4) -down_2_cal <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', bc_method = 'calibration', - ncores = 4) +down_2_cal <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', + bc_method = 'calibration', ncores = 4) -down_2_qm <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', bc_method = 'quantile_mapping', - ncores = 4) +down_2_qm <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', + bc_method = 'quantile_mapping', ncores = 4) #------------------------------------ # Downscaling to point locations diff --git a/examples/interpolation-lr.R b/examples/interpolation-lr.R index c32f0b1..89dcea8 100644 --- a/examples/interpolation-lr.R +++ b/examples/interpolation-lr.R @@ -20,10 +20,10 @@ latmin <- 35.1 latmax <- 44.1 -sdates <- format(ymd("19811201") + rep(years(0:36),each=1),"%Y%m%d") -#sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') +#sdates <- format(ymd("19811201") + rep(years(0:36),each=1),"%Y%m%d") +sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') -obs1 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h_i1087/$var$_$sdate$.nc', +obs1 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$sdate$.nc', var = 'tas', time = indices(1), lat = values(list(latmin, latmax)), sdate = format(ymd(sdates), "%Y%m"), lat_reorder = Sort(decreasing = FALSE), lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), @@ -62,6 +62,32 @@ down_1_nn <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$da obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "4nn", int_method = "conservative", predictors = ind_rdm, loocv = TRUE, ncores = 4) +#---------------------------------- +# Downscaling to point locations with CST_Intlr +# Note: load first NCO module +#---------------------------------- +points <- list(lat = c(36.1, 38.7), lon = c(-1.9, 0.8)) +down_2_bas <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "basic", + points = points, method_point_interp = 'bilinear', int_method = NULL, predictors = NULL, + source_file_exp = attr(exp1, "Files")[1,1,1], source_file_obs = attr(obs1, "Files")[1,1,1], + loocv = TRUE, ncores = 4) + +down_2_lsc <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "large-scale", + points = points, method_point_interp = 'bilinear', int_method = NULL, predictors = ind_rdm, + source_file_exp = attr(exp1, "Files")[1,1,1], source_file_obs = attr(obs1, "Files")[1,1,1], + loocv = TRUE, ncores = 4) + +down_2_nn <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "4nn", + points = points, method_point_interp = 'bilinear', int_method = NULL, predictors = NULL, + source_file_exp = attr(exp1, "Files")[1,1,1], source_file_obs = attr(obs1, "Files")[1,1,1], + loocv = TRUE, ncores = 4) + #---------------------------------- # Create s2dv objects #---------------------------------- @@ -82,16 +108,15 @@ attr(exp1$lat,"last_lat") <- exp1$lat[length(exp1$lat)] #---------------------------------- # Downscaling with CST_Intlr #---------------------------------- -down_2_bas <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "basic", +down_3_bas <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "basic", int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) -down_2_lsc <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "large-scale", - int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) +down_3_lsc <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "large-scale", + int_method = "bilinear", predictors = ind_rdm, loocv = TRUE, ncores = 4) -down_2_nn <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "4nn", +down_3_nn <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "4nn", int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) -#---------------------------------- -# Downscaling to point locations with CST_Intlr -#---------------------------------- + + diff --git a/examples/interpolation.R b/examples/interpolation.R index 367fb90..96c446b 100644 --- a/examples/interpolation.R +++ b/examples/interpolation.R @@ -11,6 +11,10 @@ target_grid <- '/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc' #lonmax <- 45 #latmin <- 27 #latmax <- 72 +#lonmin <- -11.5 +#lonmax <- 5.35 +#latmin <- 35.1 +#latmax <- 44.1 lonmin <- -11.5 lonmax <- 5.35 latmin <- 35.1 @@ -18,10 +22,11 @@ latmax <- 44.1 sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') -exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', var = 'tas', time = indices(1), member = indices(1:3), sdate = sdates, lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), - lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + lon = values(list(lonmin, lonmax)), + lon_reorder = CircularSort(-180, 180), synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), lat = c('lat', 'latitude'), member = c('member','ensemble')), return_vars = list(lat = 'dat', lon = 'dat'), @@ -35,8 +40,9 @@ attr(exp2$lat,"last_lat") <- exp2$lat[length(exp2$lat)] #-------------------------------- # Downscaling with Interpolation +# If lon_reorder = CircularSort(0, 360) is set, please add the parameter remap_region = c(lonmin,lonmax,latmin,latmax) #-------------------------------- -exp1_con <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, +exp1_con <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'con') exp1_bil <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'bil') @@ -60,9 +66,10 @@ exp2_con2 <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_rem # Downscaling to point locations #-------------------------------- points <- list(lat = c(36.1, 38.7), lon = c(-1.9, 0.8)) -down_points_sbc <- CST_Interpolation(exp = exp, points = points, method_point_interp = 'bilinear', - target_grid = target_grid, int_method = 'con', bc_method = 'simple_bias', - ncores = 4) +down_points_sbc <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, lons = attr(exp1, "Variables")$dat1$lon, + points = points, method_point_interp = 'bilinear', remap_region = c(lonmin,lonmax,latmin,latmax), + source_file = attr(exp1, "Files")[1,1,1]) + #s2dv::PlotEquiMap(var = obs$data[1,1,1,,,1], lat = obs$lat, lon = obs$lon, filled.continents = FALSE, toptitle = 'Observations', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_obs.png")) #s2dv::PlotEquiMap(var = exp$data[1,1,1,1,1,,], lat = exp$lat, lon = exp$lon, filled.continents = FALSE, toptitle = 'Predictions', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_fcst.png")) diff --git a/tests/QuantileMapping_new.R b/tests/QuantileMapping_new.R new file mode 100644 index 0000000..45a8bec --- /dev/null +++ b/tests/QuantileMapping_new.R @@ -0,0 +1,159 @@ +#'Quaintiles Mapping for seasonal or decadal forecast data +#' +#'@description This function is a wrapper from fitQmap and doQmap from package 'qmap'to be applied in CSTools objects of class 's2dv_cube'. +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#'@param exp an object of class \code{s2dv_cube} +#'@param obs an object of class \code{s2dv_cube} +#'@parma exp_cor an object of class \code{s2dv_cube} in which the quantile mapping correction should be applied. If it is not specified, the correction is applied in object 'exp'. +#'@param sdate_dim +#'@param memb_dim +#'@param window_dim +#'@param method a character string indicating the method to be used: 'PTF','DIST','RQUANT','QUANT','SSPLIN'. By default, the empirical quantile mapping 'QUANT' is used. +#' +#'@return an oject of class \code{s2dv_cube} containing the experimental data after applyingthe quantile mapping correction. +#') <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , +#'@import qmap +#'@import multiApply +#' +#'@seealso \code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} +#'@examples +#'exp$data <- 1 : c(1 * 10 * 20 * 60 * 6 * 7) +#'dim(exp$data) <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , +#' lat = 6, lon = 7) +#'class(exp) <- 's2dv_cube' +#'obs$data <- 101 : c(100 + 1 * 1 * 20 * 60 * 6 * 7) +#'dim(obs$data) <- c(dataset = 1, member = 1, sdate = 20, ftime = 60 , +#' lat = 6, lon = 7) +#'class(obs) <- 's2dv_cube' +#'res <- CST_QuantileMapping(exp, obs) +#'exp <- lonlat_data$exp +#'obs <- lonlat_data$obs +#'res <- CST_QuantileMapping(exp, obs) +#'@export +CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', + memb_dim = 'member', window_dim = NULL, + method = 'QUANT', na.rm = FALSE, + ncores = NULL, ...) { + if (!inherits(exp, 's2dv_cube') || !inherits(exp, 's2dv_cube')) { + stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + dimnames <- names(dim(exp$data)) + QMapped <- QuantileMapping(exp = exp$data, obs = obs$data, + exp_cor = exp_cor$data, + sdate_dim = sdate_dim, memb_dim = memb_dim, + window_dim = window_dim, method = method, + na.rm = na.rm, ncores = ncores, ...) + exp$data <- QMapped + exp$Datasets <- c(exp$Datasets, obs$Datasets) + exp$source_files <- c(exp$source_files, obs$source_files) + return(exp) +} +QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', + memb_dim = 'member', window_dim = NULL, + method = 'QUANT', + na.rm = FALSE, ncores = NULL, ...) { + obsdims <- names(dim(obs)) + expdims <- names(dim(exp)) + if (is.null(expdims)) { + stop("Parameter 'exp' musth have dimension names.") + } + if (is.null(obsdims)) { + stop("Parameter 'obs' musth have dimension names.") + } + if (!is.null(exp_cor)) { + exp_cordims <- names(dim(exp_cor)) + if (is.null(exp_cordims)) { + stop("Parameter 'exp_cor' musth have dimension names.") + } + } + if (!is.null(window_dim)) { + if (!(window_dim %in% obsdims)) { + stop("Dimension 'window_dim' not found in 'obs'.") + } + obs <- CSTools::MergeDims(obs, c(memb_dim, window_dim)) + if (window_dim %in% expdims) { + exp <- CSTools::MergeDims(exp, c(memb_dim, window_dim)) + warning("window_dim found in exp and it is merged to memb_dim.") + } + } + sample_dims <- c(memb_dim, sdate_dim) + if (!all(memb_dim %in% obsdims)) { + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = memb_dim[!(memb_dim %in% obsdims)]) + } + if (!all(sample_dims %in% expdims)) { + stop("Parameter 'exp' requires 'sdate_dim' and 'member_dim' dimensions.") + } + if (!is.character(method)) { + warning("Parameter 'method' must be a character string indicating ", + "one of the following methods: 'PTF', 'DIST', 'RQUANT', + 'QUANT', 'SSPLIN'. Method 'QUANT' is being used.") + method = 'QUANT' + } + if (length(method) > 1) { + warning("Parameter 'method' has length > 1 and only the first element", + " is used.") + method <- method[1] + } + if (!is.null(exp_cor)) { + qmaped <- Apply(list(exp, obs, exp_cor), target_dims = sample_dims, + fun = qmapcor, method = method, na.rm = na.rm, ..., + ncores = ncores)$output1 + } else { + qmaped <- Apply(list(exp, obs), target_dims = sample_dims, + fun = qmapcor, exp_cor = NULL, method = method, + na.rm = na.rm, ..., + ncores = ncores)$output1 + } + return(qmaped) +} +qmapcor <- function(exp, obs, exp_cor = NULL, method = 'QUANT', na.rm = FALSE, + ...) { + # exp[memb, sdate] + # obs[window, sdate] + if (is.null(exp_cor)) { + applied <- exp * NA + for (sd in 1:dim(exp)['sdate']) { + if (na.rm) { + # select start date for cross-val + nas_pos <- which(!is.na(exp[,sd])) + obs2 <- as.vector(obs[,-sd]) + exp2 <- as.vector(exp[,-sd]) + exp_cor2 <- as.vector(exp[,sd]) + # remove NAs + obs2 <- obs2[!is.na(obs2)] + exp2 <- exp2[!is.na(exp2)] + exp_cor2 <- exp_cor2[!is.na(exp_cor2)] + tryCatch({ + adjust <- fitQmap(obs2, exp2, method = method, ...) + applied[nas_pos, sd] <- doQmap(exp_cor2, adjust, ...) + }, + error = function(error_message) { + return(applied[,sd]) + }) + } else { + adjust <- fitQmap(as.vector(obs[,-sd]), as.vector(exp[,-sd]), + method = method, ...) + applied[,sd] <- doQmap(as.vector(exp[,sd]), adjust, ...) + } + } + } else { + applied <- exp_cor * NA + if (na.rm) { + tryCatch({ + adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], + method = method, ...) + applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], + adjust, ...) + }, + error = function(error_message) { + return(applied) + }) + } else { + adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) + applied <- doQmap(as.vector(exp_cor), adjust, ...) + } + } + return(applied) +} diff --git a/tests/testthat/test_CST_Analogs.R b/tests/testthat/test_CST_Analogs.R new file mode 100644 index 0000000..48870a6 --- /dev/null +++ b/tests/testthat/test_CST_Analogs.R @@ -0,0 +1,44 @@ +set.seed(1) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') +require(testthat) +require(CSTools) +require(multiApply) +require(s2dv) + +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_Analogs(exp = 1), + "Parameter 'exp' must be of the class 's2dv_cube'") + + exp <- rnorm(500) + dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) + exp_lons <- 1:5 + exp_lats <- 1:4 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons)) + + expect_error(CST_Analogs(exp = exp), "argument \"obs\" is missing, with no default") + + obs <- rnorm(900) + dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 1) + obs_lons <- seq(1,5, 4/14) + obs_lats <- seq(1,4, 3/11) + obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) + + expect_error( + CST_Analogs(exp = exp, obs = obs, grid_exp = 1), + paste0("Parameter 'grid_exp' must be of class 'character'. It can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must ", + "be defined in such file) or a character vector indicating the coarse grid to be ", + "passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.")) + + exp$data[1,1,1,1,1] <- NA + + l <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') + }) + + + + -- GitLab From 8beaf37d7053cabc38544ce4ab0e3e6f0601cea6 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 24 Aug 2022 17:43:26 +0200 Subject: [PATCH 20/24] fixed bug when interpolating to one location --- R/Interpolation.R | 24 +++++++++++++++--------- R/Intlr.R | 10 +++------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/R/Interpolation.R b/R/Interpolation.R index 8511784..37c81d4 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -293,28 +293,31 @@ create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, "NW", "SE", "SW")) { # crop the region to get the correct weights - save temporary file + nc_cropped1 <- paste0('tmp_cropped_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + nc_cropped2 <- paste0('tmp_cropped2_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + system(paste0('cdo sellonlatbox,', region$lon_min, ',', region$lon_max, ',', region$lat_min, - ',', region$lat_max, ' ', ncfile, ' ', ncfile, '_cropped')) + ',', region$lat_max, ' ', ncfile, ' ', nc_cropped1)) #---------------- # Read grid description and compute (i,j) of requested locations (including decimals) #---------------- - griddes <- get_griddes(paste0(ncfile,'_cropped')) + griddes <- get_griddes(nc_cropped1) if (is.null(griddes$yinc)) { - system(paste0('rm ',ncfile,'_cropped')) + system('rm nc_cropped1') stop("'griddes$yinc' not found in NetCDF file. Remember that only regular grids are accepted when ", "downscaling to point locations.") } # If latitudes are decreasingly ordered, revert them if (griddes$yinc < 0) { - system(paste0('cdo invertlat ',ncfile,'_cropped ',ncfile,'_cropped2')) - griddes <- get_griddes(paste0(ncfile,'_cropped2')) + system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) + griddes <- get_griddes(nc_cropped2) } # remove temporary files - system(paste0('rm ',ncfile,'_cropped')) - system(paste0('rm ',ncfile,'_cropped2')) + system(paste0('rm ', nc_cropped1)) + system(paste0('rm ', nc_cropped2)) if (is.null(griddes)) { stop("'griddes' not found in the NetCDF source files") @@ -572,7 +575,7 @@ latlon2ij <- function(griddes, lats, lons) { if(length(lons) != length(lats)) {stop("Input lat and lon have different lengths.")} if(any(lats < -90) | any(lats > 90)) {stop("Latitude out of valid range")} if((griddes$xfirst > 180) & (any(lons < 0))) { - stop("Please use the same convention for the latitudes in the source file and the ", + stop("Please use the same convention for the longitudes in the source file and the ", "longitude values in 'points'.") } #if(round(griddes$xinc*griddes$xsize) != 360) {stop("Grid is not global")} @@ -721,8 +724,11 @@ interpolate_data <- function(model_data, weights.df) { # Sum all series that belong to same interpolation point # Return an array that contains the requested locations and interpolation type #----------------- - interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) + #interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) #names(dim(interp_data))[1] <- "location" + interp_data <- Apply(weighted_data, target_dims = gpdim, fun = function(x) { + rowsum(x, weights.df$locid)}, output_dims = c("location", "aux"))$output1 + return(interp_data) } diff --git a/R/Intlr.R b/R/Intlr.R index 5295205..246c8ff 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -45,10 +45,6 @@ #'@param method_point_interp a character vector indicating the interpolation method to #'interpolate model gridded data into the point locations. Accepted methods are "nearest", #'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". -#'@param source_file_exp a character vector with a path to an example file of the exp data. -#'Only needed if the downscaling is to a point location. -#'@param source_file_obs a character vector with a path to an example file of the obs data. -#'Only needed if the downscaling is to a point location. #'@param predictors an array with large-scale data to be used in the 'large-scale' method. #'Only needed if the linear regression method is set to 'large-scale'. It must have, at #'least the dimension start date and another dimension whose name has to be specified in @@ -91,9 +87,9 @@ #'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') #'@export CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, - method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, - predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - large_scale_predictor_dimname = 'vars', loocv = FALSE, region = NULL, ncores = 1) { + method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", large_scale_predictor_dimname = 'vars', + loocv = FALSE, region = NULL, ncores = 1) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") -- GitLab From b565c096ba7a7266278f9d9dff3d70ec7f63a094 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 24 Aug 2022 17:46:31 +0200 Subject: [PATCH 21/24] fixed bug when interpolating to one location --- R/Interpolation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Interpolation.R b/R/Interpolation.R index 37c81d4..d2d9373 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -305,7 +305,7 @@ create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, griddes <- get_griddes(nc_cropped1) if (is.null(griddes$yinc)) { - system('rm nc_cropped1') + system(paste0('rm ', nc_cropped1)) stop("'griddes$yinc' not found in NetCDF file. Remember that only regular grids are accepted when ", "downscaling to point locations.") } -- GitLab From 6ddbb21d886823a1674d190328c20825066b43c3 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Fri, 26 Aug 2022 17:30:36 +0200 Subject: [PATCH 22/24] multinomial logistic regression --- R/LogisticReg.R | 127 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 R/LogisticReg.R diff --git a/R/LogisticReg.R b/R/LogisticReg.R new file mode 100644 index 0000000..c7b0a6e --- /dev/null +++ b/R/LogisticReg.R @@ -0,0 +1,127 @@ + +LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, int_method = NULL, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", source_file = NULL, region = NULL, + loocv = FALSE, ncores = 1) { + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) + + # compute ensemble mean anomalies + exp_anom <- get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # convert observations to categorical predictands + obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { + terc <- convert2prob(as.vector(x), prob = c(1:2/3)) + apply(terc, 1, function(r) which (r == 1))}, + output_dims = sdate_dim)$output1 + + res <- Apply(list(exp_anom, obs_cat), target_dims = sdate_dim, fun = function(x, y) + .log_reg(x = x, y = y, loocv = loocv), output_dims = sdate_dim)$output1 + + res <- list(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} + + +get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { + + # compute climatology + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean)$output1 + + # compute ensemble mean + ens_mean <- Apply(obj_ens, target_dims = member_dim, mean)$output1 + + # compute ensemble mean anomalies + anom <- Apply(list(ens_mean, clim), margins = list(sdate_dim, NULL), fun = function(x,y) x - y)$output1 + + return(anom) +} + +# atomic functions for logistic regressions +.log_reg <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + + n <- nrow(tmp_df) + res <- rep(NA, n) + + } else { + # training + lm1 <- train_lr(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv) + } + + return(res) +} + +#----------------------------------- +# Function to train the logistic regressions. +#----------------------------------- +train_lr <- function(df, loocv) { + + require(nnet) + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + #lm1 <- lapply(1:nrow(df), function(j) glm(df[-j,], formula = y ~ ., family = "binomial")) + lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[-j,])) + } else { + + #lm1 <- list(glm(data = df, formula = y ~ ., family = "binomial")) + lm1 <- list(multinom(y ~ ., data = df)) + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the logistic regressions. +#----------------------------------- +pred_lr <- function(df, lm1, loocv) { + + if (loocv) { + + pred_vals <- sapply(1:nrow(df), function(j) predict(lm1[[j]], df[j,], type = "class")) + + } else { + + pred_vals_ls <- lapply(lm1, predict, data = df, type = "class") + pred_vals <- unlist(pred_vals_ls) + } + + return(pred_vals) +} + + -- GitLab From fe8bea1935f743387e90256d286253ba61ed7940 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Fri, 2 Sep 2022 15:55:16 +0200 Subject: [PATCH 23/24] Issues and logistic regression w 3 cat --- R/Intbc.R | 6 ++--- R/Intlr.R | 1 - R/LogisticReg.R | 70 ++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 72 insertions(+), 5 deletions(-) diff --git a/R/Intbc.R b/R/Intbc.R index ce50c40..e198730 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -286,7 +286,8 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, sdate_dim = 'sdate', ...) } else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { - source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/tests/QuantileMapping_new.R') + # TO REMOVE ONCE CSTools includes the new changes + source('/esarchive/scratch/jramon/GitLab_jramon/cstools/R/CST_QuantileMapping.R') res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, sample_dims = sdate_dim, na.rm = TRUE, ...) } @@ -300,11 +301,10 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, obs_ref <- Apply(obs_ref, target_dims = c(time_dim, sdate_dim), fun = as.vector, output_dims = "time")$output1 } - + # REMEMBER to add na.rm = T in colMeans in .proxiesattractor res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ...) } - # Return a list of three elements res <- list(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat) diff --git a/R/Intlr.R b/R/Intlr.R index 246c8ff..0da5222 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -250,7 +250,6 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") } - # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", "'lon_dim'") diff --git a/R/LogisticReg.R b/R/LogisticReg.R index c7b0a6e..8226b23 100644 --- a/R/LogisticReg.R +++ b/R/LogisticReg.R @@ -4,6 +4,74 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target sdate_dim = "sdate", member_dim = "member", source_file = NULL, region = NULL, loocv = FALSE, ncores = 1) { + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(target_grid, 'character')) { + stop("Parameter 'target_grid' must be of the class 'character'") + } + + if (!is.null(int_method) & !inherits(int_method, 'character')) { + stop("Parameter 'int_method' must be of the class 'character'") + } + + if (!is.null(method_point_interp) & !inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + if (!is.null(source_file) & !inherits(source_file, 'character')) { + stop("Parameter 'source_file' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'member_dim'") + } + + if (!is.null(points) & (is.null(source_file))) { + stop("No source files found. One source file for exp must be provided in the parameter 'source_file'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + if (is.null(region)) { warning("The borders of the downscaling region have not been provided. Assuming the four borders ", "of the downscaling region are defined by the first and last elements of the parametres ", @@ -65,7 +133,7 @@ get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { .log_reg <- function(x, y, loocv) { tmp_df <- data.frame(x = x, y = y) - + # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { -- GitLab From 5824f25be7f594e6266c1218d71f68f9f82ce93b Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 21 Sep 2022 15:53:18 +0200 Subject: [PATCH 24/24] added methods for logistic regression --- examples/interpolation-bc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/interpolation-bc.R b/examples/interpolation-bc.R index f1b44b2..ad074b6 100644 --- a/examples/interpolation-bc.R +++ b/examples/interpolation-bc.R @@ -22,7 +22,7 @@ latmax <- 44.1 #sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') sdates <- format(ymd("20000201") + rep(years(0:20), each=1),"%Y%m%d") - +sdates <- format(seq(ymd("20000201"), ymd("20200201"), "1 month"), "%Y%m%d") obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$sdate$.nc', var = 'tas', time = indices(1), lat = values(list(latmin, latmax)), -- GitLab