From fbb314eb43eabe3d715ac08c80716a70b200c9fe Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 11 Oct 2023 11:35:36 +0200 Subject: [PATCH 01/18] added lines to compute anomalies for hindcast and observations separately --- example_scripts/example_downscaling.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/example_scripts/example_downscaling.R b/example_scripts/example_downscaling.R index 4e7e6294..7808a3a4 100644 --- a/example_scripts/example_downscaling.R +++ b/example_scripts/example_downscaling.R @@ -17,6 +17,11 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) +# compute anomalies +clim_hcst <- Apply(data$hcst$data, target_dims = c('syear','ensemble'), mean, na.rm = T)$output1 +clim_obs <- Apply(data$obs$data, target_dims = c('syear','ensemble'), mean, na.rm = T)$output1 +data$hcst$data <- Ano(data = data$hcst$data, clim = clim_hcst) +data$obs$data <- Ano(data = data$obs$data, clim = clim_obs) # Change units data <- Units(recipe, data) # Downscale datasets -- GitLab From 63ba1b0f7b21f3cde7709aae05d4db8a435951c0 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 20 Oct 2023 13:01:12 +0200 Subject: [PATCH 02/18] Add option to compute anomalies when obs and hcst don't share the same grid --- example_scripts/example_downscaling.R | 8 +- modules/Anomalies/Anomalies.R | 87 ++++++++++++------- .../recipe_seasonal_downscaling.yml | 2 +- 3 files changed, 61 insertions(+), 36 deletions(-) diff --git a/example_scripts/example_downscaling.R b/example_scripts/example_downscaling.R index 7808a3a4..1e9f1d6b 100644 --- a/example_scripts/example_downscaling.R +++ b/example_scripts/example_downscaling.R @@ -6,6 +6,7 @@ # Load modules source("modules/Loading/Loading.R") source("modules/Units/Units.R") +source("modules/Anomalies/Anomalies.R") source("modules/Downscaling/Downscaling.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") @@ -17,13 +18,10 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) -# compute anomalies -clim_hcst <- Apply(data$hcst$data, target_dims = c('syear','ensemble'), mean, na.rm = T)$output1 -clim_obs <- Apply(data$obs$data, target_dims = c('syear','ensemble'), mean, na.rm = T)$output1 -data$hcst$data <- Ano(data = data$hcst$data, clim = clim_hcst) -data$obs$data <- Ano(data = data$obs$data, clim = clim_obs) # Change units data <- Units(recipe, data) +# Compute anomalies +data <- Anomalies(recipe, data) # Downscale datasets data <- Downscaling(recipe, data) # Compute skill metrics diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index a2f0c005..0eb9fbc7 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -12,7 +12,6 @@ Anomalies <- function(recipe, data) { "'Workflow:Anomalies:compute' is missing from the recipe.")) stop() } - if (recipe$Analysis$Workflow$Anomalies$compute) { if (recipe$Analysis$Workflow$Anomalies$cross_validation) { cross <- TRUE @@ -23,28 +22,51 @@ Anomalies <- function(recipe, data) { } original_dims <- data$hcst$dim - # Compute anomalies - anom <- CST_Anomaly(data$hcst, data$obs, - cross = cross, - memb = TRUE, - memb_dim = 'ensemble', - dim_anom = 'syear', - dat_dim = c('dat', 'ensemble'), - ftime_dim = 'time', - ncores = recipe$Analysis$ncores) - # Reorder dims - anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) - anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) - # Save full fields hcst_fullvalue <- data$hcst obs_fullvalue <- data$obs - - # Hindcast climatology - - data$hcst <- anom$exp - data$obs <- anom$obs - remove(anom) + # Compute anomalies + if (isTRUE(all.equal(as.vector(data$hcst$coords$latitude), + as.vector(data$obs$coords$latitude))) && + isTRUE(all.equal(as.vector(data$hcst$coords$longitude), + as.vector(data$obs$coord$longitude)))) { + anom <- CST_Anomaly(data$hcst, data$obs, + cross = cross, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time', + ncores = recipe$Analysis$ncores) + # Reorder dims + anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) + anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) + # Hindcast climatology + data$hcst <- anom$exp + data$obs <- anom$obs + remove(anom) + } else { + ## TODO: Remove when cross-validation is implemented for this use case + if (cross) { + warn(recipe$Run$logger, + paste("Anomaly computation in cross-validation has not been", + "implemented yet for the case where hcst and obs have", + "different grids. The climatology will be computed as", + "a simple mean.")) + } + clim_hcst <- Apply(data$hcst$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = recipe$Analysis$remove_NAs, + ncores = recipe$Analysis$ncores)$output1 + clim_obs <- Apply(data$obs$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = recipe$Analysis$remove_NAs, + ncores = recipe$Anaysis$ncores)$output1 + data$hcst$data <- Ano(data = data$hcst$data, clim = clim_hcst) + data$obs$data <- Ano(data = data$obs$data, clim = clim_obs) + } # Change variable metadata for (var in data$hcst$attrs$Variable$varName) { # Change hcst longname @@ -57,15 +79,20 @@ Anomalies <- function(recipe, data) { # Compute forecast anomaly field if (!is.null(data$fcst)) { # Compute hindcast climatology ensemble mean - clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, - time_dim = "syear", - dat_dim = c("dat", "ensemble"), - memb = FALSE, - memb_dim = "ensemble", - ftime_dim = "time", - ncores = recipe$Analysis$ncores) - clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, - name = "syear") + if (isTRUE(all.equal(as.vector(data$hcst$coords$latitude), + as.vector(data$obs$coords$latitude))) && + isTRUE(all.equal(as.vector(data$hcst$coords$longitude), + as.vector(data$obs$coord$longitude)))) { + clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = FALSE, + memb_dim = "ensemble", + ftime_dim = "time", + ncores = recipe$Analysis$ncores) + clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, + name = "syear") + } # Store original dimensions dims <- dim(clim_hcst) # Repeat the array as many times as ensemble members @@ -113,7 +140,7 @@ Anomalies <- function(recipe, data) { } else { warn(recipe$Run$logger, paste("The Anomalies module has been called, but", - "recipe parameter Analysis:Variables:anomaly is set to FALSE.", + "recipe parameter Workflow:anomalies:compute is set to FALSE.", "The full fields will be returned.")) hcst_fullvalue <- NULL obs_fullvalue <- NULL diff --git a/recipes/atomic_recipes/recipe_seasonal_downscaling.yml b/recipes/atomic_recipes/recipe_seasonal_downscaling.yml index 219c5d61..c526140d 100644 --- a/recipes/atomic_recipes/recipe_seasonal_downscaling.yml +++ b/recipes/atomic_recipes/recipe_seasonal_downscaling.yml @@ -29,7 +29,7 @@ Analysis: type: none Workflow: Anomalies: - compute: no # yes/no, default yes + compute: yes # yes/no, default yes cross_validation: no # yes/no, default yes save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' Calibration: -- GitLab From d17d7b395f73b2bbf505807c3d19558774f47def Mon Sep 17 00:00:00 2001 From: eduzenli Date: Fri, 20 Oct 2023 17:51:29 +0200 Subject: [PATCH 03/18] Anomalies module is added --- tests/recipes/recipe-seasonal_downscaling.yml | 6 +- tests/testthat/test-seasonal_downscaling.R | 64 ++++++++++++++++--- 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/tests/recipes/recipe-seasonal_downscaling.yml b/tests/recipes/recipe-seasonal_downscaling.yml index 41df9e82..43516acb 100644 --- a/tests/recipes/recipe-seasonal_downscaling.yml +++ b/tests/recipes/recipe-seasonal_downscaling.yml @@ -25,8 +25,8 @@ Analysis: type: 'none' Workflow: Anomalies: - compute: no - cross_validation: + compute: TRUE + cross_validation: FALSE save: 'none' Calibration: method: 'none' @@ -46,6 +46,8 @@ Analysis: target_grid: /esarchive/recon/ecmwf/era5/daily_mean/tas_f1h/tas_199301.nc save: 'all' Output_format: S2S4E + ncores: 1 + remove_NAs: FALSE Run: Loglevel: INFO Terminal: yes diff --git a/tests/testthat/test-seasonal_downscaling.R b/tests/testthat/test-seasonal_downscaling.R index fde0756d..26db1e5d 100644 --- a/tests/testthat/test-seasonal_downscaling.R +++ b/tests/testthat/test-seasonal_downscaling.R @@ -5,6 +5,7 @@ source("modules/Loading/Loading.R") source("modules/Skill/Skill.R") source("modules/Downscaling/Downscaling.R") source("modules/Saving/Saving.R") +source("modules/Anomalies/Anomalies.R") recipe_file <- "tests/recipes/recipe-seasonal_downscaling.yml" recipe <- prepare_outputs(recipe_file, disable_checks = F) @@ -14,9 +15,14 @@ suppressWarnings({invisible(capture.output( data <- Loading(recipe) ))}) +# Compute anomalies +suppressWarnings({invisible(capture.output( +ano_data <- Anomalies(recipe, data) +))}) + # Downscale the data suppressWarnings({invisible(capture.output( -downscaled_data <- Downscaling(recipe, data) +downscaled_data <- Downscaling(recipe, ano_data) ))}) # Compute skill metrics @@ -24,6 +30,7 @@ suppressWarnings({invisible(capture.output( skill_metrics <- Skill(recipe, downscaled_data) ))}) +#====================================== test_that("1. Loading", { expect_equal( @@ -101,7 +108,48 @@ as.POSIXct("1994-12-03 11:30:00 UTC", tz = 'UTC') }) #====================================== -test_that("2. Downscaling", { +test_that("2. Anomalies", { + +expect_equal( +is.list(ano_data), +TRUE +) +expect_equal( +names(ano_data), +c("hcst", "obs", "fcst", "hcst.full_val", "obs.full_val") +) +expect_equal( +class(ano_data$hcst), +"s2dv_cube" +) +expect_equal( +class(ano_data$fcst), +"NULL" +) +expect_equal( +dim(ano_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +mean(ano_data$hcst$data), +-3.466088e-16, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(ano_data$hcst$data)[3, 26:28, 3, 2, 12]), +c(-1.5654303, -0.6506540, -0.4674155), +tolerance = 0.0001 +) +expect_equal( +range(ano_data$hcst$data), +c(-6.265505, 9.440247), +tolerance = 0.0001 +) + +}) + +#====================================== +test_that("3. Downscaling", { expect_equal( is.list(downscaled_data), @@ -141,24 +189,24 @@ c(ensemble = 1, dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, lat ) expect_equal( as.vector(drop(downscaled_data$hcst$data)[1:2,1:2,1,2,3]), -c(293.8468, 293.5830, 294.0713, 293.7104), +c(1.482848, 1.503154, 1.599051, 1.591909), tolerance = 0.0001 ) expect_equal( mean(downscaled_data$hcst$data), -288.5359, +-0.04462092, tolerance = 0.0001 ) expect_equal( range(downscaled_data$hcst$data), -c(284.0950, 297.4405), +c(-4.611440, 5.771648), tolerance = 0.0001 ) }) #====================================== -test_that("3. Metrics", { +test_that("4. Metrics", { expect_equal( is.list(skill_metrics), @@ -182,7 +230,7 @@ dim(skill_metrics$rpss) ) expect_equal( skill_metrics$rpss[1], --0.05325714, +-0.005942857, tolerance = 0.0001 ) expect_equal( @@ -192,7 +240,7 @@ FALSE }) -test_that("4. Check saved data", { +test_that("5. Check saved data", { outputs <- paste0(recipe$Run$output_dir, "/outputs/Downscaling/") expect_equal( -- GitLab From 2d042397888412ad673c2b5cda16520465a11add Mon Sep 17 00:00:00 2001 From: eduzenli Date: Thu, 7 Mar 2024 10:38:39 +0100 Subject: [PATCH 04/18] Downscaling through the large scale variable and forecast downscaling feature have been added. --- modules/Downscaling/Downscaling.R | 102 ++++-- modules/Downscaling/tmp/Analogs.R | 454 +++++++++++++++-------- modules/Downscaling/tmp/Intbc.R | 134 +++++-- modules/Downscaling/tmp/Interpolation.R | 10 +- modules/Downscaling/tmp/Intlr.R | 459 +++++++++++++++++++----- modules/Downscaling/tmp/LogisticReg.R | 298 +++++++++++---- 6 files changed, 1063 insertions(+), 394 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 59233dc2..85deaa2c 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -14,6 +14,10 @@ Downscaling <- function(recipe, data) { # recipe: object obtained when passing the .yml recipe file to read_yaml() type <- tolower(recipe$Analysis$Workflow$Downscaling$type) + + if (!is.null(data$fcst)) { + print("Forecast data is being downscaled.") + } if (type == "none") { hcst_downscal <- data$hcst @@ -21,11 +25,6 @@ Downscaling <- function(recipe, data) { } else { - if (!is.null(data$fcst)) { - warn(recipe$Run$logger, - "The downscaling will be only performed to the hindcast data") - data$fcst <- NULL - } # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) @@ -42,8 +41,13 @@ Downscaling <- function(recipe, data) { } #TO DO: add the parametre loocv where it corresponds - if (is.null(recipe$Analysis$loocv)) { + if (is.null(recipe$Analysis$loocv) & is.null(data$fcst)) { loocv <- TRUE + } else if (!is.null(data$fcst) & (isTRUE(recipe$Analysis$loocv) | + is.null(recipe$Analysis$loocv))) { + loocv <- FALSE + warning("'loocv' is set to FALSE to train the model with the whole hindcast ", + "and predict with the forecast.") } else { loocv <- recipe$Analysis$loocv } @@ -116,7 +120,7 @@ Downscaling <- function(recipe, data) { "'rpc-based', 'qm'.")) } - hcst_downscal <- CST_Intbc(data$hcst, data$obs, + hcst_downscal <- CST_Intbc(data$hcst, data$obs, data$fcst, target_grid = target_grid, bc_method = bc_method, int_method = int_method, @@ -157,7 +161,7 @@ Downscaling <- function(recipe, data) { data$pred$data <- NULL } - hcst_downscal <- CST_Intlr(data$hcst, data$obs, + hcst_downscal <- CST_Intlr(data$hcst, data$obs, data$fcst, lr_method = lr_method, target_grid = target_grid, points = NULL, @@ -176,17 +180,38 @@ Downscaling <- function(recipe, data) { DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "analogs") { + + if ( (!is.null(data$hcstL) | !is.null(data$fcstL)) & is.null(data$obsL) ) { + stop ("Large scale model field (i.e., data$hcstL or data$fcstL) is provided. ", + "Observation must also be provided for the large scale variable.") + } + if ( is.null(data$hcstL) & is.null(data$fcstL) & !is.null(data$obsL) ) { + stop ("Large scale observational field (i.e., data$obsL) is provided. ", + "Model data must also be provided for the large scale variable.") + } + + if ( !is.null(data$hcstL) & !is.null(data$obsL) & !is.null(data$fcst) & is.null(data$fcstL) ) { + stop ("Large scale observational field (i.e., data$obsL) is provided. Please also ", + "provide large scale forecast data (i.e., data$fcstL) ", + "or set data$fcst to NULL.") + } + + if ( is.null(data$fcst) & !is.null(data$fcstL) ) { + print("Forecast data is being downscaled.") + } + if (is.null(nanalogs)) { warning("The number of analogs for searching has not been provided in the ", "recipe. Setting it to 3.") nanalogs <- 3 } - if (!is.null(size) & recipe$Analysis$Variables$freq == "monthly_mean") { + if (!is.null(size) & (recipe$Analysis$Variables$freq == "monthly_mean" | + recipe$Analysis$Variables$freq == "weekly_mean")) { size <- NULL warning("Size is set to NULL. ", - "It must be NULL for the monthly input data.") + "It must be NULL for the weekly or monthly input data.") } if (!is.null(size)) { @@ -195,15 +220,44 @@ Downscaling <- function(recipe, data) { sdate_dim = 'syear', time_dim = 'time', loocv = TRUE, - size = size) - data$obs$data <- Apply(data$obs$data, - target_dims="window", - fun=function (x) x[!is.na(x)])$output1 + size = size, + ncores = ncores) + + if (!is.null(data$hcstL)) { + data$obsL$data <- .generate_window(data$obsL$data, + sdate_dim = 'syear', + time_dim = 'time', + loocv = TRUE, + size = size, + ncores = ncores) + } + } + + ## Priority: fcstL, fcst, hcstL, hcst + exp <- data$hcst + + if (!is.null(data$hcstL)) { + exp <- data$hcstL + if (is.null(data$fcst)) { ## if data$fcst exists, no need for the warning + print("Provided large scale variable is used as the predictor.") + } + } + + if (!is.null(data$fcst)) { + exp <- data$fcst + } + + if (!is.null(data$fcstL)) { + exp <- data$fcstL + if (is.null(data$hcstL)) { ## if data$hcstL exist, the warning have already been given. + print("Provided large scale variable is used as the predictor.") + } } - hcst_downscal <- CST_Analogs(data$hcst, data$obs, - grid_exp = data$hcst$attrs$source_files[ - which(!is.na(data$hcst$attrs$source_files))[1]], + hcst_downscal <- CST_Analogs(exp, data$obs, + grid_exp = exp$attrs$source_files[ + which(!is.na(exp$attrs$source_files))[1]], + obsL = data$obsL, nanalogs = nanalogs, fun_analog = "wmean", lat_dim = "latitude", @@ -213,7 +267,8 @@ Downscaling <- function(recipe, data) { member_dim = "ensemble", region = NULL, return_indices = FALSE, - loocv_window = loocv, + loocv_window = loocv, + metric = "dist", ncores = ncores) if (!is.null(size)) { @@ -252,7 +307,7 @@ Downscaling <- function(recipe, data) { "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) } - hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, + hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, data$fcst, target_grid = target_grid, int_method = int_method, log_reg_method = log_reg_method, @@ -272,6 +327,13 @@ Downscaling <- function(recipe, data) { } } print(DOWNSCAL_MSG) + + fcst_downscal <- NULL + + if (!is.null(data$fcst) | ( !is.null(data$fcstL) & type == "analogs" ) ) { + fcst_downscal <- hcst_downscal + hcst_downscal$exp <- NULL + } # Saving if (recipe$Analysis$Workflow$Downscaling$save != 'none') { @@ -291,5 +353,5 @@ Downscaling <- function(recipe, data) { save_observations(recipe = recipe, data_cube = hcst_downscal$obs) } - return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) + return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = fcst_downscal$exp)) } diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R index 99fc45e7..b342a91a 100644 --- a/modules/Downscaling/tmp/Analogs.R +++ b/modules/Downscaling/tmp/Analogs.R @@ -1,16 +1,24 @@ #'@rdname CST_Analogs -#'@title Downscaling using Analogs based on large scale fields. +#'@title Downscaling using Analogs based on coarse scale fields. #' #'@author J. Ramon, \email{jaume.ramon@bsc.es} #' #'@description This function performs a downscaling using Analogs. To compute -#'the analogs given a coarse-scale field, the function looks for days with -#'similar conditions in the historical observations. The coarse scale and -#'observation data can be either global or regional. In the latter case, the -#'region is defined by the user. In principle, the coarse and observation data -#'should be of the same variable, although different variables can also be admitted. -#'The analogs function will find the N best analogs based in Minimum Euclidean -#'distance. +#'the analogs given a coarse-scale field, the function looks for days with similar conditions +#'in the historical observations. The analogs function determines the N best analogs based +#'on Euclidian distance, distance correlation, or Spearman's correlation metrics. To downscale +#'a local-scale variable, either the variable itself or another large-scale variable +#'can be utilized as the predictor. In the first scenario, analogs are examined between +#'the observation and model data of the same local-scale variable. In the latter scenario, +#'the function identifies the day in the observation data that closely resembles +#'the large-scale pattern of interest in the model. When it identifies the date of +#'the best analog, the function extracts the corresponding local-scale variable for that day +#'from the observation of the local scale variable. The used local-scale and large-scale +#'variables can be retrieved from independent regions. The input data for the first case must +#'include 'exp' and 'obs,' while in the second case, 'obs,' 'obsL,' and 'exp' are the +#'required input fields. Users can perform the downscaling process over the subregions +#'that can be identified through the 'region' argument, instead of focusing +#'on the entire area of the loaded data. #' #'The search of analogs must be done in the longest dataset posible, but might #'require high-memory computational resources. This is important since it is @@ -21,25 +29,24 @@ #'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal #'and decadal predictions) but can admit climate projections or reanalyses. It does #'not have constrains of specific region or variables to downscale. -#'@param exp an 's2dv' object with named dimensions containing the experimental field on -#'the coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude, longitude, start date and time. The object is expected to be -#'already subset for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param obs an 's2dv' object with named dimensions containing the observational field. -#'The object must have, at least, the dimensions latitude, longitude and start date. -#'The object is expected to be already subset for the desired region. -#'@param obs2 an 's2dv' object with named dimensions containing a different observational -#'field to that in 'obs'. If provided, these observations will be used in the training, -#'i.e. the searching for analogs, so that they should be in a coarser grid to those in -#''obs'. Training with observations on a grid with a spatial resolution closer to that -#'in 'exp', will in principle ensure better results. The object must have, at least, the -#'dimensions latitude, longitude and start date. The object is expected to be already -#'subset for the desired region. -#'@param grid_exp a character vector with a path to an example file of the exp data. -#'It can be either a path to another NetCDF file which to read the target grid from +#'@param exp an 's2dv_cube' object with named dimensions containing the experimental field +#'on the coarse scale for the variable targeted for downscaling (in case obsL is not provided) +#'or for the large-scale variable used as the predictor (if obsL is provided). +#'The object must have, at least, the dimensions latitude, longitude, start date and time. +#'The object is expected to be already subset for the desired region. Data can be in one +#'or two integrated regions, e.g., crossing the Greenwich meridian. To get the correct +#'results in the latter case, the borders of the region should be specified in the parameter +#''region'. See parameter 'region'. Also, the object can be either hindcast or forecast data. +#'However, if forecast data is provided, the loocv_window parameter should be selected as FALSE. +#'@param obs an 's2dv_cube' object with named dimensions containing the observational field +#'for the variable targeted for downscaling. The object must have, at least, the dimensions +#'latitude, longitude and start date. The object is expected to be already subset for the +#'desired region. +#'@param obsL an 's2dv_cube' object with named dimensions containing the observational +#'field of the large-scale variable. The object must have, at least, the dimensions latitude, +#'longitude and start date. The object is expected to be already subset for the desired region. +#'@param grid_exp a character vector with a path to an example file of the exp +#'data. It can be either a path to another NetCDF file which to read the target grid from #'(a single grid must be defined in such file) or a character vector indicating the #'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. #'@param nanalogs an integer indicating the number of analogs to be searched @@ -56,6 +63,9 @@ #''data' in exp and obs. Default set to "time". #'@param member_dim a character vector indicating the member dimension name in the element #''data' in exp and obs. Default set to "member". +#'@param metric a character vector to select the analog specification method. Only these +#'options are valid: "dist" (i.e., Euclidian distance), "dcor" (i.e., distance correlation) +#'or "cor" (i.e., Spearman's .correlation). The default metric is "dist". #'@param region a numeric vector indicating the borders of the region defined in exp. #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers #'to the left border, while lonmax refers to the right border. latmin indicates the lower @@ -68,7 +78,7 @@ #'of the window. It is recommended to be set to TRUE. Default to TRUE. #'@param ncores an integer indicating the number of cores to use in parallel computation. #'The default value is NULL. -#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'@return An 's2dv_cube' object. The element 'data' contains the dowscaled field, 'lat' the #'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL #'(default), the output array in 'data' also contains the dimension 'analog' with the best #'analog days. @@ -81,31 +91,40 @@ #'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) #'obs_lons <- seq(0,6, 6/14) #'obs_lats <- seq(0,6, 6/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) +#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) #'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') #'@export -CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", - lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", - region = NULL, return_indices = FALSE, loocv_window = TRUE, ncores = NULL) { - - # input exp and obs must be s2dv_cube objects - if (!inherits(exp,'s2dv_cube')) { - stop("Parameter 'exp' must be of the class 's2dv_cube'") - } +CST_Analogs <- function(exp, obs, obsL = NULL, grid_exp, nanalogs = 3, + fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", metric = "dist", region = NULL, + return_indices = FALSE, loocv_window = TRUE, ncores = NULL) { # input exp and obs must be s2dv_cube objects if (!inherits(obs,'s2dv_cube')) { stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], - exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], - obs_lons = obs$coords[[lon_dim]], grid_exp = grid_exp, nanalogs = nanalogs, - fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, - sdate_dim = sdate_dim, time_dim = time_dim, member_dim = member_dim, - region = region, return_indices = return_indices, loocv_window = loocv_window, - ncores = ncores) + if (!is.null(obsL)) { + # input obs must be s2dv_cube objects + if (!inherits(obsL,'s2dv_cube')) { + stop("Parameter 'obsL' must be of the class 's2dv_cube'") + } + } + # input exp and obs must be s2dv_cube objects + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + res <- Analogs(exp = exp$data, obs = obs$data, obsL = obsL$data, + exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], + obsL_lats = obsL$coords[[lat_dim]], obsL_lons = obsL$coords[[lon_dim]], + grid_exp = grid_exp, nanalogs = nanalogs, fun_analog = fun_analog, + lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, + time_dim = time_dim, member_dim = member_dim, metric = metric, + region = region, return_indices = return_indices, + loocv_window = loocv_window, ncores = ncores) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data @@ -129,13 +148,21 @@ CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analo #'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} #' #'@description This function performs a downscaling using Analogs. To compute -#'the analogs given a coarse-scale field, the function looks for days with -#'similar conditions in the historical observations. The coarse scale and -#'observation data can be either global or regional. In the latter case, the -#'region is defined by the user. In principle, the coarse and observation data -#'should be of the same variable, although different variables can also be admitted. -#'The analogs function will find the N best analogs based in Minimum Euclidean -#'distance. +#'the analogs given a coarse-scale field, the function looks for days with similar conditions +#'in the historical observations. The analogs function determines the N best analogs based +#'on RMSE, distance correlation, or Spearman's correlation metrics. To downscale +#'a local-scale variable, either the variable itself or another large-scale variable +#'can be utilized as the predictor. In the first scenario, analogs are examined between +#'the observation and model data of the same local-scale variable. In the latter scenario, +#'the function identifies the day in the observation data that closely resembles +#'the large-scale pattern of interest in the model. When it identifies the date of +#'the best analog, the function extracts the corresponding local-scale variable for that day +#'from the observation of the local scale variable. The used local-scale and large-scale +#'variables can be retrieved from independent regions. The input data for the first case must +#'include 'exp' and 'obs,' while in the second case, 'exp', 'obs,' and 'obsL' are the +#'required input fields. Users can perform the downscaling process over the subregions +#'that can be identified through the 'region' and 'regionL' arguments, instead of focusing +#'on the entire area of the loaded data. #' #'The search of analogs must be done in the longest dataset posible, but might #'require high-memory computational resources. This is important since it is @@ -146,19 +173,21 @@ CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analo #'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal #'and decadal predictions) but can admit climate projections or reanalyses. It does #'not have constrains of specific region or variables to downscale. -#'@param exp an array with named dimensions containing the experimental field on the -#'coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude, longitude, start date and time. The object is expected to be -#'already subset for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param obs an array with named dimensions containing the observational field. The object -#'must have, at least, the dimensions latitude, longitude, start date and time. The object -#'is expected to be already subset for the desired region. Optionally, 'obs' can have the -#'dimension 'window', containing the sampled fields into which the function will look for -#'the analogs. See function 'generate_window()'. Otherwise, the function will look for -#'analogs using all the possible fields contained in obs. +#'@param exp an array with named dimensions containing the experimental field +#'on the coarse scale for the variable targeted for downscaling (in case obsL is not provided) +#'or for the large-scale variable used as the predictor (if obsL is provided). +#'The object must have, at least, the dimensions latitude, longitude, start date and time. +#'The object is expected to be already subset for the desired region. Data can be in one +#'or two integrated regions, e.g., crossing the Greenwich meridian. To get the correct +#'results in the latter case, the borders of the region should be specified in the parameter +#''region'. See parameter 'region'. Also, the object can be either hindcast or forecast data. +#'However, if forecast data is provided, the loocv_window parameter should be selected as FALSE. +#'@param obs an array with named dimensions containing the observational field for the variable +#'targeted for downscaling. The object must have, at least, the dimensions latitude, longitude, +#'start date and time. The object is expected to be already subset for the desired region. +#'Optionally, 'obs' can have the dimension 'window', containing the sampled fields into which +#'the function will look for the analogs. See function 'generate_window()'. Otherwise, +#'the function will look for analogs using all the possible fields contained in obs. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -167,17 +196,20 @@ CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analo #'range from -90 to 90. #'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes #'can range from -180 to 180 or from 0 to 360. +#'@param obsL an 's2dv_cube' object with named dimensions containing the observational +#'field of the large-scale variable.The object must have, at least, the dimensions latitude, +#'longitude and start date. The object is expected to be already subset for the desired region. +#'Optionally, 'obsL' can have the dimension 'window', containing the sampled fields into which +#'the function will look for the analogs. See function 'generate_window()'. Otherwise, +#'the function will look for analogs using all the possible fields contained in obs. +#'@param obsL_lats a numeric vector containing the latitude values in 'obsL'. Latitudes must +#'range from -90 to 90. +#'@param obsL_lons a numeric vector containing the longitude values in 'obsL'. Longitudes +#'can range from -180 to 180 or from 0 to 360. #'@param grid_exp a character vector with a path to an example file of the exp data. #'It can be either a path to another NetCDF file which to read the target grid from #'(a single grid must be defined in such file) or a character vector indicating the #'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. -#'@param obs2 an 's2dv' object with named dimensions containing a different observational -#'field to that in 'obs'. If provided, these observations will be used in the training, -#'i.e. the searching for analogs, so that they should be in a coarser grid to those in -#''obs'. Training with observations on a grid with a spatial resolution closer to that -#'in 'exp', will in principle ensure better results. The object must have, at least, the -#'dimensions latitude, longitude and start date. The object is expected to be already -#'subset for the desired region. #'@param nanalogs an integer indicating the number of analogs to be searched. #'@param fun_analog a function to be applied over the found analogs. Only these options #'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), @@ -192,6 +224,9 @@ CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analo #''data' in exp and obs. Default set to "time". #'@param member_dim a character vector indicating the member dimension name in the element #''data' in exp and obs. Default set to "member". +#'@param metric a character vector to select the analog specification method. Only these +#'options are valid: "dist" (i.e., Euclidian distance), "dcor" (i.e., distance correlation) +#'or "cor" (i.e., Spearman's .correlation). The default metric is "dist". #'@param region a numeric vector indicating the borders of the region defined in exp. #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers #'to the left border, while lonmax refers to the right border. latmin indicates the lower @@ -209,7 +244,7 @@ CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analo #'@import multiApply #'@import CSTools #'@importFrom s2dv InsertDim CDORemap -#'@importFrom FNN get.knnx +#'@importFrom energy dcor #' #'@seealso \code{\link[s2dverification]{CDORemap}} #' @@ -229,11 +264,11 @@ CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analo #'downscaled_field <- Analogs(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, #'obs_lats = obs_lats, obs_lons = obs_lons, grid_exp = 'r360x180') #'@export -Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, obs2 = NULL, - obs2_lats = NULL, obs2_lons = NULL, nanalogs = 3, fun_analog = NULL, - lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - member_dim = "member", region = NULL, return_indices = FALSE, - loocv_window = TRUE, ncores = NULL) { +Analogs <- function(exp, obs, exp_lats = NULL, exp_lons = NULL, obs_lats, obs_lons, + grid_exp, obsL = NULL, obsL_lats = NULL, obsL_lons = NULL, nanalogs = 3, + fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", metric = "dist", region = NULL, + return_indices = FALSE, loocv_window = TRUE, ncores = NULL) { #----------------------------------- # Checkings #----------------------------------- @@ -264,7 +299,7 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, stop("Parameter 'time_dim' must be of the class 'character'") } - # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", "'lon_dim'") @@ -305,38 +340,41 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, } } - if (!is.null(obs2)) { + if (!is.null(obsL) ) { + # the code is not yet prepared to handle members in the observations - if (member_dim %in% names(dim(obs2))) { - if (identical(as.numeric(dim(obs2)[member_dim]), 1)) { - obs2 <- ClimProjDiags::Subset(x = obs2, along = member_dim, indices = 1, drop = 'selected') + if (member_dim %in% names(dim(obsL))) { + if (identical(as.numeric(dim(obsL)[member_dim]), 1)) { + obsL <- ClimProjDiags::Subset(x = obsL, along = member_dim, indices = 1, drop = 'selected') } else { - stop("Not implemented for observations with members ('obs2' can have 'member_dim', ", + stop("Not implemented for observations with members ('obsL' can have 'member_dim', ", "but it should be of length = 1).") } } - if (is.null(obs2_lats) | is.null(obs2_lons)) { + if (is.null(obsL_lats) | is.null(obsL_lons)) { stop("Missing latitudes and/or longitudes for the provided training observations. Please ", - "provide them with the parametres 'obs2_lats' and 'obs2_lons'") + "provide them with the parametres 'obsL_lats' and 'obsL_lons'") } - if (is.na(match(lon_dim, names(dim(obs2))))) { - stop("Missing longitude dimension in 'obs2', or does not match the parameter 'lon_dim'") + if (is.na(match(lon_dim, names(dim(obsL))))) { + stop("Missing longitude dimension in 'obsL', or does not match the parameter 'lon_dim'") } - if (is.na(match(lat_dim, names(dim(obs2))))) { - stop("Missing latitude dimension in 'obs2', or does not match the parameter 'lat_dim'") + if (is.na(match(lat_dim, names(dim(obsL))))) { + stop("Missing latitude dimension in 'obsL', or does not match the parameter 'lat_dim'") } - if (is.na(match(sdate_dim, names(dim(obs2))))) { - stop("Missing start date dimension in 'obs2', or does not match the parameter 'sdate_dim'") + if (is.na(match(sdate_dim, names(dim(obsL))))) { + stop("Missing start date dimension in 'obsL', or does not match the parameter 'sdate_dim'") } - if (is.na(match(time_dim, names(dim(obs2))))) { - stop("Missing time dimension in 'obs2', or does not match the parameter 'time_dim'") + if (is.na(match(time_dim, names(dim(obsL))))) { + stop("Missing time dimension in 'obsL', or does not match the parameter 'time_dim'") } + } + ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | @@ -350,15 +388,25 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) } - if (!is.null(obs2)) { - obs_train <- obs2 - obs_train_lats <- obs2_lats - obs_train_lons <- obs2_lons + # metric method to be used to specify the analogs + stopifnot(metric %in% c("cor", "dcor", "dist")) + + if ( (dim(exp)[sdate_dim] != dim(obs)[sdate_dim]) & loocv_window ) { + loocv_window <- FALSE + warning("The lengths of sdate_dim in the data provided through exp and obs are ", + "not the same. Thus, exp is considered as forecast data, and loocv_window ", + "is set to FALSE.") + } + + if (!is.null(obsL)) { + obs_train <- obsL + obs_train_lats <- obsL_lats + obs_train_lons <- obsL_lons } else { obs_train <- obs obs_train_lats <- obs_lats obs_train_lons <- obs_lons - } + } # Correct indices later if cross-validation loocv_correction <- FALSE @@ -366,24 +414,40 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, loocv_correction <- TRUE } - #----------------------------------- - # Interpolate high-res observations to the coarse grid - #----------------------------------- + # crop downscaling region, if the argument region is provided. + if (!is.null(region) & is.null(obsL)) { + # if a border is equally distant from two different grids, the map will be cropped from the grid having smaller coordinate + + a <- which.min(abs((region[1]-obs_lons))) + b <- which.min(abs((region[2]-obs_lons))) + c <- which.min(abs((region[3]-obs_lats))) + d <- which.min(abs((region[4]-obs_lats))) + obs <- ClimProjDiags::Subset(x = obs, along = list(lon_dim,lat_dim), + indices = list(a:b,c:d), drop = 'selected') + } + if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", - "downscaling region are defined by the first and last elements of the parametres 'exp_lats' and ", - "'exp_lons'.") - region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], exp_lats[length(exp_lats)]) + warning("The borders of the downscaling region have not been provided. ", + "Assuming the four borders of the downscaling region are defined by the ", + "first and last elements of the parameters 'exp_lats' and 'exp_lons'.") + region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], + exp_lats[length(exp_lats)]) } - obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, - target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = "conservative", region = region, ncores = ncores) + obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, + target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = "conservative", region = region, + ncores = ncores) + # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to # the same grid to force the matching - if (!.check_coords(lat1 = as.numeric(obs_interpolated$lat), lat2 = exp_lats, lon1 = as.numeric(obs_interpolated$lon), lon2 = exp_lons)) { - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = grid_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", + if (!.check_coords(lat1 = as.numeric(obs_interpolated$lat), + lat2 = exp_lats, + lon1 = as.numeric(obs_interpolated$lon), + lon2 = exp_lons)) { + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, + target_grid = grid_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = "conservative", region = region, ncores = ncores)$data } else { exp_interpolated <- exp @@ -391,40 +455,57 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, # Create window if user does not have it in the training observations if ( !("window" %in% names(dim(obs_interpolated$data))) ) { - obs_train_interpolated <- .generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, - time_dim = time_dim, loocv = loocv_window, ncores = ncores) - obs_hres <- .generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window, ncores = ncores) + obs_train_interpolated <- .generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, + time_dim = time_dim, loocv = loocv_window, + ncores = ncores) + if (!is.null(obsL)) { + if ( ("window" %in% names(dim(obs))) ) { + stop("Either both obs and obsL should include 'window' dimension or none.") + } + } + obs_hres <- .generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, + loocv = loocv_window, ncores = ncores) + } else { obs_train_interpolated <- obs_interpolated$data - dim(obs_train_interpolated) <- dim(obs_train_interpolated)[-which(names(dim(obs_train_interpolated))=="time")] + dim(obs_train_interpolated) <- dim(ClimProjDiags::Subset(x = obs_train_interpolated, + along = time_dim, indices = 1, drop = 'selected')) + + if (!is.null(obsL)) { + if ( !("window" %in% names(dim(obs))) ) { + stop("Either both obs and obsL should include 'window' dimension or none.") + } + } obs_hres <- obs - dim(obs_hres) <- dim(obs_hres)[-which(names(dim(obs_hres))=="time")] + dim(obs_hres) <- dim(ClimProjDiags::Subset(x = obs_hres, + along = time_dim, indices = 1, drop = 'selected')) + } #----------------------------------- # Reshape train and test #----------------------------------- - res.data <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), - target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), - c("window", lat_dim, lon_dim)), - fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, - fun_analog = fun_analog), ncores = ncores)$output1 - # Return the indices of the best analogs - if (return_indices) { - res.ind <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), + RES <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), c("window", lat_dim, lon_dim)), - fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, - fun_analog = fun_analog, return_indices = TRUE), ncores = ncores, output_dims = 'ind')$output1 + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, + k = nanalogs, metric = metric, fun_analog = fun_analog), + ncores = ncores) ## output1 -> data, output2 -> index, output3 -> metric + + res.data <- RES$output1 + + # Return the indices of the best analogs + if (return_indices) { + res.ind <- RES$output2 # If cross-validation has been applied, correct the indices if (loocv_correction) { nsdates <- dim(res.ind)[names(dim(res.ind)) == sdate_dim] ntimes <- dim(res.ind)[names(dim(res.ind)) == time_dim] - res.ind <- Apply(res.ind, target_dims = c("ind", sdate_dim), function(x) + res.ind <- Apply(res.ind, target_dims = c("index", sdate_dim), function(x) sapply(1:nsdates, function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]]), - output_dims = c("ind", sdate_dim), ncores = ncores)$output1 + output_dims = c("index", sdate_dim), ncores = ncores)$output1 } # restore ensemble dimension in observations if it existed originally @@ -433,72 +514,92 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, } res <- list(data = res.data, ind = res.ind, obs = obs, lon = obs_lons, lat = obs_lats) - } - else { + } else { # restore ensemble dimension in observations if it existed originally - if (restore_ens) { - obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) - } - - res <- list(data = res.data, obs = obs, lon = obs_lons, lat = obs_lats) + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + res <- list(data = res.data, obs = obs, lon = obs_lons, lat = obs_lats) } return(res) } # For each element in test, find the indices of the k nearest neigbhors in train -.analogs <- function(train, test, obs_hres, k, fun_analog, return_indices = FALSE) { - - require(FNN) - - # train and obs_hres dim: 3 dimensions window, lat and lon (in this order) +.analogs <- function(train, test, obs_hres, k, fun_analog, metric = NULL, return_indices = FALSE) { + + # train and obs_rs dim: 3 dimensions window, lat and lon (in this order) # test dim: 2 dimensions lat and lon (in this order) # Number of lats/lons of the high-resolution data space_dims_hres <- dim(obs_hres)[c(2,3)] # Reformat train and test as an array with (time, points) - train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" + train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" test <- as.vector(test) obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space" - + # Identify and remove NA's - idx_na_tr <- is.na(train[ , 1]) + dum<-which(!apply(train,2,function (x) all(is.na(x))))[1] ## the column in which NA in space will be investigated. it shouldn't be all-NA time-step + idx_na_tr <- is.na(train[ , dum]) # NA in space + idy_na_tr <- is.na(train[1, ]) # NA in time idx_na_te <- is.na(test) idx_na <- idx_na_tr | idx_na_te - tr_wo_na <- t(train[!idx_na , ]) + tr_wo_na <- t(train[!idx_na , !idy_na_tr ]) te_wo_na <- test[!idx_na] te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = "time") - - knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) - dist <- knn.ind$nn.dist - idx <- knn.ind$nn.index - - # Either return only the indices or the analogs - if (return_indices) { - res <- as.numeric(idx) + if (all(is.na(test))) { + res <- array(NA, space_dims_hres) + res_ind <- array(NA, k) + names(dim(res_ind)) <- c("index") + res_metric <- array(NA, k) + names(dim(res_metric)) <- c("metric") } else { + if (metric == "dist") { + dist_all <- sqrt(rowSums((sweep(tr_wo_na, 2, te_wo_na[1,]))^2)) # euc dist + best_vals <- head(sort(dist_all), k) + idx <- match(best_vals, dist_all) + } else if (metric == "cor") { + cor_all <- apply(tr_wo_na, 1, function (x) cor(x,te_wo_na[1, ], method = "spearman")) # cor + best_vals <- head(sort(cor_all, decreasing = TRUE), k) + idx <- match(best_vals, cor_all) + } else if (metric == "dcor") { +# require(energy) + dcor_all <- apply(tr_wo_na, 1, function (x) .dcor(x,te_wo_na[1, ])) # dcor + best_vals <- head(sort(dcor_all, decreasing = TRUE), k,) + idx <- match(best_vals, dcor_all) + } + if (isTRUE(any(idy_na_tr))) { + dum <-(1:length(idy_na_tr))[!idy_na_tr] + idx <- dum[idx] + } + res_ind <- array(idx, k) + names(dim(res_ind)) <- c("index") + res_metric <- array(best_vals, c(k)) + names(dim(res_metric)) <- c("metric") res <- obs_hres[ , idx] dim(res) <- c(space_dims_hres, analogs = k) if (!is.null(fun_analog)) { if (fun_analog == "wmean") { - weight <- 1 / dist + if (metric == "dist") { + weight <- 1 / best_vals + } else { + weight <- best_vals + } res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) } else if (fun_analog == "min") { - res<-res[,,which.min(dist)] + res <- res[, , which.min(best_vals)] } else if (fun_analog == "max") { - res<-res[,,which.max(dist)] + res <- res[, , which.max(best_vals)] } else { res <- apply(res, c(1,2), fun_analog) } } } - - return(res) + return(list(res, res_ind, res_metric)) } - # Add the dimension window to an array that contains, at least, the start date and time # dimensions # object has at least dimensions sdate and time @@ -548,3 +649,38 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, return(obj_window) } + +# Distance correlation function +.dcor <- function(x, y) { + n <- length(x) + + # Calculate Euclidean distances for x + dist_x <- as.matrix(dist(matrix(x))) + # Calculate Euclidean distances for y + dist_y <- as.matrix(dist(matrix(y))) + + # Centering matrices + H <- diag(n) - 1/n + + # Centered distance matrices + dist_centered_x <- H %*% dist_x %*% H + dist_centered_y <- H %*% dist_y %*% H + + # Calculate the product of mean-centered distance matrices + B <- dist_centered_x %*% t(dist_centered_y) + C <- dist_centered_x %*% t(dist_centered_x) + D <- dist_centered_y %*% t(dist_centered_y) + + # Calculate the distance covariance + dcov_xy <- sqrt(sum(diag(B))) + + # Calculate the variances + cov_xx <- sqrt(sum(diag(C))) + cov_yy <- sqrt(sum(diag(D))) + + # Calculate the distance correlation + dcor_xy <- dcov_xy / sqrt(cov_xx * cov_yy) + + return(dcor_xy) +} + diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index dc5d050b..70043c1f 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -20,6 +20,10 @@ #'@param obs an 's2dv object' containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. +#'@param exp_cor an optional 's2dv_cube' object with named dimensions containing the seasonal +#'forecast experiment data. If the forecast is provided, it will be downscaled using the +#'hindcast and observations; if not provided, the hindcast will be downscaled instead. The +#'default value is NULL. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param bc_method a character vector indicating the bias adjustment method to be applied after @@ -62,13 +66,13 @@ #'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) #'obs_lons <- seq(1,5, 4/14) #'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) -#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) +#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) +#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'bias', int_method = 'conservative') #'@export -CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, - method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", +CST_Intbc <- function(exp, obs, exp_cor = NULL, target_grid, bc_method, int_method = NULL, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = NULL, ...) { if (!inherits(exp,'s2dv_cube')) { @@ -79,25 +83,38 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], - obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, - int_method = int_method, bc_method = bc_method, points = points, - source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1], + res <- Intbc(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, + exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], + target_grid = target_grid, int_method = int_method, + bc_method = bc_method, points = points, + source_file_exp = exp$attrs$source_files[1], + source_file_obs = obs$attrs$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores, ...) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - obs$data <- res$obs obs$dims <- dim(obs$data) obs$coords[[lon_dim]] <- res$lon obs$coords[[lat_dim]] <- res$lat - res_s2dv <- list(exp = exp, obs = obs) + if (is.null(exp_cor)) { + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = obs) + } else { + exp_cor$data <- res$data + exp_cor$dims <- dim(exp_cor$data) + exp_cor$coords[[lon_dim]] <- res$lon + exp_cor$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp_cor, obs = obs) + } + return(res_s2dv) } @@ -123,7 +140,11 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #''region'. #'@param obs an array with named dimensions containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is -#'expected to be already subset for the desired region. +#'expected to be already subset for the desired region. +#'@param exp_cor an optional array with named dimensions containing the seasonal forecast +#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and +#'observations; if not provided, the hindcast will be downscaled instead. The default value +#'is NULL. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -187,10 +208,11 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'res <- Intbc(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, #'obs_lons = obs_lons, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') #'@export -Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, - points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", member_dim = "member", source_file_exp = NULL, source_file_obs = NULL, - region = NULL, ncores = NULL, ...) { +Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, + bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", source_file_exp = NULL, + source_file_obs = NULL, region = NULL, ncores = NULL, ...) { if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") @@ -238,6 +260,27 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, "'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used.") } + if (!is.null(exp_cor)) { + if (is.na(match(lon_dim, names(dim(exp_cor))))) { + stop("Missing longitude dimension in 'exp_cor', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp_cor))))) { + stop("Missing latitude dimension in 'exp_cor', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp_cor))))) { + stop("Missing start date dimension in 'exp_cor', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp_cor))))) { + stop("Missing member dimension in 'exp_cor', or does not match the parameter 'member_dim'") + } + } + # When observations are pointwise if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { point_obs <- T @@ -259,9 +302,9 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, } if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders ", - "of the downscaling region are defined by the first and last elements of the parametres ", - "'obs_lats' and 'obs_lons'.") + warning("The borders of the downscaling region have not been provided. Assuming the ", + "four borders of the downscaling region are defined by the first and last ", + "elements of the parametres 'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } ## ncores @@ -272,18 +315,34 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, } } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region, ncores = ncores) + + exp_cor_interpolated <- NULL + + if (!is.null(exp_cor)) { + exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, + region = region, ncores = ncores) + + } # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_obs, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region, ncores = ncores) obs_ref <- obs_interpolated$data } else { @@ -306,10 +365,17 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, if (bc_method == 'qm' | bc_method == 'quantile_mapping') { - res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, - memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, ...) + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, + exp_cor = exp_cor_interpolated$data, + na.rm = TRUE, memb_dim = member_dim, sdate_dim = sdate_dim, + ncores = ncores, ...) } else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { + # Dynamical bias correction is not yet prepared to handle hindcast-forecast data + # It will return only the hindcast downscaled + if (!is.null(exp_cor)) { + warning("For the dynamical bias correction only the hindcast downscaled will be returned.") + } # the temporal dimension must be only one dimension called "time" if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), @@ -328,8 +394,10 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, if (dim(obs_ref)[sdate_dim] == 1) { warning('Simple Bias Correction should not be used with only one observation. Returning NA.') } - res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores, cal.method = bc_method) + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, + exp_cor = exp_cor_interpolated$data, + memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, + cal.method = bc_method) } # Return a list of three elements diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R index ed79f4fd..51de9ec0 100644 --- a/modules/Downscaling/tmp/Interpolation.R +++ b/modules/Downscaling/tmp/Interpolation.R @@ -48,7 +48,7 @@ #'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) #'lons <- 1:5 #'lats <- 1:4 -#'exp <- s2dv_cube(data = exp, lat = lats, lon = lons) +#'exp <- s2dv_cube(data = exp, coords = list(lat = lats, lon = lons)) #'res <- CST_Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') #'@export CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, @@ -68,7 +68,7 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") } - res <- Interpolation(exp = exp$data, lats = exp$coords[[lat_dim]], lons = exp$coords[[lon_dim]], + res <- Interpolation(exp = exp$data, lats = exp$coords[[lat_dim]], lons = exp$attrs[[lon_dim]], source_file = exp$attrs$source_files[1], points = points, method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, lon_dim = lon_dim, region = region, method_point_interp = method_point_interp, ncores = ncores) @@ -334,10 +334,10 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me if (griddes$yinc < 0) { system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) griddes <- .get_griddes(nc_cropped2) + system(paste0('rm ', nc_cropped2)) } # remove temporary files system(paste0('rm ', nc_cropped1)) - system(paste0('rm ', nc_cropped2)) if (is.null(griddes)) { stop("'griddes' not found in the NetCDF source files") @@ -679,7 +679,9 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me # sdate, time, ensemble, num_procs, etc) #====================== .get_model_data <- function(weights.df, mdata, ncores = NULL) { - + + require(plyr) + #----------------- # Get data for all combinations of i and j. # (inefficient, getting many unneded pairs). diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R index 36a7f11b..0d4f10dd 100644 --- a/modules/Downscaling/tmp/Intlr.R +++ b/modules/Downscaling/tmp/Intlr.R @@ -22,6 +22,10 @@ #'@param obs an 's2dv object' containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. +#'@param exp_cor an optional 's2dv_cube' object with named dimensions containing the seasonal +#'forecast experiment data. If the forecast is provided, it will be downscaled using the +#'hindcast and observations; if not provided, the hindcast will be downscaled instead. The +#'default value is NULL. #'@param lr_method a character vector indicating the linear regression method to be applied #'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' #'method fits a linear regression using high resolution observations as predictands and the @@ -82,44 +86,87 @@ #'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) #'obs_lons <- seq(1,5, 4/14) #'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) +#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) #'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') #'@export -CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, - method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", time_dim = "time", member_dim = "member", - large_scale_predictor_dimname = 'vars', loocv = TRUE, region = NULL, ncores = NULL) { +CST_Intlr <- function(exp, obs, exp_cor = NULL, lr_method, target_grid = NULL, points = NULL, + int_method = NULL, method_point_interp = NULL, predictors = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", large_scale_predictor_dimname = 'vars', + loocv = TRUE, region = NULL, ncores = NULL) { - if (!inherits(exp,'s2dv_cube')) { + if (!inherits(exp, 's2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") } - if (!inherits(obs,'s2dv_cube')) { + if (!inherits(obs, 's2dv_cube')) { stop("Parameter 'obs' must be of the class 's2dv_cube'") } - - res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], - obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], points = points, - source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1], + + exp_cor_aux <- NULL + + if (!is.null(exp_cor)) { + if (identical(lr_method, 'basic') | identical(lr_method, '4nn')) { + if (!inherits(exp_cor, 's2dv_cube')) { + stop("Parameter 'exp_cor' must be of the class 's2dv_cube'") + } + exp_cor_aux <- exp_cor$data + # when large-scale is selected, the forecast object does not have to be an s2dv_cube object + } else if (identical(lr_method, 'large-scale')) { + if (!inherits(exp_cor, 's2dv_cube')) { + exp_cor_aux <- exp_cor + } else { + exp_cor_aux <- exp_cor$data + } + } + } + res <- Intlr(exp = exp$data, obs = obs$data, exp_cor = exp_cor_aux, + exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], + points = points, source_file_exp = exp$attrs$source_files[1], + source_file_obs = obs$attrs$source_files[1], target_grid = target_grid, lr_method = lr_method, int_method = int_method, method_point_interp = method_point_interp, predictors = predictors, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, - member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, + member_dim = member_dim, + large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, region = region, ncores = ncores) - + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - obs$data <- res$obs obs$dims <- dim(obs$data) obs$coords[[lon_dim]] <- res$lon obs$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp, obs = obs) + + if (is.null(exp_cor)) { + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = obs) + } else { + if (identical(lr_method, 'basic') | identical(lr_method, '4nn')) { + exp_cor$data <- res$data + exp_cor$dims <- dim(exp_cor$data) + exp_cor$coords[[lon_dim]] <- res$lon + exp_cor$coords[[lat_dim]] <- res$lat + # when large-scale is selected, the forecast object does not have to be an s2dv_cube object + } else if (identical(lr_method, 'large-scale')) { + if (!inherits(exp_cor, 's2dv_cube')) { + exp_cor <- suppressWarnings(s2dv_cube(res$data, lat = res$lat, lon = res$lon)) + } else { + exp_cor$data <- res$data + exp_cor$dims <- dim(exp_cor$data) + exp_cor$coords[[lon_dim]] <- res$lon + exp_cor$coords[[lat_dim]] <- res$lat + } + } + + res_s2dv <- list(exp = exp_cor, obs = obs) + } + return(res_s2dv) } @@ -147,6 +194,10 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in #'@param obs an array with named dimensions containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. +#'@param exp_cor an optional array with named dimensions containing the seasonal forecast +#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and +#'observations; if not provided, the hindcast will be downscaled instead. The default value +#'is NULL. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -222,11 +273,12 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in #'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, #'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') #'@export -Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, - int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, - predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = TRUE, - ncores = NULL) { +Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, + target_grid = NULL, points = NULL, int_method = NULL, method_point_interp = NULL, + source_file_exp = NULL, source_file_obs = NULL, predictors = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', + loocv = TRUE, ncores = NULL) { #----------------------------------- # Checkings @@ -274,6 +326,43 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t "'sdate_dim'") } + if (!is.null(exp_cor)) { + + if (is.na(match(sdate_dim, names(dim(exp_cor))))) { + stop("Missing start date dimension in 'exp_cor', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp_cor))))) { + stop("Missing member dimension in 'exp_cor', or does not match the parameter 'member_dim'") + } + + if (loocv) { # loocv equal to false to train with the whole hindcast and predict with the forecast + loocv <- FALSE + warning("Forecast data will be downscaled. 'loocv' is set to FALSE ", + "to train the mode with the whole hindcast and predict with the forecast.") + } + + if (is.null(predictors)) { + + if (is.na(match(lon_dim, names(dim(exp_cor))))) { + stop("Missing longitude dimension in 'exp_cor', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp_cor))))) { + stop("Missing latitude dimension in 'exp_cor', or does not match the parameter ", + "'lat_dim'") + } + } + } + + if (lr_method == '4nn') { + warning("4nn method skips the interpolation step since the method itself inherently downscale ", + "the coarse resolution data to the reference data resolution without the need ", + "for interpolation.") + } + # When observations are pointwise if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { point_obs <- T @@ -295,10 +384,6 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t "through the parameter 'method_point_interp'.") } - # sdate must be the time dimension in the input data - stopifnot(sdate_dim %in% names(dim(exp))) - stopifnot(sdate_dim %in% names(dim(obs))) - # the code is not yet prepared to handle members in the observations restore_ens <- FALSE if (member_dim %in% names(dim(obs))) { @@ -321,6 +406,17 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t stopifnot(sdate_dim %in% names(dim(predictors))) stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) } + # forecasts for the large scale predictors + if (!is.null(exp_cor)) { + if (is.na(match(large_scale_predictor_dimname, names(dim(exp_cor))))) { + stop("Missing large scale predictor dimension in 'exp_cor', or does not match ", + "the parameter 'large_scale_predictor_dimname'") + } + if (!identical(dim(exp_cor)[names(dim(exp_cor)) == large_scale_predictor_dimname], + dim(predictors)[names(dim(predictors)) == large_scale_predictor_dimname])) { + stop("Large scale predictor dimension in exp_cor and predictors must be identical.") + } + } } ## ncores if (!is.null(ncores)) { @@ -347,19 +443,32 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - points = points, method_point_interp = method_point_interp, - source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = int_method, region = region, ncores = ncores) + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) + + if (!is.null(exp_cor) & is.null(predictors)) { + exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) + } # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching - if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = int_method, region = region, ncores = ncores) + if ((!suppressWarnings(.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons))) | !(point_obs)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_obs, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) lats <- obs_interpolated$lat lons <- obs_interpolated$lon @@ -383,6 +492,17 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t target_dims_predictor <- sdate_dim target_dims_predictand <- sdate_dim + + if (!is.null(exp_cor)) { + aux_dim <- NULL + forecast <- exp_cor_interpolated$data + target_dims_predictor <- c(sdate_dim, member_dim) + target_dims_forecast <- c(sdate_dim, member_dim) + } else { + forecast <- NULL + target_dims_forecast <- NULL + } + } # (Multi) linear regression with large-scale predictors @@ -398,6 +518,17 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) target_dims_predictand <- sdate_dim + + if (!is.null(exp_cor)) { + aux_dim <- large_scale_predictor_dimname + if (!inherits(exp_cor, 's2dv_cube')) { + forecast <- exp_cor + } else { + forecast <- exp_cor$data + } + target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname, member_dim) + target_dims_forecast <- c(sdate_dim, large_scale_predictor_dimname, member_dim) + } } # Multi-linear regression with the four nearest neighbours @@ -405,9 +536,19 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t # Predictand: observations else if (lr_method == '4nn') { - predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, - lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4, ncores = ncores) - + predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, + lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, + lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, + nn = 4, ncores = ncores) + + if (!is.null(exp_cor)) { + aux_dim <- 'nn' + forecast <- .find_nn(coar = exp_cor, lats_hres = obs_lats, lons_hres = obs_lons, + lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, + lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, + nn = 4, ncores = ncores) + } + if (is.null(points) | ("location" %in% names(dim(obs)))) { if (!is.null(target_grid)) { warning("Interpolating to the 'obs' grid") @@ -416,44 +557,65 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t lats <- obs_lats lons <- obs_lons - } + } else { # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, # interpolate to point locations - else { - predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, + + predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, + lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL, points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) + source_file = source_file_obs, method_remap = NULL, + region = region, ncores = ncores) - predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL, points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) - + source_file = source_file_obs, method_remap = NULL, + region = region, ncores = ncores) + + if (!is.null(exp_cor)) { + forecast <- Interpolation(exp = forecast, lats = obs_lats, lons = obs_lons, + lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, + region = region, ncores = ncores) + forecast <- forecast$data + } + lats <- predictor$lat lons <- predictor$lon predictor <- predictor$data predictand <- predictand$data } - - target_dims_predictor <- c(sdate_dim,'nn') - target_dims_predictand <- sdate_dim + + target_dims_predictand <- sdate_dim + + if (!is.null(exp_cor)) { + target_dims_predictor <- c(sdate_dim,'nn', member_dim) + target_dims_forecast <- c(sdate_dim,'nn', member_dim) + } else { + target_dims_predictor <- c(sdate_dim,'nn') + } + } else { + + stop(paste0(lr_method, " method is not implemented yet")) } + # Apply the linear regressions + # case hindcast - forecast + if (!is.null(exp_cor)) { + res <- Apply(list(predictor, predictand, forecast), + target_dims = list(target_dims_predictor, target_dims_predictand, + target_dims_forecast), + fun = .intlr, loocv = loocv, aux_dim = aux_dim, ncores = ncores)$output1 + } + # case hindcast only else { - stop(paste0(lr_method, " method is not implemented yet")) + res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, + target_dims_predictand), + fun = .intlr, loocv = loocv, ncores = ncores)$output1 + names(dim(res))[1] <- sdate_dim } - - print(paste0('dim predictor',dim(predictor))) - print(paste0('dim predictand',dim(predictand))) - print(dim(list(predictor[1]))) - # Apply the linear regressions - - - - res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), - fun = .intlr, loocv = loocv, ncores = ncores)$output1 - - names(dim(res))[1] <- sdate_dim - # names(dim(res))[which(names(dim(res)) == '')] # restore ensemble dimension in observations if it existed originally if (restore_ens) { @@ -463,30 +625,65 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t # Return a list of three elements res <- list(data = res, obs = predictand, lon = lons, lat = lats) + # for testing 4nn + #x <- predictor[10,10,1,1,1,,,] + #x <- Reorder(x,c(2,3,1)) + #y <- predictand[1,1,1,10,,10] + #f <- forecast[10,10,1,1,1,,,] + #f <- Reorder(f,c(2,1)) + #f <- InsertDim(f,1,1,'sdate') + + # large-scale + #x <- Reorder(predictor,c(1,3,2)) + #y <- predictand[1,1,1,10,,10] + #f <- Reorder(forecast,c(1,3,2)) + return(res) } #----------------------------------- # Atomic function to generate and apply the linear regressions #----------------------------------- -.intlr <- function(x, y, loocv) { - - tmp_df <- data.frame(x = x, y = y) +.intlr <- function(x, y, f = NULL, loocv, aux_dim = NULL) { + + require (plyr) + + if (!is.null(f)) { + if (!is.null(aux_dim)) { + tmp_df <- data.frame(x = adply(x,.margins = 3, .id = NULL), y = y) + } else { + tmp_df <- data.frame(x = as.vector(x), y = y) + } + } else { + tmp_df <- data.frame(x = x, y = y) + } # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { - - n <- nrow(tmp_df) - res <- rep(NA, n) - + if (is.null(f)) { + res <- rep(NA, nrow(tmp_df)) + } else { + if (!is.null(aux_dim)) { + res <- array(NA, dim(f)[names(dim(f)) != aux_dim]) + } else { + res <- array(NA, dim(f)) + } + } } else { # training +# if (!is.null(f)) { ## too many number of ensemble members, + ## so Principle component analysis is needed. +# PR <- prcomp(tmp_df[, -ncol(tmp_df)], scale = TRUE) ## prcomp between predictors +# N_predictors <- which (summary(PR)$importance[ "Cumulative Proportion",] > .8)[1] +# tmp_df <- data.frame(x = as.vector(PR$x[, 1:N_predictors]), y = y) +# } + lm1 <- .train_lm(df = tmp_df, loocv = loocv) - + # prediction - res <- .pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + res <- .pred_lm(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, aux_dim = aux_dim) } - - return(res) + + return(res) } #----------------------------------- @@ -517,7 +714,9 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t #----------------------------------- # Function to apply the linear regressions. #----------------------------------- -.pred_lm <- function(df, lm1, loocv) { +.pred_lm <- function(df, lm1, f, loocv, aux_dim) { + + require (plyr) if (loocv) { pred_vals <- sapply(1:nrow(df), function(j) { @@ -527,33 +726,95 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t return(predict(lm1[[j]], df[j,])) }}) } else { - if (!is.na(lm1)) { - pred_vals_ls <- lapply(lm1, predict, data = df) - pred_vals <- unlist(pred_vals_ls) + if (!is.na(lm1)) { + # case to downscale hindcasts + if (is.null(f)) { + pred_vals_ls <- lapply(lm1, predict, data = df) + pred_vals <- unlist(pred_vals_ls) + # case to downscale forecasts + } else { + if (!is.null(aux_dim)) { + # 4nn & large-scale + fcst_df <- adply(f,.margins = 3, .id = NULL) + colnames(fcst_df) <- paste0('x.',1:ncol(fcst_df)) + pred_vals_ls <- lapply(lm1, predict, newdata = fcst_df) + pred_vals <- unlist(pred_vals_ls) + dim(pred_vals) <- dim(f)[names(dim(f)) != aux_dim] + } else { + # basic + pred_vals_ls <- lapply(lm1, predict, newdata = data.frame(x = as.vector(f))) + pred_vals <- unlist(pred_vals_ls) + dim(pred_vals) <- dim(f ) + } + } } else { - pred_vals <- rep(NA, nrow(df)) + if (is.null(f)) { + pred_vals <- rep(NA, nrow(df)) + } else { + if (!is.null(aux_dim)) { + pred_vals <- array(NA, dim(f)[names(dim(f)) != aux_dim]) + } else { + pred_vals <- array(NA, dim(f)) + } + } } - } + } + return(pred_vals) } #----------------------------------- -# Function to find N nearest neighbours. +# Function to ind N nearest neighbours. # 'coar' is an array with named dimensions #----------------------------------- -.find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4, ncores = NULL) { - - # Sort the distances from closest to furthest - idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) - idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) - - names(dim(idx_lat)) <- c('nn', lat_dim) - names(dim(idx_lon)) <- c('nn', lon_dim) - - # obtain the values of the nearest neighbours - nearest <- Apply(list(coar, idx_lat, idx_lon), - target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), - fun = function(x, y, z) x[y, z], ncores = ncores)$output1 - +.find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, + lat_dim, lon_dim, sdate_dim, member_dim, nn = 4, ncores = NULL) { + + # order lats and lons of the high res and coarse res data in 2-dimension in order to calculate the distances to each other. + hres_mat <- expand.grid(as.numeric(lats_hres), as.numeric(lons_hres)) + coar_mat <- expand.grid(as.numeric(lats_coar), as.numeric(lons_coar)) + + # calculate distances and obtain the closest 4 coarse res grid for each high res grid + dist_id <- apply (proxy::dist(hres_mat, coar_mat), 1, order)[1:nn,] + + names(dim(dist_id)) <- c("nn", lon_dim) + + idh <- SplitDim(dist_id, split_dim = lon_dim, + indices = rep(1:(length(lons_hres)),length(lats_hres)), + new_dim_name = c(lat_dim)) + + idh <- aperm (idh, c(1,3,2)) # dimension: nn, lat_dim, lon_dim + + #coar + idc_lat_grid <- match( coar_mat[,1], as.numeric(lats_coar)) + idc_lon_grid <- match( coar_mat[,2], as.numeric(lons_coar)) + cgrid <- cbind(idc_lat_grid,idc_lon_grid) # For the coarse res data, include the lat and lon order each grids ordered in 2-dimension. + + idh_dum <- array(NA, dim = c(4, 2, length(lats_hres), length(lons_hres))) + + for (i in 1:length(lats_hres)) { + for (j in 1:length(lons_hres)) { + idh_dum[,,i,j] <- cgrid[idh[,i,j],] + } + } + + idh <- idh_dum + rm(idh_dum) + names(dim(idh)) <- c("nn", "grd", lat_dim, lon_dim) + + # idh includes the lat and lon order of the nearest neighbours in the coarse res data specified for each lat lon of the high res data. + + nearest <- Apply(list(coar, idh), + target_dims = list(c(sdate_dim, lat_dim, lon_dim, member_dim), + "grd" ), + fun = function(x, y) { + a <- x[, y[1], y[2], , drop = FALSE] + a <- Subset (a, along = c(lat_dim, lon_dim), + indices = list (1,1), + drop = "selected") # drop lat lon dim coming from coar data + return(a) + }, + ncores = ncores)$output1 + return(nearest) } diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R index a85a1b3f..bf47a7d3 100644 --- a/modules/Downscaling/tmp/LogisticReg.R +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -21,6 +21,10 @@ #'@param obs an 's2dv object' with named dimensions containing the observational field. #'The object must have, at least, the dimensions latitude, longitude and start date. The #'object is expected to be already subset for the desired region. +#'@param exp_cor an optional array with named dimensions containing the seasonal forecast +#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and +#'observations; if not provided, the hindcast will be downscaled instead. The default value +#'is NULL. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param int_method a character vector indicating the regridding method to be passed to CDORemap. @@ -81,15 +85,17 @@ #'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) #'obs_lons <- seq(1,5, 4/14) #'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) +#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) #'res <- CST_LogisticReg(exp = exp, obs = obs, int_method = 'bil', target_grid = 'r1280x640', #'probs_cat = c(1/3, 2/3)) #'@export -CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_method = "ens_mean", - probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, - method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - member_dim = "member", region = NULL, loocv = TRUE, ncores = NULL) { +CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method = NULL, + log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", region = NULL, + loocv = TRUE, ncores = NULL) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -99,7 +105,7 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- LogisticReg(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], + res <- LogisticReg(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat, @@ -110,17 +116,27 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me region = region, loocv = loocv, ncores = ncores) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - obs$data <- res$obs obs$dims <- dim(obs$data) obs$coords[[lon_dim]] <- res$lon obs$coords[[lat_dim]] <- res$lat - res_s2dv <- list(exp = exp, obs = obs) + if (is.null(exp_cor)) { + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = obs) + } else { + exp_cor$data <- res$data + exp_cor$dims <- dim(exp_cor$data) + exp_cor$coords[[lon_dim]] <- res$lon + exp_cor$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp_cor, obs = obs) + } + return(res_s2dv) } @@ -147,6 +163,10 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me #'@param obs an array with named dimensions containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. +#'@param exp_cor an optional array with named dimensions containing the seasonal forecast +#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and +#'observations; if not provided, the hindcast will be downscaled instead. The default value +#'is NULL. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -223,11 +243,13 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me #'obs_lats = obs_lats, obs_lons = obs_lons, int_method = 'bil', target_grid = 'r1280x640', #'probs_cat = c(1/3, 2/3)) #'@export -LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, - int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), - return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, - lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", - source_file_exp = NULL, source_file_obs = NULL, region = NULL, loocv = TRUE, ncores = NULL) { +LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lons, + target_grid, int_method = NULL, log_reg_method = "ens_mean", + probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", + source_file_exp = NULL, source_file_obs = NULL, region = NULL, + loocv = TRUE, ncores = NULL) { #----------------------------------- # Checkings @@ -292,6 +314,34 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target "'member_dim'") } + if (!is.null(exp_cor)) { + + if (is.na(match(sdate_dim, names(dim(exp_cor))))) { + stop("Missing start date dimension in 'exp_cor', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp_cor))))) { + stop("Missing member dimension in 'exp_cor', or does not match the parameter 'member_dim'") + } + + if (is.na(match(lon_dim, names(dim(exp_cor))))) { + stop("Missing longitude dimension in 'exp_cor', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp_cor))))) { + stop("Missing latitude dimension in 'exp_cor', or does not match the parameter ", + "'lat_dim'") + } + + if (loocv) { # loocv equal to false to train with the whole hindcast and predict with the forecast + loocv <- FALSE + warning("Forecast data will be downscaled. 'loocv' is set to FALSE ", + "to train the model with the whole hindcast and predict with the forecast.") + } + } + # When observations are pointwise if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { point_obs <- T @@ -313,9 +363,9 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target } if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders ", - "of the downscaling region are defined by the first and last elements of the parametres ", - "'obs_lats' and 'obs_lons'.") + warning("The borders of the downscaling region have not been provided. Assuming ", + "the four borders of the downscaling region are defined by the first and ", + "last elements of the parametres 'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } ## ncores @@ -338,24 +388,44 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target } } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region, ncores = ncores) + if (!is.null(exp_cor)) { + exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) + } + # compute ensemble mean anomalies if (log_reg_method == "ens_mean") { - predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim, - ncores = ncores) + predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, + sdate_dim = sdate_dim, ncores = ncores) target_dims_predictor <- sdate_dim + if (!is.null(exp_cor)) { + clim_hcst <- Apply(exp_interpolated$data, target_dims = c(member_dim, sdate_dim), + mean, ncores = ncores, + na.rm = TRUE)$output1 ## climatology of hindcast ens mean + ens_mean_fcst <- Apply(exp_cor_interpolated$data, target_dims = member_dim, + mean, na.rm = TRUE, ncores = ncores)$output1 ## ens mean of the forecast + forecast <- Ano(ens_mean_fcst, clim_hcst, ncores = ncores) + target_dims_forecast <- sdate_dim + } } else if (log_reg_method == "ens_mean_sd") { require(abind) ens_mean_anom <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores) + sdate_dim = sdate_dim, ncores = ncores) ens_sd <- .get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) #merge two arrays into one array of predictors @@ -363,8 +433,30 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target names(dim(predictor)) <- c("pred", names(dim(ens_mean_anom))) target_dims_predictor <- c(sdate_dim, "pred") - } else if (log_reg_method == "sorted_members") { - predictor <- .sort_members(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) + + if (!is.null(exp_cor)) { + clim_hcst <- Apply(exp_interpolated$data, target_dims = c(member_dim, sdate_dim), + mean, ncores = ncores, + na.rm = TRUE)$output1 ## climatology of hindcast ens mean + ens_mean_fcst <- Apply(exp_cor_interpolated$data, target_dims = member_dim, + mean, na.rm = TRUE, ncores = ncores)$output1 ## ens mean of the forecast + forecast_mean_anom <- Ano(ens_mean_fcst, clim_hcst, ncores = ncores) + forecast_sd <- .get_ens_sd(obj_ens = exp_cor_interpolated$data, + member_dim = member_dim, ncores = ncores) + forecast <- abind(forecast_mean_anom, forecast_sd, along = 1/2) + names(dim(forecast)) <- c("pred", names(dim(forecast_mean_anom))) + + target_dims_forecast <- c(sdate_dim, "pred") + } + + } else if (log_reg_method == "sorted_members") { + + if (!is.null(exp_cor)) { + stop('sorted_members method cannot be used to downscale forecasts since ', + 'the ensemble members are generally exchangeable') + } + predictor <- .sort_members(obj_ens = exp_interpolated$data, + member_dim = member_dim, ncores = ncores) target_dims_predictor <- c(sdate_dim, member_dim) } else { @@ -375,9 +467,11 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target # the same grid to force the matching if ((!.check_coords(lat1 = as.numeric(exp_interpolated$lat), lat2 = obs_lats, lon1 = as.numeric(exp_interpolated$lon), lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_obs, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region, ncores = ncores) obs_ref <- obs_interpolated$data } else { @@ -385,27 +479,35 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target } # convert observations to categorical predictands - -obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { - if (!any(!is.na(x))) { - rep(NA,length(x)) - } else { - terc <- convert2prob(as.vector(x), prob = probs_cat) - apply(terc, 1, function(r) which (r == 1))}}, - output_dims = sdate_dim, ncores = ncores)$output1 - - - res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y) - .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 - - if (return_most_likely_cat) { - res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, - output_dims = sdate_dim, ncores = ncores)$output1 + obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { + if (!any(!is.na(x))) { + rep(NA,length(x)) + } else { + terc <- convert2prob(as.vector(x), prob = probs_cat) + apply(terc, 1, function(r) which (r == 1)) + } + }, output_dims = sdate_dim, ncores = ncores)$output1 + + if (is.null(exp_cor)) { + res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), + fun = function(x, y) .log_reg(x = x, y = y, loocv = loocv,probs_cat = probs_cat), + output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 + + if (return_most_likely_cat) { + res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, + output_dims = sdate_dim, ncores = ncores)$output1 + } + } else { + res <- Apply(list(predictor, obs_cat, forecast), + target_dims = list(target_dims_predictor, sdate_dim, target_dims_forecast), + fun = function(x, y, f) .log_reg(x = x, y = y, f = f, loocv = loocv, + probs_cat = probs_cat), + output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 } # restore ensemble dimension in observations if it existed originally if (restore_ens) { - obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim, ncores = ncores) + obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim) } res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) @@ -426,7 +528,8 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { .sort_members <- function(obj_ens, member_dim, ncores = NULL) { - sorted <- Apply(obj_ens, target_dims = member_dim, sort, decreasing = TRUE, na.last = TRUE, ncores = ncores)$output1 + sorted <- Apply(obj_ens, target_dims = member_dim, + sort, decreasing = TRUE, na.last = TRUE, ncores = ncores)$output1 return(sorted) } @@ -444,7 +547,8 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { require(s2dv) # compute climatology - clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean, ncores = ncores)$output1 + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), + mean, ncores = ncores, na.rm = TRUE)$output1 # compute ensemble mean ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE, ncores = ncores)$output1 @@ -456,15 +560,23 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { } # atomic functions for logistic regressions -.log_reg <- function(x, y, loocv,probs_cat) { +.log_reg <- function(x, y, f = NULL, loocv, probs_cat) { tmp_df <- data.frame(x = x, y = y) # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1) | all(is.na(tmp_df$y))) { + if (is.null(f)) { + n1 <- nrow(tmp_df) + } else { + if (is.null(dim(f))) { + n1 <- length(f) + } else { + n1 <- dim(f)[1] + } + } - n1 <- nrow(tmp_df) - n2<- length(probs_cat)+1 + n2 <- length(probs_cat) + 1 res <- matrix(NA, nrow = n1, ncol = n2) } else { @@ -472,7 +584,14 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { lm1 <- .train_lr(df = tmp_df, loocv = loocv) # prediction - res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv,probs_cat=probs_cat) + res <- .pred_lr(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, probs_cat = probs_cat) + + if ( !(is.null(f)) ) { ## if forecast is provided, and syear of forecast is 1. + if ( nrow(f) == 1 ) { + res <- array(res, dim = c(1, length(probs_cat) + 1)) + } + } + } return(res) } @@ -489,11 +608,11 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { if (loocv) { - lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ])) + lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ], trace = FALSE)) } else { - lm1 <- list(multinom(y ~ ., data = df)) + lm1 <- list(multinom(y ~ ., data = df, trace = FALSE)) } @@ -503,7 +622,7 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { #----------------------------------- # Function to apply the logistic regressions. #----------------------------------- -pred_lr <- function(df, lm1, loocv,probs_cat) { +.pred_lr <- function(df, lm1, f, loocv, probs_cat) { require(plyr) @@ -514,36 +633,57 @@ pred_lr <- function(df, lm1, loocv,probs_cat) { pred_vals_ls <-list() for (j in 1:nrow(df)) { - pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") - } + if (all(is.na(df[j, ]))) { + pred_vals_ls[[j]] <- rep(NA, length(probs_cat) + 1) + } else { + pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + } + } - pred_vals <- laply(pred_vals_ls, .fun = as.array) + pred_vals <- laply(pred_vals_ls, .fun = as.array) - if( length(probs_cat)+1==2) - { - pred_vals_dum<-array(NA,dim=c(nrow(df),2)) - pred_vals_dum[,2]<-pred_vals - pred_vals_dum[,1]<-1-pred_vals - pred_vals<-pred_vals_dum - colnames(pred_vals)<-c(1,2) - } + if( length(probs_cat) + 1 == 2) { + pred_vals_dum<-array(NA, dim=c(nrow(df), 2)) + pred_vals_dum[, 2]<-pred_vals + pred_vals_dum[, 1] <- 1-pred_vals + pred_vals <- pred_vals_dum + colnames(pred_vals) <- c(1, 2) + } - } else { + } else { # type = class, probs #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") #pred_vals <- unlist(pred_vals_ls) - pred_vals <- predict(lm1[[1]], df, type = "probs") + if (is.null(f)) { + pred_vals <- predict(lm1[[1]], df, type = "probs") - if( length(probs_cat)+1==2) - { - pred_vals_dum<-array(NA,dim=c(nrow(df),2)) - pred_vals_dum[,2]<-pred_vals - pred_vals_dum[,1]<-1-pred_vals - pred_vals<-pred_vals_dum - colnames(pred_vals)<-c(1,2) - } + if( length(probs_cat) + 1 == 2) { + pred_vals_dum <- array(NA, dim = c(nrow(df), 2)) + pred_vals_dum[, 2] <- pred_vals + pred_vals_dum[, 1] <- 1 - pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals) <- c(1,2) + } + } else { + if (is.null(dim(f))) { + pred_vals <- predict(lm1[[1]], newdata = data.frame(x = as.vector(f)), type = "probs") + } else { + pred_vals <- predict(lm1[[1]], newdata = data.frame(x = f), type = "probs") + } + if (length(probs_cat) + 1 == 2) { + if (is.null(dim(f))) { + pred_vals_dum <- matrix(NA, nrow = length(f), ncol = 2) + } else { + pred_vals_dum <- matrix(NA, nrow = dim(f)[1], ncol = 2) + } + pred_vals_dum[, 2] <- pred_vals + pred_vals_dum[, 1] <- 1 - pred_vals + pred_vals <- pred_vals_dum + colnames(pred_vals) <- c(1,2) + } + } } return(pred_vals) -- GitLab From d1503ffe428dfef89d487b664badeefb65385ce9 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Tue, 12 Mar 2024 12:02:19 +0100 Subject: [PATCH 05/18] fine tuning --- modules/Downscaling/tmp/LogisticReg.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R index bf47a7d3..f74f2ee1 100644 --- a/modules/Downscaling/tmp/LogisticReg.R +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -479,14 +479,14 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, } # convert observations to categorical predictands + obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { if (!any(!is.na(x))) { rep(NA,length(x)) } else { terc <- convert2prob(as.vector(x), prob = probs_cat) - apply(terc, 1, function(r) which (r == 1)) - } - }, output_dims = sdate_dim, ncores = ncores)$output1 + as.integer(apply(terc, 1, function(r) which (r == 1)))}}, + output_dims = sdate_dim, ncores = ncores)$output1 if (is.null(exp_cor)) { res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), -- GitLab From 847de2334d1171162ad4611a4846cb9e07f8b0d2 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Tue, 12 Mar 2024 12:08:09 +0100 Subject: [PATCH 06/18] 4nn method is converted into 9nn with principal component analysis --- modules/Downscaling/tmp/Intlr.R | 101 +++++++++++++++++++------------- 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R index 0d4f10dd..953031fe 100644 --- a/modules/Downscaling/tmp/Intlr.R +++ b/modules/Downscaling/tmp/Intlr.R @@ -26,16 +26,20 @@ #'forecast experiment data. If the forecast is provided, it will be downscaled using the #'hindcast and observations; if not provided, the hindcast will be downscaled instead. The #'default value is NULL. -#'@param lr_method a character vector indicating the linear regression method to be applied -#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' -#'method fits a linear regression using high resolution observations as predictands and the -#'interpolated model data as predictor. Then, the regression equation is to the interpolated -#'model data to correct the interpolated values. The 'large-scale' method fits a linear -#'regression with large-scale predictors from the same model (e.g. teleconnection indices) -#'as predictors and high-resolution observations as predictands. This equation is then -#'applied to the interpolated model values. Finally, the '4nn' method uses a linear -#'regression with the four nearest neighbours as predictors and high-resolution observations -#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param lr_method a character vector indicating the linear regression method to be applied. +#'Accepted methods are 'basic', 'large-scale' and '9nn'. The 'basic' method fits a +#'linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is applied to the +#'interpolated model data to correct the interpolated values. The 'large-scale' method +#'fits a linear regression with large-scale predictors (e.g. teleconnection indices) as +#'predictors and high-resolution observations as predictands. This equation is then applied +#'to the interpolated model values. Finally, the '9nn' method uses a linear regression +#'with the nine nearest neighbours as predictors and high-resolution observations as +#'predictands. Instead of constructing a regression model using all nine predictors, +#'principal component analysis is applied to the data of neighboring grids to reduce the +#'dimension of the predictors. The linear regression model is then built using the principal +#'components that explain 95% of the variance. The '9nn' method does not require a +#'pre-interpolation process. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param points a list of two elements containing the point latitudes and longitudes @@ -107,7 +111,7 @@ CST_Intlr <- function(exp, obs, exp_cor = NULL, lr_method, target_grid = NULL, p exp_cor_aux <- NULL if (!is.null(exp_cor)) { - if (identical(lr_method, 'basic') | identical(lr_method, '4nn')) { + if (identical(lr_method, 'basic') | identical(lr_method, '9nn')) { if (!inherits(exp_cor, 's2dv_cube')) { stop("Parameter 'exp_cor' must be of the class 's2dv_cube'") } @@ -147,7 +151,7 @@ CST_Intlr <- function(exp, obs, exp_cor = NULL, lr_method, target_grid = NULL, p res_s2dv <- list(exp = exp, obs = obs) } else { - if (identical(lr_method, 'basic') | identical(lr_method, '4nn')) { + if (identical(lr_method, 'basic') | identical(lr_method, '9nn')) { exp_cor$data <- res$data exp_cor$dims <- dim(exp_cor$data) exp_cor$coords[[lon_dim]] <- res$lon @@ -206,16 +210,20 @@ CST_Intlr <- function(exp, obs, exp_cor = NULL, lr_method, target_grid = NULL, p #'range from -90 to 90. #'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes #'can range from -180 to 180 or from 0 to 360. -#'@param lr_method a character vector indicating the linear regression method to be applied -#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' -#'method fits a linear regression using high resolution observations as predictands and the -#'interpolated model data as predictor. Then, the regression equation is to the interpolated -#'model data to correct the interpolated values. The 'large-scale' method fits a linear -#'regression with large-scale predictors from the same model (e.g. teleconnection indices) -#'as predictors and high-resolution observations as predictands. This equation is then -#'applied to the interpolated model values. Finally, the '4nn' method uses a linear -#'regression with the four nearest neighbours as predictors and high-resolution observations -#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param lr_method a character vector indicating the linear regression method to be applied. +#'Accepted methods are 'basic', 'large-scale' and '9nn'. The 'basic' method fits a +#'linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is applied to the +#'interpolated model data to correct the interpolated values. The 'large-scale' method +#'fits a linear regression with large-scale predictors (e.g. teleconnection indices) as +#'predictors and high-resolution observations as predictands. This equation is then applied +#'to the interpolated model values. Finally, the '9nn' method uses a linear regression +#'with the nine nearest neighbours as predictors and high-resolution observations as +#'predictands. Instead of constructing a regression model using all nine predictors, +#'principal component analysis is applied to the data of neighboring grids to reduce the +#'dimension of the predictors. The linear regression model is then built using the principal +#'components that explain 95% of the variance. The '9nn' method does not require a +#'pre-interpolation process. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param points a list of two elements containing the point latitudes and longitudes @@ -340,7 +348,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo if (loocv) { # loocv equal to false to train with the whole hindcast and predict with the forecast loocv <- FALSE warning("Forecast data will be downscaled. 'loocv' is set to FALSE ", - "to train the mode with the whole hindcast and predict with the forecast.") + "to train the model with the whole hindcast and predict with the forecast.") } if (is.null(predictors)) { @@ -357,8 +365,8 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo } } - if (lr_method == '4nn') { - warning("4nn method skips the interpolation step since the method itself inherently downscale ", + if (lr_method == '9nn') { + warning("9nn method skips the interpolation step since the method itself inherently downscale ", "the coarse resolution data to the reference data resolution without the need ", "for interpolation.") } @@ -429,7 +437,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo #----------------------------------- # Interpolation #----------------------------------- - if (lr_method != '4nn') { + if (lr_method != '9nn') { if (is.null(int_method)) { stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", @@ -534,19 +542,19 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo # Multi-linear regression with the four nearest neighbours # Predictors: model data # Predictand: observations - else if (lr_method == '4nn') { + else if (lr_method == '9nn') { predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - nn = 4, ncores = ncores) + nn = 9, ncores = ncores) if (!is.null(exp_cor)) { aux_dim <- 'nn' forecast <- .find_nn(coar = exp_cor, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - nn = 4, ncores = ncores) + nn = 9, ncores = ncores) } if (is.null(points) | ("location" %in% names(dim(obs)))) { @@ -558,7 +566,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo lats <- obs_lats lons <- obs_lons } else { - # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, + # If the downscaling is to point locations: Once the 9 nearest neighbours have been found, # interpolate to point locations predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, @@ -625,7 +633,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo # Return a list of three elements res <- list(data = res, obs = predictand, lon = lons, lat = lats) - # for testing 4nn + # for testing 9nn #x <- predictor[10,10,1,1,1,,,] #x <- Reorder(x,c(2,3,1)) #y <- predictand[1,1,1,10,,10] @@ -670,12 +678,23 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo } } else { # training -# if (!is.null(f)) { ## too many number of ensemble members, - ## so Principle component analysis is needed. -# PR <- prcomp(tmp_df[, -ncol(tmp_df)], scale = TRUE) ## prcomp between predictors -# N_predictors <- which (summary(PR)$importance[ "Cumulative Proportion",] > .8)[1] -# tmp_df <- data.frame(x = as.vector(PR$x[, 1:N_predictors]), y = y) -# } + + ## apply principle components if method is 9nn + if (any(names(dim(x)) == "nn")) { + PR <- prcomp(~ x.1 + x.2 + x.3 + x.4 + x.5 + x.6 + x.7 + x.8 + x.9, + data = tmp_df[, -ncol(tmp_df)], + na.action = na.omit, scale = FALSE) ## prcomp between predictors + ## 95% of variablity should be explained by the principle components + N_predictors <- which (summary(PR)$importance[ "Cumulative Proportion",] > .95)[1] + PR_mat <- PR$x[, 1:N_predictors, drop = FALSE] + if (!is.null(PR$na.action)) { ## if there are NAs, relocate them to the correct order + dum <- array(NA, dim = c(nrow(tmp_df), ncol(PR_mat) )) + dum [as.numeric(rownames(PR_mat)), ] <- PR_mat + PR_mat <- dum + } + tmp_df <- data.frame(x = PR_mat, y = y) + colnames(tmp_df) <- c(paste0("x.",1:(ncol(tmp_df)-1)), "y") + } lm1 <- .train_lm(df = tmp_df, loocv = loocv) @@ -734,7 +753,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo # case to downscale forecasts } else { if (!is.null(aux_dim)) { - # 4nn & large-scale + # 9nn & large-scale fcst_df <- adply(f,.margins = 3, .id = NULL) colnames(fcst_df) <- paste0('x.',1:ncol(fcst_df)) pred_vals_ls <- lapply(lm1, predict, newdata = fcst_df) @@ -768,7 +787,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo # 'coar' is an array with named dimensions #----------------------------------- .find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, - lat_dim, lon_dim, sdate_dim, member_dim, nn = 4, ncores = NULL) { + lat_dim, lon_dim, sdate_dim, member_dim, nn = 9, ncores = NULL) { # order lats and lons of the high res and coarse res data in 2-dimension in order to calculate the distances to each other. hres_mat <- expand.grid(as.numeric(lats_hres), as.numeric(lons_hres)) @@ -781,7 +800,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo idh <- SplitDim(dist_id, split_dim = lon_dim, indices = rep(1:(length(lons_hres)),length(lats_hres)), - new_dim_name = c(lat_dim)) + new_dim_name = lat_dim) idh <- aperm (idh, c(1,3,2)) # dimension: nn, lat_dim, lon_dim @@ -790,7 +809,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo idc_lon_grid <- match( coar_mat[,2], as.numeric(lons_coar)) cgrid <- cbind(idc_lat_grid,idc_lon_grid) # For the coarse res data, include the lat and lon order each grids ordered in 2-dimension. - idh_dum <- array(NA, dim = c(4, 2, length(lats_hres), length(lons_hres))) + idh_dum <- array(NA, dim = c(nn, 2, length(lats_hres), length(lons_hres))) for (i in 1:length(lats_hres)) { for (j in 1:length(lons_hres)) { -- GitLab From 79c8009067a943269547bb15a6991ce1f62273cb Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 3 Apr 2024 17:05:53 +0200 Subject: [PATCH 07/18] Update recipe checks and fix downscaling type 'none' case --- modules/Downscaling/Downscaling.R | 10 ++++------ tools/check_recipe.R | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 85deaa2c..e638fe27 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -15,13 +15,11 @@ Downscaling <- function(recipe, data) { type <- tolower(recipe$Analysis$Workflow$Downscaling$type) - if (!is.null(data$fcst)) { - print("Forecast data is being downscaled.") - } - if (type == "none") { - hcst_downscal <- data$hcst - DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" + hcst_downscal$exp <- data$hcst + hcst_downscal$obs <- data$obs + fcst_downscal <- data$fcst + DOWNSCAL_MSG <- "##### METHOD SET TO 'none': NO DOWNSCALING PERFORMED #####" } else { diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 01896877..887b93a2 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -370,7 +370,7 @@ check_recipe <- function(recipe) { DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", "rpc-based", "qm") - LR_METHODS <- c("basic", "large-scale", "4nn") + LR_METHODS <- c("basic", "large-scale", "9nn") LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") # Check downscaling type if ("type" %in% names(downscal_params)) { -- GitLab From 5cf549e92458ae9f02ab208d2344dcf7c12b3a29 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Sun, 21 Apr 2024 17:49:49 +0200 Subject: [PATCH 08/18] daily data downscaling feature is added to both Intlr and LogisticReg functions --- modules/Downscaling/Downscaling.R | 11 +-- modules/Downscaling/tmp/Intlr.R | 87 +++++++++++++------- modules/Downscaling/tmp/LogisticReg.R | 113 +++++++++++++++++++------- 3 files changed, 145 insertions(+), 66 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index e638fe27..e8fb9645 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -52,7 +52,7 @@ Downscaling <- function(recipe, data) { DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", "rpc-based", "qm") - LR_METHODS <- c("basic", "large-scale", "4nn") + LR_METHODS <- c("basic", "large-scale", "9nn") LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") if (!(type %in% DOWNSCAL_TYPES)) { @@ -138,7 +138,7 @@ Downscaling <- function(recipe, data) { if (is.null(lr_method)) { stop("Please provide one linear regression method in the recipe. Accepted ", - "methods are 'basic', 'large-scale', '4nn'.") + "methods are 'basic', 'large-scale', '9nn'.") } if (is.null(target_grid)) { @@ -147,7 +147,7 @@ Downscaling <- function(recipe, data) { if (!(lr_method %in% LR_METHODS)) { stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", - "are 'basic', 'large-scale', '4nn'.")) + "are 'basic', 'large-scale', '9nn'.")) } # TO DO: add the possibility to have the element 'pred' in 'data' @@ -195,10 +195,6 @@ Downscaling <- function(recipe, data) { "or set data$fcst to NULL.") } - if ( is.null(data$fcst) & !is.null(data$fcstL) ) { - print("Forecast data is being downscaled.") - } - if (is.null(nanalogs)) { warning("The number of analogs for searching has not been provided in the ", "recipe. Setting it to 3.") @@ -316,6 +312,7 @@ Downscaling <- function(recipe, data) { lat_dim = "latitude", lon_dim = "longitude", sdate_dim = "syear", + time_dim = "time", member_dim = "ensemble", region = NULL, loocv = loocv, diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R index 953031fe..6f7e1249 100644 --- a/modules/Downscaling/tmp/Intlr.R +++ b/modules/Downscaling/tmp/Intlr.R @@ -608,21 +608,42 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo stop(paste0(lr_method, " method is not implemented yet")) } - + + k_out <- 1 ## ## It is used in constructing window for loocv = TRUE case. k_out is also used to order the dimensions of pred.lm output for both loocv = TRUE and loocv = FALSE cases. In case the data is NOT daily, k_out is 1. + daily <- FALSE + if ( time_dim %in% names(dim(predictor)) ) { + daily <- TRUE + k_out <- as.numeric (dim(predictor)[time_dim]) ## in case loocv = TRUE and the data is daily, leave one-year data out + predictor <- MergeDims (predictor, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) + predictand <- MergeDims (predictand, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) + output_dims <- c(time_dim, sdate_dim) # for hindcast dwn + if (!is.null(exp_cor)) { + sdate_num_fr <- as.numeric (dim(forecast)[sdate_dim]) ## sdate_num of forecast + forecast <- MergeDims (forecast, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) + } + } + # Apply the linear regressions - # case hindcast - forecast + ## case hindcast - forecast if (!is.null(exp_cor)) { res <- Apply(list(predictor, predictand, forecast), target_dims = list(target_dims_predictor, target_dims_predictand, target_dims_forecast), - fun = .intlr, loocv = loocv, aux_dim = aux_dim, ncores = ncores)$output1 + fun = .intlr, loocv = loocv, aux_dim = aux_dim, ncores = ncores, + sdate_dim = sdate_dim, k_out = k_out)$output1 + + if (daily) { + res <- SplitDim(data = res, split_dim = sdate_dim, new_dim_name = time_dim, + indices = rep (1:k_out, sdate_num_fr)) + } } - # case hindcast only + ## case hindcast only else { res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), - fun = .intlr, loocv = loocv, ncores = ncores)$output1 - names(dim(res))[1] <- sdate_dim + fun = .intlr, loocv = loocv, ncores = ncores, sdate_dim = sdate_dim, + k_out = k_out, + output_dims = output_dims)$output1 } # restore ensemble dimension in observations if it existed originally @@ -648,11 +669,10 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo return(res) } - #----------------------------------- # Atomic function to generate and apply the linear regressions #----------------------------------- -.intlr <- function(x, y, f = NULL, loocv, aux_dim = NULL) { +.intlr <- function(x, y, f = NULL, loocv, aux_dim = NULL, sdate_dim = "sdate", k_out = 1) { require (plyr) @@ -665,10 +685,11 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo } else { tmp_df <- data.frame(x = x, y = y) } + # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { if (is.null(f)) { - res <- rep(NA, nrow(tmp_df)) + res <- array(rep(NA, nrow(tmp_df)), c(k_out, nrow(tmp_df)/k_out)) } else { if (!is.null(aux_dim)) { res <- array(NA, dim(f)[names(dim(f)) != aux_dim]) @@ -677,9 +698,9 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo } } } else { - # training ## apply principle components if method is 9nn + ## TODO 9NN MIGHT GIVE ERROR FOR FORECAST CASE. REVIEW AFTER DEVELOPING FORECAST STRATEGY!! if (any(names(dim(x)) == "nn")) { PR <- prcomp(~ x.1 + x.2 + x.3 + x.4 + x.5 + x.6 + x.7 + x.8 + x.9, data = tmp_df[, -ncol(tmp_df)], @@ -696,31 +717,34 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo colnames(tmp_df) <- c(paste0("x.",1:(ncol(tmp_df)-1)), "y") } - lm1 <- .train_lm(df = tmp_df, loocv = loocv) + # training + lm1 <- .train_lm(df = tmp_df, loocv = loocv, k_out = k_out) # prediction - res <- .pred_lm(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, aux_dim = aux_dim) + res <- .pred_lm(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, aux_dim = aux_dim, k_out = k_out) } - - return(res) + + return(res) } #----------------------------------- # Function to generate the linear regressions. # Returns a list #----------------------------------- -.train_lm <- function(df, loocv) { +.train_lm <- function(df, loocv, k_out = 1) { # Remove predictor columns containing only NA's - df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, function(x) !all(is.na(x)))] + df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, + function(x) !all(is.na(x)))] if (loocv) { - - lm1 <- lapply(1:nrow(df), function(j) { - if (all(is.na(df[-j,]$y))) { + + lm1 <- lapply(1:(nrow(df)/k_out), function(j) { + window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # omit the data of the year including corresponding day + if (all(is.na(df[-window ,]$y))) { return(NA) } else { - return(lm(df[-j,], formula = y ~ .)) + return(lm(df[-window,], formula = y ~ .)) }}) } else { @@ -733,23 +757,26 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo #----------------------------------- # Function to apply the linear regressions. #----------------------------------- -.pred_lm <- function(df, lm1, f, loocv, aux_dim) { +.pred_lm <- function(df, lm1, f, loocv, aux_dim, k_out = 1) { require (plyr) + if (loocv) { - pred_vals <- sapply(1:nrow(df), function(j) { + pred_vals <- sapply(1:length(lm1), function(j) { if (all(is.na(lm1[[j]]))) { - return(NA) + return(array(rep(NA, k_out), c(k_out, nrow(df) / k_out))) } else { - return(predict(lm1[[j]], df[j,])) + window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # test the daily data of the corresponding year + return(predict(lm1[[j]], df[window,])) }}) + pred_vals <- array (pred_vals, c(k_out, nrow(df) / k_out)) } else { if (!is.na(lm1)) { # case to downscale hindcasts if (is.null(f)) { - pred_vals_ls <- lapply(lm1, predict, data = df) - pred_vals <- unlist(pred_vals_ls) + pred_vals_ls <- lapply(lm1, predict, newdata = df) + pred_vals <- array (unlist(pred_vals_ls), c(k_out, nrow(df) / k_out)) # case to downscale forecasts } else { if (!is.null(aux_dim)) { @@ -763,17 +790,17 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo # basic pred_vals_ls <- lapply(lm1, predict, newdata = data.frame(x = as.vector(f))) pred_vals <- unlist(pred_vals_ls) - dim(pred_vals) <- dim(f ) + dim(pred_vals) <- dim(f) } } } else { if (is.null(f)) { - pred_vals <- rep(NA, nrow(df)) + pred_vals <- array(rep(NA, k_out), c(k_out, nrow(df) / k_out)) } else { if (!is.null(aux_dim)) { pred_vals <- array(NA, dim(f)[names(dim(f)) != aux_dim]) } else { - pred_vals <- array(NA, dim(f)) + pred_vals <- array(NA, dim(f)) } } } @@ -799,7 +826,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo names(dim(dist_id)) <- c("nn", lon_dim) idh <- SplitDim(dist_id, split_dim = lon_dim, - indices = rep(1:(length(lons_hres)),length(lats_hres)), + indices = rep(1:(length(lats_hres)),length(lons_hres)), new_dim_name = lat_dim) idh <- aperm (idh, c(1,3,2)) # dimension: nn, lat_dim, lon_dim diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R index c8f17525..d42c39e8 100644 --- a/modules/Downscaling/tmp/LogisticReg.R +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -56,7 +56,9 @@ #'@param sdate_dim a character vector indicating the start date dimension name in the element #''data' in exp and obs. Default set to "sdate". #'@param member_dim a character vector indicating the member dimension name in the element -#''data' in exp and obs. Default set to "member". +#''data' in exp and obs. Default set to "member". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". #'@param region a numeric vector indicating the borders of the region defined in obs. #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers #'to the left border, while lonmax refers to the right border. latmin indicates the lower @@ -94,8 +96,8 @@ CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method = log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", member_dim = "member", region = NULL, - loocv = TRUE, ncores = NULL) { + sdate_dim = "sdate", member_dim = "member", time_dim = "time", + region = NULL, loocv = TRUE, ncores = NULL) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -105,14 +107,16 @@ CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method = stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- LogisticReg(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, exp_lats = exp$coords[[lat_dim]], - exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], - obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, - probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat, + res <- LogisticReg(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, + exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], + target_grid = target_grid, probs_cat = probs_cat, + return_most_likely_cat = return_most_likely_cat, int_method = int_method, log_reg_method = log_reg_method, points = points, method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - source_file_exp = exp$coords$source_files[1], source_file_obs = obs$coords$source_files[1], + time_dim = time_dim, source_file_exp = exp$attrs$source_files[1], + source_file_obs = obs$attrs$source_files[1], region = region, loocv = loocv, ncores = ncores) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data @@ -206,7 +210,9 @@ CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method = #'@param sdate_dim a character vector indicating the start date dimension name in the element #''data' in exp and obs. Default set to "sdate". #'@param member_dim a character vector indicating the member dimension name in the element -#''data' in exp and obs. Default set to "member". +#''data' in exp and obs. Default set to "member". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". #'@param source_file_exp a character vector with a path to an example file of the exp data. #'Only needed if the downscaling is to a point location. #'@param source_file_obs a character vector with a path to an example file of the obs data. @@ -247,7 +253,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, target_grid, int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", member_dim = "member", + sdate_dim = "sdate", member_dim = "member", time_dim = "time", source_file_exp = NULL, source_file_obs = NULL, region = NULL, loocv = TRUE, ncores = NULL) { @@ -480,6 +486,20 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, # convert observations to categorical predictands + k_out <- 1 ## in case loocv = TRUE and the data is NOT daily, leave one data out. + daily <- FALSE + if ( time_dim %in% names(dim(obs_ref)) & dim(obs_ref)[time_dim] > 1) { + daily <- TRUE + sdate_num <- as.numeric (dim(obs_ref)[sdate_dim]) + k_out <- as.numeric (dim(obs_ref)[time_dim]) ## in case loocv = TRUE and the data is daily, leave one-year data out + obs_ref <- MergeDims (obs_ref, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) + predictor <- MergeDims (predictor, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) + if (!is.null(exp_cor)) { + sdate_num_fr <- as.numeric (dim(forecast)[sdate_dim]) ## sdate_num of forecast + forecast <- MergeDims (forecast, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) + } + } + obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { if (!any(!is.na(x))) { rep(NA,length(x)) @@ -487,22 +507,52 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, terc <- convert2prob(as.vector(x), prob = probs_cat) as.integer(apply(terc, 1, function(r) which (r == 1)))}}, output_dims = sdate_dim, ncores = ncores)$output1 - + + target_dims_predictand <- sdate_dim + # Apply the logistic regressions + ## case hindcast only if (is.null(exp_cor)) { - res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), - fun = function(x, y) .log_reg(x = x, y = y, loocv = loocv,probs_cat = probs_cat), + res <- Apply(list(predictor, obs_cat), + target_dims = list(target_dims_predictor, target_dims_predictand), + fun = function(x, y) .log_reg(x = x, y = y, loocv = loocv, probs_cat = probs_cat, + sdate_dim = sdate_dim, k_out = k_out), output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 + if ( daily ) { + res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim, + indices = rep(1:k_out, sdate_num)) + obs_ref <- SplitDim(obs_ref, split_dim = sdate_dim, new_dim_name = time_dim, + indices = rep(1:k_out, sdate_num)) + } + if (return_most_likely_cat) { res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, output_dims = sdate_dim, ncores = ncores)$output1 } - } else { + } + ## case hindcast - forecast + else { + if ( time_dim %in% names(dim(predictor)) ) { + if (dim(predictor)[time_dim] > 1) { + target_dims_predictor <- c(target_dims_predictor, time_dim) + target_dims_predictand <- c(target_dims_predictand, time_dim) + target_dims_forecast <- c(target_dims_forecast, time_dim) + } + } res <- Apply(list(predictor, obs_cat, forecast), - target_dims = list(target_dims_predictor, sdate_dim, target_dims_forecast), + target_dims = list(target_dims_predictor, target_dims_predictand, + target_dims_forecast), fun = function(x, y, f) .log_reg(x = x, y = y, f = f, loocv = loocv, - probs_cat = probs_cat), + probs_cat = probs_cat, sdate_dim = sdate_dim, + k_out = k_out), output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 + + if ( daily ) { + res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim, + indices = rep(1:k_out, sdate_num_fr)) + obs_ref <- SplitDim(obs_ref, split_dim = sdate_dim, new_dim_name = time_dim, + indices = rep(1:k_out, sdate_num)) + } } # restore ensemble dimension in observations if it existed originally @@ -560,8 +610,8 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, } # atomic functions for logistic regressions -.log_reg <- function(x, y, f = NULL, loocv, probs_cat) { - +.log_reg <- function(x, y, f = NULL, loocv, probs_cat, sdate_dim, k_out = 1) { + tmp_df <- data.frame(x = x, y = y) # if the data is all NA, force return return NA @@ -581,10 +631,11 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, } else { # training - lm1 <- .train_lr(df = tmp_df, loocv = loocv) + lm1 <- .train_lr(df = tmp_df, loocv = loocv, k_out) # prediction - res <- .pred_lr(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, probs_cat = probs_cat) + res <- .pred_lr(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, + probs_cat = probs_cat, k_out = k_out) if ( !(is.null(f)) ) { ## if forecast is provided, and syear of forecast is 1. if ( nrow(f) == 1 ) { @@ -599,7 +650,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, #----------------------------------- # Function to train the logistic regressions. #----------------------------------- -.train_lr <- function(df, loocv) { +.train_lr <- function(df, loocv, k_out = 1) { require(nnet) @@ -608,7 +659,9 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, if (loocv) { - lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ], trace = FALSE)) + lm1 <- lapply(1:(nrow(df)/k_out), function(j) { + window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # omit the data of the year including corresponding day + multinom(y ~ ., data = df[ -window, ], trace = FALSE)}) } else { @@ -622,7 +675,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, #----------------------------------- # Function to apply the logistic regressions. #----------------------------------- -.pred_lr <- function(df, lm1, f, loocv, probs_cat) { +.pred_lr <- function(df, lm1, f, loocv, probs_cat, k_out = 1) { require(plyr) @@ -631,16 +684,18 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, # The error: "Error: Results must have the same dimensions." can # appear when the number of sdates is insufficient - pred_vals_ls <-list() - for (j in 1:nrow(df)) { - if (all(is.na(df[j, ]))) { - pred_vals_ls[[j]] <- rep(NA, length(probs_cat) + 1) + pred_vals_ls <- list() + for (j in 1:length(lm1)) { + window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # test the daily data of the corresponding year + if (all(is.na(df[window, ]))) { + pred_vals_ls[[j]] <- array(rep(NA, length(window) * (length(probs_cat) + 1)), + dim = c(length(window), length(probs_cat) + 1)) } else { - pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + pred_vals_ls[[j]] <- predict(lm1[[j]], df[window,], type = "probs") } } - pred_vals <- laply(pred_vals_ls, .fun = as.array) + pred_vals <- do.call(rbind, pred_vals_ls) if( length(probs_cat) + 1 == 2) { pred_vals_dum<-array(NA, dim=c(nrow(df), 2)) -- GitLab From 682c08e7c3ec4e2255880ac9bb99b1c971abaadd Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 23 Apr 2024 11:43:59 +0200 Subject: [PATCH 09/18] Formatting --- modules/Downscaling/Downscaling.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index e8fb9645..89158e8c 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -179,20 +179,20 @@ Downscaling <- function(recipe, data) { DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "analogs") { - if ( (!is.null(data$hcstL) | !is.null(data$fcstL)) & is.null(data$obsL) ) { - stop ("Large scale model field (i.e., data$hcstL or data$fcstL) is provided. ", - "Observation must also be provided for the large scale variable.") + if ((!is.null(data$hcstL) | !is.null(data$fcstL)) & is.null(data$obsL)) { + stop("Large scale model field (i.e., data$hcstL or data$fcstL) is provided. ", + "Observation must also be provided for the large scale variable.") } - if ( is.null(data$hcstL) & is.null(data$fcstL) & !is.null(data$obsL) ) { - stop ("Large scale observational field (i.e., data$obsL) is provided. ", - "Model data must also be provided for the large scale variable.") + if (is.null(data$hcstL) & is.null(data$fcstL) & !is.null(data$obsL)) { + stop("Large scale observational field (i.e., data$obsL) is provided. ", + "Model data must also be provided for the large scale variable.") } - if ( !is.null(data$hcstL) & !is.null(data$obsL) & !is.null(data$fcst) & is.null(data$fcstL) ) { - stop ("Large scale observational field (i.e., data$obsL) is provided. Please also ", - "provide large scale forecast data (i.e., data$fcstL) ", - "or set data$fcst to NULL.") + if (!is.null(data$hcstL) & !is.null(data$obsL) & !is.null(data$fcst) & is.null(data$fcstL)) { + stop("Large scale observational field (i.e., data$obsL) is provided. Please also ", + "provide large scale forecast data (i.e., data$fcstL) ", + "or set data$fcst to NULL.") } if (is.null(nanalogs)) { -- GitLab From 1c6eeee81c0b9b6332c94b99e2e5251fba18689d Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 7 May 2024 11:56:29 +0200 Subject: [PATCH 10/18] Compute anomalies when fcst and obs don't share a grid; refactor code --- modules/Anomalies/Anomalies.R | 58 +++++++++++++++-------------------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index d5f11228..939e427e 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -28,6 +28,7 @@ Anomalies <- function(recipe, data) { hcst_fullvalue <- data$hcst obs_fullvalue <- data$obs # Compute anomalies + # 1. Case where hcst, fcst and obs share the same grid if (isTRUE(all.equal(as.vector(data$hcst$coords$latitude), as.vector(data$obs$coords$latitude))) && isTRUE(all.equal(as.vector(data$hcst$coords$longitude), @@ -47,6 +48,23 @@ Anomalies <- function(recipe, data) { data$hcst <- anom$exp data$obs <- anom$obs remove(anom) + # Forecast anomalies + if (!is.null(data$fcst)) { + clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = FALSE, + memb_dim = "ensemble", + ftime_dim = "time", + ncores = recipe$Analysis$ncores) + data$fcst$data <- Ano(data = data$fcst$data, clim = clim$clim_exp) + } + # Change metadata + for (var in data$fcst$attrs$Variable$varName) { + data$fcst$attrs$Variable$metadata[[var]]$long_name <- + paste(data$fcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") + } + # 2. Case where hcst and fcst are in a different grid than obs } else { ## TODO: Remove when cross-validation is implemented for this use case if (cross) { @@ -68,6 +86,9 @@ Anomalies <- function(recipe, data) { ncores = recipe$Anaysis$ncores)$output1 data$hcst$data <- Ano(data = data$hcst$data, clim = clim_hcst) data$obs$data <- Ano(data = data$obs$data, clim = clim_obs) + if (!is.null(data$fcst)) { + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + } } # Change variable metadata for (var in data$hcst$attrs$Variable$varName) { @@ -76,41 +97,12 @@ Anomalies <- function(recipe, data) { paste(data$hcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") # Change obs longname data$obs$attrs$Variable$metadata[[var]]$long_name <- - paste(data$obs$attrs$Variable$metadata[[var]]$long_name, "anomaly") - } - # Compute forecast anomaly field - if (!is.null(data$fcst)) { - # Compute hindcast climatology ensemble mean - if (isTRUE(all.equal(as.vector(data$hcst$coords$latitude), - as.vector(data$obs$coords$latitude))) && - isTRUE(all.equal(as.vector(data$hcst$coords$longitude), - as.vector(data$obs$coord$longitude)))) { - clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, - time_dim = "syear", - dat_dim = c("dat", "ensemble"), - memb = FALSE, - memb_dim = "ensemble", - ftime_dim = "time", - ncores = recipe$Analysis$ncores) - clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, - name = "syear") - } - # Store original dimensions - dims <- dim(clim_hcst) - # Repeat the array as many times as ensemble members - clim_hcst <- rep(clim_hcst, data$fcst$dim[['ensemble']]) - # Rename and reorder dimensions - dim(clim_hcst) <- c(dims, ensemble = data$fcst$dim[['ensemble']]) - clim_hcst <- Reorder(clim_hcst, order = names(data$fcst$dim)) - # Get fcst anomalies - data$fcst$data <- data$fcst$data - clim_hcst - # Change metadata - for (var in data$fcst$attrs$Variable$varName) { - data$fcst$attrs$Variable$metadata[[var]]$long_name <- - paste(data$fcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") + paste(data$obs$attrs$Variable$metadata[[var]]$long_name, "anomaly") + if (!is.null(data$fcst)) { + data$fcst$attrs$Variable$metadata[[var]]$long_name <- + paste(data$fcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") } } - info(recipe$Run$logger, paste("The anomalies have been computed,", cross_msg, "cross-validation. The original full fields are returned as", -- GitLab From a35690ddf4e068ec89fc8bd521515b0057dd545a Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 7 May 2024 11:56:48 +0200 Subject: [PATCH 11/18] Specify CSTools::Calibration() --- modules/Downscaling/tmp/Intbc.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index 70043c1f..62378404 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -394,10 +394,11 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo if (dim(obs_ref)[sdate_dim] == 1) { warning('Simple Bias Correction should not be used with only one observation. Returning NA.') } - res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, - exp_cor = exp_cor_interpolated$data, - memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, - cal.method = bc_method) + res <- CSTools::Calibration(exp = exp_interpolated$data, obs = obs_ref, + exp_cor = exp_cor_interpolated$data, + memb_dim = member_dim, sdate_dim = sdate_dim, + ncores = ncores, + cal.method = bc_method) } # Return a list of three elements -- GitLab From 14c6419529acc79a1991f7144ac3085bde55dc39 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 7 May 2024 11:56:58 +0200 Subject: [PATCH 12/18] add fcst to test --- recipes/atomic_recipes/recipe_seasonal_downscaling.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/atomic_recipes/recipe_seasonal_downscaling.yml b/recipes/atomic_recipes/recipe_seasonal_downscaling.yml index c526140d..18d03185 100644 --- a/recipes/atomic_recipes/recipe_seasonal_downscaling.yml +++ b/recipes/atomic_recipes/recipe_seasonal_downscaling.yml @@ -14,7 +14,7 @@ Analysis: name: ERA5 Time: sdate: '0501' - fcst_year: + fcst_year: '2020' hcst_start: '2000' hcst_end: '2005' ftime_min: 1 -- GitLab From 772e65cf1ff2eb386000e8a277aa4630c8d53910 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 7 May 2024 11:57:11 +0200 Subject: [PATCH 13/18] Remove comment --- modules/Downscaling/Downscaling.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 89158e8c..e69a4e24 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -325,7 +325,7 @@ Downscaling <- function(recipe, data) { fcst_downscal <- NULL - if (!is.null(data$fcst) | ( !is.null(data$fcstL) & type == "analogs" ) ) { + if (!is.null(data$fcst) | (!is.null(data$fcstL) & type == "analogs")) { fcst_downscal <- hcst_downscal hcst_downscal$exp <- NULL } @@ -334,13 +334,12 @@ Downscaling <- function(recipe, data) { if (recipe$Analysis$Workflow$Downscaling$save != 'none') { info(recipe$Run$logger, "##### START SAVING DOWNSCALED DATA #####") } - ## TODO: What do we do with the full values? recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/Downscaling/") - # if ((recipe$Analysis$Workflow$Downscaling$save %in% - # c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { - # save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') - # } + if ((recipe$Analysis$Workflow$Downscaling$save %in% + c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { + save_forecast(recipe = recipe, data_cube = fcst_downscal$exp, type = 'fcst') + } if (recipe$Analysis$Workflow$Downscaling$save %in% c('all', 'exp_only')) { save_forecast(recipe = recipe, data_cube = hcst_downscal$exp, type = 'hcst') } -- GitLab From 85f02a9d7ec732d2ae68b12d580904acc5534976 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 7 May 2024 15:06:59 +0200 Subject: [PATCH 14/18] Bugfix: remove tolower() from downscaling target grid --- modules/Downscaling/Downscaling.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index e69a4e24..4bb29e6e 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -28,7 +28,7 @@ Downscaling <- function(recipe, data) { bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) - target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) + target_grid <- recipe$Analysis$Workflow$Downscaling$target_grid nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) size <- recipe$Analysis$Workflow$Downscaling$size -- GitLab From e2c3a99a553ecf68c9f24b120df2491416c8d36d Mon Sep 17 00:00:00 2001 From: eduzenli Date: Mon, 13 May 2024 11:59:13 +0200 Subject: [PATCH 15/18] dimension problem with 2 categories has been solved --- modules/Downscaling/tmp/LogisticReg.R | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R index d42c39e8..a14f3d99 100644 --- a/modules/Downscaling/tmp/LogisticReg.R +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -532,13 +532,6 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, } ## case hindcast - forecast else { - if ( time_dim %in% names(dim(predictor)) ) { - if (dim(predictor)[time_dim] > 1) { - target_dims_predictor <- c(target_dims_predictor, time_dim) - target_dims_predictand <- c(target_dims_predictand, time_dim) - target_dims_forecast <- c(target_dims_forecast, time_dim) - } - } res <- Apply(list(predictor, obs_cat, forecast), target_dims = list(target_dims_predictor, target_dims_predictand, target_dims_forecast), @@ -698,9 +691,9 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, pred_vals <- do.call(rbind, pred_vals_ls) if( length(probs_cat) + 1 == 2) { - pred_vals_dum<-array(NA, dim=c(nrow(df), 2)) - pred_vals_dum[, 2]<-pred_vals - pred_vals_dum[, 1] <- 1-pred_vals + pred_vals_dum<-array(NA, dim = c(nrow(df), 2)) + pred_vals_dum[, 2] <- t(pred_vals) + pred_vals_dum[, 1] <- 1 - pred_vals_dum[, 2] pred_vals <- pred_vals_dum colnames(pred_vals) <- c(1, 2) } @@ -715,8 +708,8 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, if( length(probs_cat) + 1 == 2) { pred_vals_dum <- array(NA, dim = c(nrow(df), 2)) - pred_vals_dum[, 2] <- pred_vals - pred_vals_dum[, 1] <- 1 - pred_vals + pred_vals_dum[, 2] <- t(pred_vals) + pred_vals_dum[, 1] <- 1 - pred_vals_dum[, 2] pred_vals<-pred_vals_dum colnames(pred_vals) <- c(1,2) } -- GitLab From 8ea69d3084dde6cd8628de2d869dce9de7b8627e Mon Sep 17 00:00:00 2001 From: eduzenli Date: Mon, 13 May 2024 11:59:37 +0200 Subject: [PATCH 16/18] dimension problem of observational return has been solved --- modules/Downscaling/tmp/Intlr.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R index 6f7e1249..beb8d7ee 100644 --- a/modules/Downscaling/tmp/Intlr.R +++ b/modules/Downscaling/tmp/Intlr.R @@ -614,6 +614,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo if ( time_dim %in% names(dim(predictor)) ) { daily <- TRUE k_out <- as.numeric (dim(predictor)[time_dim]) ## in case loocv = TRUE and the data is daily, leave one-year data out + sdate_num <- as.numeric (dim(predictand)[sdate_dim]) ## sdate_num of hindcast predictor <- MergeDims (predictor, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) predictand <- MergeDims (predictand, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) output_dims <- c(time_dim, sdate_dim) # for hindcast dwn @@ -646,9 +647,14 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo output_dims = output_dims)$output1 } + if (daily) { + predictand <- SplitDim(data = predictand, split_dim = sdate_dim, new_dim_name = time_dim, + indices = rep (1:k_out, sdate_num)) + } + # restore ensemble dimension in observations if it existed originally if (restore_ens) { - predictand <- s2dv::InsertDim(predictand, posdim = 1, lendim = 1, name = member_dim) + predictand <- MergeDims (predictand, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) } # Return a list of three elements -- GitLab From 3b5ee4939868f2f14733353b6e6b13fb8f7bb900 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Thu, 16 May 2024 12:26:29 +0200 Subject: [PATCH 17/18] . --- modules/Downscaling/Downscaling.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index e8fb9645..64e495c4 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -28,7 +28,7 @@ Downscaling <- function(recipe, data) { bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) - target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) + target_grid <- recipe$Analysis$Workflow$Downscaling$target_grid nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) size <- recipe$Analysis$Workflow$Downscaling$size -- GitLab From a60af2f815c25cd58efacdc187bb514519ed609f Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 23 May 2024 15:01:24 +0200 Subject: [PATCH 18/18] Add warning and condition not to save hcst data when hcst is not downscaled --- modules/Downscaling/Downscaling.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 4bb29e6e..f334b1de 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -326,8 +326,11 @@ Downscaling <- function(recipe, data) { fcst_downscal <- NULL if (!is.null(data$fcst) | (!is.null(data$fcstL) & type == "analogs")) { - fcst_downscal <- hcst_downscal + fcst_downscal$exp <- hcst_downscal$exp hcst_downscal$exp <- NULL + warn(recipe$Run$logger, + paste0("Only $obs and $fcst have been downscaled; $hcst will", + " be NULL and some modules might return errors.")) } # Saving @@ -340,7 +343,8 @@ Downscaling <- function(recipe, data) { c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { save_forecast(recipe = recipe, data_cube = fcst_downscal$exp, type = 'fcst') } - if (recipe$Analysis$Workflow$Downscaling$save %in% c('all', 'exp_only')) { + if ((recipe$Analysis$Workflow$Downscaling$save %in% + c('all', 'exp_only')) && (!is.null(hcst_downscal$exp))) { save_forecast(recipe = recipe, data_cube = hcst_downscal$exp, type = 'hcst') } if (recipe$Analysis$Workflow$Downscaling$save == 'all') { -- GitLab