From afdbfdc0ac74758f0a2bd7c85cfaf9fb014d2036 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Thu, 13 Feb 2025 09:28:20 +0100 Subject: [PATCH 01/16] added CST_TrainIndex, update d unit test, CST_Calibrartion, docs --- R/CST_Calibration.R | 120 ++++++++--------------- R/CST_TrainIndex.R | 88 +++++++++++++++++ tests/testthat/test-CST_Calibration.R | 11 ++- tests/testthat/test-CST_TrainIndex.R | 131 ++++++++++++++++++++++++++ 4 files changed, 263 insertions(+), 87 deletions(-) create mode 100644 R/CST_TrainIndex.R create mode 100644 tests/testthat/test-CST_TrainIndex.R diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index c4942eaa..3b0bf04a 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -73,6 +73,13 @@ #' The default value is NULL. #'@param ncores An integer that indicates the number of cores for parallel #' computations using multiApply function. The default value is one. +#'@param k Positive integer. Default = 1. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. +#'@param tail.out Boolean for 'leave-k-out' method; TRUE to remove both extremes +#' keeping the same sample size for all k-folds (e.g. amt.points=50, k=3, +#' eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. +#' amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) #' #'@return An object of class \code{s2dv_cube} containing the calibrated #'forecasts in the element \code{data} with the dimensions nexp, nobs and same @@ -146,7 +153,7 @@ #'@importFrom ClimProjDiags Subset #'@export CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", - eval.method = "leave-one-out", multi.model = FALSE, + eval.method = "leave-k-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, ncores = NULL, k = 1, tail.out = TRUE) { @@ -254,6 +261,13 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #' The default value is NULL. #'@param ncores An integer that indicates the number of cores for parallel #' computation using multiApply function. The default value is NULL (one core). +#'@param k k Positive integer. Default = 1. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. +#'@param tail.out Boolean for 'leave-k-out' method; TRUE to remove both extremes +#' keeping the same sample size for all k-folds (e.g. amt.points=50, k=3, +#' eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. +#' amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) #' #'@return An array containing the calibrated forecasts with the dimensions #'nexp, nobs and same dimensions as in the 'exp' array. nexp is the number of @@ -301,7 +315,7 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'@importFrom ClimProjDiags Subset #'@export Calibration <- function(exp, obs, exp_cor = NULL, - cal.method = "mse_min", eval.method = "leave-one-out", + cal.method = "mse_min", eval.method = "leave-k-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, @@ -478,9 +492,9 @@ Calibration <- function(exp, obs, exp_cor = NULL, } } ## eval.method - if (!any(eval.method %in% c('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold', 'retrospective'))) { + if (!any(eval.method %in% c('in-sample', 'leave-k-out', 'hindcast-vs-forecast', 'retrospective'))) { stop(paste0("Parameter 'eval.method' must be a character string indicating ", - "the sampling method used ('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold' or ", + "the sampling method used ('in-sample', 'leave-k-out', 'hindcast-vs-forecast' or ", "'retrospective').")) } ## multi.model @@ -567,77 +581,8 @@ Calibration <- function(exp, obs, exp_cor = NULL, } } -.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor, - k = 1, tail.out = TRUE) { -# eval.method: 'leave-one-out', 'k-fold', 'in-sample', "hindcast-vs-forecast", "retrospective" -# amt.points: length of the sample -# amt.points_cor: only needed for hindcast-vs-forecast method -# k: the number of samples to leave-out -# tail_out: boolean for 'k-fold method; TRUE to remove both extremes of the sample when k is -# in the extreme keeping the same sample size for all k-folds (e.g. amt.points=50, k=3, -# eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. -# amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) - - if (k >= amt.points && !is.null(k) | k < 1) { - stop("k needs to be a positive integer less than the amt.points") - } - if (eval.method == "leave-one-out") { - dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, - train.dexes = seq(1, amt.points)[-x]))) - } else if (eval.method == "k-fold") { - # k is a odd number - if (tail.out == TRUE){ - dexes.lst <- lapply(seq(1, amt.points), function(x, kfold = k) { - if (x >= ((kfold-1)/2) + 1 && x + ((kfold-1)/2) <= amt.points) { - ind <- (x-((kfold-1)/2)):(x+((kfold-1)/2)) - } else if (x < ((kfold-1)/2) + 1) { - ind <- c((amt.points - ((kfold-1)/2-x)):amt.points, 1:(x+(kfold-1)/2)) - } else if ((x+((kfold-1)/2)) > amt.points) { - ind <- c((x-(kfold-1)/2):amt.points, 1:(((kfold-1)/2)-amt.points+x)) - } else { - stop("Review make.eval.train.dexes function") - } - return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-ind])) - }) - } else { - if (k != 1){ - k <- (k-1)/2 - } - - dexes.lst <- lapply(seq(1, amt.points), function(x) { - start_idx <- max(1, x - k) - end_idx <- min(amt.points, x + k) - eval_range <- start_idx:end_idx - train_idx <- setdiff(seq(1, amt.points), eval_range) - return(list(eval.dexes = x, train.dexes = train_idx)) - }) - } - - } else if (eval.method == "retrospective") { - # k can be any integer indicating the when to start - dexes.lst <- base::Filter(length, lapply(seq(1, amt.points), - function(x, mindata = k) { - if (x > k) { - eval.dexes <- x - train.dexes <- 1:(x-1) - return(list(eval.dexes = x, - train.dexes = 1:(x-1))) - }})) - } else if (eval.method == "in-sample") { - dexes.lst <- list(list(eval.dexes = seq(1, amt.points), - train.dexes = seq(1, amt.points))) - } else if (eval.method == "hindcast-vs-forecast") { - dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor), - train.dexes = seq(1, amt.points))) - } else { - stop(paste0("unknown sampling method: ", eval.method)) - } - - return(dexes.lst) -} - .cal <- function(exp, obs, exp_cor = NULL, dat_dim = NULL, cal.method = "mse_min", - eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, + eval.method = "leave-k-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, k = 1, tail.out = TRUE) { # exp: [memb, sdate, (dat)] @@ -672,8 +617,11 @@ Calibration <- function(exp, obs, exp_cor = NULL, sdate <- expdims[2] # sdate sdate_cor <- expdims_cor[2] - var.cor.fc <- array(dim = c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs)) - + if(eval.method == 'retrospective'){ + var.cor.fc <- array(dim = c(expdims_cor[1], expdims_cor[2]-k, nexp = nexp, nobs = nobs)) + }else{ + var.cor.fc <- array(dim = c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs)) + } for (i in 1:nexp) { for (j in 1:nobs) { exp_data <- exp[, , i] @@ -693,7 +641,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, expcor_data <- exp_cor } - eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, + eval.train.dexeses <- CST_TrainIndex(eval.method = eval.method, amt.points = sdate, amt.points_cor = sdate_cor, k = k, tail.out = tail.out) @@ -701,14 +649,18 @@ Calibration <- function(exp, obs, exp_cor = NULL, for (i.sample in seq(1, amt.resamples)) { # defining training (tr) and evaluation (ev) subsets # fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) - eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes + eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes fc.ev <- expcor_data[, eval.dexes, drop = FALSE] fc.tr <- exp_data[, train.dexes] obs.tr <- obs_data[train.dexes, drop = FALSE] - + if(eval.method == 'retrospective'){ + eval.dexes = eval.dexes - k + }else{ + eval.dexes = eval.dexes + } if (cal.method == "bias") { - var.cor.fc[, eval.dexes, i, j] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) + var.cor.fc[, eval.dexes , i, j] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) # forecast correction implemented } else if (cal.method == "evmos") { # forecast correction implemented @@ -717,11 +669,11 @@ Calibration <- function(exp, obs, exp_cor = NULL, # calculate value for regression parameters init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) # correct evaluation subset - var.cor.fc[, eval.dexes, i, j] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) + var.cor.fc[, eval.dexes , i, j] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) } else if (cal.method == "mse_min") { quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) - var.cor.fc[, eval.dexes, i, j] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) + var.cor.fc[, eval.dexes, i, j] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) } else if (cal.method == "crps_min") { quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) @@ -767,7 +719,11 @@ Calibration <- function(exp, obs, exp_cor = NULL, } if (is.null(dat_dim)) { + if(eval.method == 'retrospective'){ + dim(var.cor.fc) <- c(expdims_cor[1], (expdims_cor[2]-k)) + }else{ dim(var.cor.fc) <- dim(exp_cor)[1:2] + } } return(var.cor.fc) } diff --git a/R/CST_TrainIndex.R b/R/CST_TrainIndex.R new file mode 100644 index 00000000..3806e3b2 --- /dev/null +++ b/R/CST_TrainIndex.R @@ -0,0 +1,88 @@ +#' Generate Training and Evaluation Indices for Cross-Validation +#' +#' This function generates training and evaluation indices based on different cross-validation methods. +#' @author Theertha Kariyathan, \email{theertha.kariyathan@bsc.es} +#' +#' @param eval.method Character. The cross-validation method. Options include: +#' -\code{leave-k-out}: Leaves out \code{k} points at a time for evaluation. +#' -\code{retrospective}: Uses past data for training and a future point for evaluation. +#' -\code{in-sample}: Uses the entire dataset for both training and evaluation. +#' -\code{hindcast-vs-forecast}: Uses a subset \code{amt.points_cor} for evaluation while training on \code{amt.points}. +#' +#' @param amt.points Integer. Total number of points in the dataset. +#' @param amt.points_cor Integer. Number of points for evaluation in the \code{hindcast-vs-forecast} method. +#' @param k Positive integer. Default = 1. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. +#' @param tail.out Logical for method \code{leave-k-out}. Default = TRUE. +#' TRUE to remove both extremes keeping the same sample size for all k-folds +#' (e.g. amt.points=50, k=3, eval.dexes=1, train.dexes={3,49}). +#' FALSE to remove only the corresponding tail +#' (e.g. amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) +#' @return A list of lists, where each element contains: +#' \code{eval.dexes}: Indices of evaluation points. +#' \code{train.dexes}: Indices of training points. +#' +#' @examples +#' # Leave-k-out cross-validation +#' CST_TrainIndex("leave-k-out", amt.points = 10, amt.points_cor = 5, k = 3) +#' # Retrospective cross-validation +#' CST_TrainIndex("retrospective", amt.points = 10, amt.points_cor = 5, k = 3) +#' # In-sample validation +#' CST_TrainIndex("in-sample", amt.points = 10, amt.points_cor = 5) +#' # Hindcast vs. Forecast validation +#' CST_TrainIndex("hindcast-vs-forecast", amt.points = 10, amt.points_cor = 5) +#' +#' @export +CST_TrainIndex <- function(eval.method, amt.points, amt.points_cor, + k = 1, tail.out = TRUE) { + + if (is.null(k) || k >= amt.points || k < 1 || k %% 2 == 0 ) { + stop("k needs to be a positive odd integer less than the amt.points") + } + + if ( k %% 2 == 0 ) { + warning("k is expected to be an odd integer") + } + + if (eval.method == "leave-k-out") { + if (k == 1) { + tail.out == TRUE + } + amt.seq <- 1:amt.points + strike = (k - 1) / 2 + alt_amt.seq <- c(tail(amt.seq, strike), amt.seq, head(amt.seq, strike)) + + dexes.lst <- lapply(seq(1, amt.points), function(x, kfold = k) { + if (tail.out) { + ind = alt_amt.seq[x:(x + (kfold - 1))] + } else{ + ind = ((x - strike):(x + strike)) + ind = ind[ind > 0 & ind <= amt.points] + } + return(list(eval.dexes = x, train.dexes = amt.seq[!(amt.seq %in% ind)])) + }) + } else if (eval.method == "retrospective") { + # k can be any integer indicating the when to start + dexes.lst <- base::Filter(length, lapply(seq(1, amt.points), + function(x, mindata = k) { + if (x > k) { + eval.dexes <- x + train.dexes <- 1:(x-1) + return(list(eval.dexes = x, + train.dexes = 1:(x-1))) + }})) + + } else if (eval.method == "in-sample") { + dexes.lst <- list(list(eval.dexes = seq(1, amt.points), + train.dexes = seq(1, amt.points))) + } else if (eval.method == "hindcast-vs-forecast") { + dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor), + train.dexes = seq(1, amt.points))) + } else { + stop(paste0("unknown sampling method: ", eval.method)) + } + + return(dexes.lst) +} + diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index 285e81c8..29fb5f4c 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -158,8 +158,8 @@ test_that("1. Input checks", { ) expect_error( Calibration(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), - obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 2)), - dat_dim = 'dataset'), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 2)), + dat_dim = 'dataset'), paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", " except 'memb_dim' and 'dat_dim'.") ) @@ -218,7 +218,7 @@ test_that("1. Input checks", { expect_error( Calibration(exp4, obs4, eval.method = 'biass'), paste0("Parameter 'eval.method' must be a character string indicating ", - "the sampling method used ('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold' or ", + "the sampling method used ('in-sample', 'leave-k-out', 'hindcast-vs-forecast' or ", "'retrospective')."), fixed = TRUE ) @@ -234,6 +234,7 @@ test_that("1. Input checks", { ) }) + ############################################## test_that("2. Output checks: dat1", { @@ -454,14 +455,14 @@ test_that("6. Output checks: dat4", { tolerance = 0.0001 ) expect_equal( - as.vector(Calibration(exp4, obs4, eval.method = "k-fold", k = 5, tail.out = T))[1:5], + as.vector(Calibration(exp4, obs4, eval.method = "leave-k-out", k = 5, tail.out = T))[1:5], c(-0.8149557, 0.2043942, -1.0781616, 1.9806657, 0.3879362), tolerance = 0.0001 ) expect_equal( as.vector(Calibration(exp4, obs4, eval.method = "retrospective", k = 3)[1,1:5]), - c(NA, NA, NA, 1.6267865, -0.1658674), + c(1.6267865, -0.1658674, 0.3998208, 3.3390213, 0.4102334), tolerance = 0.0001 ) diff --git a/tests/testthat/test-CST_TrainIndex.R b/tests/testthat/test-CST_TrainIndex.R new file mode 100644 index 00000000..f6f75cb3 --- /dev/null +++ b/tests/testthat/test-CST_TrainIndex.R @@ -0,0 +1,131 @@ + + +############################################## + +test_that("1. Input checks", { + # s2dv_cube + expect_error( + CST_TrainIndex(eval.method = 1, 2), + paste0("unknown sampling method: 1") + ) + expect_error( + CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 12), + paste0("k needs to be a positive odd integer less than the amt.points") + ) + expect_error( + CST_TrainIndex(eval.method = 'retrospective', 10, NULL, 10), + paste0("k needs to be a positive odd integer less than the amt.points") + ) +}) + +############################################## + +test_that("2. Output checks: leave-k-out", { + expect_equal( + is.list(CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)), + TRUE + ) + expect_equal( + is.list(CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), + TRUE + ) + expect_equal( + length(CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)), + 10 + ) + expect_equal( + names(CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), + c("eval.dexes", "train.dexes") + ) + expect_equal( + CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[1]], + list(eval.dexes = 1, + train.dexes = 4:8) + ) + expect_equal( + CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[2]], + list(eval.dexes = 2, + train.dexes = 5:9) + ) + expect_equal( + CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[3]], + list(eval.dexes = 3, + train.dexes = 6:10) + ) + expect_equal( + CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[9]], + list(eval.dexes = 9, + train.dexes = 2:6) + ) + expect_equal( + CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[10]], + list(eval.dexes = 10, + train.dexes = 3:7) + ) + expect_equal( + CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[1]], + list(eval.dexes = 1, + train.dexes = 4:10) + ) + expect_equal( + CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[5]], + list(eval.dexes = 5, + train.dexes = c(1,2,8:10)) + ) + expect_equal( + CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[10]], + list(eval.dexes = 10, + train.dexes = c(1:7)) + ) + expect_equal( + CST_TrainIndex(eval.method = 'leave-k-out', 20, NULL, 1)[[1]], + CST_TrainIndex(eval.method = 'leave-k-out', 20, NULL, 1, tail.out = FALSE)[[1]] + ) + +}) + +############################################## + +test_that("3. Output checks: retrospective", { + expect_equal( + length(CST_TrainIndex(eval.method = 'retrospective', 10, NULL, 3)), + 7 + ) + expect_equal( + CST_TrainIndex(eval.method = 'retrospective', 10, NULL, 3)[[1]], + list(eval.dexes = 4, train.dexes = 1:3) + ) + expect_equal( + CST_TrainIndex(eval.method = 'retrospective', 10, NULL, 3)[[2]], + list(eval.dexes = 5, train.dexes = 1:4) + ) + expect_equal( + CST_TrainIndex(eval.method = 'retrospective', 10, NULL, 3)[[7]], + list(eval.dexes = 10, train.dexes = 1:9) + ) +}) + + + +# Train.Eval.dexes <- function(k, amt.points, tail.out = TRUE) { +# amt.seq <- 1:amt.points +# strike = (k - 1) / 2 +# alt_amt.seq <- c(tail(amt.seq, strike), amt.seq, head(amt.seq, strike)) +# +# dexes.lst <- lapply(seq(1, amt.points), function(x, kfold = k) { +# if (tail.out) { +# ind = alt_amt.seq[x:(x + (kfold - 1))] +# if(x == 2){print(ind)} +# # return(list(eval.dexes = x, train.dexes = amt.seq[!(amt.seq %in% ind)])) +# } else{ +# ind = ((x - strike):(x + strike)) +# ind = ind[ind > 0 & ind <= amt.points] +# #return(list(eval.dexes = x, train.dexes = amt.seq[!(amt.seq %in% ind)])) +# } +# return(list(eval.dexes = x, train.dexes = amt.seq[!(amt.seq %in% ind)])) +# +# }) +# } + + + \ No newline at end of file -- GitLab From 6d51d7ad92d47349fbb75c564757906af2c7b351 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Mon, 17 Mar 2025 15:18:02 +0100 Subject: [PATCH 02/16] replace ,.make.eval with CST_TrainIndex --- R/CST_CategoricalEnsCombination.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CST_CategoricalEnsCombination.R b/R/CST_CategoricalEnsCombination.R index f973cde7..9d610019 100644 --- a/R/CST_CategoricalEnsCombination.R +++ b/R/CST_CategoricalEnsCombination.R @@ -304,7 +304,7 @@ comb.dims <- function(arr.in, dims.to.combine){ amt.coeff <- amt.mdl + 1 var.cat.fc <- array(NA, c(amt.cat, amt.sdate)) - eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = amt.sdate, k = k, tail.out = tail.out) + eval.train.dexeses <- CST_TrainIndex(eval.method = eval.method, amt.points = amt.sdate, k = k, tail.out = tail.out) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { -- GitLab From d7a31ab26f57534704c1e1586b06c619002d7364 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Mon, 17 Mar 2025 15:37:55 +0100 Subject: [PATCH 03/16] rmv test-make.eval, update categoricalensemble --- R/CST_CategoricalEnsCombination.R | 10 +-- tests/testthat/test-CST_TrainIndex.R | 23 ----- tests/testthat/test-make-eval-train-dexes.R | 93 --------------------- 3 files changed, 5 insertions(+), 121 deletions(-) delete mode 100644 tests/testthat/test-make-eval-train-dexes.R diff --git a/R/CST_CategoricalEnsCombination.R b/R/CST_CategoricalEnsCombination.R index 9d610019..a242d616 100644 --- a/R/CST_CategoricalEnsCombination.R +++ b/R/CST_CategoricalEnsCombination.R @@ -57,8 +57,8 @@ #' Vannitsem (2019). Finally, the \code{obs} method classifies the observations #' into the different categories and therefore contains only 0 and 1 values. #'@param eval.method Is the sampling method used, can be either -#' \code{"in-sample"} or \code{"leave-one-out"}. Default value is the -#' \code{"leave-one-out"} cross validation. +#' \code{"in-sample"} or \code{"leave-k-out"}. Default value is the +#' \code{"leave-k-out"} cross validation. #'@param ... other parameters to be passed on to the calibration procedure. #' #'@return An object of class \code{s2dv_cube} containing the categorical @@ -96,7 +96,7 @@ #'@import abind #'@export CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", - eval.method = "leave-one-out", + eval.method = "leave-k-out", amt.cat = 3, k = 1, tail.out = TRUE, ...) { # Check 's2dv_cube' @@ -155,8 +155,8 @@ CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", #' Vannitsem (2019). Finally, the \code{obs} method classifies the observations #' into the different categories and therefore contains only 0 and 1 values. #'@param eval.method Is the sampling method used, can be either -#' \code{"in-sample"} or \code{"leave-one-out"}. Default value is the -#' \code{"leave-one-out"} cross validation. +#' \code{"in-sample"} or \code{"leave-k-out"}. Default value is the +#' \code{"leave-k-out"} cross validation. #'@param ... Other parameters to be passed on to the calibration procedure. #' #'@return An array containing the categorical forecasts in the element called diff --git a/tests/testthat/test-CST_TrainIndex.R b/tests/testthat/test-CST_TrainIndex.R index f6f75cb3..3618019f 100644 --- a/tests/testthat/test-CST_TrainIndex.R +++ b/tests/testthat/test-CST_TrainIndex.R @@ -1,5 +1,4 @@ - ############################################## test_that("1. Input checks", { @@ -106,26 +105,4 @@ test_that("3. Output checks: retrospective", { }) - -# Train.Eval.dexes <- function(k, amt.points, tail.out = TRUE) { -# amt.seq <- 1:amt.points -# strike = (k - 1) / 2 -# alt_amt.seq <- c(tail(amt.seq, strike), amt.seq, head(amt.seq, strike)) -# -# dexes.lst <- lapply(seq(1, amt.points), function(x, kfold = k) { -# if (tail.out) { -# ind = alt_amt.seq[x:(x + (kfold - 1))] -# if(x == 2){print(ind)} -# # return(list(eval.dexes = x, train.dexes = amt.seq[!(amt.seq %in% ind)])) -# } else{ -# ind = ((x - strike):(x + strike)) -# ind = ind[ind > 0 & ind <= amt.points] -# #return(list(eval.dexes = x, train.dexes = amt.seq[!(amt.seq %in% ind)])) -# } -# return(list(eval.dexes = x, train.dexes = amt.seq[!(amt.seq %in% ind)])) -# -# }) -# } - - \ No newline at end of file diff --git a/tests/testthat/test-make-eval-train-dexes.R b/tests/testthat/test-make-eval-train-dexes.R deleted file mode 100644 index a74a06c0..00000000 --- a/tests/testthat/test-make-eval-train-dexes.R +++ /dev/null @@ -1,93 +0,0 @@ -############################################## - -test_that("1. Input checks", { - # s2dv_cube - expect_error( - .make.eval.train.dexes(eval.method = 1, 2), - paste0("unknown sampling method: 1") - ) - expect_error( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 12), - paste0("k needs to be a positive integer less than the amt.points") - ) - expect_error( - .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 10), - paste0("k needs to be a positive integer less than the amt.points") - ) -}) - -############################################## - -test_that("2. Output checks: k-fold", { - expect_equal( - is.list(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)), - TRUE - ) - expect_equal( - is.list(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]]), - TRUE - ) - expect_equal( - length(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)), - 10 - ) - expect_equal( - names(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]]), - c("eval.dexes", "train.dexes") - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]], - list(eval.dexes = 1, - train.dexes = 4:8) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[2]], - list(eval.dexes = 2, - train.dexes = 5:9) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[3]], - list(eval.dexes = 3, - train.dexes = 6:10) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[9]], - list(eval.dexes = 9, - train.dexes = 2:6) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[10]], - list(eval.dexes = 10, - train.dexes = 3:7) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 1), - .make.eval.train.dexes(eval.method = 'leave-one-out', 10, NULL) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 20, NULL, 1), - .make.eval.train.dexes(eval.method = 'leave-one-out', 20, NULL) - ) -}) - -############################################## - -test_that("3. Output checks: retrospective", { - expect_equal( - length(.make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)), - 7 - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[1]], - list(eval.dexes = 4, train.dexes = 1:3) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[2]], - list(eval.dexes = 5, train.dexes = 1:4) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[7]], - list(eval.dexes = 10, train.dexes = 1:9) - ) -}) - -- GitLab From 45bd5f9712504a0c6e39c69179b7995b8f774f69 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Thu, 20 Mar 2025 15:06:03 +0100 Subject: [PATCH 04/16] rename fun and update doc --- R/CST_Calibration.R | 23 +++-- R/CST_CategoricalEnsCombination.R | 17 ++- R/{CST_TrainIndex.R => Eval_TrainIndices.R} | 25 +++-- tests/testthat/test-CST_TrainIndex.R | 108 -------------------- tests/testthat/test-Eval_TrainIndices.R | 108 ++++++++++++++++++++ 5 files changed, 148 insertions(+), 133 deletions(-) rename R/{CST_TrainIndex.R => Eval_TrainIndices.R} (84%) delete mode 100644 tests/testthat/test-CST_TrainIndex.R create mode 100644 tests/testthat/test-Eval_TrainIndices.R diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 3b0bf04a..093814fe 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -39,9 +39,10 @@ #' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or #' \code{rpc-based}. Default value is \code{mse_min}. #'@param eval.method A character string indicating the sampling method used, it -#' can be either \code{in-sample} or \code{leave-one-out}. Default value is the -#' \code{leave-one-out} cross validation. In case the forecast is provided, any -#' chosen eval.method is over-ruled and a third option is used. +#' can be either \code{in-sample}, \code{leave-k-out}, \code{retrospective} or +#' \code{hindcast-vs-forecast}. Default value is the \code{leave-k-out} cross validation. +#' In case the forecast is provided, any chosen eval.method is over-ruled +#' and a third option is used. #'@param multi.model A boolean that is used only for the \code{mse_min} #' method. If multi-model ensembles or ensembles of different sizes are used, #' it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences @@ -207,7 +208,7 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'minimizes the Continuous Ranked Probability Score (CRPS). The #'\code{"rpc-based"} method adjusts the forecast variance ensuring that the #'ratio of predictable components (RPC) is equal to one, as in Eade et al. -#'(2014). Both in-sample or our out-of-sample (leave-one-out cross +#'(2014). Both in-sample or our out-of-sample (leave-k-out cross #'validation) calibration are possible. #' #'@param exp A multidimensional array with named dimensions (at least 'sdate' @@ -226,11 +227,11 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'@param cal.method A character string indicating the calibration method used, #' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} #' or \code{rpc-based}. Default value is \code{mse_min}. -#'@param eval.method A character string indicating the sampling method used, -#' can be either \code{in-sample} or \code{leave-one-out}. Default value is -#' the \code{leave-one-out} cross validation. In case the forecast is -#' provided, any chosen eval.method is over-ruled and a third option is -#' used. +#'@param eval.method A character string indicating the sampling method used, it +#' can be either \code{in-sample}, \code{leave-k-out}, \code{retrospective} or +#' \code{hindcast-vs-forecast}. Default value is the \code{leave-k-out} cross validation. +#' In case the forecast is provided, any chosen eval.method is over-ruled +#' and a third option is used. #'@param multi.model A boolean that is used only for the \code{mse_min} #' method. If multi-model ensembles or ensembles of different sizes are used, #' it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences @@ -348,7 +349,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, # if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" # if exp_cor is provided, eval.method is overruled (because if exp_cor is provided, the # train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; - # no need for "leave-one-out" or "in-sample") + # no need for "leave-k-out" or "in-sample") eval.method <- "hindcast-vs-forecast" expcordims <- names(dim(exp_cor)) if (is.null(expcordims)) { @@ -641,7 +642,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, expcor_data <- exp_cor } - eval.train.dexeses <- CST_TrainIndex(eval.method = eval.method, + eval.train.dexeses <- Eval_TrainIndices(eval.method = eval.method, amt.points = sdate, amt.points_cor = sdate_cor, k = k, tail.out = tail.out) diff --git a/R/CST_CategoricalEnsCombination.R b/R/CST_CategoricalEnsCombination.R index a242d616..0e4e477c 100644 --- a/R/CST_CategoricalEnsCombination.R +++ b/R/CST_CategoricalEnsCombination.R @@ -59,6 +59,13 @@ #'@param eval.method Is the sampling method used, can be either #' \code{"in-sample"} or \code{"leave-k-out"}. Default value is the #' \code{"leave-k-out"} cross validation. +#'@param k k Positive integer. Default = 1. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. +#'@param tail.out Boolean for 'leave-k-out' method; TRUE to remove both extremes +#' keeping the same sample size for all k-folds (e.g. amt.points=50, k=3, +#' eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. +#' amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) #'@param ... other parameters to be passed on to the calibration procedure. #' #'@return An object of class \code{s2dv_cube} containing the categorical @@ -154,9 +161,11 @@ CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", #' Rajagopalan et al. (2002), Robertson et al. (2004) and Van Schaeybroeck and #' Vannitsem (2019). Finally, the \code{obs} method classifies the observations #' into the different categories and therefore contains only 0 and 1 values. -#'@param eval.method Is the sampling method used, can be either -#' \code{"in-sample"} or \code{"leave-k-out"}. Default value is the -#' \code{"leave-k-out"} cross validation. +#'@param eval.method A character string indicating the sampling method used, it +#' can be either \code{in-sample}, \code{leave-k-out}, \code{retrospective} or +#' \code{hindcast-vs-forecast}. Default value is the \code{leave-k-out} cross validation. +#' In case the forecast is provided, any chosen eval.method is over-ruled +#' and a third option is used. #'@param ... Other parameters to be passed on to the calibration procedure. #' #'@return An array containing the categorical forecasts in the element called @@ -304,7 +313,7 @@ comb.dims <- function(arr.in, dims.to.combine){ amt.coeff <- amt.mdl + 1 var.cat.fc <- array(NA, c(amt.cat, amt.sdate)) - eval.train.dexeses <- CST_TrainIndex(eval.method = eval.method, amt.points = amt.sdate, k = k, tail.out = tail.out) + eval.train.dexeses <- Eval_TrainIndices(eval.method = eval.method, amt.points = amt.sdate, k = k, tail.out = tail.out) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { diff --git a/R/CST_TrainIndex.R b/R/Eval_TrainIndices.R similarity index 84% rename from R/CST_TrainIndex.R rename to R/Eval_TrainIndices.R index 3806e3b2..b6949726 100644 --- a/R/CST_TrainIndex.R +++ b/R/Eval_TrainIndices.R @@ -1,19 +1,24 @@ #' Generate Training and Evaluation Indices for Cross-Validation #' -#' This function generates training and evaluation indices based on different cross-validation methods. +#' This function generates training and evaluation indices based on different +#' cross-validation methods. #' @author Theertha Kariyathan, \email{theertha.kariyathan@bsc.es} #' #' @param eval.method Character. The cross-validation method. Options include: #' -\code{leave-k-out}: Leaves out \code{k} points at a time for evaluation. #' -\code{retrospective}: Uses past data for training and a future point for evaluation. #' -\code{in-sample}: Uses the entire dataset for both training and evaluation. -#' -\code{hindcast-vs-forecast}: Uses a subset \code{amt.points_cor} for evaluation while training on \code{amt.points}. +#' -\code{hindcast-vs-forecast}: Uses a subset \code{amt.points_cor} for +#' evaluation while training on \code{amt.points}. #' #' @param amt.points Integer. Total number of points in the dataset. -#' @param amt.points_cor Integer. Number of points for evaluation in the \code{hindcast-vs-forecast} method. +#' @param amt.points_cor Integer. Number of points for evaluation in the +#' \code{hindcast-vs-forecast} method. #' @param k Positive integer. Default = 1. -#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. -#' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, +#' indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer, +#' indicating when to start. #' @param tail.out Logical for method \code{leave-k-out}. Default = TRUE. #' TRUE to remove both extremes keeping the same sample size for all k-folds #' (e.g. amt.points=50, k=3, eval.dexes=1, train.dexes={3,49}). @@ -25,16 +30,16 @@ #' #' @examples #' # Leave-k-out cross-validation -#' CST_TrainIndex("leave-k-out", amt.points = 10, amt.points_cor = 5, k = 3) +#' Eval_TrainIndices("leave-k-out", amt.points = 10, amt.points_cor = 5, k = 3) #' # Retrospective cross-validation -#' CST_TrainIndex("retrospective", amt.points = 10, amt.points_cor = 5, k = 3) +#' Eval_TrainIndices("retrospective", amt.points = 10, amt.points_cor = 5, k = 3) #' # In-sample validation -#' CST_TrainIndex("in-sample", amt.points = 10, amt.points_cor = 5) +#' Eval_TrainIndices("in-sample", amt.points = 10, amt.points_cor = 5) #' # Hindcast vs. Forecast validation -#' CST_TrainIndex("hindcast-vs-forecast", amt.points = 10, amt.points_cor = 5) +#' Eval_TrainIndices("hindcast-vs-forecast", amt.points = 10, amt.points_cor = 5) #' #' @export -CST_TrainIndex <- function(eval.method, amt.points, amt.points_cor, +Eval_TrainIndices <- function(eval.method, amt.points, amt.points_cor, k = 1, tail.out = TRUE) { if (is.null(k) || k >= amt.points || k < 1 || k %% 2 == 0 ) { diff --git a/tests/testthat/test-CST_TrainIndex.R b/tests/testthat/test-CST_TrainIndex.R deleted file mode 100644 index 3618019f..00000000 --- a/tests/testthat/test-CST_TrainIndex.R +++ /dev/null @@ -1,108 +0,0 @@ - -############################################## - -test_that("1. Input checks", { - # s2dv_cube - expect_error( - CST_TrainIndex(eval.method = 1, 2), - paste0("unknown sampling method: 1") - ) - expect_error( - CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 12), - paste0("k needs to be a positive odd integer less than the amt.points") - ) - expect_error( - CST_TrainIndex(eval.method = 'retrospective', 10, NULL, 10), - paste0("k needs to be a positive odd integer less than the amt.points") - ) -}) - -############################################## - -test_that("2. Output checks: leave-k-out", { - expect_equal( - is.list(CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)), - TRUE - ) - expect_equal( - is.list(CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), - TRUE - ) - expect_equal( - length(CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)), - 10 - ) - expect_equal( - names(CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), - c("eval.dexes", "train.dexes") - ) - expect_equal( - CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[1]], - list(eval.dexes = 1, - train.dexes = 4:8) - ) - expect_equal( - CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[2]], - list(eval.dexes = 2, - train.dexes = 5:9) - ) - expect_equal( - CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[3]], - list(eval.dexes = 3, - train.dexes = 6:10) - ) - expect_equal( - CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[9]], - list(eval.dexes = 9, - train.dexes = 2:6) - ) - expect_equal( - CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5)[[10]], - list(eval.dexes = 10, - train.dexes = 3:7) - ) - expect_equal( - CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[1]], - list(eval.dexes = 1, - train.dexes = 4:10) - ) - expect_equal( - CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[5]], - list(eval.dexes = 5, - train.dexes = c(1,2,8:10)) - ) - expect_equal( - CST_TrainIndex(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[10]], - list(eval.dexes = 10, - train.dexes = c(1:7)) - ) - expect_equal( - CST_TrainIndex(eval.method = 'leave-k-out', 20, NULL, 1)[[1]], - CST_TrainIndex(eval.method = 'leave-k-out', 20, NULL, 1, tail.out = FALSE)[[1]] - ) - -}) - -############################################## - -test_that("3. Output checks: retrospective", { - expect_equal( - length(CST_TrainIndex(eval.method = 'retrospective', 10, NULL, 3)), - 7 - ) - expect_equal( - CST_TrainIndex(eval.method = 'retrospective', 10, NULL, 3)[[1]], - list(eval.dexes = 4, train.dexes = 1:3) - ) - expect_equal( - CST_TrainIndex(eval.method = 'retrospective', 10, NULL, 3)[[2]], - list(eval.dexes = 5, train.dexes = 1:4) - ) - expect_equal( - CST_TrainIndex(eval.method = 'retrospective', 10, NULL, 3)[[7]], - list(eval.dexes = 10, train.dexes = 1:9) - ) -}) - - - \ No newline at end of file diff --git a/tests/testthat/test-Eval_TrainIndices.R b/tests/testthat/test-Eval_TrainIndices.R new file mode 100644 index 00000000..0cb95eea --- /dev/null +++ b/tests/testthat/test-Eval_TrainIndices.R @@ -0,0 +1,108 @@ + +############################################## + +test_that("1. Input checks", { + # s2dv_cube + expect_error( + Eval_TrainIndices(eval.method = 1, 2), + paste0("unknown sampling method: 1") + ) + expect_error( + Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 12), + paste0("k needs to be a positive odd integer less than the amt.points") + ) + expect_error( + Eval_TrainIndices(eval.method = 'retrospective', 10, NULL, 10), + paste0("k needs to be a positive odd integer less than the amt.points") + ) +}) + +############################################## + +test_that("2. Output checks: leave-k-out", { + expect_equal( + is.list(Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)), + TRUE + ) + expect_equal( + is.list(Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), + TRUE + ) + expect_equal( + length(Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)), + 10 + ) + expect_equal( + names(Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), + c("eval.dexes", "train.dexes") + ) + expect_equal( + Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]], + list(eval.dexes = 1, + train.dexes = 4:8) + ) + expect_equal( + Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[2]], + list(eval.dexes = 2, + train.dexes = 5:9) + ) + expect_equal( + Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[3]], + list(eval.dexes = 3, + train.dexes = 6:10) + ) + expect_equal( + Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[9]], + list(eval.dexes = 9, + train.dexes = 2:6) + ) + expect_equal( + Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[10]], + list(eval.dexes = 10, + train.dexes = 3:7) + ) + expect_equal( + Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[1]], + list(eval.dexes = 1, + train.dexes = 4:10) + ) + expect_equal( + Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[5]], + list(eval.dexes = 5, + train.dexes = c(1,2,8:10)) + ) + expect_equal( + Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[10]], + list(eval.dexes = 10, + train.dexes = c(1:7)) + ) + expect_equal( + Eval_TrainIndices(eval.method = 'leave-k-out', 20, NULL, 1)[[1]], + Eval_TrainIndices(eval.method = 'leave-k-out', 20, NULL, 1, tail.out = FALSE)[[1]] + ) + +}) + +############################################## + +test_that("3. Output checks: retrospective", { + expect_equal( + length(Eval_TrainIndices(eval.method = 'retrospective', 10, NULL, 3)), + 7 + ) + expect_equal( + Eval_TrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[1]], + list(eval.dexes = 4, train.dexes = 1:3) + ) + expect_equal( + Eval_TrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[2]], + list(eval.dexes = 5, train.dexes = 1:4) + ) + expect_equal( + Eval_TrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[7]], + list(eval.dexes = 10, train.dexes = 1:9) + ) +}) + + + \ No newline at end of file -- GitLab From 9505e3693b0ed6ccd0f1cd5d1e2daa35c0811a95 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Fri, 28 Mar 2025 11:20:55 +0100 Subject: [PATCH 05/16] replace with evaltrainindices --- R/CST_QuantileMapping.R | 93 +++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index acd5259d..0fcf94d4 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -254,50 +254,68 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', } .qmapcor <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', + eval.method = 'in-sample', method = 'QUANT', na.rm = FALSE, ...) { - + # exp: [memb (+ window), sdate] # obs: [memb (+ window), sdate] # exp_cor: NULL or [memb, sdate] - + if (is.null(exp_cor)) { applied <- exp * NA - for (sd in 1:dim(exp)[sdate_dim]) { - if (na.rm) { - # select start date for cross-val - nas_pos <- which(!is.na(exp[, sd])) - obs2 <- as.vector(obs[, -sd]) - exp2 <- as.vector(exp[, -sd]) - exp_cor2 <- as.vector(exp[, sd]) - # remove NAs - obs2 <- obs2[!is.na(obs2)] - exp2 <- exp2[!is.na(exp2)] - exp_cor2 <- exp_cor2[!is.na(exp_cor2)] - tryCatch({ - adjust <- fitQmap(obs2, exp2, method = method, ...) - applied[nas_pos, sd] <- doQmap(exp_cor2, adjust, ...) + if (eval.method == 'leave-k-out') { + + index <- Eval_TrainIndices(eval.method, amt.points = dim(exp)[sdate_dim], k = 1) + + applied <- lapply(index, function(x){ + if (na.rm) { + # select start date for cross-val + nas_pos <- which(!is.na(exp[, x$eval.dexes])) + obs2 <- as.vector(obs[, x$train.dexes]) + exp2 <- as.vector(exp[, x$train.dexes]) + exp_cor2 <- as.vector(exp[, x$eval.dexes]) + # remove NAs + obs2 <- obs2[!is.na(obs2)] + exp2 <- exp2[!is.na(exp2)] + exp_cor2 <- exp_cor2[!is.na(exp_cor2)] + tryCatch({ + adjust <- fitQmap(obs2, exp2, method = method, ...) + applied[nas_pos, sd] <- doQmap(exp_cor2, adjust, ...) }, error = function(error_message) { - return(applied[, sd]) + return(applied[, x$eval.dexes]) }) - } else { - # na.rm = FALSE shouldn't fail, just return NA - if (anyNA(obs[, -sd]) | anyNA(exp[, -sd])) { - applied[, sd] <- NA } else { - adjust <- fitQmap(as.vector(obs[, -sd]), as.vector(exp[, -sd]), - method = method, ...) - exp2 <- exp[, sd] - if (sum(is.na(exp2)) >= 1) { - app <- rep(NA, length(exp2)) - nas_pos <- which(is.na(exp2)) - exp2 <- exp2[!is.na(exp2)] - app[-nas_pos] <- doQmap(as.vector(exp2), adjust, ...) + # na.rm = FALSE shouldn't fail, just return NA + if (anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])) { + applied[, x$eval.dexes] <- NA } else { - app <- doQmap(as.vector(exp2), adjust, ...) + adjust <- fitQmap(as.vector(obs[, x$train.dexes]), as.vector(exp[, x$train.dexes]), + method = method, ...) + + print(adjust$par$fitq[1,1]) + + exp2 <- exp[, x$eval.dexes] + if (sum(is.na(exp2)) >= 1) { + app <- rep(NA, length(exp2)) + nas_pos <- which(is.na(exp2)) + exp2 <- exp2[!is.na(exp2)] + app[-nas_pos] <- doQmap(as.vector(exp2), adjust, ...) + } else { + app <- doQmap(as.vector(exp2), adjust, ...) + } + applied[, x$eval.dexes] <- app } - applied[, sd] <- app } + }) + } else if (eval.method == 'in-sample') { + if (na.rm) { + adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], + method = method, ...) + applied[!is.na(exp)] <- doQmap(exp[!is.na(exp)], adjust, ...) + } else { + adjust <- fitQmap(obs, exp, method = method, ...) + applied <- doQmap(exp, adjust, ...) } } } else { @@ -308,15 +326,20 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', method = method, ...) applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], adjust, ...) - }, - error = function(error_message) { - return(applied) - }) + }, + error = function(error_message) { + return(applied) + }) } else { adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) applied <- doQmap(as.vector(exp_cor), adjust, ...) } dim(applied) <- dim(exp_cor) } + applied <- exp * NA + adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], method = method, ...) + applied[!is.na(exp)] <- doQmap(exp[!is.na(exp)], adjust, ...) + + return(applied) } -- GitLab From c213a3333d64fec34a6cfdc6e189b6aa5c9a1f0d Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Mon, 31 Mar 2025 12:20:40 +0200 Subject: [PATCH 06/16] update doc,sanity, rename var --- R/CST_Calibration.R | 14 +-- R/CST_CategoricalEnsCombination.R | 6 +- R/{Eval_TrainIndices.R => EvalTrainIndices.R} | 54 ++++----- tests/testthat/test-EvalTrainIndices.R | 108 ++++++++++++++++++ tests/testthat/test-Eval_TrainIndices.R | 108 ------------------ 5 files changed, 145 insertions(+), 145 deletions(-) rename R/{Eval_TrainIndices.R => EvalTrainIndices.R} (62%) create mode 100644 tests/testthat/test-EvalTrainIndices.R delete mode 100644 tests/testthat/test-Eval_TrainIndices.R diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 093814fe..c5d87469 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -78,9 +78,9 @@ #' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. #' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. #'@param tail.out Boolean for 'leave-k-out' method; TRUE to remove both extremes -#' keeping the same sample size for all k-folds (e.g. amt.points=50, k=3, +#' keeping the same sample size for all k-folds (e.g. sample.length=50, k=3, #' eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. -#' amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) +#' sample.length=50, k=3, eval.dexes=1, train.dexes={3,50}) #' #'@return An object of class \code{s2dv_cube} containing the calibrated #'forecasts in the element \code{data} with the dimensions nexp, nobs and same @@ -266,9 +266,9 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. #' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. #'@param tail.out Boolean for 'leave-k-out' method; TRUE to remove both extremes -#' keeping the same sample size for all k-folds (e.g. amt.points=50, k=3, +#' keeping the same sample size for all k-folds (e.g. sample.length=50, k=3, #' eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. -#' amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) +#' sample.length=50, k=3, eval.dexes=1, train.dexes={3,50}) #' #'@return An array containing the calibrated forecasts with the dimensions #'nexp, nobs and same dimensions as in the 'exp' array. nexp is the number of @@ -642,9 +642,9 @@ Calibration <- function(exp, obs, exp_cor = NULL, expcor_data <- exp_cor } - eval.train.dexeses <- Eval_TrainIndices(eval.method = eval.method, - amt.points = sdate, - amt.points_cor = sdate_cor, + eval.train.dexeses <- EvalTrainIndices(eval.method = eval.method, + sample.length = sdate, + sample.length_cor = sdate_cor, k = k, tail.out = tail.out) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { diff --git a/R/CST_CategoricalEnsCombination.R b/R/CST_CategoricalEnsCombination.R index 0e4e477c..94ef489f 100644 --- a/R/CST_CategoricalEnsCombination.R +++ b/R/CST_CategoricalEnsCombination.R @@ -63,9 +63,9 @@ #' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. #' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. #'@param tail.out Boolean for 'leave-k-out' method; TRUE to remove both extremes -#' keeping the same sample size for all k-folds (e.g. amt.points=50, k=3, +#' keeping the same sample size for all k-folds (e.g. sample.length=50, k=3, #' eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. -#' amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) +#' sample.length=50, k=3, eval.dexes=1, train.dexes={3,50}) #'@param ... other parameters to be passed on to the calibration procedure. #' #'@return An object of class \code{s2dv_cube} containing the categorical @@ -313,7 +313,7 @@ comb.dims <- function(arr.in, dims.to.combine){ amt.coeff <- amt.mdl + 1 var.cat.fc <- array(NA, c(amt.cat, amt.sdate)) - eval.train.dexeses <- Eval_TrainIndices(eval.method = eval.method, amt.points = amt.sdate, k = k, tail.out = tail.out) + eval.train.dexeses <- EvalTrainIndices(eval.method = eval.method, sample.length = amt.sdate, k = k, tail.out = tail.out) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { diff --git a/R/Eval_TrainIndices.R b/R/EvalTrainIndices.R similarity index 62% rename from R/Eval_TrainIndices.R rename to R/EvalTrainIndices.R index b6949726..d0fb3fd8 100644 --- a/R/Eval_TrainIndices.R +++ b/R/EvalTrainIndices.R @@ -8,11 +8,11 @@ #' -\code{leave-k-out}: Leaves out \code{k} points at a time for evaluation. #' -\code{retrospective}: Uses past data for training and a future point for evaluation. #' -\code{in-sample}: Uses the entire dataset for both training and evaluation. -#' -\code{hindcast-vs-forecast}: Uses a subset \code{amt.points_cor} for -#' evaluation while training on \code{amt.points}. +#' -\code{hindcast-vs-forecast}: Uses all years from the hindcast sdate dimension +#' as training and all years from the forecast sdate dimension will be corrected. #' -#' @param amt.points Integer. Total number of points in the dataset. -#' @param amt.points_cor Integer. Number of points for evaluation in the +#' @param sample.length Integer. Length of the dataset. +#' @param sample.length_cor Integer. Length of forecast dataset in #' \code{hindcast-vs-forecast} method. #' @param k Positive integer. Default = 1. #' In method \code{leave-k-out}, \code{k} is expected to be odd integer, @@ -21,55 +21,55 @@ #' indicating when to start. #' @param tail.out Logical for method \code{leave-k-out}. Default = TRUE. #' TRUE to remove both extremes keeping the same sample size for all k-folds -#' (e.g. amt.points=50, k=3, eval.dexes=1, train.dexes={3,49}). +#' (e.g. sample.length=50, k=3, eval.dexes=1, train.dexes={3,49}). #' FALSE to remove only the corresponding tail -#' (e.g. amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) +#' (e.g. sample.length=50, k=3, eval.dexes=1, train.dexes={3,50}) #' @return A list of lists, where each element contains: #' \code{eval.dexes}: Indices of evaluation points. #' \code{train.dexes}: Indices of training points. #' #' @examples #' # Leave-k-out cross-validation -#' Eval_TrainIndices("leave-k-out", amt.points = 10, amt.points_cor = 5, k = 3) +#' Eval_TrainIndices("leave-k-out", sample.length = 10, sample.length_cor = 5, k = 3) #' # Retrospective cross-validation -#' Eval_TrainIndices("retrospective", amt.points = 10, amt.points_cor = 5, k = 3) +#' Eval_TrainIndices("retrospective", sample.length = 10, sample.length_cor = 5, k = 3) #' # In-sample validation -#' Eval_TrainIndices("in-sample", amt.points = 10, amt.points_cor = 5) +#' Eval_TrainIndices("in-sample", sample.length = 10, sample.length_cor = 5) #' # Hindcast vs. Forecast validation -#' Eval_TrainIndices("hindcast-vs-forecast", amt.points = 10, amt.points_cor = 5) +#' Eval_TrainIndices("hindcast-vs-forecast", sample.length = 10, sample.length_cor = 5) #' #' @export -Eval_TrainIndices <- function(eval.method, amt.points, amt.points_cor, +EvalTrainIndices <- function(eval.method, sample.length, sample.length_cor, k = 1, tail.out = TRUE) { - if (is.null(k) || k >= amt.points || k < 1 || k %% 2 == 0 ) { - stop("k needs to be a positive odd integer less than the amt.points") - } - - if ( k %% 2 == 0 ) { - warning("k is expected to be an odd integer") - } - if (eval.method == "leave-k-out") { + + if (is.null(k) || k >= sample.length || k < 1 || k %% 2 == 0 ) { + warning("k is expected to be a positive odd integer less than the sample.length") + } + if (k == 1) { tail.out == TRUE } - amt.seq <- 1:amt.points + amt.seq <- 1:sample.length strike = (k - 1) / 2 alt_amt.seq <- c(tail(amt.seq, strike), amt.seq, head(amt.seq, strike)) - dexes.lst <- lapply(seq(1, amt.points), function(x, kfold = k) { + dexes.lst <- lapply(seq(1, sample.length), function(x, kfold = k) { if (tail.out) { ind = alt_amt.seq[x:(x + (kfold - 1))] } else{ ind = ((x - strike):(x + strike)) - ind = ind[ind > 0 & ind <= amt.points] + ind = ind[ind > 0 & ind <= sample.length] } return(list(eval.dexes = x, train.dexes = amt.seq[!(amt.seq %in% ind)])) }) } else if (eval.method == "retrospective") { # k can be any integer indicating the when to start - dexes.lst <- base::Filter(length, lapply(seq(1, amt.points), + if(k >= sample.length){ + warning("k is expected to be less than the sample.length") + } + dexes.lst <- base::Filter(length, lapply(seq(1, sample.length), function(x, mindata = k) { if (x > k) { eval.dexes <- x @@ -79,11 +79,11 @@ Eval_TrainIndices <- function(eval.method, amt.points, amt.points_cor, }})) } else if (eval.method == "in-sample") { - dexes.lst <- list(list(eval.dexes = seq(1, amt.points), - train.dexes = seq(1, amt.points))) + dexes.lst <- list(list(eval.dexes = seq(1, sample.length), + train.dexes = seq(1, sample.length))) } else if (eval.method == "hindcast-vs-forecast") { - dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor), - train.dexes = seq(1, amt.points))) + dexes.lst <- list(list(eval.dexes = seq(1, sample.length_cor), + train.dexes = seq(1, sample.length))) } else { stop(paste0("unknown sampling method: ", eval.method)) } diff --git a/tests/testthat/test-EvalTrainIndices.R b/tests/testthat/test-EvalTrainIndices.R new file mode 100644 index 00000000..62a33707 --- /dev/null +++ b/tests/testthat/test-EvalTrainIndices.R @@ -0,0 +1,108 @@ + +############################################## + +test_that("1. Input checks", { + # s2dv_cube + expect_error( + EvalTrainIndices(eval.method = 1, 2), + paste0("unknown sampling method: 1") + ) + expect_warning( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 12), + paste0("k is expected to be a positive odd integer less than the sample.length") + ) + expect_warning( + EvalTrainIndices(eval.method = 'retrospective', 10, NULL, 10), + paste0("k is expected to be less than the sample.length") + ) +}) + +############################################## + +test_that("2. Output checks: leave-k-out", { + expect_equal( + is.list(EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)), + TRUE + ) + expect_equal( + is.list(EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), + TRUE + ) + expect_equal( + length(EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)), + 10 + ) + expect_equal( + names(EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), + c("eval.dexes", "train.dexes") + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]], + list(eval.dexes = 1, + train.dexes = 4:8) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[2]], + list(eval.dexes = 2, + train.dexes = 5:9) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[3]], + list(eval.dexes = 3, + train.dexes = 6:10) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[9]], + list(eval.dexes = 9, + train.dexes = 2:6) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[10]], + list(eval.dexes = 10, + train.dexes = 3:7) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[1]], + list(eval.dexes = 1, + train.dexes = 4:10) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[5]], + list(eval.dexes = 5, + train.dexes = c(1,2,8:10)) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[10]], + list(eval.dexes = 10, + train.dexes = c(1:7)) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 20, NULL, 1)[[1]], + EvalTrainIndices(eval.method = 'leave-k-out', 20, NULL, 1, tail.out = FALSE)[[1]] + ) + +}) + +############################################## + +test_that("3. Output checks: retrospective", { + expect_equal( + length(EvalTrainIndices(eval.method = 'retrospective', 10, NULL, 3)), + 7 + ) + expect_equal( + EvalTrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[1]], + list(eval.dexes = 4, train.dexes = 1:3) + ) + expect_equal( + EvalTrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[2]], + list(eval.dexes = 5, train.dexes = 1:4) + ) + expect_equal( + EvalTrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[7]], + list(eval.dexes = 10, train.dexes = 1:9) + ) +}) + + + \ No newline at end of file diff --git a/tests/testthat/test-Eval_TrainIndices.R b/tests/testthat/test-Eval_TrainIndices.R deleted file mode 100644 index 0cb95eea..00000000 --- a/tests/testthat/test-Eval_TrainIndices.R +++ /dev/null @@ -1,108 +0,0 @@ - -############################################## - -test_that("1. Input checks", { - # s2dv_cube - expect_error( - Eval_TrainIndices(eval.method = 1, 2), - paste0("unknown sampling method: 1") - ) - expect_error( - Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 12), - paste0("k needs to be a positive odd integer less than the amt.points") - ) - expect_error( - Eval_TrainIndices(eval.method = 'retrospective', 10, NULL, 10), - paste0("k needs to be a positive odd integer less than the amt.points") - ) -}) - -############################################## - -test_that("2. Output checks: leave-k-out", { - expect_equal( - is.list(Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)), - TRUE - ) - expect_equal( - is.list(Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), - TRUE - ) - expect_equal( - length(Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)), - 10 - ) - expect_equal( - names(Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), - c("eval.dexes", "train.dexes") - ) - expect_equal( - Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]], - list(eval.dexes = 1, - train.dexes = 4:8) - ) - expect_equal( - Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[2]], - list(eval.dexes = 2, - train.dexes = 5:9) - ) - expect_equal( - Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[3]], - list(eval.dexes = 3, - train.dexes = 6:10) - ) - expect_equal( - Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[9]], - list(eval.dexes = 9, - train.dexes = 2:6) - ) - expect_equal( - Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[10]], - list(eval.dexes = 10, - train.dexes = 3:7) - ) - expect_equal( - Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[1]], - list(eval.dexes = 1, - train.dexes = 4:10) - ) - expect_equal( - Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[5]], - list(eval.dexes = 5, - train.dexes = c(1,2,8:10)) - ) - expect_equal( - Eval_TrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[10]], - list(eval.dexes = 10, - train.dexes = c(1:7)) - ) - expect_equal( - Eval_TrainIndices(eval.method = 'leave-k-out', 20, NULL, 1)[[1]], - Eval_TrainIndices(eval.method = 'leave-k-out', 20, NULL, 1, tail.out = FALSE)[[1]] - ) - -}) - -############################################## - -test_that("3. Output checks: retrospective", { - expect_equal( - length(Eval_TrainIndices(eval.method = 'retrospective', 10, NULL, 3)), - 7 - ) - expect_equal( - Eval_TrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[1]], - list(eval.dexes = 4, train.dexes = 1:3) - ) - expect_equal( - Eval_TrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[2]], - list(eval.dexes = 5, train.dexes = 1:4) - ) - expect_equal( - Eval_TrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[7]], - list(eval.dexes = 10, train.dexes = 1:9) - ) -}) - - - \ No newline at end of file -- GitLab From 38a7909cad871753ed372895d947dffd119272a8 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Mon, 31 Mar 2025 12:54:27 +0200 Subject: [PATCH 07/16] data unavaible, commented test-as.s2dv check 8 --- tests/testthat/test-as.s2dv_cube.R | 78 +++++++++++++++--------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/tests/testthat/test-as.s2dv_cube.R b/tests/testthat/test-as.s2dv_cube.R index 8ff19582..15d69a2d 100644 --- a/tests/testthat/test-as.s2dv_cube.R +++ b/tests/testthat/test-as.s2dv_cube.R @@ -332,44 +332,44 @@ test_that("7. Tests from Start()", { ############################################## -test_that("8. Tests from Start()", { - path <- paste0('/esarchive/exp/ecearth/a3t4/diags/CMIP/EC-Earth-Consortium/EC-Earth3-LR/piControl/$memb$/Omon/$var$/gn/', - 'v*/$var$_Omon_EC-Earth3-LR_piControl_$memb$_gn_$chunk$.nc') - suppressWarnings( - data7 <- Start(dat = list(list(name = 'a3t4', path = path)), - var = 'tosmean', - memb = paste0('r', 1:5, 'i1p1f1'), - region = c("ATL3", "Global_Ocean", "Nino3.4"), - time = indices(1:10), - chunk = 'all', - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = 'chunk', region = NULL), - num_procs = 8, - retrieve = T) - ) - - res7 <- as.s2dv_cube(data7) - - # dimensions - expect_equal( - dim(res7$data), - c(dat = 1, var = 1, memb = 5, region = 3, time = 10) - ) - # elements - expect_equal( - names(res7), - c("data", "dims", "coords", "attrs") - ) - expect_equal( - res7$attrs$Variable$varName, - c('tosmean') - ) - # Dates - expect_equal( - dim(res7$attrs$Dates), - c(time = 10) - ) -}) +# test_that("8. Tests from Start()", { +# path <- paste0('/esarchive/exp/ecearth/a3t4/diags/CMIP/EC-Earth-Consortium/EC-Earth3-LR/piControl/$memb$/Omon/$var$/gn/', +# 'v*/$var$_Omon_EC-Earth3-LR_piControl_$memb$_gn_$chunk$.nc') +# suppressWarnings( +# data7 <- Start(dat = list(list(name = 'a3t4', path = path)), +# var = 'tosmean', +# memb = paste0('r', 1:5, 'i1p1f1'), +# region = c("ATL3", "Global_Ocean", "Nino3.4"), +# time = indices(1:10), +# chunk = 'all', +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = 'chunk', region = NULL), +# num_procs = 8, +# retrieve = T) +# ) +# +# res7 <- as.s2dv_cube(data7) +# +# # dimensions +# expect_equal( +# dim(res7$data), +# c(dat = 1, var = 1, memb = 5, region = 3, time = 10) +# ) +# # elements +# expect_equal( +# names(res7), +# c("data", "dims", "coords", "attrs") +# ) +# expect_equal( +# res7$attrs$Variable$varName, +# c('tosmean') +# ) +# # Dates +# expect_equal( +# dim(res7$attrs$Dates), +# c(time = 10) +# ) +# }) ############################################## \ No newline at end of file -- GitLab From 0825df84cb9c717dab993a391bd77dad5821f8ab Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Mon, 14 Apr 2025 09:52:23 +0200 Subject: [PATCH 08/16] evaltrainindices in quantilemapping --- R/CST_QuantileMapping.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index 0fcf94d4..b4182699 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -265,7 +265,7 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', applied <- exp * NA if (eval.method == 'leave-k-out') { - index <- Eval_TrainIndices(eval.method, amt.points = dim(exp)[sdate_dim], k = 1) + index <- EvalTrainIndices(eval.method, sample.length = dim(exp)[sdate_dim], k = 1) applied <- lapply(index, function(x){ if (na.rm) { @@ -280,7 +280,7 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', exp_cor2 <- exp_cor2[!is.na(exp_cor2)] tryCatch({ adjust <- fitQmap(obs2, exp2, method = method, ...) - applied[nas_pos, sd] <- doQmap(exp_cor2, adjust, ...) + applied[nas_pos, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) }, error = function(error_message) { return(applied[, x$eval.dexes]) -- GitLab From f1d51131cd80b21a3ce26ceb8cbd499080de00eb Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Tue, 15 Apr 2025 16:26:48 +0200 Subject: [PATCH 09/16] evaltrainindices updated in Quantilemapping --- R/CST_QuantileMapping.R | 259 +++++++++++++++++++++++++++------------- 1 file changed, 179 insertions(+), 80 deletions(-) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index b4182699..245ed65c 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -253,93 +253,192 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', return(qmaped) } +#.qmapcor Theertha's version +#' TEST +# exp <- as.numeric(1:prod(6,10,15)) +# dim(exp) <- c(member = 6, syear = 10, window = 15) +# obs <- as.numeric(rnorm(prod(1,10,15), 50)) +# dim(obs) <- c(member = 1, syear = 10, window = 15) +# fcst <- 100*(1:prod(8,1,1)) +# dim(fcst) <- c(member = 8, syear = 1, swindow = 1) +# +# +# res_sample <- QuantileMapping(exp = exp, obs = obs, exp_cor = NULL, +# memb_dim = 'member', sdate_dim = 'syear', +# window_dim = 'window', eval.method = "in-sample") +# +# res_leave <- QuantileMapping(exp = exp, obs = obs, exp_cor = NULL, +# memb_dim = 'member', sdate_dim = 'syear', +# window_dim = 'window', eval.method = "leave-k-out", k = 1) +# +# res_retro <- QuantileMapping(exp = exp, obs = obs, exp_cor = NULL, +# memb_dim = 'member', sdate_dim = 'syear', +# window_dim = 'window', eval.method = "retrospective", +# k = 3) +# +# res_hind <- QuantileMapping(exp = exp, obs = obs, exp_cor = fcst, +# memb_dim = 'member', sdate_dim = 'syear', +# window_dim = 'window', eval.method = "hindcast-vs-forecast") +#' + .qmapcor <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', - eval.method = 'in-sample', + eval.method = 'leave-k-out', k = 1, method = 'QUANT', na.rm = FALSE, ...) { - # exp: [memb (+ window), sdate] - # obs: [memb (+ window), sdate] - # exp_cor: NULL or [memb, sdate] - if (is.null(exp_cor)) { - applied <- exp * NA - if (eval.method == 'leave-k-out') { - - index <- EvalTrainIndices(eval.method, sample.length = dim(exp)[sdate_dim], k = 1) - - applied <- lapply(index, function(x){ - if (na.rm) { - # select start date for cross-val - nas_pos <- which(!is.na(exp[, x$eval.dexes])) - obs2 <- as.vector(obs[, x$train.dexes]) - exp2 <- as.vector(exp[, x$train.dexes]) - exp_cor2 <- as.vector(exp[, x$eval.dexes]) - # remove NAs - obs2 <- obs2[!is.na(obs2)] - exp2 <- exp2[!is.na(exp2)] - exp_cor2 <- exp_cor2[!is.na(exp_cor2)] - tryCatch({ - adjust <- fitQmap(obs2, exp2, method = method, ...) - applied[nas_pos, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) - }, - error = function(error_message) { - return(applied[, x$eval.dexes]) - }) - } else { - # na.rm = FALSE shouldn't fail, just return NA - if (anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])) { - applied[, x$eval.dexes] <- NA - } else { - adjust <- fitQmap(as.vector(obs[, x$train.dexes]), as.vector(exp[, x$train.dexes]), - method = method, ...) - - print(adjust$par$fitq[1,1]) - - exp2 <- exp[, x$eval.dexes] - if (sum(is.na(exp2)) >= 1) { - app <- rep(NA, length(exp2)) - nas_pos <- which(is.na(exp2)) - exp2 <- exp2[!is.na(exp2)] - app[-nas_pos] <- doQmap(as.vector(exp2), adjust, ...) - } else { - app <- doQmap(as.vector(exp2), adjust, ...) - } - applied[, x$eval.dexes] <- app + if(!is.null(exp_cor) & eval.method != "hindcast-vs-forcast"){ + warning("Defaulting to 'hindcast-vs-forcast' as exp_cor is not 'NULL'") + } + + if(eval.method == "retrospective" & k == 1){ + stop("k = 1 not expected, trainindex need at least two non-NA values to interpolate ") + } + + applied <- exp * NA + index <- EvalTrainIndices(eval.method = eval.method, sample.length = dim(exp)[sdate_dim], k = k, + sample.length_cor = dim(exp_cor)[sdate_dim], ...) + + for(x in index){ + + nas_pos <- which(!is.na(exp[, x$eval.dexes])) + obs2 <- as.vector(obs[, x$train.dexes]) + exp2 <- as.vector(exp[, x$train.dexes]) + exp_cor2 <- as.vector(exp[, x$eval.dexes]) + # remove NAs + obs2 <- obs2[!is.na(obs2)] + exp2 <- exp2[!is.na(exp2)] + exp_cor2 <- exp_cor2[!is.na(exp_cor2)] + + adjust <- fitQmap(obs2, exp2, method = 'QUANT', ...) + + if(is.null(exp_cor)){ + if(na.rm){ + + if(eval.method == "in-sample"){ + applied[nas_pos] <- doQmap(exp_cor2, adjust, ...) + }else{ + applied[nas_pos, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) + } + }else{ + + if (anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])) { + applied[, x$eval.dexes] <- NA + }else{ + if(eval.method == "in-sample"){ + applied[nas_pos] <- doQmap(exp_cor2, adjust, ...) + }else{ + applied[, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) } } - }) - } else if (eval.method == 'in-sample') { - if (na.rm) { - adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], - method = method, ...) - applied[!is.na(exp)] <- doQmap(exp[!is.na(exp)], adjust, ...) - } else { - adjust <- fitQmap(obs, exp, method = method, ...) - applied <- doQmap(exp, adjust, ...) + } + }else{ + # add check if method is hindcast else give warning that since exp_cor is not null hindcast is selected + #hindcast vs forecast + if(na.rm){ + applied[nas_pos] <- doQmap(as.vector(exp_cor2), adjust, ...) + }else{ + if(anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])){ + applied[nas_pos] <- NA + }else{ + applied[nas_pos] <- doQmap(as.vector(exp_cor2), adjust, ...) + } } } - } else { - applied <- exp_cor * NA - if (na.rm) { - tryCatch({ - adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], - method = method, ...) - applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], - adjust, ...) - }, - error = function(error_message) { - return(applied) - }) - } else { - adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) - applied <- doQmap(as.vector(exp_cor), adjust, ...) - } - dim(applied) <- dim(exp_cor) } - applied <- exp * NA - adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], method = method, ...) - applied[!is.na(exp)] <- doQmap(exp[!is.na(exp)], adjust, ...) - - - return(applied) + applied } + + + + +#.qmapcor Nuria's version ### +# .qmapcor <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', +# eval.method = 'in-sample', +# method = 'QUANT', na.rm = FALSE, ...) { +# +# # exp: [memb (+ window), sdate] +# # obs: [memb (+ window), sdate] +# # exp_cor: NULL or [memb, sdate] +# +# if (is.null(exp_cor)) { +# applied <- exp * NA +# if (eval.method == 'leave-k-out') { +# +# index <- EvalTrainIndices(eval.method, sample.length = dim(exp)[sdate_dim], k = 1) +# +# applied <- lapply(index, function(x){ +# if (na.rm) { +# # select start date for cross-val +# nas_pos <- which(!is.na(exp[, x$eval.dexes])) +# obs2 <- as.vector(obs[, x$train.dexes]) +# exp2 <- as.vector(exp[, x$train.dexes]) +# exp_cor2 <- as.vector(exp[, x$eval.dexes]) +# # remove NAs +# obs2 <- obs2[!is.na(obs2)] +# exp2 <- exp2[!is.na(exp2)] +# exp_cor2 <- exp_cor2[!is.na(exp_cor2)] +# tryCatch({ +# adjust <- fitQmap(obs2, exp2, method = method, ...) +# applied[nas_pos, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) +# }, +# error = function(error_message) { +# return(applied[, x$eval.dexes]) +# }) +# } else { +# # na.rm = FALSE shouldn't fail, just return NA +# if (anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])) { +# applied[, x$eval.dexes] <- NA +# } else { +# adjust <- fitQmap(as.vector(obs[, x$train.dexes]), as.vector(exp[, x$train.dexes]), +# method = method, ...) +# +# print(adjust$par$fitq[1,1]) +# +# exp2 <- exp[, x$eval.dexes] +# if (sum(is.na(exp2)) >= 1) { +# app <- rep(NA, length(exp2)) +# nas_pos <- which(is.na(exp2)) +# exp2 <- exp2[!is.na(exp2)] +# app[-nas_pos] <- doQmap(as.vector(exp2), adjust, ...) +# } else { +# app <- doQmap(as.vector(exp2), adjust, ...) +# } +# applied[, x$eval.dexes] <- app +# } +# } +# }) +# } else if (eval.method == 'in-sample') { +# if (na.rm) { +# adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], +# method = method, ...) +# applied[!is.na(exp)] <- doQmap(exp[!is.na(exp)], adjust, ...) +# } else { +# adjust <- fitQmap(obs, exp, method = method, ...) +# applied <- doQmap(exp, adjust, ...) +# } +# } +# } else { +# applied <- exp_cor * NA +# if (na.rm) { +# tryCatch({ +# adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], +# method = method, ...) +# applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], +# adjust, ...) +# }, +# error = function(error_message) { +# return(applied) +# }) +# } else { +# adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) +# applied <- doQmap(as.vector(exp_cor), adjust, ...) +# } +# dim(applied) <- dim(exp_cor) +# } +# applied <- exp * NA +# adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], method = method, ...) +# applied[!is.na(exp)] <- doQmap(exp[!is.na(exp)], adjust, ...) +# +# +# return(applied) +# } -- GitLab From c3169dd05eeacdbe1bab18e85ce4e6d7fb3cbe4b Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Tue, 22 Apr 2025 15:30:50 +0200 Subject: [PATCH 10/16] testing --- R/CST_QuantileMapping.R | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index 245ed65c..415c0bc4 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -297,7 +297,7 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', applied <- exp * NA index <- EvalTrainIndices(eval.method = eval.method, sample.length = dim(exp)[sdate_dim], k = k, sample.length_cor = dim(exp_cor)[sdate_dim], ...) - + print(index) for(x in index){ nas_pos <- which(!is.na(exp[, x$eval.dexes])) @@ -310,8 +310,9 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', exp_cor2 <- exp_cor2[!is.na(exp_cor2)] adjust <- fitQmap(obs2, exp2, method = 'QUANT', ...) - + if(is.null(exp_cor)){ + if(na.rm){ if(eval.method == "in-sample"){ @@ -321,26 +322,30 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', } }else{ - if (anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])) { + if (anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])) { applied[, x$eval.dexes] <- NA }else{ if(eval.method == "in-sample"){ applied[nas_pos] <- doQmap(exp_cor2, adjust, ...) }else{ - applied[, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) + applied[nas_pos, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) } } } }else{ + applied <- exp_cor * NA + nas_cor <- which(!is.na(exp_cor[, x$eval.dexes])) # add check if method is hindcast else give warning that since exp_cor is not null hindcast is selected #hindcast vs forecast if(na.rm){ - applied[nas_pos] <- doQmap(as.vector(exp_cor2), adjust, ...) + applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], adjust, ...) + #applied[nas_pos] <- doQmap(as.vector(exp_cor), adjust, ...) }else{ - if(anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])){ - applied[nas_pos] <- NA + if(anyNA(exp_cor[, x$train.dexes])){ + #if(anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])){ + applied[nas_cor] <- NA }else{ - applied[nas_pos] <- doQmap(as.vector(exp_cor2), adjust, ...) + applied[nas_pos, x$eval.dexes] <- doQmap(as.vector(exp_cor), adjust, ...) } } } -- GitLab From f668020bc4da64095fc7b0212ebb574820f006b5 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Thu, 24 Apr 2025 15:42:27 +0200 Subject: [PATCH 11/16] exp_cor revert default --- R/CST_QuantileMapping.R | 35 +++++++++++++++++++---------------- R/EvalTrainIndices.R | 8 ++++---- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index 415c0bc4..23f98c35 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -297,7 +297,7 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', applied <- exp * NA index <- EvalTrainIndices(eval.method = eval.method, sample.length = dim(exp)[sdate_dim], k = k, sample.length_cor = dim(exp_cor)[sdate_dim], ...) - print(index) + if(is.null(exp_cor)){ for(x in index){ nas_pos <- which(!is.na(exp[, x$eval.dexes])) @@ -311,8 +311,6 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', adjust <- fitQmap(obs2, exp2, method = 'QUANT', ...) - if(is.null(exp_cor)){ - if(na.rm){ if(eval.method == "in-sample"){ @@ -332,24 +330,29 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', } } } + } }else{ applied <- exp_cor * NA - nas_cor <- which(!is.na(exp_cor[, x$eval.dexes])) + print(eval.method) + # nas_cor <- which(!is.na(exp_cor[, x$eval.dexes])) # add check if method is hindcast else give warning that since exp_cor is not null hindcast is selected #hindcast vs forecast - if(na.rm){ - applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], adjust, ...) - #applied[nas_pos] <- doQmap(as.vector(exp_cor), adjust, ...) - }else{ - if(anyNA(exp_cor[, x$train.dexes])){ - #if(anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])){ - applied[nas_cor] <- NA - }else{ - applied[nas_pos, x$eval.dexes] <- doQmap(as.vector(exp_cor), adjust, ...) - } + if (na.rm) { + tryCatch({ + adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], + method = method, ...) + applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], + adjust, ...) + }, + error = function(error_message) { + return(applied) + }) + } else { + adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) + applied <- doQmap(as.vector(exp_cor), adjust, ...) + } + dim(applied) <- dim(exp_cor) } - } - } applied } diff --git a/R/EvalTrainIndices.R b/R/EvalTrainIndices.R index d0fb3fd8..87ee316a 100644 --- a/R/EvalTrainIndices.R +++ b/R/EvalTrainIndices.R @@ -30,13 +30,13 @@ #' #' @examples #' # Leave-k-out cross-validation -#' Eval_TrainIndices("leave-k-out", sample.length = 10, sample.length_cor = 5, k = 3) +#' EvalTrainIndices("leave-k-out", sample.length = 10, sample.length_cor = 5, k = 3) #' # Retrospective cross-validation -#' Eval_TrainIndices("retrospective", sample.length = 10, sample.length_cor = 5, k = 3) +#' EvalTrainIndices("retrospective", sample.length = 10, sample.length_cor = 5, k = 3) #' # In-sample validation -#' Eval_TrainIndices("in-sample", sample.length = 10, sample.length_cor = 5) +#' EvalTrainIndices("in-sample", sample.length = 10, sample.length_cor = 5) #' # Hindcast vs. Forecast validation -#' Eval_TrainIndices("hindcast-vs-forecast", sample.length = 10, sample.length_cor = 5) +#' EvalTrainIndices("hindcast-vs-forecast", sample.length = 10, sample.length_cor = 5) #' #' @export EvalTrainIndices <- function(eval.method, sample.length, sample.length_cor, -- GitLab From 19165c054889c299d7c9a8feaf64242dad6bca75 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Thu, 24 Apr 2025 16:24:12 +0200 Subject: [PATCH 12/16] evalindices in quantilemapping --- R/CST_QuantileMapping.R | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index 23f98c35..583e0892 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -287,18 +287,19 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', if(!is.null(exp_cor) & eval.method != "hindcast-vs-forcast"){ - warning("Defaulting to 'hindcast-vs-forcast' as exp_cor is not 'NULL'") + eval.method = 'hindcast-vs-forecast' + # warning("Defaulting to 'hindcast-vs-forcast' as exp_cor is not 'NULL'") } if(eval.method == "retrospective" & k == 1){ stop("k = 1 not expected, trainindex need at least two non-NA values to interpolate ") - } + } #can replace with tryCatch for smooth fail + - applied <- exp * NA index <- EvalTrainIndices(eval.method = eval.method, sample.length = dim(exp)[sdate_dim], k = k, sample.length_cor = dim(exp_cor)[sdate_dim], ...) - if(is.null(exp_cor)){ for(x in index){ + nas_pos <- which(!is.na(exp[, x$eval.dexes])) obs2 <- as.vector(obs[, x$train.dexes]) @@ -310,7 +311,8 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', exp_cor2 <- exp_cor2[!is.na(exp_cor2)] adjust <- fitQmap(obs2, exp2, method = 'QUANT', ...) - + if(is.null(exp_cor)){ + applied <- exp * NA if(na.rm){ if(eval.method == "in-sample"){ @@ -330,29 +332,30 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', } } } - } }else{ applied <- exp_cor * NA - print(eval.method) - # nas_cor <- which(!is.na(exp_cor[, x$eval.dexes])) # add check if method is hindcast else give warning that since exp_cor is not null hindcast is selected #hindcast vs forecast if (na.rm) { tryCatch({ - adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], + + adjust <- fitQmap(obs2, exp2, method = method, ...) + applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], adjust, ...) + }, error = function(error_message) { - return(applied) + return(applied) }) - } else { + } else { adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) applied <- doQmap(as.vector(exp_cor), adjust, ...) } dim(applied) <- dim(exp_cor) - } + } + } applied } -- GitLab From 8a6bbc3e72cbecdf41ae04fa725bc1eb0fb995fd Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Thu, 24 Apr 2025 17:12:38 +0200 Subject: [PATCH 13/16] rm '...' from index<-Evaltrain --- R/CST_QuantileMapping.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index 583e0892..daed5f0f 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -297,7 +297,7 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', index <- EvalTrainIndices(eval.method = eval.method, sample.length = dim(exp)[sdate_dim], k = k, - sample.length_cor = dim(exp_cor)[sdate_dim], ...) + sample.length_cor = dim(exp_cor)[sdate_dim]) for(x in index){ -- GitLab From d0b64a2bc19826d8495679f395fdbf35b5a91f92 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Fri, 25 Apr 2025 10:11:35 +0200 Subject: [PATCH 14/16] 'applied' placed outsideloop --- R/CST_QuantileMapping.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index daed5f0f..96402eea 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -298,6 +298,8 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', index <- EvalTrainIndices(eval.method = eval.method, sample.length = dim(exp)[sdate_dim], k = k, sample.length_cor = dim(exp_cor)[sdate_dim]) + applied <- exp * NA + for(x in index){ @@ -312,7 +314,6 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', adjust <- fitQmap(obs2, exp2, method = 'QUANT', ...) if(is.null(exp_cor)){ - applied <- exp * NA if(na.rm){ if(eval.method == "in-sample"){ -- GitLab From 07106fa04cdb831d84ed0840fda1507e980b595a Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Thu, 12 Jun 2025 15:30:44 +0200 Subject: [PATCH 15/16] documentation update --- R/CST_QuantileMapping.R | 115 +++++++--------------------------------- 1 file changed, 20 insertions(+), 95 deletions(-) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index 96402eea..05ab3449 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -31,6 +31,14 @@ #' computation using multiApply function. The default value is NULL (1). #'@param ... Additional parameters to be used by the method choosen. See qmap #' package for details. +#'@param eval.method A character string indicating the evaluation method for cross-validaton. +#'the default method is 'leave-k-out', other available methods are +#''retrospective', 'in-sample', 'hindcast-vs-forecast'. +#'@param k Positive integer. Default = 1. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, +#' indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer greater than 1, +#' indicating when to start. #' #'@return An object of class \code{s2dv_cube} containing the experimental data #'after applying the quantile mapping correction. @@ -57,6 +65,7 @@ CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', memb_dim = 'member', window_dim = NULL, method = 'QUANT', na.rm = FALSE, + eval.method = "leave-k-out", k = 1, ncores = NULL, ...) { # Check 's2dv_cube' if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')) { @@ -123,6 +132,14 @@ CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', #' computation using multiApply function. The default value is NULL (1). #'@param ... Additional parameters to be used by the method choosen. See qmap #' package for details. +#'@param eval.method A character string indicating the evaluation method for cross-validaton. +#'the default method is 'leave-k-out', other available methods are +#''retrospective', 'in-sample', 'hindcast-vs-forecast'. +#'@param k Positive integer. Default = 1. +#' In method 'leave-k-out', 'k' is expected to be odd integer, +#' indicating the number of points to leave out. +#' In method 'retrospective', 'k' can be any positive integer greater than 1, +#' indicating when to start. #' #'@return An array containing the experimental data after applying the quantile #'mapping correction. @@ -146,6 +163,7 @@ CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', memb_dim = 'member', window_dim = NULL, method = 'QUANT', na.rm = FALSE, + eval.method = "leave-k-out", k = 1, ncores = NULL, ...) { # exp and obs obsdims <- names(dim(obs)) @@ -253,8 +271,7 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', return(qmaped) } -#.qmapcor Theertha's version -#' TEST +# Example # exp <- as.numeric(1:prod(6,10,15)) # dim(exp) <- c(member = 6, syear = 10, window = 15) # obs <- as.numeric(rnorm(prod(1,10,15), 50)) @@ -315,9 +332,8 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', adjust <- fitQmap(obs2, exp2, method = 'QUANT', ...) if(is.null(exp_cor)){ if(na.rm){ - if(eval.method == "in-sample"){ - applied[nas_pos] <- doQmap(exp_cor2, adjust, ...) + applied[nas_pos] <- doQmap(exp_cor2, adjust, ...) }else{ applied[nas_pos, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) } @@ -363,94 +379,3 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', -#.qmapcor Nuria's version ### -# .qmapcor <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', -# eval.method = 'in-sample', -# method = 'QUANT', na.rm = FALSE, ...) { -# -# # exp: [memb (+ window), sdate] -# # obs: [memb (+ window), sdate] -# # exp_cor: NULL or [memb, sdate] -# -# if (is.null(exp_cor)) { -# applied <- exp * NA -# if (eval.method == 'leave-k-out') { -# -# index <- EvalTrainIndices(eval.method, sample.length = dim(exp)[sdate_dim], k = 1) -# -# applied <- lapply(index, function(x){ -# if (na.rm) { -# # select start date for cross-val -# nas_pos <- which(!is.na(exp[, x$eval.dexes])) -# obs2 <- as.vector(obs[, x$train.dexes]) -# exp2 <- as.vector(exp[, x$train.dexes]) -# exp_cor2 <- as.vector(exp[, x$eval.dexes]) -# # remove NAs -# obs2 <- obs2[!is.na(obs2)] -# exp2 <- exp2[!is.na(exp2)] -# exp_cor2 <- exp_cor2[!is.na(exp_cor2)] -# tryCatch({ -# adjust <- fitQmap(obs2, exp2, method = method, ...) -# applied[nas_pos, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) -# }, -# error = function(error_message) { -# return(applied[, x$eval.dexes]) -# }) -# } else { -# # na.rm = FALSE shouldn't fail, just return NA -# if (anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])) { -# applied[, x$eval.dexes] <- NA -# } else { -# adjust <- fitQmap(as.vector(obs[, x$train.dexes]), as.vector(exp[, x$train.dexes]), -# method = method, ...) -# -# print(adjust$par$fitq[1,1]) -# -# exp2 <- exp[, x$eval.dexes] -# if (sum(is.na(exp2)) >= 1) { -# app <- rep(NA, length(exp2)) -# nas_pos <- which(is.na(exp2)) -# exp2 <- exp2[!is.na(exp2)] -# app[-nas_pos] <- doQmap(as.vector(exp2), adjust, ...) -# } else { -# app <- doQmap(as.vector(exp2), adjust, ...) -# } -# applied[, x$eval.dexes] <- app -# } -# } -# }) -# } else if (eval.method == 'in-sample') { -# if (na.rm) { -# adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], -# method = method, ...) -# applied[!is.na(exp)] <- doQmap(exp[!is.na(exp)], adjust, ...) -# } else { -# adjust <- fitQmap(obs, exp, method = method, ...) -# applied <- doQmap(exp, adjust, ...) -# } -# } -# } else { -# applied <- exp_cor * NA -# if (na.rm) { -# tryCatch({ -# adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], -# method = method, ...) -# applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], -# adjust, ...) -# }, -# error = function(error_message) { -# return(applied) -# }) -# } else { -# adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) -# applied <- doQmap(as.vector(exp_cor), adjust, ...) -# } -# dim(applied) <- dim(exp_cor) -# } -# applied <- exp * NA -# adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], method = method, ...) -# applied[!is.na(exp)] <- doQmap(exp[!is.na(exp)], adjust, ...) -# -# -# return(applied) -# } -- GitLab From 449b33045f5b2a6d4610c56675b2662a238d8cf1 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Thu, 12 Jun 2025 16:43:19 +0200 Subject: [PATCH 16/16] add param in Quantile passing in wrapper --- R/CST_QuantileMapping.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index 05ab3449..95e65540 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -81,6 +81,7 @@ CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', exp_cor = exp_cor$data, sdate_dim = sdate_dim, memb_dim = memb_dim, window_dim = window_dim, method = method, + eval.method = eval.method, k = k, na.rm = na.rm, ncores = ncores, ...) if (is.null(exp_cor)) { exp$data <- QMapped -- GitLab