diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index c4942eaade57a66320625ea5ad96f2aef4f0f1e7..c5d87469facca7ec7493bb397895a092cde26c55 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -39,9 +39,10 @@ #' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or #' \code{rpc-based}. Default value is \code{mse_min}. #'@param eval.method A character string indicating the sampling method used, it -#' can be either \code{in-sample} or \code{leave-one-out}. Default value is the -#' \code{leave-one-out} cross validation. In case the forecast is provided, any -#' chosen eval.method is over-ruled and a third option is used. +#' can be either \code{in-sample}, \code{leave-k-out}, \code{retrospective} or +#' \code{hindcast-vs-forecast}. Default value is the \code{leave-k-out} cross validation. +#' In case the forecast is provided, any chosen eval.method is over-ruled +#' and a third option is used. #'@param multi.model A boolean that is used only for the \code{mse_min} #' method. If multi-model ensembles or ensembles of different sizes are used, #' it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences @@ -73,6 +74,13 @@ #' The default value is NULL. #'@param ncores An integer that indicates the number of cores for parallel #' computations using multiApply function. The default value is one. +#'@param k Positive integer. Default = 1. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. +#'@param tail.out Boolean for 'leave-k-out' method; TRUE to remove both extremes +#' keeping the same sample size for all k-folds (e.g. sample.length=50, k=3, +#' eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. +#' sample.length=50, k=3, eval.dexes=1, train.dexes={3,50}) #' #'@return An object of class \code{s2dv_cube} containing the calibrated #'forecasts in the element \code{data} with the dimensions nexp, nobs and same @@ -146,7 +154,7 @@ #'@importFrom ClimProjDiags Subset #'@export CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", - eval.method = "leave-one-out", multi.model = FALSE, + eval.method = "leave-k-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, ncores = NULL, k = 1, tail.out = TRUE) { @@ -200,7 +208,7 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'minimizes the Continuous Ranked Probability Score (CRPS). The #'\code{"rpc-based"} method adjusts the forecast variance ensuring that the #'ratio of predictable components (RPC) is equal to one, as in Eade et al. -#'(2014). Both in-sample or our out-of-sample (leave-one-out cross +#'(2014). Both in-sample or our out-of-sample (leave-k-out cross #'validation) calibration are possible. #' #'@param exp A multidimensional array with named dimensions (at least 'sdate' @@ -219,11 +227,11 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'@param cal.method A character string indicating the calibration method used, #' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} #' or \code{rpc-based}. Default value is \code{mse_min}. -#'@param eval.method A character string indicating the sampling method used, -#' can be either \code{in-sample} or \code{leave-one-out}. Default value is -#' the \code{leave-one-out} cross validation. In case the forecast is -#' provided, any chosen eval.method is over-ruled and a third option is -#' used. +#'@param eval.method A character string indicating the sampling method used, it +#' can be either \code{in-sample}, \code{leave-k-out}, \code{retrospective} or +#' \code{hindcast-vs-forecast}. Default value is the \code{leave-k-out} cross validation. +#' In case the forecast is provided, any chosen eval.method is over-ruled +#' and a third option is used. #'@param multi.model A boolean that is used only for the \code{mse_min} #' method. If multi-model ensembles or ensembles of different sizes are used, #' it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences @@ -254,6 +262,13 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #' The default value is NULL. #'@param ncores An integer that indicates the number of cores for parallel #' computation using multiApply function. The default value is NULL (one core). +#'@param k k Positive integer. Default = 1. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. +#'@param tail.out Boolean for 'leave-k-out' method; TRUE to remove both extremes +#' keeping the same sample size for all k-folds (e.g. sample.length=50, k=3, +#' eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. +#' sample.length=50, k=3, eval.dexes=1, train.dexes={3,50}) #' #'@return An array containing the calibrated forecasts with the dimensions #'nexp, nobs and same dimensions as in the 'exp' array. nexp is the number of @@ -301,7 +316,7 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'@importFrom ClimProjDiags Subset #'@export Calibration <- function(exp, obs, exp_cor = NULL, - cal.method = "mse_min", eval.method = "leave-one-out", + cal.method = "mse_min", eval.method = "leave-k-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, @@ -334,7 +349,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, # if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" # if exp_cor is provided, eval.method is overruled (because if exp_cor is provided, the # train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; - # no need for "leave-one-out" or "in-sample") + # no need for "leave-k-out" or "in-sample") eval.method <- "hindcast-vs-forecast" expcordims <- names(dim(exp_cor)) if (is.null(expcordims)) { @@ -478,9 +493,9 @@ Calibration <- function(exp, obs, exp_cor = NULL, } } ## eval.method - if (!any(eval.method %in% c('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold', 'retrospective'))) { + if (!any(eval.method %in% c('in-sample', 'leave-k-out', 'hindcast-vs-forecast', 'retrospective'))) { stop(paste0("Parameter 'eval.method' must be a character string indicating ", - "the sampling method used ('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold' or ", + "the sampling method used ('in-sample', 'leave-k-out', 'hindcast-vs-forecast' or ", "'retrospective').")) } ## multi.model @@ -567,77 +582,8 @@ Calibration <- function(exp, obs, exp_cor = NULL, } } -.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor, - k = 1, tail.out = TRUE) { -# eval.method: 'leave-one-out', 'k-fold', 'in-sample', "hindcast-vs-forecast", "retrospective" -# amt.points: length of the sample -# amt.points_cor: only needed for hindcast-vs-forecast method -# k: the number of samples to leave-out -# tail_out: boolean for 'k-fold method; TRUE to remove both extremes of the sample when k is -# in the extreme keeping the same sample size for all k-folds (e.g. amt.points=50, k=3, -# eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. -# amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) - - if (k >= amt.points && !is.null(k) | k < 1) { - stop("k needs to be a positive integer less than the amt.points") - } - if (eval.method == "leave-one-out") { - dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, - train.dexes = seq(1, amt.points)[-x]))) - } else if (eval.method == "k-fold") { - # k is a odd number - if (tail.out == TRUE){ - dexes.lst <- lapply(seq(1, amt.points), function(x, kfold = k) { - if (x >= ((kfold-1)/2) + 1 && x + ((kfold-1)/2) <= amt.points) { - ind <- (x-((kfold-1)/2)):(x+((kfold-1)/2)) - } else if (x < ((kfold-1)/2) + 1) { - ind <- c((amt.points - ((kfold-1)/2-x)):amt.points, 1:(x+(kfold-1)/2)) - } else if ((x+((kfold-1)/2)) > amt.points) { - ind <- c((x-(kfold-1)/2):amt.points, 1:(((kfold-1)/2)-amt.points+x)) - } else { - stop("Review make.eval.train.dexes function") - } - return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-ind])) - }) - } else { - if (k != 1){ - k <- (k-1)/2 - } - - dexes.lst <- lapply(seq(1, amt.points), function(x) { - start_idx <- max(1, x - k) - end_idx <- min(amt.points, x + k) - eval_range <- start_idx:end_idx - train_idx <- setdiff(seq(1, amt.points), eval_range) - return(list(eval.dexes = x, train.dexes = train_idx)) - }) - } - - } else if (eval.method == "retrospective") { - # k can be any integer indicating the when to start - dexes.lst <- base::Filter(length, lapply(seq(1, amt.points), - function(x, mindata = k) { - if (x > k) { - eval.dexes <- x - train.dexes <- 1:(x-1) - return(list(eval.dexes = x, - train.dexes = 1:(x-1))) - }})) - } else if (eval.method == "in-sample") { - dexes.lst <- list(list(eval.dexes = seq(1, amt.points), - train.dexes = seq(1, amt.points))) - } else if (eval.method == "hindcast-vs-forecast") { - dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor), - train.dexes = seq(1, amt.points))) - } else { - stop(paste0("unknown sampling method: ", eval.method)) - } - - return(dexes.lst) -} - .cal <- function(exp, obs, exp_cor = NULL, dat_dim = NULL, cal.method = "mse_min", - eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, + eval.method = "leave-k-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, k = 1, tail.out = TRUE) { # exp: [memb, sdate, (dat)] @@ -672,8 +618,11 @@ Calibration <- function(exp, obs, exp_cor = NULL, sdate <- expdims[2] # sdate sdate_cor <- expdims_cor[2] - var.cor.fc <- array(dim = c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs)) - + if(eval.method == 'retrospective'){ + var.cor.fc <- array(dim = c(expdims_cor[1], expdims_cor[2]-k, nexp = nexp, nobs = nobs)) + }else{ + var.cor.fc <- array(dim = c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs)) + } for (i in 1:nexp) { for (j in 1:nobs) { exp_data <- exp[, , i] @@ -693,22 +642,26 @@ Calibration <- function(exp, obs, exp_cor = NULL, expcor_data <- exp_cor } - eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, - amt.points = sdate, - amt.points_cor = sdate_cor, + eval.train.dexeses <- EvalTrainIndices(eval.method = eval.method, + sample.length = sdate, + sample.length_cor = sdate_cor, k = k, tail.out = tail.out) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { # defining training (tr) and evaluation (ev) subsets # fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) - eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes + eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes fc.ev <- expcor_data[, eval.dexes, drop = FALSE] fc.tr <- exp_data[, train.dexes] obs.tr <- obs_data[train.dexes, drop = FALSE] - + if(eval.method == 'retrospective'){ + eval.dexes = eval.dexes - k + }else{ + eval.dexes = eval.dexes + } if (cal.method == "bias") { - var.cor.fc[, eval.dexes, i, j] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) + var.cor.fc[, eval.dexes , i, j] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) # forecast correction implemented } else if (cal.method == "evmos") { # forecast correction implemented @@ -717,11 +670,11 @@ Calibration <- function(exp, obs, exp_cor = NULL, # calculate value for regression parameters init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) # correct evaluation subset - var.cor.fc[, eval.dexes, i, j] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) + var.cor.fc[, eval.dexes , i, j] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) } else if (cal.method == "mse_min") { quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) - var.cor.fc[, eval.dexes, i, j] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) + var.cor.fc[, eval.dexes, i, j] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) } else if (cal.method == "crps_min") { quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) @@ -767,7 +720,11 @@ Calibration <- function(exp, obs, exp_cor = NULL, } if (is.null(dat_dim)) { + if(eval.method == 'retrospective'){ + dim(var.cor.fc) <- c(expdims_cor[1], (expdims_cor[2]-k)) + }else{ dim(var.cor.fc) <- dim(exp_cor)[1:2] + } } return(var.cor.fc) } diff --git a/R/CST_CategoricalEnsCombination.R b/R/CST_CategoricalEnsCombination.R index f973cde79ff9a590438be36cee50d5c16f55c234..94ef489f87e0ffc89e68aabcafe0703f0eb05ed1 100644 --- a/R/CST_CategoricalEnsCombination.R +++ b/R/CST_CategoricalEnsCombination.R @@ -57,8 +57,15 @@ #' Vannitsem (2019). Finally, the \code{obs} method classifies the observations #' into the different categories and therefore contains only 0 and 1 values. #'@param eval.method Is the sampling method used, can be either -#' \code{"in-sample"} or \code{"leave-one-out"}. Default value is the -#' \code{"leave-one-out"} cross validation. +#' \code{"in-sample"} or \code{"leave-k-out"}. Default value is the +#' \code{"leave-k-out"} cross validation. +#'@param k k Positive integer. Default = 1. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer, indicating when to start. +#'@param tail.out Boolean for 'leave-k-out' method; TRUE to remove both extremes +#' keeping the same sample size for all k-folds (e.g. sample.length=50, k=3, +#' eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. +#' sample.length=50, k=3, eval.dexes=1, train.dexes={3,50}) #'@param ... other parameters to be passed on to the calibration procedure. #' #'@return An object of class \code{s2dv_cube} containing the categorical @@ -96,7 +103,7 @@ #'@import abind #'@export CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", - eval.method = "leave-one-out", + eval.method = "leave-k-out", amt.cat = 3, k = 1, tail.out = TRUE, ...) { # Check 's2dv_cube' @@ -154,9 +161,11 @@ CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", #' Rajagopalan et al. (2002), Robertson et al. (2004) and Van Schaeybroeck and #' Vannitsem (2019). Finally, the \code{obs} method classifies the observations #' into the different categories and therefore contains only 0 and 1 values. -#'@param eval.method Is the sampling method used, can be either -#' \code{"in-sample"} or \code{"leave-one-out"}. Default value is the -#' \code{"leave-one-out"} cross validation. +#'@param eval.method A character string indicating the sampling method used, it +#' can be either \code{in-sample}, \code{leave-k-out}, \code{retrospective} or +#' \code{hindcast-vs-forecast}. Default value is the \code{leave-k-out} cross validation. +#' In case the forecast is provided, any chosen eval.method is over-ruled +#' and a third option is used. #'@param ... Other parameters to be passed on to the calibration procedure. #' #'@return An array containing the categorical forecasts in the element called @@ -304,7 +313,7 @@ comb.dims <- function(arr.in, dims.to.combine){ amt.coeff <- amt.mdl + 1 var.cat.fc <- array(NA, c(amt.cat, amt.sdate)) - eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = amt.sdate, k = k, tail.out = tail.out) + eval.train.dexeses <- EvalTrainIndices(eval.method = eval.method, sample.length = amt.sdate, k = k, tail.out = tail.out) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index acd5259dd8f1c16da23ff21504c5b386fd7657b0..95e65540262bf630b6cd433267c8f8f179460818 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -31,6 +31,14 @@ #' computation using multiApply function. The default value is NULL (1). #'@param ... Additional parameters to be used by the method choosen. See qmap #' package for details. +#'@param eval.method A character string indicating the evaluation method for cross-validaton. +#'the default method is 'leave-k-out', other available methods are +#''retrospective', 'in-sample', 'hindcast-vs-forecast'. +#'@param k Positive integer. Default = 1. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, +#' indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer greater than 1, +#' indicating when to start. #' #'@return An object of class \code{s2dv_cube} containing the experimental data #'after applying the quantile mapping correction. @@ -57,6 +65,7 @@ CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', memb_dim = 'member', window_dim = NULL, method = 'QUANT', na.rm = FALSE, + eval.method = "leave-k-out", k = 1, ncores = NULL, ...) { # Check 's2dv_cube' if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')) { @@ -72,6 +81,7 @@ CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', exp_cor = exp_cor$data, sdate_dim = sdate_dim, memb_dim = memb_dim, window_dim = window_dim, method = method, + eval.method = eval.method, k = k, na.rm = na.rm, ncores = ncores, ...) if (is.null(exp_cor)) { exp$data <- QMapped @@ -123,6 +133,14 @@ CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', #' computation using multiApply function. The default value is NULL (1). #'@param ... Additional parameters to be used by the method choosen. See qmap #' package for details. +#'@param eval.method A character string indicating the evaluation method for cross-validaton. +#'the default method is 'leave-k-out', other available methods are +#''retrospective', 'in-sample', 'hindcast-vs-forecast'. +#'@param k Positive integer. Default = 1. +#' In method 'leave-k-out', 'k' is expected to be odd integer, +#' indicating the number of points to leave out. +#' In method 'retrospective', 'k' can be any positive integer greater than 1, +#' indicating when to start. #' #'@return An array containing the experimental data after applying the quantile #'mapping correction. @@ -146,6 +164,7 @@ CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', memb_dim = 'member', window_dim = NULL, method = 'QUANT', na.rm = FALSE, + eval.method = "leave-k-out", k = 1, ncores = NULL, ...) { # exp and obs obsdims <- names(dim(obs)) @@ -253,70 +272,111 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', return(qmaped) } +# Example +# exp <- as.numeric(1:prod(6,10,15)) +# dim(exp) <- c(member = 6, syear = 10, window = 15) +# obs <- as.numeric(rnorm(prod(1,10,15), 50)) +# dim(obs) <- c(member = 1, syear = 10, window = 15) +# fcst <- 100*(1:prod(8,1,1)) +# dim(fcst) <- c(member = 8, syear = 1, swindow = 1) +# +# +# res_sample <- QuantileMapping(exp = exp, obs = obs, exp_cor = NULL, +# memb_dim = 'member', sdate_dim = 'syear', +# window_dim = 'window', eval.method = "in-sample") +# +# res_leave <- QuantileMapping(exp = exp, obs = obs, exp_cor = NULL, +# memb_dim = 'member', sdate_dim = 'syear', +# window_dim = 'window', eval.method = "leave-k-out", k = 1) +# +# res_retro <- QuantileMapping(exp = exp, obs = obs, exp_cor = NULL, +# memb_dim = 'member', sdate_dim = 'syear', +# window_dim = 'window', eval.method = "retrospective", +# k = 3) +# +# res_hind <- QuantileMapping(exp = exp, obs = obs, exp_cor = fcst, +# memb_dim = 'member', sdate_dim = 'syear', +# window_dim = 'window', eval.method = "hindcast-vs-forecast") +#' + .qmapcor <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', + eval.method = 'leave-k-out', k = 1, method = 'QUANT', na.rm = FALSE, ...) { - - # exp: [memb (+ window), sdate] - # obs: [memb (+ window), sdate] - # exp_cor: NULL or [memb, sdate] - - if (is.null(exp_cor)) { - applied <- exp * NA - for (sd in 1:dim(exp)[sdate_dim]) { - if (na.rm) { - # select start date for cross-val - nas_pos <- which(!is.na(exp[, sd])) - obs2 <- as.vector(obs[, -sd]) - exp2 <- as.vector(exp[, -sd]) - exp_cor2 <- as.vector(exp[, sd]) - # remove NAs - obs2 <- obs2[!is.na(obs2)] - exp2 <- exp2[!is.na(exp2)] - exp_cor2 <- exp_cor2[!is.na(exp_cor2)] - tryCatch({ - adjust <- fitQmap(obs2, exp2, method = method, ...) - applied[nas_pos, sd] <- doQmap(exp_cor2, adjust, ...) - }, - error = function(error_message) { - return(applied[, sd]) - }) - } else { - # na.rm = FALSE shouldn't fail, just return NA - if (anyNA(obs[, -sd]) | anyNA(exp[, -sd])) { - applied[, sd] <- NA - } else { - adjust <- fitQmap(as.vector(obs[, -sd]), as.vector(exp[, -sd]), - method = method, ...) - exp2 <- exp[, sd] - if (sum(is.na(exp2)) >= 1) { - app <- rep(NA, length(exp2)) - nas_pos <- which(is.na(exp2)) - exp2 <- exp2[!is.na(exp2)] - app[-nas_pos] <- doQmap(as.vector(exp2), adjust, ...) - } else { - app <- doQmap(as.vector(exp2), adjust, ...) + + + if(!is.null(exp_cor) & eval.method != "hindcast-vs-forcast"){ + eval.method = 'hindcast-vs-forecast' + # warning("Defaulting to 'hindcast-vs-forcast' as exp_cor is not 'NULL'") + } + + if(eval.method == "retrospective" & k == 1){ + stop("k = 1 not expected, trainindex need at least two non-NA values to interpolate ") + } #can replace with tryCatch for smooth fail + + + index <- EvalTrainIndices(eval.method = eval.method, sample.length = dim(exp)[sdate_dim], k = k, + sample.length_cor = dim(exp_cor)[sdate_dim]) + applied <- exp * NA + + for(x in index){ + + + nas_pos <- which(!is.na(exp[, x$eval.dexes])) + obs2 <- as.vector(obs[, x$train.dexes]) + exp2 <- as.vector(exp[, x$train.dexes]) + exp_cor2 <- as.vector(exp[, x$eval.dexes]) + # remove NAs + obs2 <- obs2[!is.na(obs2)] + exp2 <- exp2[!is.na(exp2)] + exp_cor2 <- exp_cor2[!is.na(exp_cor2)] + + adjust <- fitQmap(obs2, exp2, method = 'QUANT', ...) + if(is.null(exp_cor)){ + if(na.rm){ + if(eval.method == "in-sample"){ + applied[nas_pos] <- doQmap(exp_cor2, adjust, ...) + }else{ + applied[nas_pos, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) + } + }else{ + + if (anyNA(obs[, x$train.dexes]) | anyNA(exp[, x$train.dexes])) { + applied[, x$eval.dexes] <- NA + }else{ + if(eval.method == "in-sample"){ + applied[nas_pos] <- doQmap(exp_cor2, adjust, ...) + }else{ + applied[nas_pos, x$eval.dexes] <- doQmap(exp_cor2, adjust, ...) } - applied[, sd] <- app } } - } - } else { - applied <- exp_cor * NA - if (na.rm) { - tryCatch({ - adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], - method = method, ...) - applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], - adjust, ...) + }else{ + applied <- exp_cor * NA + # add check if method is hindcast else give warning that since exp_cor is not null hindcast is selected + #hindcast vs forecast + if (na.rm) { + tryCatch({ + + adjust <- fitQmap(obs2, exp2, + method = method, ...) + + applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], + adjust, ...) + }, error = function(error_message) { - return(applied) + return(applied) }) - } else { - adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) - applied <- doQmap(as.vector(exp_cor), adjust, ...) + } else { + adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) + applied <- doQmap(as.vector(exp_cor), adjust, ...) + } + dim(applied) <- dim(exp_cor) } - dim(applied) <- dim(exp_cor) } - return(applied) + applied } + + + + diff --git a/R/EvalTrainIndices.R b/R/EvalTrainIndices.R new file mode 100644 index 0000000000000000000000000000000000000000..87ee316a948de28d58236339ff0daf0f9e524b58 --- /dev/null +++ b/R/EvalTrainIndices.R @@ -0,0 +1,93 @@ +#' Generate Training and Evaluation Indices for Cross-Validation +#' +#' This function generates training and evaluation indices based on different +#' cross-validation methods. +#' @author Theertha Kariyathan, \email{theertha.kariyathan@bsc.es} +#' +#' @param eval.method Character. The cross-validation method. Options include: +#' -\code{leave-k-out}: Leaves out \code{k} points at a time for evaluation. +#' -\code{retrospective}: Uses past data for training and a future point for evaluation. +#' -\code{in-sample}: Uses the entire dataset for both training and evaluation. +#' -\code{hindcast-vs-forecast}: Uses all years from the hindcast sdate dimension +#' as training and all years from the forecast sdate dimension will be corrected. +#' +#' @param sample.length Integer. Length of the dataset. +#' @param sample.length_cor Integer. Length of forecast dataset in +#' \code{hindcast-vs-forecast} method. +#' @param k Positive integer. Default = 1. +#' In method \code{leave-k-out}, \code{k} is expected to be odd integer, +#' indicating the number of points to leave out. +#' In method \code{retrospective}, \code{k} can be any positive integer, +#' indicating when to start. +#' @param tail.out Logical for method \code{leave-k-out}. Default = TRUE. +#' TRUE to remove both extremes keeping the same sample size for all k-folds +#' (e.g. sample.length=50, k=3, eval.dexes=1, train.dexes={3,49}). +#' FALSE to remove only the corresponding tail +#' (e.g. sample.length=50, k=3, eval.dexes=1, train.dexes={3,50}) +#' @return A list of lists, where each element contains: +#' \code{eval.dexes}: Indices of evaluation points. +#' \code{train.dexes}: Indices of training points. +#' +#' @examples +#' # Leave-k-out cross-validation +#' EvalTrainIndices("leave-k-out", sample.length = 10, sample.length_cor = 5, k = 3) +#' # Retrospective cross-validation +#' EvalTrainIndices("retrospective", sample.length = 10, sample.length_cor = 5, k = 3) +#' # In-sample validation +#' EvalTrainIndices("in-sample", sample.length = 10, sample.length_cor = 5) +#' # Hindcast vs. Forecast validation +#' EvalTrainIndices("hindcast-vs-forecast", sample.length = 10, sample.length_cor = 5) +#' +#' @export +EvalTrainIndices <- function(eval.method, sample.length, sample.length_cor, + k = 1, tail.out = TRUE) { + + if (eval.method == "leave-k-out") { + + if (is.null(k) || k >= sample.length || k < 1 || k %% 2 == 0 ) { + warning("k is expected to be a positive odd integer less than the sample.length") + } + + if (k == 1) { + tail.out == TRUE + } + amt.seq <- 1:sample.length + strike = (k - 1) / 2 + alt_amt.seq <- c(tail(amt.seq, strike), amt.seq, head(amt.seq, strike)) + + dexes.lst <- lapply(seq(1, sample.length), function(x, kfold = k) { + if (tail.out) { + ind = alt_amt.seq[x:(x + (kfold - 1))] + } else{ + ind = ((x - strike):(x + strike)) + ind = ind[ind > 0 & ind <= sample.length] + } + return(list(eval.dexes = x, train.dexes = amt.seq[!(amt.seq %in% ind)])) + }) + } else if (eval.method == "retrospective") { + # k can be any integer indicating the when to start + if(k >= sample.length){ + warning("k is expected to be less than the sample.length") + } + dexes.lst <- base::Filter(length, lapply(seq(1, sample.length), + function(x, mindata = k) { + if (x > k) { + eval.dexes <- x + train.dexes <- 1:(x-1) + return(list(eval.dexes = x, + train.dexes = 1:(x-1))) + }})) + + } else if (eval.method == "in-sample") { + dexes.lst <- list(list(eval.dexes = seq(1, sample.length), + train.dexes = seq(1, sample.length))) + } else if (eval.method == "hindcast-vs-forecast") { + dexes.lst <- list(list(eval.dexes = seq(1, sample.length_cor), + train.dexes = seq(1, sample.length))) + } else { + stop(paste0("unknown sampling method: ", eval.method)) + } + + return(dexes.lst) +} + diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index 285e81c8907cd457219d6af3ea881577683baff9..29fb5f4cb68d71184527450a2fc7e46f38f371d2 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -158,8 +158,8 @@ test_that("1. Input checks", { ) expect_error( Calibration(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), - obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 2)), - dat_dim = 'dataset'), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 2)), + dat_dim = 'dataset'), paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", " except 'memb_dim' and 'dat_dim'.") ) @@ -218,7 +218,7 @@ test_that("1. Input checks", { expect_error( Calibration(exp4, obs4, eval.method = 'biass'), paste0("Parameter 'eval.method' must be a character string indicating ", - "the sampling method used ('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold' or ", + "the sampling method used ('in-sample', 'leave-k-out', 'hindcast-vs-forecast' or ", "'retrospective')."), fixed = TRUE ) @@ -234,6 +234,7 @@ test_that("1. Input checks", { ) }) + ############################################## test_that("2. Output checks: dat1", { @@ -454,14 +455,14 @@ test_that("6. Output checks: dat4", { tolerance = 0.0001 ) expect_equal( - as.vector(Calibration(exp4, obs4, eval.method = "k-fold", k = 5, tail.out = T))[1:5], + as.vector(Calibration(exp4, obs4, eval.method = "leave-k-out", k = 5, tail.out = T))[1:5], c(-0.8149557, 0.2043942, -1.0781616, 1.9806657, 0.3879362), tolerance = 0.0001 ) expect_equal( as.vector(Calibration(exp4, obs4, eval.method = "retrospective", k = 3)[1,1:5]), - c(NA, NA, NA, 1.6267865, -0.1658674), + c(1.6267865, -0.1658674, 0.3998208, 3.3390213, 0.4102334), tolerance = 0.0001 ) diff --git a/tests/testthat/test-EvalTrainIndices.R b/tests/testthat/test-EvalTrainIndices.R new file mode 100644 index 0000000000000000000000000000000000000000..62a33707965a2f616c19804e11e0dc5546c0e0c1 --- /dev/null +++ b/tests/testthat/test-EvalTrainIndices.R @@ -0,0 +1,108 @@ + +############################################## + +test_that("1. Input checks", { + # s2dv_cube + expect_error( + EvalTrainIndices(eval.method = 1, 2), + paste0("unknown sampling method: 1") + ) + expect_warning( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 12), + paste0("k is expected to be a positive odd integer less than the sample.length") + ) + expect_warning( + EvalTrainIndices(eval.method = 'retrospective', 10, NULL, 10), + paste0("k is expected to be less than the sample.length") + ) +}) + +############################################## + +test_that("2. Output checks: leave-k-out", { + expect_equal( + is.list(EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)), + TRUE + ) + expect_equal( + is.list(EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), + TRUE + ) + expect_equal( + length(EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)), + 10 + ) + expect_equal( + names(EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]]), + c("eval.dexes", "train.dexes") + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[1]], + list(eval.dexes = 1, + train.dexes = 4:8) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[2]], + list(eval.dexes = 2, + train.dexes = 5:9) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[3]], + list(eval.dexes = 3, + train.dexes = 6:10) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[9]], + list(eval.dexes = 9, + train.dexes = 2:6) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5)[[10]], + list(eval.dexes = 10, + train.dexes = 3:7) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[1]], + list(eval.dexes = 1, + train.dexes = 4:10) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[5]], + list(eval.dexes = 5, + train.dexes = c(1,2,8:10)) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 10, NULL, 5, tail.out = FALSE)[[10]], + list(eval.dexes = 10, + train.dexes = c(1:7)) + ) + expect_equal( + EvalTrainIndices(eval.method = 'leave-k-out', 20, NULL, 1)[[1]], + EvalTrainIndices(eval.method = 'leave-k-out', 20, NULL, 1, tail.out = FALSE)[[1]] + ) + +}) + +############################################## + +test_that("3. Output checks: retrospective", { + expect_equal( + length(EvalTrainIndices(eval.method = 'retrospective', 10, NULL, 3)), + 7 + ) + expect_equal( + EvalTrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[1]], + list(eval.dexes = 4, train.dexes = 1:3) + ) + expect_equal( + EvalTrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[2]], + list(eval.dexes = 5, train.dexes = 1:4) + ) + expect_equal( + EvalTrainIndices(eval.method = 'retrospective', 10, NULL, 3)[[7]], + list(eval.dexes = 10, train.dexes = 1:9) + ) +}) + + + \ No newline at end of file diff --git a/tests/testthat/test-as.s2dv_cube.R b/tests/testthat/test-as.s2dv_cube.R index 8ff195827853e4e974a147ea75d2617786c9e22e..15d69a2dd52cc1a7ecada9cda8d519d8ce9743a0 100644 --- a/tests/testthat/test-as.s2dv_cube.R +++ b/tests/testthat/test-as.s2dv_cube.R @@ -332,44 +332,44 @@ test_that("7. Tests from Start()", { ############################################## -test_that("8. Tests from Start()", { - path <- paste0('/esarchive/exp/ecearth/a3t4/diags/CMIP/EC-Earth-Consortium/EC-Earth3-LR/piControl/$memb$/Omon/$var$/gn/', - 'v*/$var$_Omon_EC-Earth3-LR_piControl_$memb$_gn_$chunk$.nc') - suppressWarnings( - data7 <- Start(dat = list(list(name = 'a3t4', path = path)), - var = 'tosmean', - memb = paste0('r', 1:5, 'i1p1f1'), - region = c("ATL3", "Global_Ocean", "Nino3.4"), - time = indices(1:10), - chunk = 'all', - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = 'chunk', region = NULL), - num_procs = 8, - retrieve = T) - ) - - res7 <- as.s2dv_cube(data7) - - # dimensions - expect_equal( - dim(res7$data), - c(dat = 1, var = 1, memb = 5, region = 3, time = 10) - ) - # elements - expect_equal( - names(res7), - c("data", "dims", "coords", "attrs") - ) - expect_equal( - res7$attrs$Variable$varName, - c('tosmean') - ) - # Dates - expect_equal( - dim(res7$attrs$Dates), - c(time = 10) - ) -}) +# test_that("8. Tests from Start()", { +# path <- paste0('/esarchive/exp/ecearth/a3t4/diags/CMIP/EC-Earth-Consortium/EC-Earth3-LR/piControl/$memb$/Omon/$var$/gn/', +# 'v*/$var$_Omon_EC-Earth3-LR_piControl_$memb$_gn_$chunk$.nc') +# suppressWarnings( +# data7 <- Start(dat = list(list(name = 'a3t4', path = path)), +# var = 'tosmean', +# memb = paste0('r', 1:5, 'i1p1f1'), +# region = c("ATL3", "Global_Ocean", "Nino3.4"), +# time = indices(1:10), +# chunk = 'all', +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = 'chunk', region = NULL), +# num_procs = 8, +# retrieve = T) +# ) +# +# res7 <- as.s2dv_cube(data7) +# +# # dimensions +# expect_equal( +# dim(res7$data), +# c(dat = 1, var = 1, memb = 5, region = 3, time = 10) +# ) +# # elements +# expect_equal( +# names(res7), +# c("data", "dims", "coords", "attrs") +# ) +# expect_equal( +# res7$attrs$Variable$varName, +# c('tosmean') +# ) +# # Dates +# expect_equal( +# dim(res7$attrs$Dates), +# c(time = 10) +# ) +# }) ############################################## \ No newline at end of file diff --git a/tests/testthat/test-make-eval-train-dexes.R b/tests/testthat/test-make-eval-train-dexes.R deleted file mode 100644 index a74a06c0fa18506bf3c0c0751194f611543c711e..0000000000000000000000000000000000000000 --- a/tests/testthat/test-make-eval-train-dexes.R +++ /dev/null @@ -1,93 +0,0 @@ -############################################## - -test_that("1. Input checks", { - # s2dv_cube - expect_error( - .make.eval.train.dexes(eval.method = 1, 2), - paste0("unknown sampling method: 1") - ) - expect_error( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 12), - paste0("k needs to be a positive integer less than the amt.points") - ) - expect_error( - .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 10), - paste0("k needs to be a positive integer less than the amt.points") - ) -}) - -############################################## - -test_that("2. Output checks: k-fold", { - expect_equal( - is.list(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)), - TRUE - ) - expect_equal( - is.list(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]]), - TRUE - ) - expect_equal( - length(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)), - 10 - ) - expect_equal( - names(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]]), - c("eval.dexes", "train.dexes") - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]], - list(eval.dexes = 1, - train.dexes = 4:8) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[2]], - list(eval.dexes = 2, - train.dexes = 5:9) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[3]], - list(eval.dexes = 3, - train.dexes = 6:10) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[9]], - list(eval.dexes = 9, - train.dexes = 2:6) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[10]], - list(eval.dexes = 10, - train.dexes = 3:7) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 1), - .make.eval.train.dexes(eval.method = 'leave-one-out', 10, NULL) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'k-fold', 20, NULL, 1), - .make.eval.train.dexes(eval.method = 'leave-one-out', 20, NULL) - ) -}) - -############################################## - -test_that("3. Output checks: retrospective", { - expect_equal( - length(.make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)), - 7 - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[1]], - list(eval.dexes = 4, train.dexes = 1:3) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[2]], - list(eval.dexes = 5, train.dexes = 1:4) - ) - expect_equal( - .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[7]], - list(eval.dexes = 10, train.dexes = 1:9) - ) -}) -