diff --git a/.gitignore b/.gitignore index 89a262feca684eb5c483695ed9261014aba892af..b2d7bbf988d665ea906815f3c6e84c94c628f08a 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,9 @@ Rplots.pdf .nfs* *.RData !data/*.RData +<<<<<<< HEAD +.Rproj.user/ + +======= .Rproj.user +>>>>>>> d82bd2503734b41ca8f7bdd66deb934c3b202f03 diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 3ba22917679947eb71b1b6c2211b9e60bda804be..c4942eaade57a66320625ea5ad96f2aef4f0f1e7 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -149,7 +149,7 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, memb_dim = 'member', sdate_dim = 'sdate', - dat_dim = NULL, ncores = NULL) { + dat_dim = NULL, ncores = NULL, k = 1, tail.out = TRUE) { # Check 's2dv_cube' if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube'.") @@ -165,7 +165,7 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", multi.model = multi.model, na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha, memb_dim = memb_dim, sdate_dim = sdate_dim, - dat_dim = dat_dim, ncores = ncores) + dat_dim = dat_dim, ncores = ncores, k = k, tail.out = tail.out) if (is.null(exp_cor)) { exp$data <- Calibration @@ -305,7 +305,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, - ncores = NULL) { + ncores = NULL, k = 1, tail.out = TRUE) { # Check inputs ## exp, obs @@ -478,10 +478,10 @@ Calibration <- function(exp, obs, exp_cor = NULL, } } ## eval.method - if (!any(eval.method %in% c('in-sample', 'leave-one-out', 'hindcast-vs-forecast'))) { + if (!any(eval.method %in% c('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold', 'retrospective'))) { stop(paste0("Parameter 'eval.method' must be a character string indicating ", - "the sampling method used ('in-sample', 'leave-one-out' or ", - "'hindcast-vs-forecast').")) + "the sampling method used ('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold' or ", + "'retrospective').")) } ## multi.model if (!inherits(multi.model, "logical")) { @@ -513,7 +513,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha, target_dims = list(exp = target_dims_exp, obs = target_dims_obs), - ncores = ncores, fun = .cal)$output1 + ncores = ncores, k = k, tail.out = tail.out, fun = .cal)$output1 } else { calibrated <- Apply(data = list(exp = exp, obs = obs, exp_cor = exp_cor), dat_dim = dat_dim, cal.method = cal.method, eval.method = eval.method, @@ -521,7 +521,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, apply_to = apply_to, alpha = alpha, target_dims = list(exp = target_dims_exp, obs = target_dims_obs, exp_cor = target_dims_cor), - ncores = ncores, fun = .cal)$output1 + ncores = ncores, k = k, tail.out = tail.out, fun = .cal)$output1 } if (!is.null(dat_dim)) { @@ -567,10 +567,62 @@ Calibration <- function(exp, obs, exp_cor = NULL, } } -.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor) { +.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor, + k = 1, tail.out = TRUE) { +# eval.method: 'leave-one-out', 'k-fold', 'in-sample', "hindcast-vs-forecast", "retrospective" +# amt.points: length of the sample +# amt.points_cor: only needed for hindcast-vs-forecast method +# k: the number of samples to leave-out +# tail_out: boolean for 'k-fold method; TRUE to remove both extremes of the sample when k is +# in the extreme keeping the same sample size for all k-folds (e.g. amt.points=50, k=3, +# eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. +# amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) + + if (k >= amt.points && !is.null(k) | k < 1) { + stop("k needs to be a positive integer less than the amt.points") + } if (eval.method == "leave-one-out") { dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-x]))) + } else if (eval.method == "k-fold") { + # k is a odd number + if (tail.out == TRUE){ + dexes.lst <- lapply(seq(1, amt.points), function(x, kfold = k) { + if (x >= ((kfold-1)/2) + 1 && x + ((kfold-1)/2) <= amt.points) { + ind <- (x-((kfold-1)/2)):(x+((kfold-1)/2)) + } else if (x < ((kfold-1)/2) + 1) { + ind <- c((amt.points - ((kfold-1)/2-x)):amt.points, 1:(x+(kfold-1)/2)) + } else if ((x+((kfold-1)/2)) > amt.points) { + ind <- c((x-(kfold-1)/2):amt.points, 1:(((kfold-1)/2)-amt.points+x)) + } else { + stop("Review make.eval.train.dexes function") + } + return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-ind])) + }) + } else { + if (k != 1){ + k <- (k-1)/2 + } + + dexes.lst <- lapply(seq(1, amt.points), function(x) { + start_idx <- max(1, x - k) + end_idx <- min(amt.points, x + k) + eval_range <- start_idx:end_idx + train_idx <- setdiff(seq(1, amt.points), eval_range) + return(list(eval.dexes = x, train.dexes = train_idx)) + }) + } + + } else if (eval.method == "retrospective") { + # k can be any integer indicating the when to start + dexes.lst <- base::Filter(length, lapply(seq(1, amt.points), + function(x, mindata = k) { + if (x > k) { + eval.dexes <- x + train.dexes <- 1:(x-1) + return(list(eval.dexes = x, + train.dexes = 1:(x-1))) + }})) } else if (eval.method == "in-sample") { dexes.lst <- list(list(eval.dexes = seq(1, amt.points), train.dexes = seq(1, amt.points))) @@ -580,12 +632,13 @@ Calibration <- function(exp, obs, exp_cor = NULL, } else { stop(paste0("unknown sampling method: ", eval.method)) } + return(dexes.lst) } .cal <- function(exp, obs, exp_cor = NULL, dat_dim = NULL, cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, - na.rm = TRUE, apply_to = NULL, alpha = NULL) { + na.rm = TRUE, apply_to = NULL, alpha = NULL, k = 1, tail.out = TRUE) { # exp: [memb, sdate, (dat)] # obs: [sdate (dat)] @@ -642,7 +695,8 @@ Calibration <- function(exp, obs, exp_cor = NULL, eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = sdate, - amt.points_cor = sdate_cor) + amt.points_cor = sdate_cor, + k = k, tail.out = tail.out) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { # defining training (tr) and evaluation (ev) subsets diff --git a/R/CST_CategoricalEnsCombination.R b/R/CST_CategoricalEnsCombination.R index 3c92d587efb6aa4be76bf7137bf68aa557bc4e40..f973cde79ff9a590438be36cee50d5c16f55c234 100644 --- a/R/CST_CategoricalEnsCombination.R +++ b/R/CST_CategoricalEnsCombination.R @@ -97,7 +97,7 @@ #'@export CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", eval.method = "leave-one-out", - amt.cat = 3, + amt.cat = 3, k = 1, tail.out = TRUE, ...) { # Check 's2dv_cube' if (!inherits(exp, "s2dv_cube") || !inherits(exp, "s2dv_cube")) { @@ -113,7 +113,8 @@ CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", exp$data <- CategoricalEnsCombination(fc = exp$data, obs = obs$data, cat.method = cat.method, eval.method = eval.method, - amt.cat = amt.cat, ...) + amt.cat = amt.cat, k = k, + tail.out = tail.out, ...) names.dim.tmp[which(names.dim.tmp == "member")] <- "category" names(dim(exp$data)) <- names.dim.tmp @@ -175,7 +176,7 @@ CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", #'@importFrom s2dv InsertDim #'@import abind #'@export -CategoricalEnsCombination <- function (fc, obs, cat.method, eval.method, amt.cat, ...) { +CategoricalEnsCombination <- function (fc, obs, cat.method, eval.method, amt.cat, k, tail.out, ...) { if (!all(c("member", "sdate") %in% names(dim(fc)))) { stop("Parameter 'exp' must have the dimensions 'member' and 'sdate'.") @@ -205,6 +206,8 @@ CategoricalEnsCombination <- function (fc, obs, cat.method, eval.method, amt.cat cat.method = cat.method, eval.method = eval.method, amt.cat = amt.cat, + k = k, + tail.out = tail.out, ...) return(cat_fc_out) } @@ -243,7 +246,7 @@ comb.dims <- function(arr.in, dims.to.combine){ return(arr.out) } -.apply.obs.fc <- function(obs, fc, target.dims, FUN, return.feat, cat.method, eval.method, amt.cat, ...){ +.apply.obs.fc <- function(obs, fc, target.dims, FUN, return.feat, cat.method, eval.method, amt.cat, k, tail.out, ...){ dimnames.tmp <- dimnames(fc) fc.dims.tmp <- dim(fc) dims.out.tmp <- return.feat$dim @@ -260,6 +263,8 @@ comb.dims <- function(arr.in, dims.to.combine){ cat.method = cat.method, eval.method = eval.method, amt.cat = amt.cat, + k = k, + tail.out = tail.out, ...) dims.tmp <- dim(arr.out) names.dims.tmp <- names(dim(arr.out)) @@ -280,7 +285,7 @@ comb.dims <- function(arr.in, dims.to.combine){ } -.cat_fc <- function(obs.fc, amt.cat, cat.method, eval.method) { +.cat_fc <- function(obs.fc, amt.cat, cat.method, eval.method, k, tail.out) { dims.tmp=dim(obs.fc) amt.mbr <- dims.tmp["member"][]-1 @@ -299,7 +304,7 @@ comb.dims <- function(arr.in, dims.to.combine){ amt.coeff <- amt.mdl + 1 var.cat.fc <- array(NA, c(amt.cat, amt.sdate)) - eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = amt.sdate) + eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = amt.sdate, k = k, tail.out = tail.out) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 491b727195638d98ef7f7a891825696fbbaa5f9e..6843f1c9a244212e8071a317c9fa0b0d994c0bb4 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -18,7 +18,9 @@ CST_Calibration( memb_dim = "member", sdate_dim = "sdate", dat_dim = NULL, - ncores = NULL + ncores = NULL, + k = 1, + tail.out = TRUE ) } \arguments{ diff --git a/man/CST_CategoricalEnsCombination.Rd b/man/CST_CategoricalEnsCombination.Rd index 85ebb7f8f00c0c393418d341af002cfee464a726..416ad7f82e4172f0f32dfd96660104b53e76b6e0 100644 --- a/man/CST_CategoricalEnsCombination.Rd +++ b/man/CST_CategoricalEnsCombination.Rd @@ -11,6 +11,8 @@ CST_CategoricalEnsCombination( cat.method = "pool", eval.method = "leave-one-out", amt.cat = 3, + k = 1, + tail.out = TRUE, ... ) } diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index 491aff29556fc0a67f4abc9fb5ad11500aca272d..285e81c8907cd457219d6af3ea881577683baff9 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -218,8 +218,8 @@ test_that("1. Input checks", { expect_error( Calibration(exp4, obs4, eval.method = 'biass'), paste0("Parameter 'eval.method' must be a character string indicating ", - "the sampling method used ('in-sample', 'leave-one-out' or ", - "'hindcast-vs-forecast')."), + "the sampling method used ('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold' or ", + "'retrospective')."), fixed = TRUE ) # multi.model @@ -453,6 +453,18 @@ test_that("6. Output checks: dat4", { c(-0.7119142, 0.2626203, -0.9635483, 1.9607986, 0.4380930), tolerance = 0.0001 ) + expect_equal( + as.vector(Calibration(exp4, obs4, eval.method = "k-fold", k = 5, tail.out = T))[1:5], + c(-0.8149557, 0.2043942, -1.0781616, 1.9806657, 0.3879362), + tolerance = 0.0001 + ) + + expect_equal( + as.vector(Calibration(exp4, obs4, eval.method = "retrospective", k = 3)[1,1:5]), + c(NA, NA, NA, 1.6267865, -0.1658674), + tolerance = 0.0001 + ) + }) ############################################## diff --git a/tests/testthat/test-make-eval-train-dexes.R b/tests/testthat/test-make-eval-train-dexes.R new file mode 100644 index 0000000000000000000000000000000000000000..a74a06c0fa18506bf3c0c0751194f611543c711e --- /dev/null +++ b/tests/testthat/test-make-eval-train-dexes.R @@ -0,0 +1,93 @@ +############################################## + +test_that("1. Input checks", { + # s2dv_cube + expect_error( + .make.eval.train.dexes(eval.method = 1, 2), + paste0("unknown sampling method: 1") + ) + expect_error( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 12), + paste0("k needs to be a positive integer less than the amt.points") + ) + expect_error( + .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 10), + paste0("k needs to be a positive integer less than the amt.points") + ) +}) + +############################################## + +test_that("2. Output checks: k-fold", { + expect_equal( + is.list(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)), + TRUE + ) + expect_equal( + is.list(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]]), + TRUE + ) + expect_equal( + length(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)), + 10 + ) + expect_equal( + names(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]]), + c("eval.dexes", "train.dexes") + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]], + list(eval.dexes = 1, + train.dexes = 4:8) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[2]], + list(eval.dexes = 2, + train.dexes = 5:9) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[3]], + list(eval.dexes = 3, + train.dexes = 6:10) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[9]], + list(eval.dexes = 9, + train.dexes = 2:6) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[10]], + list(eval.dexes = 10, + train.dexes = 3:7) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 1), + .make.eval.train.dexes(eval.method = 'leave-one-out', 10, NULL) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 20, NULL, 1), + .make.eval.train.dexes(eval.method = 'leave-one-out', 20, NULL) + ) +}) + +############################################## + +test_that("3. Output checks: retrospective", { + expect_equal( + length(.make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)), + 7 + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[1]], + list(eval.dexes = 4, train.dexes = 1:3) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[2]], + list(eval.dexes = 5, train.dexes = 1:4) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[7]], + list(eval.dexes = 10, train.dexes = 1:9) + ) +}) +