diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..3faa54d96dda3af8d4e29ab8b4c8101470f6ca41 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,66 @@ +### Questions and bug reports + +If you have questions about the usage of a function or would like to report a bug, please follow these steps: + +1. Use the search function in GitLab to see if a similar problem has already been reported and/or solved. + +2. Rather than including a long script, try to run your code in small blocks to narrow down where the problem is coming from as much as possible. + +3. Open an issue tagging the maintainers (@tkariyat), and include a descriptive title and a piece of code to reproduce your problem (if applicable). + +### How to contribute + +If you would like to add a bugfix, an enhancement or a new functions, follow these steps: + +1. Open an issue to ask for help or describe a feature to be integrated + +2. Agree with the maintainers (@tkariyat) on the requirements + +3. Create a new branch from master with a meaningful name + +4. Once the development is finished, open a merge request to merge the branch on master + +*Note: Remember to work with multidimensional arrays with named dimensions when possible and use multiApply (https://earth.bsc.es/gitlab/ces/multiApply)* + +### Adding a function + +To add a new function in this R package, follow these considerations: + +* Each function exposed to the users should be in its own separate file in the R folder +* The name of the function should match the name of the file (e.g.: `Function()` included in file **Function.R** +* The documentation should be in roxygen2 format as a header of the function +* Once the function and the documentation are finished, run the command `devtools::document()` in your R terminal to automatically generate the **Function.Rd** file +* When doing the development, please use an R version between R/4.1.2 and R/4.3.x + +### Style guide + +* Use `<-` for variable assignment +* Include spaces between operators (e.g. `+`, `-`, `&`), before `{`, and after `for`, `if`, `while`, `,` and `)`. +* When possible, maximum line length should be 100 characters (soft limit of 80 characters). +* Number of indentation spaces is 2, using tabs for indentation is forbidden. +* Double quotes are recommended for strings. When writing quotes within quoted text, use double quotes outside and single quotes inside. E.g.: `“Parameter 'na.rm' is missing.”` +* Self-explanatory names are preferred for variables. Try to be consistent with the variable naming style. Generally: avoid special characters (except underscores) and reserved words (ex: if, for, else, …) +* Remember to include short comments to make the code easier to understand. Comments should be in their own line and they should start with `#` followed by a space. Comments that start with `##` and a space are for details not needed to understand the general procedure but useful to make note of more technical aspects of the code. + +#### Examples: + +```r +# Proper spacing, indentation spaces and text quotes: +NewFunction <- function(text = "default", uppercase = TRUE) { + # Check uppercase parameter + if (!is.logical(uppercase)) { + stop("Parameter 'uppercase' should be TRUE or FALSE.") + } + # Only transform text if needed + if (uppercase && is.character(text)) { + text <- toupper(text) + } + return(text) +} + +# How to format line breaks to avoid long lines: +my_strings <- list(one = "one", + two = "two", + three = "three", + four = "four") +``` diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 3ba22917679947eb71b1b6c2211b9e60bda804be..86bd771291f2db6f59cf3afcdd3b4a5e0dcb7755 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -149,7 +149,7 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, memb_dim = 'member', sdate_dim = 'sdate', - dat_dim = NULL, ncores = NULL) { + dat_dim = NULL, ncores = NULL, k = 1, tail.out = TRUE) { # Check 's2dv_cube' if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube'.") @@ -165,7 +165,7 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", multi.model = multi.model, na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha, memb_dim = memb_dim, sdate_dim = sdate_dim, - dat_dim = dat_dim, ncores = ncores) + dat_dim = dat_dim, ncores = ncores, k = k, tail.out = tail.out) if (is.null(exp_cor)) { exp$data <- Calibration @@ -305,7 +305,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, - ncores = NULL) { + ncores = NULL, k = 1, tail.out = TRUE) { # Check inputs ## exp, obs @@ -478,10 +478,10 @@ Calibration <- function(exp, obs, exp_cor = NULL, } } ## eval.method - if (!any(eval.method %in% c('in-sample', 'leave-one-out', 'hindcast-vs-forecast'))) { + if (!any(eval.method %in% c('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold', 'retrospective'))) { stop(paste0("Parameter 'eval.method' must be a character string indicating ", - "the sampling method used ('in-sample', 'leave-one-out' or ", - "'hindcast-vs-forecast').")) + "the sampling method used ('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold' or ", + "'retrospective').")) } ## multi.model if (!inherits(multi.model, "logical")) { @@ -513,7 +513,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha, target_dims = list(exp = target_dims_exp, obs = target_dims_obs), - ncores = ncores, fun = .cal)$output1 + ncores = ncores, k = k, tail.out = tail.out, fun = .cal)$output1 } else { calibrated <- Apply(data = list(exp = exp, obs = obs, exp_cor = exp_cor), dat_dim = dat_dim, cal.method = cal.method, eval.method = eval.method, @@ -521,7 +521,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, apply_to = apply_to, alpha = alpha, target_dims = list(exp = target_dims_exp, obs = target_dims_obs, exp_cor = target_dims_cor), - ncores = ncores, fun = .cal)$output1 + ncores = ncores, k = k, tail.out = tail.out, fun = .cal)$output1 } if (!is.null(dat_dim)) { @@ -567,10 +567,63 @@ Calibration <- function(exp, obs, exp_cor = NULL, } } -.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor) { +.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor, + k = 1, tail.out = TRUE) { +# eval.method: 'leave-one-out', 'k-fold', 'in-sample', "hindcast-vs-forecast", "retrospective" +# amt.points: length of the sample +# amt.points_cor: only needed for hindcast-vs-forecast method +# k: the number of samples to leave-out +# tail_out: boolean for 'k-fold method; TRUE to remove both extremes of the sample when k is +# in the extreme keeping the same sample size for all k-folds (e.g. amt.points=50, k=3, +# eval.dexes=1, train.dexes={3,49}). FALSE to remove only the corresponding tail (e.g.. +# amt.points=50, k=3, eval.dexes=1, train.dexes={3,50}) + + if (k >= amt.points && !is.null(k)) { + stop("k need to be smaller than the amt.points") + } + if (eval.method == "leave-one-out") { dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-x]))) + } else if (eval.method == "k-fold") { + # k is a odd number + if (tail.out == TRUE){ + dexes.lst <- lapply(seq(1, amt.points), function(x, kfold = k) { + if (x >= ((kfold-1)/2) + 1 && x + ((kfold-1)/2) <= amt.points) { + ind <- (x-((kfold-1)/2)):(x+((kfold-1)/2)) + } else if (x < ((kfold-1)/2) + 1) { + ind <- c((amt.points - ((kfold-1)/2-x)):amt.points, 1:(x+(kfold-1)/2)) + } else if ((x+((kfold-1)/2)) > amt.points) { + ind <- c((x-(kfold-1)/2):amt.points, 1:(((kfold-1)/2)-amt.points+x)) + } else { + stop("Review make.eval.train.dexes function") + } + return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-ind])) + }) + } else { + if (k != 1){ + k <- (k-1)/2 + } + + dexes.lst <- lapply(seq(1, amt.points), function(x) { + start_idx <- max(1, x - k) + end_idx <- min(amt.points, x + k) + eval_range <- start_idx:end_idx + train_idx <- setdiff(seq(1, amt.points), eval_range) + return(list(eval.dexes = x, train.dexes = train_idx)) + }) + } + + } else if (eval.method == "retrospective") { + # k can be any integer indicating the when to start + dexes.lst <- base::Filter(length, lapply(seq(1, amt.points), + function(x, mindata = k) { + if (x > k) { + eval.dexes <- x + train.dexes <- 1:(x-1) + return(list(eval.dexes = x, + train.dexes = 1:(x-1))) + }})) } else if (eval.method == "in-sample") { dexes.lst <- list(list(eval.dexes = seq(1, amt.points), train.dexes = seq(1, amt.points))) @@ -580,12 +633,13 @@ Calibration <- function(exp, obs, exp_cor = NULL, } else { stop(paste0("unknown sampling method: ", eval.method)) } + return(dexes.lst) } .cal <- function(exp, obs, exp_cor = NULL, dat_dim = NULL, cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, - na.rm = TRUE, apply_to = NULL, alpha = NULL) { + na.rm = TRUE, apply_to = NULL, alpha = NULL, k = 1, tail.out = TRUE) { # exp: [memb, sdate, (dat)] # obs: [sdate (dat)] @@ -642,7 +696,8 @@ Calibration <- function(exp, obs, exp_cor = NULL, eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = sdate, - amt.points_cor = sdate_cor) + amt.points_cor = sdate_cor, + k = k, tail.out = tail.out) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { # defining training (tr) and evaluation (ev) subsets diff --git a/R/CST_CategoricalEnsCombination.R b/R/CST_CategoricalEnsCombination.R index 3c92d587efb6aa4be76bf7137bf68aa557bc4e40..f973cde79ff9a590438be36cee50d5c16f55c234 100644 --- a/R/CST_CategoricalEnsCombination.R +++ b/R/CST_CategoricalEnsCombination.R @@ -97,7 +97,7 @@ #'@export CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", eval.method = "leave-one-out", - amt.cat = 3, + amt.cat = 3, k = 1, tail.out = TRUE, ...) { # Check 's2dv_cube' if (!inherits(exp, "s2dv_cube") || !inherits(exp, "s2dv_cube")) { @@ -113,7 +113,8 @@ CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", exp$data <- CategoricalEnsCombination(fc = exp$data, obs = obs$data, cat.method = cat.method, eval.method = eval.method, - amt.cat = amt.cat, ...) + amt.cat = amt.cat, k = k, + tail.out = tail.out, ...) names.dim.tmp[which(names.dim.tmp == "member")] <- "category" names(dim(exp$data)) <- names.dim.tmp @@ -175,7 +176,7 @@ CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", #'@importFrom s2dv InsertDim #'@import abind #'@export -CategoricalEnsCombination <- function (fc, obs, cat.method, eval.method, amt.cat, ...) { +CategoricalEnsCombination <- function (fc, obs, cat.method, eval.method, amt.cat, k, tail.out, ...) { if (!all(c("member", "sdate") %in% names(dim(fc)))) { stop("Parameter 'exp' must have the dimensions 'member' and 'sdate'.") @@ -205,6 +206,8 @@ CategoricalEnsCombination <- function (fc, obs, cat.method, eval.method, amt.cat cat.method = cat.method, eval.method = eval.method, amt.cat = amt.cat, + k = k, + tail.out = tail.out, ...) return(cat_fc_out) } @@ -243,7 +246,7 @@ comb.dims <- function(arr.in, dims.to.combine){ return(arr.out) } -.apply.obs.fc <- function(obs, fc, target.dims, FUN, return.feat, cat.method, eval.method, amt.cat, ...){ +.apply.obs.fc <- function(obs, fc, target.dims, FUN, return.feat, cat.method, eval.method, amt.cat, k, tail.out, ...){ dimnames.tmp <- dimnames(fc) fc.dims.tmp <- dim(fc) dims.out.tmp <- return.feat$dim @@ -260,6 +263,8 @@ comb.dims <- function(arr.in, dims.to.combine){ cat.method = cat.method, eval.method = eval.method, amt.cat = amt.cat, + k = k, + tail.out = tail.out, ...) dims.tmp <- dim(arr.out) names.dims.tmp <- names(dim(arr.out)) @@ -280,7 +285,7 @@ comb.dims <- function(arr.in, dims.to.combine){ } -.cat_fc <- function(obs.fc, amt.cat, cat.method, eval.method) { +.cat_fc <- function(obs.fc, amt.cat, cat.method, eval.method, k, tail.out) { dims.tmp=dim(obs.fc) amt.mbr <- dims.tmp["member"][]-1 @@ -299,7 +304,7 @@ comb.dims <- function(arr.in, dims.to.combine){ amt.coeff <- amt.mdl + 1 var.cat.fc <- array(NA, c(amt.cat, amt.sdate)) - eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = amt.sdate) + eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = amt.sdate, k = k, tail.out = tail.out) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 491b727195638d98ef7f7a891825696fbbaa5f9e..6843f1c9a244212e8071a317c9fa0b0d994c0bb4 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -18,7 +18,9 @@ CST_Calibration( memb_dim = "member", sdate_dim = "sdate", dat_dim = NULL, - ncores = NULL + ncores = NULL, + k = 1, + tail.out = TRUE ) } \arguments{ diff --git a/man/CST_CategoricalEnsCombination.Rd b/man/CST_CategoricalEnsCombination.Rd index 85ebb7f8f00c0c393418d341af002cfee464a726..416ad7f82e4172f0f32dfd96660104b53e76b6e0 100644 --- a/man/CST_CategoricalEnsCombination.Rd +++ b/man/CST_CategoricalEnsCombination.Rd @@ -11,6 +11,8 @@ CST_CategoricalEnsCombination( cat.method = "pool", eval.method = "leave-one-out", amt.cat = 3, + k = 1, + tail.out = TRUE, ... ) } diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index 491aff29556fc0a67f4abc9fb5ad11500aca272d..285e81c8907cd457219d6af3ea881577683baff9 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -218,8 +218,8 @@ test_that("1. Input checks", { expect_error( Calibration(exp4, obs4, eval.method = 'biass'), paste0("Parameter 'eval.method' must be a character string indicating ", - "the sampling method used ('in-sample', 'leave-one-out' or ", - "'hindcast-vs-forecast')."), + "the sampling method used ('in-sample', 'leave-one-out', 'hindcast-vs-forecast', 'k-fold' or ", + "'retrospective')."), fixed = TRUE ) # multi.model @@ -453,6 +453,18 @@ test_that("6. Output checks: dat4", { c(-0.7119142, 0.2626203, -0.9635483, 1.9607986, 0.4380930), tolerance = 0.0001 ) + expect_equal( + as.vector(Calibration(exp4, obs4, eval.method = "k-fold", k = 5, tail.out = T))[1:5], + c(-0.8149557, 0.2043942, -1.0781616, 1.9806657, 0.3879362), + tolerance = 0.0001 + ) + + expect_equal( + as.vector(Calibration(exp4, obs4, eval.method = "retrospective", k = 3)[1,1:5]), + c(NA, NA, NA, 1.6267865, -0.1658674), + tolerance = 0.0001 + ) + }) ############################################## diff --git a/tests/testthat/test-make-eval-train-dexes.R b/tests/testthat/test-make-eval-train-dexes.R new file mode 100644 index 0000000000000000000000000000000000000000..53e5dc88072c54c2612c865469aa87d887b4fbfe --- /dev/null +++ b/tests/testthat/test-make-eval-train-dexes.R @@ -0,0 +1,93 @@ +############################################## + +test_that("1. Input checks", { + # s2dv_cube + expect_error( + .make.eval.train.dexes(eval.method = 1, 2), + paste0("unknown sampling method: 1") + ) + expect_error( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 12), + paste0("k need to be smaller than the amt.points") + ) + expect_error( + .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 10), + paste0("k need to be smaller than the amt.points") + ) +}) + +############################################## + +test_that("2. Output checks: k-fold", { + expect_equal( + is.list(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)), + TRUE + ) + expect_equal( + is.list(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]]), + TRUE + ) + expect_equal( + length(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)), + 10 + ) + expect_equal( + names(.make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]]), + c("eval.dexes", "train.dexes") + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[1]], + list(eval.dexes = 1, + train.dexes = 4:8) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[2]], + list(eval.dexes = 2, + train.dexes = 5:9) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[3]], + list(eval.dexes = 3, + train.dexes = 6:10) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[9]], + list(eval.dexes = 9, + train.dexes = 2:6) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 5)[[10]], + list(eval.dexes = 10, + train.dexes = 3:7) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 10, NULL, 1), + .make.eval.train.dexes(eval.method = 'leave-one-out', 10, NULL) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'k-fold', 20, NULL, 1), + .make.eval.train.dexes(eval.method = 'leave-one-out', 20, NULL) + ) +}) + +############################################## + +test_that("3. Output checks: retrospective", { + expect_equal( + length(.make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)), + 7 + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[1]], + list(eval.dexes = 4, train.dexes = 1:3) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[2]], + list(eval.dexes = 5, train.dexes = 1:4) + ) + expect_equal( + .make.eval.train.dexes(eval.method = 'retrospective', 10, NULL, 3)[[7]], + list(eval.dexes = 10, train.dexes = 1:9) + ) +}) +