diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 874c351eab65459da83bd6e705e1902b8cbb91bf..3fa68d9596a7b0d4d7eaf135fb898fe81db92d28 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -10,6 +10,8 @@ #'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. #'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. #'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. +#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. +#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. #'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. #'@return an object of class \code{s2dv_cube} containing the calibrated forecasts in the element \code{$data} with the same dimensions as the one in the exp object. #' @@ -36,16 +38,12 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, - na.fill = TRUE, ncores = 1) { + na.fill = TRUE, memb_dim = 'member', + sdate_dim = 'sdate', ncores = 1) { if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } - - if (dim(obs$data)["member"] != 1) { - stop("The length of the dimension 'member' in the component 'data' ", - "of the parameter 'obs' must be equal to 1.") - } if(!missing(multi.model) & !(cal.method == "mse_min")){ warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method)) @@ -55,6 +53,7 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = eval.method, multi.model = multi.model, na.fill = na.fill, + memb_dim = memb_dim, sdate_dim = sdate_dim, ncores = ncores) exp$Datasets <- c(exp$Datasets, obs$Datasets) @@ -81,11 +80,15 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. #'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. #'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. +#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. +#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. #'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. #'@return an array containing the calibrated forecasts with the same dimensions as the \code{exp} array. #' #'@importFrom s2dv InsertDim #'@import abind +#'@import multiApply +#'@importFrom s2dverification Subset #' #'@seealso \code{\link{CST_Load}} #' @@ -97,8 +100,10 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #'a <- Calibration(exp = mod1, obs = obs1) #'str(a) #'@export -Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-one-out", - multi.model = FALSE, na.fill = TRUE, ncores = 1) { +Calibration <- function(exp, obs, cal.method = "mse_min", + eval.method = "leave-one-out", + multi.model = FALSE, na.fill = TRUE, + memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { dim.exp <- dim(exp) amt.dims.exp <- length(dim.exp) @@ -106,16 +111,36 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o amt.dims.obs <- length(dim.obs) dim.names.exp <- names(dim.exp) dim.names.obs <- names(dim.obs) + if (is.null(memb_dim) || !is.character(memb_dim)) { + stop("Parameter 'memb_dim' should be a character string indicating the", + "name of the dimension where members are stored in 'exp'.") + } + if (length(memb_dim) > 1) { + memb_dim <- memb_dim[1] + warning("Parameter 'memb_dim' has length greater than 1 and only", + " the first element will be used.") + } - target.dim.names.exp <- c("member", "sdate") - target.dim.names.obs <- c("sdate") + if (is.null(sdate_dim) || !is.character(sdate_dim)) { + stop("Parameter 'sdate_dim' should be a character string indicating the", + "name of the dimension where start dates are stored in 'exp'.") + } + if (length(sdate_dim) > 1) { + sdate_dim <- sdate_dim[1] + warning("Parameter 'sdate_dim' has length greater than 1 and only", + " the first element will be used.") + } + target.dim.names.exp <- c(memb_dim, sdate_dim) + target.dim.names.obs <- sdate_dim if (!all(target.dim.names.exp %in% dim.names.exp)) { - stop("Parameter 'exp' must have the dimensions 'member' and 'sdate'.") + stop("Parameter 'exp' must have the dimensions defined in memb_dim ", + "and sdate_dim.") } - if (!all(c("sdate") %in% dim.names.obs)) { - stop("Parameter 'obs' must have the dimension 'sdate'.") + if (!all(c(sdate_dim) %in% dim.names.obs)) { + stop("Parameter 'obs' must have the dimension defined in sdate_dim ", + "parameter.") } if (any(is.na(exp))) { @@ -126,12 +151,9 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o warning("Parameter 'obs' contains NA values.") } - if (dim(obs)['member']!=1){ - stop("Parameter 'obs' must have a member dimension with length=1") + if (memb_dim %in% names(dim(obs))) { + obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") } - - obs <- Subset(obs, along = "member", indices = 1, drop = "selected") - data.set.sufficiently.large.out <- Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), @@ -140,21 +162,21 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o if(!all(data.set.sufficiently.large.out)){ if(na.fill){ - warning("Some forecast data could not be corrected due to data lack and is replaced with NA values") + warning("Some forecast data could not be corrected due to data lack", + " and is replaced with NA values") } else { - warning("Some forecast data could not be corrected due to data lack and is replaced with uncorrected values") + warning("Some forecast data could not be corrected due to data lack", + " and is replaced with uncorrected values") } } - calibrated <- Apply(data = list(exp = exp, obs = obs), cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, na.fill = na.fill, target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), - ncores = ncores, + ncores = ncores, output_dims = target.dim.names.exp, fun = .cal)$output1 - dexes <- match(names(dim(exp)), names(dim(calibrated))) calibrated <- aperm(calibrated, dexes) dimnames(calibrated) <- dimnames(exp)[dexes] @@ -188,7 +210,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o amt.mbr <- dims.fc[1] amt.sdate <- dims.fc[2] var.cor.fc <- NA * exp - names(dim(var.cor.fc)) <- c("member", "sdate") + names(dim(var.cor.fc)) <- dims.fc if(!.data.set.sufficiently.large(exp = exp, obs = obs)){ if(na.fill){ diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 76812a438ce7f4cd0b42975e7feef911d6f9b48c..f15d0e2fa81e18986d25052602778bd4054a39d6 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -11,6 +11,8 @@ CST_Calibration( eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, + memb_dim = "member", + sdate_dim = "sdate", ncores = 1 ) } @@ -27,6 +29,10 @@ CST_Calibration( \item{na.fill}{is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.} +\item{memb_dim}{is a character string indicating the name of the member dimension. By default, it is set to 'member'.} + +\item{sdate_dim}{is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} + \item{ncores}{is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} } \value{ diff --git a/man/Calibration.Rd b/man/Calibration.Rd index 64452279fce1be4719bcdfdaa18864398fd01ee2..df91fd071ffffd81a2e58c8474e58660b866b6d7 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -11,6 +11,8 @@ Calibration( eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, + memb_dim = "member", + sdate_dim = "sdate", ncores = 1 ) } @@ -27,6 +29,10 @@ Calibration( \item{na.fill}{is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.} +\item{memb_dim}{is a character string indicating the name of the member dimension. By default, it is set to 'member'.} + +\item{sdate_dim}{is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} + \item{ncores}{is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} } \value{ diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index a4491719ce63e224965a60000926c18799bffe7f..f169e2e3fbed6437b72846dfe0740afcd8daf892 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -17,10 +17,10 @@ test_that("Sanity checks", { expect_equal(cal$lat, obs$lat) expect_equal(cal$lon, exp$lon) expect_equal(cal$lon, obs$lon) - expect_error( - CST_Calibration(exp = exp, obs = exp), - "The length of the dimension 'member' in the component 'data' " - ) + # expect_error( + # CST_Calibration(exp = exp, obs = exp), + # "The length of the dimension 'member' in the component 'data' " + # ) exp2 <- exp exp2$data[1, 2, 1, 1, 1, 1] <- NA