diff --git a/R/CST_BiasCorrection.R b/R/CST_BiasCorrection.R index 20b51082ccc7a164213e26d4844b36ea05dafe48..da368db00cffe396531ef55d449c3d04650ff7ea 100644 --- a/R/CST_BiasCorrection.R +++ b/R/CST_BiasCorrection.R @@ -1,4 +1,4 @@ -#' Bias Correction based on the mean and standard deviation adjustment +#'Bias Correction based on the mean and standard deviation adjustment #' #'@author Verónica Torralba, \email{veronica.torralba@bsc.es} #'@description This function applies the simple bias adjustment technique @@ -7,23 +7,35 @@ #' #'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} #' function, containing the seasonal forecast experiment data in the element -#' named \code{$data} +#' named \code{$data} with at least time and member dimensions. #'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the observed data in the element named \code{$data}. +#' function, containing the observed data in the element named \code{$data} +#' with at least time dimension. #'@param exp_cor An object of class \code{s2dv_cube} as returned by -#' \code{CST_Load} function, containing the seasonl forecast experiment to be -#' corrected. If it is NULL, the 'exp' forecast will be corrected. +#' \code{CST_Load} function, containing the seasonal forecast experiment to be +#' corrected with at least time dimension. If it is NULL, the 'exp' forecast +#' will be corrected. If there is only one corrected dataset, it should not +#' have dataset dimension. If there is a corresponding corrected dataset for +#' each 'exp' forecast, the dataset dimension must have the same length as in +#' 'exp'. The default value is NULL. #'@param na.rm A logical value indicating whether missing values should be #' stripped before the computation proceeds, by default it is set to FALSE. #'@param memb_dim A character string indicating the name of the member #' dimension. By default, it is set to 'member'. #'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. #'@param ncores An integer that indicates the number of cores for parallel #' computations using multiApply function. The default value is NULL. #'@return An object of class \code{s2dv_cube} containing the bias corrected -#'forecasts with the same dimensions of the experimental data. -#' +#'forecasts with the dimensions nexp, nobs and same dimensions as in the 'exp' +#'object. nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is +#'the number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp +#'and nobs are omitted. If 'exp_cor' is provided the returned array will be with +#'the same dimensions as 'exp_cor'. +#' #'@references Torralba, V., F.J. Doblas-Reyes, D. MacLeod, I. Christel and M. #'Davis (2017). Seasonal climate prediction: a new source of information for #'the management of wind energy resources. Journal of Applied Meteorology and @@ -45,8 +57,8 @@ #'@import multiApply #'@export CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, - memb_dim = 'member', sdate_dim = 'sdate', - ncores = NULL) { + memb_dim = 'member', sdate_dim = 'sdate', + dat_dim = NULL, ncores = NULL) { if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')) { stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") @@ -54,19 +66,13 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, if (!is.null(exp_cor)) { if (!inherits(exp_cor, 's2dv_cube')) { stop("Parameter 'exp_cor' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") + "as output by CSTools::CST_Load.") } - dimnames <- names(dim(exp_cor$data)) - } else { - dimnames <- names(dim(exp$data)) } BiasCorrected <- BiasCorrection(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, - memb_dim = memb_dim, sdate_dim = sdate_dim, + memb_dim = memb_dim, sdate_dim = sdate_dim, dat_dim = dat_dim, na.rm = na.rm, ncores = ncores) - - pos <- match(dimnames, names(dim(BiasCorrected))) - BiasCorrected <- aperm(BiasCorrected, pos) if (is.null(exp_cor)) { exp$data <- BiasCorrected @@ -84,7 +90,7 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, } } -#' Bias Correction based on the mean and standard deviation adjustment +#'Bias Correction based on the mean and standard deviation adjustment #' #'@author Verónica Torralba, \email{veronica.torralba@bsc.es} #'@description This function applies the simple bias adjustment technique @@ -92,24 +98,33 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, #'standard deviation and mean to that of the reference dataset. #' #'@param exp A multidimensional array with named dimensions containing the -#' seasonal forecast experiment data with at least 'member' and 'sdate' -#' dimensions. +#' seasonal forecast experiment data with at least time and member dimensions. #'@param obs A multidimensional array with named dimensions containing the -#' observed data with at least 'sdate' dimension. +#' observed data with at least time dimension. #'@param exp_cor A multidimensional array with named dimensions containing the -#' seasonl forecast experiment to be corrected. If it is NULL, the 'exp' -#' forecast will be corrected. +#' seasonal forecast experiment to be corrected with at least time and member +#' dimension. If it is NULL, the 'exp' forecast will be corrected. If there is +#' only one corrected dataset, it should not have dataset dimension. If there +#' is a corresponding corrected dataset for each 'exp' forecast, the dataset +#' dimension must have the same length as in 'exp'. The default value is NULL. #'@param na.rm A logical value indicating whether missing values should be #' stripped before the computation proceeds, by default it is set to FALSE. #'@param memb_dim A character string indicating the name of the member #' dimension. By default, it is set to 'member'. #'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. #'@param ncores An integer that indicates the number of cores for parallel #' computations using multiApply function. The default value is NULL. #' -#'@return An array containing the bias corrected forecasts with the same -#'dimensions of the experimental data. +#'@return An array containing the bias corrected forecasts with the dimensions +#'nexp, nobs and same dimensions as in the 'exp' object. nexp is the number of +#'experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If +#''exp_cor' is provided the returned array will be with the same dimensions as +#''exp_cor'. #' #'@references Torralba, V., F.J. Doblas-Reyes, D. MacLeod, I. Christel and M. #'Davis (2017). Seasonal climate prediction: a new source of information for the @@ -127,7 +142,7 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, #'@export BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, memb_dim = 'member', sdate_dim = 'sdate', - ncores = NULL) { + dat_dim = NULL, ncores = NULL) { # Check inputs ## exp, obs if (!is.array(exp) || !is.numeric(exp)) { @@ -174,6 +189,68 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, if (dim(obs)[memb_dim] != 1) { stop("If parameter 'obs' has dimension 'memb_dim' its length must be equal to 1.") } + } else { + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = memb_dim) + } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + exp_cor <- InsertDim(exp_cor, posdim = 1, lendim = 1, name = memb_dim) + exp_cor_remove_memb <- TRUE + } else { + exp_cor_remove_memb <- FALSE + } + } else { + exp_cor_remove_memb <- FALSE + } + + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and exp_cor (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.") + } + if (!is.null(exp_cor)) { + name_exp_cor <- sort(names(dim(exp_cor))) + name_exp <- sort(names(dim(exp))) + if (!is.null(dat_dim)) { + if (dat_dim %in% exp_cordims) { + if (!identical(dim(exp)[dat_dim], dim(exp_cor)[dat_dim])) { + stop("If parameter 'exp_cor' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + } + name_exp_cor <- name_exp_cor[-which(name_exp_cor == dat_dim)] + target_dims_cor <- c(memb_dim, sdate_dim, dat_dim) + } else { + target_dims_cor <- c(memb_dim, sdate_dim) + } + } else { + target_dims_cor <- c(memb_dim, sdate_dim) + } + name_exp <- name_exp[-which(name_exp %in% c(memb_dim, sdate_dim, dat_dim))] + name_exp_cor <- name_exp_cor[-which(name_exp_cor %in% target_dims_cor)] + if (!identical(length(name_exp), length(name_exp_cor)) | + !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { + stop("Parameter 'exp' and 'exp_cor' must have the same length of ", + "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.") + } } ## na.rm if (!is.logical(na.rm)) { @@ -191,57 +268,106 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, stop("Parameter 'ncores' must be either NULL or a positive integer.") } } - - target_dims_obs <- sdate_dim - if (memb_dim %in% names(dim(obs))) { - target_dims_obs <- c(memb_dim, target_dims_obs) - } if (is.null(exp_cor)) { BiasCorrected <- Apply(data = list(var_obs = obs, var_exp = exp), - target_dims = list(target_dims_obs, - c(memb_dim, sdate_dim)), - fun = .sbc, + target_dims = list(c(memb_dim, sdate_dim, dat_dim), + c(memb_dim, sdate_dim, dat_dim)), + fun = .sbc, dat_dim = dat_dim, na.rm = na.rm, ncores = ncores)$output1 } else { BiasCorrected <- Apply(data = list(var_obs = obs, var_exp = exp, var_cor = exp_cor), - target_dims = list(target_dims_obs, - c(memb_dim, sdate_dim), - c(memb_dim, sdate_dim)), - fun = .sbc, - output_dims = c(memb_dim, sdate_dim), + target_dims = list(c(memb_dim, sdate_dim, dat_dim), + c(memb_dim, sdate_dim, dat_dim), + target_dims_cor), + fun = .sbc, dat_dim = dat_dim, na.rm = na.rm, ncores = ncores)$output1 } + if (!is.null(dat_dim)) { + pos <- match(c(names(dim(exp))[-which(names(dim(exp)) == dat_dim)], 'nexp', 'nobs'), + names(dim(BiasCorrected))) + BiasCorrected <- aperm(BiasCorrected, pos) + } else { + pos <- match(c(names(dim(exp))), names(dim(BiasCorrected))) + BiasCorrected <- aperm(BiasCorrected, pos) + } + + if (exp_cor_remove_memb) { + dim(BiasCorrected) <- dim(BiasCorrected)[-which(names(dim(BiasCorrected)) == memb_dim)] + } + return(BiasCorrected) } -.sbc <- function(var_obs, var_exp , var_cor = NULL, na.rm = FALSE) { +.sbc <- function(var_obs, var_exp, var_cor = NULL, dat_dim = NULL, na.rm = FALSE) { - ntime <- dim(var_exp)[2] - corrected <- NA * var_exp + # exp: [memb, sdate, (dat)] + # obs: [memb, sdate, (dat)] + # ref: [memb, sdate, (dat)] or NULL - if (is.null(var_cor)) { - for (t in 1:ntime) { - # parameters - sd_obs <- sd(var_obs[-t], na.rm = na.rm) - sd_exp <- sd(var_exp[, -t], na.rm = na.rm) - clim_exp <- mean(var_exp[, -t], na.rm = na.rm) - clim_obs <- mean(var_obs[-t], na.rm = na.rm) - - # bias corrected forecast - corrected[, t] <- ((var_exp[, t] - clim_exp) * (sd_obs / sd_exp)) + clim_obs + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + var_exp <- InsertDim(var_exp, posdim = 3, lendim = 1, name = 'dataset') + var_obs <- InsertDim(var_obs, posdim = 3, lendim = 1, name = 'dataset') + if (!is.null(var_cor)) { + var_cor <- InsertDim(var_cor, posdim = 3, lendim = 1, name = 'dataset') } } else { - # parameters - sd_obs <- sd(var_obs, na.rm = na.rm) - sd_exp <- sd(var_exp, na.rm = na.rm) - clim_exp <- mean(var_exp, na.rm = na.rm) - clim_obs <- mean(var_obs, na.rm = na.rm) - - # bias corrected forecast - corrected <- ((var_cor - clim_exp) * (sd_obs / sd_exp)) + clim_obs + nexp <- as.numeric(dim(var_exp)[dat_dim]) + nobs <- as.numeric(dim(var_obs)[dat_dim]) + } + + if (!is.null(var_cor)) { + if (length(dim(var_cor)) == 2) { # ref: [memb, sdate] + cor_dat_dim <- FALSE + } else { + cor_dat_dim <- TRUE + } + corrected <- array(dim = c(dim(var_cor)[1:2], nexp = nexp, nobs = nobs)) + } else { + ntime <- dim(var_exp)[2] + corrected <- array(dim = c(dim(var_exp)[1:2], nexp = nexp, nobs = nobs)) + } + + for (i in 1:nexp) { + for (j in 1:nobs) { + if (is.null(var_cor)) { + for (t in 1:ntime) { + # parameters + sd_obs <- sd(var_obs[, -t, j], na.rm = na.rm) + sd_exp <- sd(var_exp[, -t, i], na.rm = na.rm) + clim_exp <- mean(var_exp[, -t, i], na.rm = na.rm) + clim_obs <- mean(var_obs[, -t, j], na.rm = na.rm) + + # bias corrected forecast + corrected[, t, i, j] <- ((var_exp[, t, i] - clim_exp) * (sd_obs / sd_exp)) + clim_obs + } + } else { + # parameters + sd_obs <- sd(var_obs[, , j], na.rm = na.rm) + sd_exp <- sd(var_exp[, , i], na.rm = na.rm) + clim_exp <- mean(var_exp[, , i], na.rm = na.rm) + clim_obs <- mean(var_obs[, , j], na.rm = na.rm) + + # bias corrected forecast + if (isTRUE(cor_dat_dim)) { + corrected[, , i, j] <- ((var_cor[, , i] - clim_exp) * (sd_obs / sd_exp)) + clim_obs + } else { + corrected[, , i, j] <- ((var_cor - clim_exp) * (sd_obs / sd_exp)) + clim_obs + } + } + } + } + + if (is.null(dat_dim)) { + if (!is.null(var_cor)) { + dim(corrected) <- dim(var_cor)[1:2] + } else { + dim(corrected) <- dim(var_exp)[1:2] + } } return(corrected) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index cccd7394a0c488958454ab7f65f313399bc2db5e..d0b547d1af9b98e2a5d6cbfd4c282d4872ddb743 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -2,65 +2,112 @@ #' #'@author Verónica Torralba, \email{veronica.torralba@bsc.es} #'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} -#'@description Equivalent to function \code{Calibration} but for objects of -#'class \code{s2dv_cube}. +#'@description Five types of member-by-member bias correction can be performed. +#'The \code{"bias"} method corrects the bias only, the \code{"evmos"} method +#'applies a variance inflation technique to ensure the correction of the bias +#'and the correspondence of variance between forecast and observation (Van +#'Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods +#'\code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast +#'variance and the ensemble spread as described in Doblas-Reyes et al. (2005) +#'and Van Schaeybroeck and Vannitsem (2015), respectively. While the +#'\code{"mse_min"} method minimizes a constrained mean-squared error using three +#'parameters, the \code{"crps_min"} method features four parameters and +#'minimizes the Continuous Ranked Probability Score (CRPS). The +#'\code{"rpc-based"} method adjusts the forecast variance ensuring that the +#'ratio of predictable components (RPC) is equal to one, as in Eade et al. +#'(2014). It is equivalent to function \code{Calibration} but for objects +#'of class \code{s2dv_cube}. #' #'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the seasonal hindcast experiment data in the element -#' named \code{$data}. The hindcast is used to calibrate the forecast in case -#' the forecast is provided; if not, the same hindcast will be calibrated -#' instead. +#' function with at least 'sdate' and 'member' dimensions, containing the +#' seasonal hindcast experiment data in the element named \code{data}. The +#' hindcast is used to calibrate the forecast in case the forecast is provided; +#' if not, the same hindcast will be calibrated instead. #'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the observed data in the element named \code{$data}. +#' function with at least 'sdate' dimension, containing the observed data in +#' the element named \code{$data}. #'@param exp_cor An optional object of class \code{s2dv_cube} as returned by -#' \code{CST_Load} function, containing the seasonal forecast experiment data -#' in the element named \code{$data}. If the forecast is provided, it will be -#' calibrated using the hindcast and observations; if not, the hindcast will be -#' calibrated instead. -#'@param cal.method Is the calibration method used, can be either \code{bias}, -#' \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default -#' value is \code{mse_min}. -#'@param eval.method Is the sampling method used, can be either \code{in-sample} -#' or \code{leave-one-out}. Default value is the \code{leave-one-out} cross -#' validation. In case the forecast is provided, any chosen eval.method is -#' over-ruled and a third option is used. -#'@param multi.model Is a boolean that is used only for the \code{mse_min} +#' \code{CST_Load} function with at least 'sdate' and 'member' dimensions, +#' containing the seasonal forecast experiment data in the element named +#' \code{data}. If the forecast is provided, it will be calibrated using the +#' hindcast and observations; if not, the hindcast will be calibrated instead. +#' If there is only one corrected dataset, it should not have dataset dimension. +#' If there is a corresponding corrected dataset for each 'exp' forecast, the +#' dataset dimension must have the same length as in 'exp'. The default value +#' is NULL. +#'@param cal.method A character string indicating the calibration method used, +#' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or +#' \code{rpc-based}. Default value is \code{mse_min}. +#'@param eval.method A character string indicating the sampling method used, it +#' can be either \code{in-sample} or \code{leave-one-out}. Default value is the +#' \code{leave-one-out} cross validation. In case the forecast is provided, any +#' chosen eval.method is over-ruled and a third option is used. +#'@param multi.model A boolean that is used only for the \code{mse_min} #' method. If multi-model ensembles or ensembles of different sizes are used, #' it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences #' between the two approaches are generally small but may become large when #' using small ensemble sizes. Using multi.model when the calibration method is #' \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. -#'@param na.fill Is a boolean that indicates what happens in case calibration is +#'@param na.fill A boolean that indicates what happens in case calibration is #' not possible or will yield unreliable results. This happens when three or #' less forecasts-observation pairs are available to perform the training phase #' of the calibration. By default \code{na.fill} is set to true such that NA #' values will be returned. If \code{na.fill} is set to false, the uncorrected #' data will be returned. -#'@param na.rm Is a boolean that indicates whether to remove the NA values or -#' not. The default value is \code{TRUE}. See Details section for further +#'@param na.rm A boolean that indicates whether to remove the NA values or not. +#' The default value is \code{TRUE}. See Details section for further #' information about its use and compatibility with \code{na.fill}. -#'@param apply_to Is a character string that indicates whether to apply the +#'@param apply_to A character string that indicates whether to apply the #' calibration to all the forecast (\code{"all"}) or only to those where the -#' correlation between the ensemble mean and the observations is statistically +#' correlation between the ensemble mean and the observations is statistically #' significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. -#'@param alpha Is a numeric value indicating the significance level for the -#' correlation test. Only useful if \code{cal.method == "rpc-based" & -#' apply_to == "sign"}. -#'@param memb_dim Is a character string indicating the name of the member -#' dimension. By default, it is set to 'member'. -#'@param sdate_dim Is a character string indicating the name of the start date +#'@param alpha A numeric value indicating the significance level for the +#' correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to +#' == "sign"}. +#'@param memb_dim A character string indicating the name of the member dimension. +#' By default, it is set to 'member'. +#'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. -#'@param ncores Is an integer that indicates the number of cores for parallel +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param ncores An integer that indicates the number of cores for parallel #' computations using multiApply function. The default value is one. +#' #'@return An object of class \code{s2dv_cube} containing the calibrated -#'forecasts in the element \code{$data} with the same dimensions as the one in -#'the exp object. -#' -#'@importFrom s2dv InsertDim -#'@import abind +#'forecasts in the element \code{data} with the dimensions nexp, nobs and same +#'dimensions as in the 'exp' object. nexp is the number of experiment +#'(i.e., 'dat_dim' in exp), and nobs is the number of observation (i.e., +#''dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If 'exp_cor' +#'is provided the returned array will be with the same dimensions as 'exp_cor'. #' +#'@details Both the \code{na.fill} and \code{na.rm} parameters can be used to +#'indicate how the function has to handle the NA values. The \code{na.fill} +#'parameter checks whether there are more than three forecast-observations pairs +#'to perform the computation. In case there are three or less pairs, the +#'computation is not carried out, and the value returned by the function depends +#'on the value of this parameter (either NA if \code{na.fill == TRUE} or the +#'uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} +#'is used to indicate the function whether to remove the missing values during +#'the computation of the parameters needed to perform the calibration. +#' +#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the +#'success of multi-model ensembles in seasonal forecasting-II calibration and +#'combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x +#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., +#'Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate +#'predictions underestimate the predictability of the read world? Geophysical +#'Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing +#'through linear regression. Nonlinear Processes in Geophysics, 18(2), +#'147. doi:10.5194/npg-18-147-2011 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble +#'post-processing using member-by-member approaches: theoretical aspects. +#'Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. +#'doi:10.1002/qj.2397 +#' #'@seealso \code{\link{CST_Load}} -#' +#' #'@examples #'# Example 1: #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) @@ -73,9 +120,7 @@ #'obs <- list(data = obs1, lat = lat, lon = lon) #'attr(exp, 'class') <- 's2dv_cube' #'attr(obs, 'class') <- 's2dv_cube' -#'a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", -#' eval.method = "in-sample") -#'str(a) +#'a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample") #' #'# Example 2: #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) @@ -92,61 +137,58 @@ #'attr(exp, 'class') <- 's2dv_cube' #'attr(obs, 'class') <- 's2dv_cube' #'attr(exp_cor, 'class') <- 's2dv_cube' -#'a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, -#' cal.method = "evmos") -#'str(a) +#'a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, cal.method = "evmos") +#' +#'@importFrom s2dv InsertDim Reorder +#'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { - - if(!missing(multi.model) & !(cal.method == "mse_min")){ - warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method)) - } - - if(is.null(exp_cor)){ #exp will be used to calibrate and will also be calibrated: "calibrate hindcast" - if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { + memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, ncores = NULL) { + + if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") + "as output by CSTools::CST_Load.") + } + if (!is.null(exp_cor)) { + if (!inherits(exp_cor, "s2dv_cube")) { + stop("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") } - exp$data <- Calibration(exp = exp$data, obs = obs$data, exp_cor = NULL, - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - memb_dim = memb_dim, sdate_dim = sdate_dim, - ncores = ncores) + # if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" + # if exp_cor is provided, eval.method is overruled (because if exp_cor is provided, the + # train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; + # no need for "leave-one-out" or "in-sample") + eval.method = "hindcast-vs-forecast" + } + + if (!missing(multi.model) & !(cal.method == "mse_min")) { + warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method)) + } + + Calibration <- Calibration(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, + cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, + na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha, + memb_dim = memb_dim, sdate_dim = sdate_dim, dat_dim = dat_dim, ncores = ncores) + + if (is.null(exp_cor)) { + exp$data <- Calibration exp$Datasets <- c(exp$Datasets, obs$Datasets) exp$source_files <- c(exp$source_files, obs$source_files) - - return(exp) + + return(exp) - }else{ #if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" - eval.method = "hindcast-vs-forecast" #if exp_cor is provided, eval.method is overrruled (because if exp_cor is provided, the train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; no need for "leave-one-out" or "in-sample") - if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube") || !inherits(exp_cor, "s2dv_cube")) { - stop("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - exp_cor$data <- Calibration(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - memb_dim = memb_dim, sdate_dim = sdate_dim, - ncores = ncores) - exp_cor$Datasets <- c(exp_cor$Datasets, obs$Datasets) - exp_cor$source_files <- c(exp_cor$source_files, exp$source_files, obs$source_files) - - return(exp_cor) + } else { + exp_cor$data <- Calibration + exp_cor$Datasets <- c(exp_cor$Datasets, exp$Datasets, obs$Datasets) + exp_cor$source_files <- c(exp_cor$source_files, exp$source_files, obs$source_files) - } + return(exp_cor) + } } - - #'Forecast Calibration #' #'@author Verónica Torralba, \email{veronica.torralba@bsc.es} @@ -163,24 +205,9 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'parameters, the \code{"crps_min"} method features four parameters and #'minimizes the Continuous Ranked Probability Score (CRPS). The #'\code{"rpc-based"} method adjusts the forecast variance ensuring that the -#'ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). -#'@description Both in-sample or our out-of-sample (leave-one-out cross +#'ratio of predictable components (RPC) is equal to one, as in Eade et al. +#'(2014). Both in-sample or our out-of-sample (leave-one-out cross #'validation) calibration are possible. -#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the -#'success of multi-model ensembles in seasonal forecasting-II calibration and -#'combination. Tellus A. 2005;57:234-252. -#'doi:10.1111/j.1600-0870.2005.00104.x -#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., -#'Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate -#'predictions underestimate the predictability of the read world? Geophysical -#'Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 -#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing -#'through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. -#'doi:10.5194/npg-18-147-2011 -#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble -#'post-processing using member-by-member approaches: theoretical aspects. -#'Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. -#'doi:10.1002/qj.2397 #' #'@param exp A multidimensional array with named dimensions (at least 'sdate' #' and 'member') containing the seasonal hindcast experiment data. The hindcast @@ -189,153 +216,246 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'@param obs A multidimensional array with named dimensions (at least 'sdate') #' containing the observed data. #'@param exp_cor An optional multidimensional array with named dimensions (at -#' least 'sdate' and 'member') containing the seasonal forecast experiment data. -#' If the forecast is provided, it will be calibrated using the hindcast and -#' observations; if not, the hindcast will be calibrated instead. -#'@param cal.method Is the calibration method used, can be either \code{bias}, -#' \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default -#' value is \code{mse_min}. -#'@param eval.method Is the sampling method used, can be either \code{in-sample} -#' or \code{leave-one-out}. Default value is the \code{leave-one-out} cross -#' validation. In case the forecast is provided, any chosen eval.method is -#' over-ruled and a third option is used. -#'@param multi.model Is a boolean that is used only for the \code{mse_min} +#' least 'sdate' and 'member') containing the seasonal forecast experiment +#' data. If the forecast is provided, it will be calibrated using the hindcast +#' and observations; if not, the hindcast will be calibrated instead. If there +#' is only one corrected dataset, it should not have dataset dimension. If there +#' is a corresponding corrected dataset for each 'exp' forecast, the dataset +#' dimension must have the same length as in 'exp'. The default value is NULL. +#'@param cal.method A character string indicating the calibration method used, +#' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} +#' or \code{rpc-based}. Default value is \code{mse_min}. +#'@param eval.method A character string indicating the sampling method used, +#' can be either \code{in-sample} or \code{leave-one-out}. Default value is +#' the \code{leave-one-out} cross validation. In case the forecast is +#' provided, any chosen eval.method is over-ruled and a third option is +#' used. +#'@param multi.model A boolean that is used only for the \code{mse_min} #' method. If multi-model ensembles or ensembles of different sizes are used, #' it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences #' between the two approaches are generally small but may become large when -#' using small ensemble sizes. Using multi.model when the calibration method is -#' \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. -#'@param na.fill Is a boolean that indicates what happens in case calibration is +#' using small ensemble sizes. Using multi.model when the calibration method +#' is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. +#'@param na.fill A boolean that indicates what happens in case calibration is #' not possible or will yield unreliable results. This happens when three or -#' less forecasts-observation pairs are available to perform the training phase +#' less forecasts-observation pairs are available to perform the training phase #' of the calibration. By default \code{na.fill} is set to true such that NA #' values will be returned. If \code{na.fill} is set to false, the uncorrected #' data will be returned. -#'@param na.rm Is a boolean that indicates whether to remove the NA values or +#'@param na.rm A boolean that indicates whether to remove the NA values or #' not. The default value is \code{TRUE}. -#'@param apply_to Is a character string that indicates whether to apply the +#'@param apply_to A character string that indicates whether to apply the #' calibration to all the forecast (\code{"all"}) or only to those where the #' correlation between the ensemble mean and the observations is statistically #' significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. -#'@param alpha Is a numeric value indicating the significance level for the -#' correlation test. Only useful if \code{cal.method == "rpc-based" & -#' apply_to == "sign"}. -#'@param memb_dim Is a character string indicating the name of the member +#'@param alpha A numeric value indicating the significance level for the +#' correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == +#' "sign"}. +#'@param memb_dim A character string indicating the name of the member #' dimension. By default, it is set to 'member'. -#'@param sdate_dim Is a character string indicating the name of the start date +#'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. -#'@param ncores Is an integer that indicates the number of cores for parallel -#' computations using multiApply function. The default value is one. -#'@return An array containing the calibrated forecasts with the same dimensions -#'as the \code{exp} array. -#' -#'@importFrom s2dv InsertDim MeanDims Reorder -#'@import abind -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#' -#'@seealso \code{\link{CST_Load}} -#' -#'@details -#'Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate -#'how the function has to handle the NA values. The \code{na.fill} parameter -#'checks whether there are more than three forecast-observations pairs to -#'perform the computation. In case there are three or less pairs, the +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param ncores An integer that indicates the number of cores for parallel +#' computation using multiApply function. The default value is NULL (one core). +#' +#'@return An array containing the calibrated forecasts with the dimensions +#'nexp, nobs and same dimensions as in the 'exp' array. nexp is the number of +#'experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'If 'exp_cor' is provided the returned array will be with the same dimensions as +#''exp_cor'. +#' +#'@details Both the \code{na.fill} and \code{na.rm} parameters can be used to +#'indicate how the function has to handle the NA values. The \code{na.fill} +#'parameter checks whether there are more than three forecast-observations pairs +#'to perform the computation. In case there are three or less pairs, the #'computation is not carried out, and the value returned by the function depends #'on the value of this parameter (either NA if \code{na.fill == TRUE} or the #'uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} #'is used to indicate the function whether to remove the missing values during #'the computation of the parameters needed to perform the calibration. #' +#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the +#'success of multi-model ensembles in seasonal forecasting-II calibration and +#'combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x +#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., +#'Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate +#'predictions underestimate the predictability of the read world? Geophysical +#'Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing +#'through linear regression. Nonlinear Processes in Geophysics, 18(2), +#'147. doi:10.5194/npg-18-147-2011 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble +#'post-processing using member-by-member approaches: theoretical aspects. +#'Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. +#'doi:10.1002/qj.2397 +#' +#'@seealso \code{\link{CST_Load}} +#' #'@examples #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) #'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) #'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) #'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) #'a <- Calibration(exp = mod1, obs = obs1) -#'str(a) +#' +#'@importFrom s2dv InsertDim Reorder +#'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export -Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", - eval.method = "leave-one-out", +Calibration <- function(exp, obs, exp_cor = NULL, + cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { - - dim.exp <- dim(exp) - amt.dims.exp <- length(dim.exp) - dim.obs <- dim(obs) - amt.dims.obs <- length(dim.obs) - dim.names.exp <- names(dim.exp) - dim.names.obs <- names(dim.obs) - if(!is.null(exp_cor)){ - dim.exp_cor <- dim(exp_cor) - amt.dims.exp_cor <- length(dim.exp_cor) - dim.names.exp_cor <- names(dim.exp_cor) - } - if (is.null(memb_dim) || !is.character(memb_dim)) { - stop("Parameter 'memb_dim' should be a character string indicating the", - "name of the dimension where members are stored in 'exp'.") + memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, ncores = NULL) { + + # Check inputs + ## exp, obs + if (!is.array(exp) || !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") } - if (length(memb_dim) > 1) { - memb_dim <- memb_dim[1] - warning("Parameter 'memb_dim' has length greater than 1 and only", - " the first element will be used.") - } - - if (is.null(sdate_dim) || !is.character(sdate_dim)) { + if (!is.array(obs) || !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + expdims <- names(dim(exp)) + obsdims <- names(dim(obs)) + if (is.null(expdims)) { + stop("Parameter 'exp' must have dimension names.") + } + if (is.null(obsdims)) { + stop("Parameter 'obs' must have dimension names.") + } + if (any(is.na(exp))) { + warning("Parameter 'exp' contains NA values.") + } + if (any(is.na(obs))) { + warning("Parameter 'obs' contains NA values.") + } + ## exp_cor + if (!is.null(exp_cor)) { + eval.method = "hindcast-vs-forecast" + expcordims <- names(dim(exp_cor)) + if (is.null(expcordims)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (any(is.na(exp_cor))) { + warning("Parameter 'exp_cor' contains NA values.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## sdate_dim and memb_dim + if (!is.character(sdate_dim)) { stop("Parameter 'sdate_dim' should be a character string indicating the", - "name of the dimension where start dates are stored in 'exp'.") + "name of the dimension where start dates are stored in 'exp'.") } if (length(sdate_dim) > 1) { sdate_dim <- sdate_dim[1] warning("Parameter 'sdate_dim' has length greater than 1 and only", " the first element will be used.") } - target.dim.names.exp <- c(memb_dim, sdate_dim) - target.dim.names.obs <- sdate_dim - - if (!all(target.dim.names.exp %in% dim.names.exp)) { - stop("Parameter 'exp' must have the dimensions defined in memb_dim ", - "and sdate_dim.") + if (!is.character(memb_dim)) { + stop("Parameter 'memb_dim' should be a character string indicating the", + "name of the dimension where members are stored in 'exp'.") } - - if(!is.null(exp_cor)){ - if (!all(target.dim.names.exp %in% dim.names.exp_cor)) { - stop("Parameter 'exp_cor' must have the dimensions defined in memb_dim ", - "and sdate_dim.") - } + if (length(memb_dim) > 1) { + memb_dim <- memb_dim[1] + warning("Parameter 'memb_dim' has length greater than 1 and only", + " the first element will be used.") } - if (!all(c(sdate_dim) %in% dim.names.obs)) { + target_dims_exp <- c(memb_dim, sdate_dim, dat_dim) + target_dims_obs <- c(sdate_dim, dat_dim) + + if (!all(target_dims_exp %in% expdims)) { + stop("Parameter 'exp' requires 'sdate_dim' and 'memb_dim' dimensions.") + } + if (!all(target_dims_obs %in% obsdims)) { stop("Parameter 'obs' must have the dimension defined in sdate_dim ", "parameter.") } - - if (any(is.na(exp))) { - warning("Parameter 'exp' contains NA values.") + if (memb_dim %in% obsdims) { + if (dim(obs)[memb_dim] != 1) { + warning("Parameter 'obs' has dimension 'memb_dim' with length larger", + " than 1. Only the first member dimension will be used.") + } + obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") } - - if(!is.null(exp_cor)){ - if (any(is.na(exp_cor))) { - warning("Parameter 'exp_cor' contains NA values.") + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + exp_cor <- InsertDim(exp_cor, posdim = 1, lendim = 1, name = memb_dim) + exp_cor_remove_memb <- TRUE + } else { + exp_cor_remove_memb <- FALSE } + } else { + exp_cor_remove_memb <- FALSE } - - if (any(is.na(obs))) { - warning("Parameter 'obs' contains NA values.") + ## exp, obs, and exp_cor (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] } - - if (memb_dim %in% names(dim(obs))) { - obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of all ", + "dimensions except 'memb_dim' and 'dat_dim'.") + } + if (!is.null(exp_cor)) { + name_exp_cor <- sort(names(dim(exp_cor))) + name_exp <- sort(names(dim(exp))) + if (!is.null(dat_dim)) { + if (dat_dim %in% expcordims) { + if (!identical(dim(exp)[dat_dim], dim(exp_cor)[dat_dim])) { + stop("If parameter 'exp_cor' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + } + name_exp_cor <- name_exp_cor[-which(name_exp_cor == dat_dim)] + target_dims_cor <- c(memb_dim, sdate_dim, dat_dim) + } else { + target_dims_cor <- c(memb_dim, sdate_dim) + } + } else { + target_dims_cor <- c(memb_dim, sdate_dim) + } + name_exp <- name_exp[-which(name_exp %in% target_dims_exp)] + name_exp_cor <- name_exp_cor[-which(name_exp_cor %in% target_dims_cor)] + if (!identical(length(name_exp), length(name_exp_cor)) | + !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { + stop("Parameter 'exp' and 'exp_cor' must have the same length of ", + "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.") + } } - data.set.sufficiently.large.out <- - Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), - ncores = ncores, - fun = .data.set.sufficiently.large)$output1 + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + ## data.set.sufficiently.large + data.set.sufficiently.large.out <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs), + ncores = ncores, + fun = .data.set.sufficiently.large)$output1 - if(!all(data.set.sufficiently.large.out)){ - if(na.fill){ + if (!all(data.set.sufficiently.large.out)) { + if (na.fill) { warning("Some forecast data could not be corrected due to data lack", " and is replaced with NA values") } else { @@ -343,10 +463,18 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", " and is replaced with uncorrected values") } } - + ## na.rm if (!na.rm %in% c(TRUE,FALSE)) { stop("Parameter 'na.rm' must be TRUE or FALSE.") } + if (length(na.rm) > 1) { + na.rm <- na.rm[1] + warning("Paramter 'na.rm' has length greater than 1, and only the fist element is used.") + } + ## cal.method, apply_to, alpha + if (!any(cal.method %in% c('bias', 'evmos', 'mse_min', 'crps_min', 'rpc-based'))) { + stop("Parameter 'cal.method' must be a character string indicating the calibration method used.") + } if (cal.method == 'rpc-based') { if (is.null(apply_to)) { apply_to <- 'sign' @@ -363,158 +491,305 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", } } } - - if(is.null(exp_cor)){ - calibrated <- Apply(data = list(exp = exp, obs = obs), - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), - ncores = ncores, output_dims = target.dim.names.exp, - fun = .cal)$output1 - dexes <- match(names(dim(exp)), names(dim(calibrated))) - calibrated <- aperm(calibrated, dexes) - dimnames(calibrated) <- dimnames(exp)[dexes] - }else{ + ## eval.method + if (!any(eval.method %in% c('in-sample', 'leave-one-out', 'hindcast-vs-forecast'))) { + stop("Parameter 'eval.method' must be a character string indicating the sampling method used ('in-sample', 'leave-one-out' or 'hindcast-vs-forecast').") + } + ## multi.model + if (!multi.model %in% c(TRUE,FALSE)) { + stop("Parameter 'multi.model' must be TRUE or FALSE.") + } + + if (is.null(exp_cor)) { + calibrated <- Apply(data = list(exp = exp, obs = obs), dat_dim = dat_dim, + cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, + na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha, + target_dims = list(exp = target_dims_exp, obs = target_dims_obs), + ncores = ncores, fun = .cal)$output1 + } else { calibrated <- Apply(data = list(exp = exp, obs = obs, exp_cor = exp_cor), - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs, exp_cor = target.dim.names.exp), - ncores = ncores, output_dims = target.dim.names.exp, - fun = .cal)$output1 - dexes <- match(names(dim(exp_cor)), names(dim(calibrated))) - calibrated <- aperm(calibrated, dexes) - dimnames(calibrated) <- dimnames(exp_cor)[dexes] - } + dat_dim = dat_dim, cal.method = cal.method, eval.method = eval.method, + multi.model = multi.model, na.fill = na.fill, na.rm = na.rm, + apply_to = apply_to, alpha = alpha, + target_dims = list(exp = target_dims_exp, obs = target_dims_obs, + exp_cor = target_dims_cor), + ncores = ncores, fun = .cal)$output1 + } + if (!is.null(dat_dim)) { + pos <- match(c(names(dim(exp))[-which(names(dim(exp)) == dat_dim)], 'nexp', 'nobs'), + names(dim(calibrated))) + calibrated <- aperm(calibrated, pos) + } else { + pos <- match(c(names(dim(exp))), names(dim(calibrated))) + calibrated <- aperm(calibrated, pos) + } + + if (exp_cor_remove_memb) { + dim(calibrated) <- dim(calibrated)[-which(names(dim(calibrated)) == memb_dim)] + } + return(calibrated) } -.data.set.sufficiently.large <- function(exp, obs){ +.data.set.sufficiently.large <- function(exp, obs) { amt.min.samples <- 3 amt.good.pts <- sum(!is.na(obs) & !apply(exp, c(2), function(x) all(is.na(x)))) return(amt.good.pts > amt.min.samples) } -.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor){ - if(eval.method == "leave-one-out"){ +.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor) { + if (eval.method == "leave-one-out") { dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-x]))) - } else if (eval.method == "in-sample"){ + } else if (eval.method == "in-sample") { dexes.lst <- list(list(eval.dexes = seq(1, amt.points), train.dexes = seq(1, amt.points))) - } else if (eval.method == "hindcast-vs-forecast"){ + } else if (eval.method == "hindcast-vs-forecast") { dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor), train.dexes = seq(1, amt.points))) } else { - stop(paste0("unknown sampling method: ",eval.method)) + stop(paste0("unknown sampling method: ", eval.method)) } return(dexes.lst) } -.cal <- function(exp, obs, exp_cor = NULL, cal.method, eval.method, multi.model, na.fill, na.rm, apply_to, alpha) { - if(is.null(exp_cor)){ - exp_cor <- exp ## generate a copy of exp so that the same function can run - ## when exp_cor is provided and when it's not - } - obs <- as.vector(obs) - dims.fc <- dim(exp) - dims.fc_cor <- dim(exp_cor) ## new line - amt.mbr <- dims.fc[1] - amt.sdate <- dims.fc[2] - amt.sdate_cor <- dims.fc_cor[2] ## new line - var.cor.fc <- NA * exp_cor ## modified line (exp_cor instead of exp); - ## in case of exp_cor not provided, at this point exp_cor - ## is already the same as exp so no change - names(dim(var.cor.fc)) <- dims.fc - - if(!.data.set.sufficiently.large(exp = exp, obs = obs)){ - if(na.fill){ - return(var.cor.fc) - } else { - var.cor.fc[] <- exp[] - return(var.cor.fc) - } - } - eval.train.dexeses <- .make.eval.train.dexes(eval.method, amt.points = amt.sdate, - amt.points_cor = amt.sdate_cor) - amt.resamples <- length(eval.train.dexeses) - for (i.sample in seq(1, amt.resamples)) { - # defining training (tr) and evaluation (ev) subsets - eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes - train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes - - fc.ev <- exp_cor[ , eval.dexes, drop = FALSE] ## modified line (exp_cor instead of exp) - ## fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) - fc.tr <- exp[ , train.dexes] - obs.tr <- obs[train.dexes , drop = FALSE] - - if(cal.method == "bias"){ - var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) - } else if(cal.method == "evmos"){ # forecast correction implemented - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate value for regression parameters - init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) - } else if (cal.method == "mse_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate value for regression parameters - init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) - } else if (cal.method == "crps_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate initial value for regression parameters - init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) - init.par[3] <- sqrt(init.par[3]) - #calculate regression parameters on training dataset - optim.tmp <- optim(par = init.par, - fn = .calc.crps.opt, - gr = .calc.crps.grad.opt, - quant.obs.fc = quant.obs.fc.tr, - na.rm = na.rm, - method = "BFGS") - - mbm.par <- optim.tmp$par - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) - } else if (cal.method == 'rpc-based') { - ens_mean.ev <- multiApply::Apply(data = fc.ev, target_dims = names(amt.mbr), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean - ens_mean.tr <- multiApply::Apply(data = fc.tr, target_dims = names(amt.mbr), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean - ens_spread.tr <- multiApply::Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(amt.sdate), fun = "-")$output1 ## Ensemble spread - exp_mean.tr <- mean(fc.tr, na.rm = na.rm) ## Mean (climatology) - var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) ## Ensemble mean variance - var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) ## Variance of ensemble members about ensemble mean (= spread) - var_obs.tr <- var(obs.tr, na.rm = na.rm) ## Variance in the observations - r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) ## Correlation between observations and the ensemble mean - if ((apply_to == 'all') || (apply_to == 'sign' && cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { - ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr - var.cor.fc[ , eval.dexes] <- s2dv::Reorder(data = multiApply::Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, ens_mean_cal = ens_mean_cal), target_dims = names(amt.sdate), fun = .CalibrationMembersRPC, var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, - order = names(dims.fc)) - dim(var.cor.fc) <- dims.fc - } else { ## no significant -> replacing with observed climatology - var.cor.fc[ , eval.dexes] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) - } +.cal <- function(exp, obs, exp_cor = NULL, dat_dim = NULL, cal.method = "mse_min", + eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, apply_to = NULL, alpha = NULL) { + + # exp: [memb, sdate, (dat)] + # obs: [sdate (dat)] + # exp_cor: [memb, sdate, (dat)] or NULL + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + exp <- InsertDim(exp, posdim = 3, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = 'dataset') + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + if (is.null(exp_cor)) { + # generate a copy of exp + exp_cor <- exp + cor_dat_dim <- TRUE + } else { + if (length(dim(exp_cor)) == 2) { # ref: [memb, sdate] + cor_dat_dim <- FALSE } else { - stop("unknown calibration method: ",cal.method) + cor_dat_dim <- TRUE + } + } + + expdims <- dim(exp) + expdims_cor <- dim(exp_cor) + memb <- expdims[1] # memb + sdate <- expdims[2] # sdate + sdate_cor <- expdims_cor[2] + + return_data_na <- FALSE + var.cor.fc <- array(dim = c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + if (!.data.set.sufficiently.large(exp = exp[, , i, drop = FALSE], obs = obs[, j, drop = FALSE])) { + return_data_na <- TRUE + if (cor_dat_dim) { + var.cor.fc[, , i, j] <- NA * exp_cor[, , i] + } else { + var.cor.fc[, , i, j] <- NA * exp_cor[, i] + } + if (!na.fill) { + var.cor.fc[, , i] <- exp[, , i] + } + } + obs_data <- as.vector(obs[, j]) + exp_data <- exp[, , i] + dim(exp_data) <- dim(exp)[1:2] + if (cor_dat_dim) { + expcor_data <- exp_cor[, , i] + dim(expcor_data) = dim(exp_cor)[1:2] + } else { + expcor_data <- exp_cor + } + eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = sdate, + amt.points_cor = sdate_cor) + amt.resamples <- length(eval.train.dexeses) + for (i.sample in seq(1, amt.resamples)) { + # defining training (tr) and evaluation (ev) subsets + # fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) + eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes + train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes + fc.ev <- expcor_data[ , eval.dexes, drop = FALSE] + fc.tr <- exp_data[ , train.dexes] + obs.tr <- obs_data[train.dexes , drop = FALSE] + + if (cal.method == "bias") { + var.cor.fc[ , eval.dexes, i, j] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) + # forecast correction implemented + } else if (cal.method == "evmos") { + # forecast correction implemented + # ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + # calculate value for regression parameters + init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) + # correct evaluation subset + var.cor.fc[ , eval.dexes, i, j] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "mse_min") { + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) + var.cor.fc[ , eval.dexes, i, j] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "crps_min") { + quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) + init.par[3] <- sqrt(init.par[3]) + # calculate regression parameters on training dataset + optim.tmp <- optim(par = init.par, fn = .calc.crps.opt, gr = .calc.crps.grad.opt, + quant.obs.fc = quant.obs.fc.tr, na.rm = na.rm, method = "BFGS") + mbm.par <- optim.tmp$par + var.cor.fc[ , eval.dexes, i, j] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) + } else if (cal.method == 'rpc-based') { + # Ensemble mean + ens_mean.ev <- Apply(data = fc.ev, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 + ens_mean.tr <- Apply(data = fc.tr, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 + # Ensemble spread + ens_spread.tr <- Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(sdate), fun = "-")$output1 + # Mean (climatology) + exp_mean.tr <- mean(fc.tr, na.rm = na.rm) + # Ensemble mean variance + var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) + # Variance of ensemble members about ensemble mean (= spread) + var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) + # Variance in the observations + var_obs.tr <- var(obs.tr, na.rm = na.rm) + # Correlation between observations and the ensemble mean + r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', + use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) + if ((apply_to == 'all') || (apply_to == 'sign' && + cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { + ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr + var.cor.fc[ , eval.dexes, i, j] <- Reorder(data = Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, + ens_mean_cal = ens_mean_cal), + target_dims = names(sdate), fun = .CalibrationMembersRPC, + var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, + order = names(expdims)[1:2]) + } else { + # no significant -> replacing with observed climatology + var.cor.fc[ , eval.dexes, i, j] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) + } + } else { + stop("unknown calibration method: ", cal.method) + } + } + } + } + + + + + if (!return_data_na) { + var.cor.fc <- array(dim = c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + obs_data <- as.vector(obs[, j]) + exp_data <- exp[, , i] + dim(exp_data) <- dim(exp)[1:2] + if (cor_dat_dim) { + expcor_data <- exp_cor[, , i] + dim(expcor_data) = dim(exp_cor)[1:2] + } else { + expcor_data <- exp_cor + } + eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = sdate, + amt.points_cor = sdate_cor) + amt.resamples <- length(eval.train.dexeses) + for (i.sample in seq(1, amt.resamples)) { + # defining training (tr) and evaluation (ev) subsets + # fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) + eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes + train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes + fc.ev <- expcor_data[ , eval.dexes, drop = FALSE] + fc.tr <- exp_data[ , train.dexes] + obs.tr <- obs_data[train.dexes , drop = FALSE] + + if (cal.method == "bias") { + var.cor.fc[ , eval.dexes, i, j] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) + # forecast correction implemented + } else if (cal.method == "evmos") { + # forecast correction implemented + # ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + # calculate value for regression parameters + init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) + # correct evaluation subset + var.cor.fc[ , eval.dexes, i, j] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "mse_min") { + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) + var.cor.fc[ , eval.dexes, i, j] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "crps_min") { + quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) + init.par[3] <- sqrt(init.par[3]) + # calculate regression parameters on training dataset + optim.tmp <- optim(par = init.par, fn = .calc.crps.opt, gr = .calc.crps.grad.opt, + quant.obs.fc = quant.obs.fc.tr, na.rm = na.rm, method = "BFGS") + mbm.par <- optim.tmp$par + var.cor.fc[ , eval.dexes, i, j] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) + } else if (cal.method == 'rpc-based') { + # Ensemble mean + ens_mean.ev <- Apply(data = fc.ev, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 + ens_mean.tr <- Apply(data = fc.tr, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 + # Ensemble spread + ens_spread.tr <- Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(sdate), fun = "-")$output1 + # Mean (climatology) + exp_mean.tr <- mean(fc.tr, na.rm = na.rm) + # Ensemble mean variance + var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) + # Variance of ensemble members about ensemble mean (= spread) + var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) + # Variance in the observations + var_obs.tr <- var(obs.tr, na.rm = na.rm) + # Correlation between observations and the ensemble mean + r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', + use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) + if ((apply_to == 'all') || (apply_to == 'sign' && + cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { + ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr + var.cor.fc[ , eval.dexes, i, j] <- Reorder(data = Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, + ens_mean_cal = ens_mean_cal), + target_dims = names(sdate), fun = .CalibrationMembersRPC, + var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, + order = names(expdims)[1:2]) + } else { + # no significant -> replacing with observed climatology + var.cor.fc[ , eval.dexes, i, j] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) + } + } else { + stop("unknown calibration method: ", cal.method) + } + } + } } } + + if (is.null(dat_dim)) { + dim(var.cor.fc) <- dim(exp_cor)[1:2] + } + return(var.cor.fc) } -.calc.obs.fc.quant <- function(obs, fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations +# Function to calculate different quantities of a series of ensemble forecasts and corresponding observations +.calc.obs.fc.quant <- function(obs, fc, na.rm) { + if (is.null(dim(fc))) { + dim(fc) <- c(length(fc), 1) + } amt.mbr <- dim(fc)[1] - obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) + obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr, name = 'amt.mbr') fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") obs.av <- mean(obs, na.rm = na.rm) @@ -532,9 +807,10 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", ) } -.calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations +# Extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations +.calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ amt.mbr <- dim(fc)[1] - obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) + obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr, name = 'amt.mbr') fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") obs.av <- mean(obs, na.rm = na.rm) @@ -553,13 +829,13 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", ) } - -.calc.fc.quant <- function(fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts +# Function to calculate different quantities of a series of ensemble forecasts +.calc.fc.quant <- function(fc, na.rm) { amt.mbr <- dim(fc)[1] fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) fc.ens.av.av <- mean(fc.ens.av, na.rm = na.rm) fc.ens.av.sd <- sd(fc.ens.av, na.rm = na.rm) - fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr) + fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr, name = 'amt.mbr') fc.ens.sd <- apply(fc, c(2), sd, na.rm = na.rm) fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = na.rm)) fc.dev <- fc - fc.ens.av.per.ens @@ -582,13 +858,13 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", ) } -.calc.fc.quant.ext <- function(fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts - +# Extended function to calculate different quantities of a series of ensemble forecasts +.calc.fc.quant.ext <- function(fc, na.rm){ amt.mbr <- dim(fc)[1] - repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr) + repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr, name = 'amt.mbr') repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3)) spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = na.rm) - spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr) + spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr, name = 'amt.mbr') return( append(.calc.fc.quant(fc, na.rm = na.rm), @@ -596,11 +872,10 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", ) } -#Below are the core or elementary functions to calculate the regression parameters for the different methods -.calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm){ +# Below are the core or elementary functions to calculate the regression parameters for the different methods +.calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm) { par.out <- rep(NA, 3) - - if(multi.model){ + if (multi.model) { par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.ens.var.av.sqrt) } else { par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.dev.sd) @@ -610,13 +885,15 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", return(par.out) } + .calc.evmos.par <- function(quant.obs.fc, na.rm){ par.out <- rep(NA, 2) par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd) par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) return(par.out) } -#Below are the core or elementary functions to calculate the functions necessary for the minimization of crps + +# Below are the core or elementary functions to calculate the functions necessary for the minimization of crps .calc.crps.opt <- function(par, quant.obs.fc, na.rm){ return( with(quant.obs.fc, @@ -627,10 +904,9 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", ) } -.calc.crps.grad.opt <- function(par, quant.obs.fc, na.rm){ - sgn1 <- with(quant.obs.fc,sign(obs.per.ens - - (par[1] + par[2] * fc.ens.av.per.ens + - ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev))) +.calc.crps.grad.opt <- function(par, quant.obs.fc, na.rm) { + sgn1 <- with(quant.obs.fc,sign(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens + + ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev))) sgn2 <- with(quant.obs.fc, sign((par[3])^2 + par[4] / spr.abs.per.ens)) sgn3 <- with(quant.obs.fc,sign((par[3])^2 * spr.abs + par[4])) deriv.par1 <- mean(sgn1, na.rm = na.rm) @@ -644,8 +920,8 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", return(c(deriv.par1, deriv.par2, deriv.par3, deriv.par4)) } -#Below are the core or elementary functions to correct the evaluation set based on the regression parameters -.correct.evmos.fc <- function(fc, par, na.rm){ +# Below are the core or elementary functions to correct the evaluation set based on the regression parameters +.correct.evmos.fc <- function(fc, par, na.rm) { quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) return(with(quant.fc.mp, par[1] + par[2] * fc)) } diff --git a/man/BiasCorrection.Rd b/man/BiasCorrection.Rd index d944a32fa7f61e738e907bdc3c95e1971912f707..233639afc593a7fcbadde0986adf48389585beec 100644 --- a/man/BiasCorrection.Rd +++ b/man/BiasCorrection.Rd @@ -11,20 +11,23 @@ BiasCorrection( na.rm = FALSE, memb_dim = "member", sdate_dim = "sdate", + dat_dim = NULL, ncores = NULL ) } \arguments{ \item{exp}{A multidimensional array with named dimensions containing the -seasonal forecast experiment data with at least 'member' and 'sdate' -dimensions.} +seasonal forecast experiment data with at least time and member dimensions.} \item{obs}{A multidimensional array with named dimensions containing the -observed data with at least 'sdate' dimension.} +observed data with at least time dimension.} \item{exp_cor}{A multidimensional array with named dimensions containing the -seasonl forecast experiment to be corrected. If it is NULL, the 'exp' -forecast will be corrected.} +seasonal forecast experiment to be corrected with at least time and member +dimension. If it is NULL, the 'exp' forecast will be corrected. If there is +only one corrected dataset, it should not have dataset dimension. If there +is a corresponding corrected dataset for each 'exp' forecast, the dataset +dimension must have the same length as in 'exp'. The default value is NULL.} \item{na.rm}{A logical value indicating whether missing values should be stripped before the computation proceeds, by default it is set to FALSE.} @@ -35,12 +38,20 @@ dimension. By default, it is set to 'member'.} \item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + \item{ncores}{An integer that indicates the number of cores for parallel computations using multiApply function. The default value is NULL.} } \value{ -An array containing the bias corrected forecasts with the same -dimensions of the experimental data. +An array containing the bias corrected forecasts with the dimensions +nexp, nobs and same dimensions as in the 'exp' object. nexp is the number of +experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If +'exp_cor' is provided the returned array will be with the same dimensions as +'exp_cor'. } \description{ This function applies the simple bias adjustment technique diff --git a/man/CST_BiasCorrection.Rd b/man/CST_BiasCorrection.Rd index fb96babd17b91ea27c8b4c4c0d794f33d8c5a734..48c593d00fc96caea8e8d50da236d99e66062c58 100644 --- a/man/CST_BiasCorrection.Rd +++ b/man/CST_BiasCorrection.Rd @@ -11,20 +11,26 @@ CST_BiasCorrection( na.rm = FALSE, memb_dim = "member", sdate_dim = "sdate", + dat_dim = NULL, ncores = NULL ) } \arguments{ \item{exp}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element -named \code{$data}} +named \code{$data} with at least time and member dimensions.} \item{obs}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} -function, containing the observed data in the element named \code{$data}.} +function, containing the observed data in the element named \code{$data} +with at least time dimension.} \item{exp_cor}{An object of class \code{s2dv_cube} as returned by -\code{CST_Load} function, containing the seasonl forecast experiment to be -corrected. If it is NULL, the 'exp' forecast will be corrected.} +\code{CST_Load} function, containing the seasonal forecast experiment to be +corrected with at least time dimension. If it is NULL, the 'exp' forecast +will be corrected. If there is only one corrected dataset, it should not +have dataset dimension. If there is a corresponding corrected dataset for +each 'exp' forecast, the dataset dimension must have the same length as in +'exp'. The default value is NULL.} \item{na.rm}{A logical value indicating whether missing values should be stripped before the computation proceeds, by default it is set to FALSE.} @@ -35,12 +41,20 @@ dimension. By default, it is set to 'member'.} \item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + \item{ncores}{An integer that indicates the number of cores for parallel computations using multiApply function. The default value is NULL.} } \value{ An object of class \code{s2dv_cube} containing the bias corrected -forecasts with the same dimensions of the experimental data. +forecasts with the dimensions nexp, nobs and same dimensions as in the 'exp' +object. nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is +the number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp +and nobs are omitted. If 'exp_cor' is provided the returned array will be with +the same dimensions as 'exp_cor'. } \description{ This function applies the simple bias adjustment technique diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 5d79ae41a34bc994c0bbe69560a9c5e61ddb6cc6..2e8861e78fe5b6d9e8e9d5d0c1bdd1982ff87bee 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -17,78 +17,115 @@ CST_Calibration( alpha = NULL, memb_dim = "member", sdate_dim = "sdate", - ncores = 1 + dat_dim = NULL, + ncores = NULL ) } \arguments{ \item{exp}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} -function, containing the seasonal hindcast experiment data in the element -named \code{$data}. The hindcast is used to calibrate the forecast in case -the forecast is provided; if not, the same hindcast will be calibrated -instead.} +function with at least 'sdate' and 'member' dimensions, containing the +seasonal hindcast experiment data in the element named \code{data}. The +hindcast is used to calibrate the forecast in case the forecast is provided; +if not, the same hindcast will be calibrated instead.} \item{obs}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} -function, containing the observed data in the element named \code{$data}.} +function with at least 'sdate' dimension, containing the observed data in +the element named \code{$data}.} \item{exp_cor}{An optional object of class \code{s2dv_cube} as returned by -\code{CST_Load} function, containing the seasonal forecast experiment data -in the element named \code{$data}. If the forecast is provided, it will be -calibrated using the hindcast and observations; if not, the hindcast will be -calibrated instead.} +\code{CST_Load} function with at least 'sdate' and 'member' dimensions, +containing the seasonal forecast experiment data in the element named +\code{data}. If the forecast is provided, it will be calibrated using the +hindcast and observations; if not, the hindcast will be calibrated instead. +If there is only one corrected dataset, it should not have dataset dimension. +If there is a corresponding corrected dataset for each 'exp' forecast, the +dataset dimension must have the same length as in 'exp'. The default value +is NULL.} -\item{cal.method}{Is the calibration method used, can be either \code{bias}, -\code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default -value is \code{mse_min}.} +\item{cal.method}{A character string indicating the calibration method used, +can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or +\code{rpc-based}. Default value is \code{mse_min}.} -\item{eval.method}{Is the sampling method used, can be either \code{in-sample} -or \code{leave-one-out}. Default value is the \code{leave-one-out} cross -validation. In case the forecast is provided, any chosen eval.method is -over-ruled and a third option is used.} +\item{eval.method}{A character string indicating the sampling method used, it +can be either \code{in-sample} or \code{leave-one-out}. Default value is the +\code{leave-one-out} cross validation. In case the forecast is provided, any +chosen eval.method is over-ruled and a third option is used.} -\item{multi.model}{Is a boolean that is used only for the \code{mse_min} +\item{multi.model}{A boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result.} -\item{na.fill}{Is a boolean that indicates what happens in case calibration is +\item{na.fill}{A boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.} -\item{na.rm}{Is a boolean that indicates whether to remove the NA values or -not. The default value is \code{TRUE}. See Details section for further +\item{na.rm}{A boolean that indicates whether to remove the NA values or not. +The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}.} -\item{apply_to}{Is a character string that indicates whether to apply the +\item{apply_to}{A character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the -correlation between the ensemble mean and the observations is statistically +correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} -\item{alpha}{Is a numeric value indicating the significance level for the -correlation test. Only useful if \code{cal.method == "rpc-based" & -apply_to == "sign"}.} +\item{alpha}{A numeric value indicating the significance level for the +correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to +== "sign"}.} -\item{memb_dim}{Is a character string indicating the name of the member -dimension. By default, it is set to 'member'.} +\item{memb_dim}{A character string indicating the name of the member dimension. +By default, it is set to 'member'.} -\item{sdate_dim}{Is a character string indicating the name of the start date +\item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} -\item{ncores}{Is an integer that indicates the number of cores for parallel +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + +\item{ncores}{An integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} } \value{ An object of class \code{s2dv_cube} containing the calibrated -forecasts in the element \code{$data} with the same dimensions as the one in -the exp object. +forecasts in the element \code{data} with the dimensions nexp, nobs and same +dimensions as in the 'exp' object. nexp is the number of experiment +(i.e., 'dat_dim' in exp), and nobs is the number of observation (i.e., +'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If 'exp_cor' +is provided the returned array will be with the same dimensions as 'exp_cor'. } \description{ -Equivalent to function \code{Calibration} but for objects of -class \code{s2dv_cube}. +Five types of member-by-member bias correction can be performed. +The \code{"bias"} method corrects the bias only, the \code{"evmos"} method +applies a variance inflation technique to ensure the correction of the bias +and the correspondence of variance between forecast and observation (Van +Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods +\code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast +variance and the ensemble spread as described in Doblas-Reyes et al. (2005) +and Van Schaeybroeck and Vannitsem (2015), respectively. While the +\code{"mse_min"} method minimizes a constrained mean-squared error using three +parameters, the \code{"crps_min"} method features four parameters and +minimizes the Continuous Ranked Probability Score (CRPS). The +\code{"rpc-based"} method adjusts the forecast variance ensuring that the +ratio of predictable components (RPC) is equal to one, as in Eade et al. +(2014). It is equivalent to function \code{Calibration} but for objects +of class \code{s2dv_cube}. +} +\details{ +Both the \code{na.fill} and \code{na.rm} parameters can be used to +indicate how the function has to handle the NA values. The \code{na.fill} +parameter checks whether there are more than three forecast-observations pairs +to perform the computation. In case there are three or less pairs, the +computation is not carried out, and the value returned by the function depends +on the value of this parameter (either NA if \code{na.fill == TRUE} or the +uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} +is used to indicate the function whether to remove the missing values during +the computation of the parameters needed to perform the calibration. } \examples{ # Example 1: @@ -102,9 +139,7 @@ exp <- list(data = mod1, lat = lat, lon = lon) obs <- list(data = obs1, lat = lat, lon = lon) attr(exp, 'class') <- 's2dv_cube' attr(obs, 'class') <- 's2dv_cube' -a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", - eval.method = "in-sample") -str(a) +a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample") # Example 2: mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) @@ -121,9 +156,27 @@ exp_cor <- list(data = mod2, lat = lat, lon = lon) attr(exp, 'class') <- 's2dv_cube' attr(obs, 'class') <- 's2dv_cube' attr(exp_cor, 'class') <- 's2dv_cube' -a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, - cal.method = "evmos") -str(a) +a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, cal.method = "evmos") + +} +\references{ +Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the +success of multi-model ensembles in seasonal forecasting-II calibration and +combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x + +Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., +Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate +predictions underestimate the predictability of the read world? Geophysical +Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 + +Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing +through linear regression. Nonlinear Processes in Geophysics, 18(2), +147. doi:10.5194/npg-18-147-2011 + +Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble +post-processing using member-by-member approaches: theoretical aspects. +Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. +doi:10.1002/qj.2397 } \seealso{ \code{\link{CST_Load}} diff --git a/man/Calibration.Rd b/man/Calibration.Rd index 5e2e0e5bda915e19f03ffacdd01547b1a03a49a2..2248834bfe5f12a8fe904506172f6bdc5c99cd4e 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -17,7 +17,8 @@ Calibration( alpha = NULL, memb_dim = "member", sdate_dim = "sdate", - ncores = 1 + dat_dim = NULL, + ncores = NULL ) } \arguments{ @@ -30,57 +31,69 @@ the same hindcast will be calibrated instead.} containing the observed data.} \item{exp_cor}{An optional multidimensional array with named dimensions (at -least 'sdate' and 'member') containing the seasonal forecast experiment data. -If the forecast is provided, it will be calibrated using the hindcast and -observations; if not, the hindcast will be calibrated instead.} - -\item{cal.method}{Is the calibration method used, can be either \code{bias}, -\code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default -value is \code{mse_min}.} - -\item{eval.method}{Is the sampling method used, can be either \code{in-sample} -or \code{leave-one-out}. Default value is the \code{leave-one-out} cross -validation. In case the forecast is provided, any chosen eval.method is -over-ruled and a third option is used.} - -\item{multi.model}{Is a boolean that is used only for the \code{mse_min} +least 'sdate' and 'member') containing the seasonal forecast experiment +data. If the forecast is provided, it will be calibrated using the hindcast +and observations; if not, the hindcast will be calibrated instead. If there +is only one corrected dataset, it should not have dataset dimension. If there +is a corresponding corrected dataset for each 'exp' forecast, the dataset +dimension must have the same length as in 'exp'. The default value is NULL.} + +\item{cal.method}{A character string indicating the calibration method used, +can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} +or \code{rpc-based}. Default value is \code{mse_min}.} + +\item{eval.method}{A character string indicating the sampling method used, +can be either \code{in-sample} or \code{leave-one-out}. Default value is +the \code{leave-one-out} cross validation. In case the forecast is +provided, any chosen eval.method is over-ruled and a third option is +used.} + +\item{multi.model}{A boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when -using small ensemble sizes. Using multi.model when the calibration method is -\code{bias}, \code{evmos} or \code{crps_min} will not affect the result.} +using small ensemble sizes. Using multi.model when the calibration method +is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result.} -\item{na.fill}{Is a boolean that indicates what happens in case calibration is +\item{na.fill}{A boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or -less forecasts-observation pairs are available to perform the training phase +less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.} -\item{na.rm}{Is a boolean that indicates whether to remove the NA values or +\item{na.rm}{A boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}.} -\item{apply_to}{Is a character string that indicates whether to apply the +\item{apply_to}{A character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} -\item{alpha}{Is a numeric value indicating the significance level for the -correlation test. Only useful if \code{cal.method == "rpc-based" & -apply_to == "sign"}.} +\item{alpha}{A numeric value indicating the significance level for the +correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == +"sign"}.} -\item{memb_dim}{Is a character string indicating the name of the member +\item{memb_dim}{A character string indicating the name of the member dimension. By default, it is set to 'member'.} -\item{sdate_dim}{Is a character string indicating the name of the start date +\item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} -\item{ncores}{Is an integer that indicates the number of cores for parallel -computations using multiApply function. The default value is one.} +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + +\item{ncores}{An integer that indicates the number of cores for parallel +computation using multiApply function. The default value is NULL (one core).} } \value{ -An array containing the calibrated forecasts with the same dimensions -as the \code{exp} array. +An array containing the calibrated forecasts with the dimensions +nexp, nobs and same dimensions as in the 'exp' array. nexp is the number of +experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +If 'exp_cor' is provided the returned array will be with the same dimensions as +'exp_cor'. } \description{ Five types of member-by-member bias correction can be performed. @@ -95,16 +108,15 @@ and Van Schaeybroeck and Vannitsem (2015), respectively. While the parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the -ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). - -Both in-sample or our out-of-sample (leave-one-out cross +ratio of predictable components (RPC) is equal to one, as in Eade et al. +(2014). Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. } \details{ -Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate -how the function has to handle the NA values. The \code{na.fill} parameter -checks whether there are more than three forecast-observations pairs to -perform the computation. In case there are three or less pairs, the +Both the \code{na.fill} and \code{na.rm} parameters can be used to +indicate how the function has to handle the NA values. The \code{na.fill} +parameter checks whether there are more than three forecast-observations pairs +to perform the computation. In case there are three or less pairs, the computation is not carried out, and the value returned by the function depends on the value of this parameter (either NA if \code{na.fill == TRUE} or the uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} @@ -117,13 +129,12 @@ dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) a <- Calibration(exp = mod1, obs = obs1) -str(a) + } \references{ Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and -combination. Tellus A. 2005;57:234-252. -doi:10.1111/j.1600-0870.2005.00104.x +combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate @@ -131,8 +142,8 @@ predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing -through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. -doi:10.5194/npg-18-147-2011 +through linear regression. Nonlinear Processes in Geophysics, 18(2), +147. doi:10.5194/npg-18-147-2011 Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. diff --git a/tests/testthat/test-CST_BiasCorrection.R b/tests/testthat/test-CST_BiasCorrection.R index f16592532646ccd4068f8bd047fd2cd61ed507ad..0a590ce40df6b240442beb52ca53745f0a43c8bf 100644 --- a/tests/testthat/test-CST_BiasCorrection.R +++ b/tests/testthat/test-CST_BiasCorrection.R @@ -18,16 +18,12 @@ attr(obs, 'class') <- 's2dv_cube' exp1 <- list(data = array(1:20, dim = c(time = 20))) class(exp1) <- 's2dv_cube' - obs1 <- list(data = array(1:20, dim = c(time = 20))) class(obs1) <- 's2dv_cube' - exp1_2 <- list(data = array(1:20, dim = c(20))) class(exp1_2) <- 's2dv_cube' - obs1_2 <- list(data = array(1:20, dim = c(20))) class(obs1_2) <- 's2dv_cube' - exp_cor1 <- list(data = array(1:20, dim = c(20))) class(exp_cor1) <- 's2dv_cube' @@ -49,6 +45,31 @@ obs4 <- array(1:200, dim = c(time = 5, members = 1, lat = 2, lon = 5)) obs4_1 <- obs4 obs4_1[1,1,1,1] <- NA +# dat5 +set.seed(1) +exp5 <- array(rnorm(80), dim = c(member = 2, sdate = 10, lat = 2, dataset = 2)) +set.seed(2) +obs5 <- array(rnorm(60), dim = c(sdate = 10, lat = 2, dataset = 3)) +set.seed(3) +exp_cor5 <- array(rnorm(20), dim = c(member = 2, sdate = 10, lat = 2)) + +# dat6 +set.seed(1) +exp6 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +exp6_1 <- array(exp6, dim = c(member = 2, sdate = 10, dataset = 1)) +exp6_2 <- exp6_1 +exp6_2[1] <- NA +set.seed(2) +obs6 <- array(rnorm(10), dim = c(member = 1, sdate = 10)) +obs6_1 <- array(obs6, dim = c(member = 1, sdate = 10, dataset = 1)) +obs6_2 <- obs6_1 +obs6_2[c(1, 3)] <- NA +set.seed(3) +exp_cor6 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) + +# dat7 +exp_cor7 <- array(rnorm(400), dim = c(member = 10, sdate = 10, lat = 2, dataset = 2)) + ############################################## test_that("1. Input checks", { @@ -121,6 +142,40 @@ test_that("1. Input checks", { BiasCorrection(exp = exp3, obs = obs3_3), paste0("If parameter 'obs' has dimension 'memb_dim' its length must be equal to 1.") ) + ## dat_dim + expect_error( + BiasCorrection(exp = exp3, obs = obs3, dat_dim = 1), + paste0("Parameter 'dat_dim' must be a character string.") + ) + expect_error( + BiasCorrection(exp = exp3, obs = obs3, dat_dim = 'dataset'), + paste0("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + ) + ## exp, obs, and exp_cor (2) + expect_error( + BiasCorrection(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 2)), + dat_dim = 'dataset'), + paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.") + ) + expect_error( + BiasCorrection(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + exp_cor = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + dat_dim = 'dataset'), + paste0("If parameter 'exp_cor' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + ) + expect_error( + BiasCorrection(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + exp_cor = array(1:6, c(sdate = 3, member = 1, lon = 3)), + dat_dim = 'dataset'), + paste0("Parameter 'exp' and 'exp_cor' must have the same length of all common dimensions", + " except 'dat_dim', 'sdate_dim' and 'memb_dim'.") + ) ## na.rm expect_warning( CST_BiasCorrection(exp = exp, obs = obs, na.rm = 1), @@ -159,15 +214,15 @@ test_that("2. Output checks: dat1", { ) expect_equal( round(BiasCorrection(exp = exp3, obs = obs3, exp_cor = exp3), 2), - array(c(2.66, 4.27, 3.2, 4.8, 3.73, 5.34), c(member = 2, sdate = 3)) + array(c(2.66, 3.2, 3.73, 4.27, 4.8, 5.34), c(sdate = 3, member = 2)) ) expect_equal( round(BiasCorrection(exp = exp3, obs = obs3_2, exp_cor = exp3), 2), - array(c(2.66, 4.27, 3.2, 4.8, 3.73, 5.34), c(member = 2, sdate = 3)) + array(c(2.66, 3.2, 3.73, 4.27, 4.8, 5.34), c(sdate = 3, member = 2)) ) expect_equal( dim(BiasCorrection(exp = exp4, obs = obs4, sdate_dim = 'time', memb_dim = 'members')), - c(members = 5, time = 5, lat = 2, lon = 5) + c(time = 5, members = 5, lat = 2, lon = 5) ) suppressWarnings( expect_equal( @@ -182,3 +237,99 @@ test_that("2. Output checks: dat1", { ) ) }) + +############################################## + +test_that("3. Output checks: dat5", { + expect_equal( + dim(BiasCorrection(exp5, obs5, memb_dim = 'member', dat_dim = 'dataset')), + c(member = 2, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) + expect_equal( + dim(BiasCorrection(exp5, obs5, exp_cor5, memb_dim = 'member', dat_dim = 'dataset')), + c(member = 2, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) + expect_equal( + as.vector(BiasCorrection(exp5, obs5, memb_dim = 'member', dat_dim = 'dataset'))[5:10], + c(0.1466060, -0.9764600, 0.6914021, 0.9330733, 0.6567210, -0.3036642), + tolerance = 0.0001 + ) + expect_equal( + as.vector(BiasCorrection(exp5, obs5, exp_cor5, memb_dim = 'member', dat_dim = 'dataset'))[5:10], + c(0.21682367, 0.03815268, 0.09778966, 1.20997987, -1.30893321, 1.37258011), + tolerance = 0.0001 + ) + expect_equal( + as.vector(BiasCorrection(exp5[, , , 1], obs5[, , 1], memb_dim = 'member'))[1:5], + as.vector(BiasCorrection(exp5, obs5, memb_dim = 'member', dat_dim = 'dataset')[, , , 1, 1][1:5]) + ) + expect_equal( + as.vector(BiasCorrection(exp5[, , , 1], obs5[, , 1], exp_cor5, memb_dim = 'member'))[1:5], + as.vector(BiasCorrection(exp5, obs5, exp_cor5, memb_dim = 'member', dat_dim = 'dataset')[, , , 1, 1][1:5]) + ) + expect_equal( + as.vector(BiasCorrection(exp5, obs5, exp_cor5, memb_dim = 'member', dat_dim = 'dataset', na.rm = TRUE))[1:5], + c(-1.0318284, -0.3098404, 0.2847780, -1.2369666, 0.2168237), + tolerance = 0.0001 + ) +}) + +############################################## +test_that("4. Output checks: dat6", { + expect_equal( + dim(BiasCorrection(exp6, obs6)), + c(member = 2, sdate = 10) + ) + expect_equal( + as.vector(BiasCorrection(exp6, obs6))[1:5], + c(-0.5430181, 0.2807323, -0.9954539, 1.9298249, 0.1466060), + tolerance = 0.0001 + ) + expect_equal( + as.vector(BiasCorrection(exp6, obs6, exp_cor6))[1:5], + c(-1.0318284, -0.3098404, 0.2847780, -1.2369666, 0.2168237), + tolerance = 0.0001 + ) + expect_equal( + dim(BiasCorrection(exp6_1, obs6_1, dat_dim = 'dataset')), + c(member = 2, sdate = 10, nexp = 1, nobs = 1) + ) + expect_equal( + as.vector(BiasCorrection(exp6_1, obs6_1, dat_dim = 'dataset')), + as.vector(BiasCorrection(exp6, obs6)), + tolerance = 0.0001 + ) + expect_equal( + as.vector(BiasCorrection(exp6_1, obs6_1, exp_cor6, dat_dim = 'dataset')), + as.vector(BiasCorrection(exp6, obs6, exp_cor6)), + tolerance = 0.0001 + ) + expect_equal( + suppressWarnings( + as.vector(BiasCorrection(exp6_1, obs6_2, dat_dim = 'dataset')) + ), + rep(as.numeric(NA), 20) + ) + expect_equal( + suppressWarnings( + as.vector(BiasCorrection(exp6_1, obs6_2, dat_dim = 'dataset', na.rm = T))[5:10] + ), + c(0.2644706, -0.8392515, 0.6458045, 0.8511290, 0.5959483, -0.2908764), + tolerance = 0.0001 + ) + expect_equal( + suppressWarnings( + as.vector(BiasCorrection(exp6_2, obs6_2, exp_cor6, dat_dim = 'dataset', na.rm = T))[5:10] + ), + c(0.14077312, -0.02076059, 0.03315629, 1.03867041, -1.23864029, 1.18567478), + tolerance = 0.0001 + ) +}) + +############################################## +test_that("6. Output checks: dat4", { + expect_equal( + dim(BiasCorrection(exp5, obs5, exp_cor7, dat_dim = 'dataset')), + c(member = 10, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) +}) diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index 8a14acd9704b8f6ddea9fc4dfed23bf2a238a413..35eccbe9bb67f9fef5aa74658cc9451d703dc988 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -1,25 +1,203 @@ context("CSTools::CST_Calibration tests") ############################################## -# dat1 + +# dat exp_obs <- lonlat_temp -exp <- exp_obs[[1]] -obs <- exp_obs[[2]] -exp$data <- ClimProjDiags::Subset(exp_obs[[1]]$data, c('member', 'lon', 'lat'), list(1:2, 1:4, 1:4)) -obs$data <- ClimProjDiags::Subset(exp_obs[[2]]$data, c('lon', 'lat'), list(1:4, 1:4)) -exp$lon <- exp$lon[1:4] -exp$lat <- exp$lat[1:4] -obs$lon <- obs$lon[1:4] -obs$lat <- obs$lat[1:4] +exp <- exp_obs$exp +obs <- exp_obs$obs +exp$data <- ClimProjDiags::Subset(exp$data, c('lat', 'lon'), list(1:5, 1:5)) +obs$data <- ClimProjDiags::Subset(obs$data, c('lat', 'lon'), list(1:5, 1:5)) + +# dat1 +exp1 <- list(data = array(1:20, dim = c(time = 20))) +class(exp1) <- 's2dv_cube' +obs1 <- list(data = array(1:20, dim = c(time = 20))) +class(obs1) <- 's2dv_cube' +exp1_2 <- list(data = array(1:20, dim = c(20))) +class(exp1_2) <- 's2dv_cube' +obs1_2 <- list(data = array(1:20, dim = c(20))) +class(obs1_2) <- 's2dv_cube' +exp_cor1 <- list(data = array(1:20, dim = c(20))) +class(exp_cor1) <- 's2dv_cube' # dat2 exp2 <- exp exp2$data[1, 2, 1, 1, 1, 1] <- NA obs2 <- obs obs2$data[1, 1, 2, 1, 1, 1] <- NA + +# dat3 +set.seed(1) +exp3 <- array(rnorm(400), dim = c(member = 10, sdate = 10, lat = 2, dataset = 2)) +set.seed(2) +obs3 <- array(rnorm(60), dim = c(sdate = 10, lat = 2, dataset = 3)) +set.seed(3) +exp_cor3 <- array(rnorm(200), dim = c(member = 10, sdate = 10, lat = 2)) + +# dat4 +set.seed(1) +exp4 <- array(rnorm(200), dim = c(member = 10, sdate = 20)) +exp4_1 <- array(exp4, dim = c(member = 10, sdate = 20, dataset = 1)) +exp4_2 <- exp4_1 +exp4_2[1] <- NA +set.seed(2) +obs4 <- array(rnorm(20), dim = c(sdate = 20)) +obs4_1 <- array(obs4, dim = c(sdate = 20, dataset = 1)) +obs4_2 <- obs4_1 +obs4_2[c(1, 3)] <- NA +set.seed(3) +exp_cor4 <- array(rnorm(200), dim = c(member = 10, sdate = 20)) + +# dat5 +exp_cor5 <- array(rnorm(400), dim = c(member = 10, sdate = 10, lat = 2, dataset = 2)) + ############################################## test_that("1. Input checks", { + # s2dv_cube + expect_error( + CST_Calibration(exp = 1), + paste0("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + ) + expect_error( + CST_Calibration(exp = exp, obs = 1), + paste0("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + ) + expect_error( + CST_Calibration(exp = exp1), + 'argument "obs" is missing, with no default' + ) + expect_error( + CST_Calibration(exp = exp1, obs = obs1, exp_cor = 1), + paste0("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + ) + # exp and obs + expect_error( + Calibration(exp = 1, obs = obs1), + "Parameter 'exp' must be a numeric array." + ) + expect_error( + CST_Calibration(exp = exp1_2, obs = obs1), + "Parameter 'exp' must have dimension names." + ) + expect_error( + CST_Calibration(exp = exp1, obs = obs1_2), + "Parameter 'obs' must have dimension names." + ) + expect_warning( + CST_Calibration(exp = exp2, obs = obs2, exp_cor = exp2), + "Parameter 'obs' contains NA values", + "Parameter 'exp' contains NA values.", + "Parameter 'exp_cor' contains NA values." + ) + # exp_cor + expect_error( + CST_Calibration(exp = exp1, obs = obs1, exp_cor = exp_cor1, sdate_dim = 'time'), + "Parameter 'exp_cor' must have dimension names." + ) + # dat_dim + expect_error( + Calibration(exp = exp3, obs = obs3, dat_dim = 1), + paste0("Parameter 'dat_dim' must be a character string.") + ) + expect_error( + CST_Calibration(exp = exp, obs = obs, dat_dim = 'dat'), + paste0("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + ) + # sdate_dim, memb_dim + expect_error( + CST_Calibration(exp = exp1, obs = obs1, sdate_dim = 1), + paste0("Parameter 'sdate_dim' should be a character string indicating the", + "name of the dimension where start dates are stored in 'exp'.") + ) + expect_warning( + CST_Calibration(exp = exp, obs = obs, sdate_dim = c('sdate', 'time')), + paste0("Parameter 'sdate_dim' has length greater than 1 and only", + " the first element will be used.") + ) + expect_error( + CST_Calibration(exp = exp1, obs = obs1, memb_dim = 1), + paste0("Parameter 'memb_dim' should be a character string indicating the", + "name of the dimension where members are stored in 'exp'.") + ) + expect_error( + CST_Calibration(exp = exp, obs = obs, sdate_dim = 'time'), + paste0("Parameter 'exp' requires 'sdate_dim' and 'memb_dim' dimensions.") + ) + expect_warning( + CST_Calibration(exp = exp, obs = obs, memb_dim = c('member', 'memb')), + paste0("Parameter 'memb_dim' has length greater than 1 and only", + " the first element will be used.") + ) + expect_error( + CST_Calibration(exp = exp1, obs = obs1, sdate_dim = 'time'), + paste0("Parameter 'exp' requires 'sdate_dim' and 'memb_dim' dimensions.") + ) + expect_error( + Calibration(exp = array(1:20, dim = c(time = 1, member = 1)), + obs = array(1:20, dim = c(member = 1)), sdate_dim = 'time'), + paste0("Parameter 'obs' must have the dimension defined in sdate_dim ", + "parameter.") + ) + expect_warning( + Calibration(exp = array(abs(rnorm(400)), dim = c(sdate = 4, member = 10)), + obs = array(abs(rnorm(8)), dim = c(sdate = 4, member = 2))), + paste0("Parameter 'obs' has dimension 'memb_dim' with length larger", + " than 1. Only the first member dimension will be used.") + ) + # exp, obs, and exp_cor (2) + expect_error( + Calibration(exp = array(1:20, dim = c(time = 1, member = 1)), + obs = array(1:20, dim = c(time = 2, member = 1)), sdate_dim = 'time'), + paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.") + ) + expect_error( + Calibration(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 2)), + dat_dim = 'dataset'), + paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.") + ) + expect_error( + Calibration(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + exp_cor = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + dat_dim = 'dataset'), + paste0("If parameter 'exp_cor' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + ) + expect_error( + Calibration(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + exp_cor = array(1:6, c(sdate = 3, member = 2, lon = 2)), dat_dim = 'dataset'), + paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", + "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.") + ) + # ncores + expect_error( + CST_Calibration(exp = exp, obs = obs, ncores = TRUE), + "Parameter 'ncores' must be either NULL or a positive integer." + ) + # na.rm + expect_warning( + CST_Calibration(exp = exp, obs = obs, na.rm = c(T,F)), + "Paramter 'na.rm' has length greater than 1, and only the fist element is used." + ) + # cal.method + expect_error( + as.vector(Calibration(exp4, obs4, cal.method = 'biass')), + "Parameter 'cal.method' must be a character string indicating the calibration method used." + ) +}) +############################################## + +test_that("2. Output checks: dat1", { expect_error( CST_Calibration(exp = 1), "Parameter 'exp' and 'obs' must be of the class 's2dv_cube', " @@ -30,7 +208,7 @@ test_that("1. Input checks", { ) cal <- CST_Calibration(exp = exp, obs = obs) expect_equal( - length(cal), + length(cal), 9 ) expect_equal( @@ -38,33 +216,194 @@ test_that("1. Input checks", { as.numeric(dim(exp$data)) ) expect_equal( - cal$lat, + cal$lat, exp$lat ) expect_equal( - cal$lat, + cal$lat, obs$lat ) expect_equal( - cal$lon, + cal$lon, exp$lon ) expect_equal( - cal$lon, + cal$lon, obs$lon ) - expect_warning( - CST_Calibration(exp = exp2, obs = obs), - "Parameter 'exp' contains NA values." + expect_equal( + dim(cal$data), + c(dataset = 1, member =15, sdate = 6, ftime = 3, lat = 5, lon = 5) ) - expect_warning( - CST_Calibration(exp = exp, obs = obs2), - "Parameter 'obs' contains NA values." + expect_equal( + as.vector(cal$data)[1:5], + c(280.8678, 281.1716, 280.3992, 282.6034, 281.6749), + tolerance = 0.0001 ) - expect_warning( - CST_Calibration(exp = exp2, obs = obs2), - "Parameter 'obs' contains NA values", "Parameter 'exp' contains NA values." +}) + +############################################## + +test_that("3. Output checks: dat3", { + expect_equal( + dim(Calibration(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')), + c(member = 10, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) + expect_equal( + dim(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset')), + c(member = 10, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) + expect_equal( + as.vector(Calibration(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset'))[5:10], + c(0.5462052, -0.6882557, 0.7157284, 0.9850565, 0.8105716, -0.1353346), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset'))[5:10], + c(0.4908992, 0.3054365, 0.3673404, 1.5218074, -1.0928551, 1.6905885), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3[, , , 1], obs3[, , 1], memb_dim = 'member'))[1:5], + as.vector(Calibration(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')[, , , 1, 1][1:5]), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3[, , , 1], obs3[, , 1], exp_cor3, memb_dim = 'member'))[1:5], + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset')[, , , 1, 1][1:5]) + ) + expect_equal( + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset', na.rm = TRUE))[1:5], + c(-0.80521700, -0.05578454, 0.56143657, -1.01815286, 0.49089916), + tolerance = 0.0001 + ) +}) + +############################################## + +test_that("4. Output checks: dat3", { + expect_equal( + as.vector(Calibration(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', cal.method = 'bias'))[1:7], + c(-0.3984805, 0.4116167, -0.6076553, 1.8232541, 0.5574811, -0.5924950, 0.7154024), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset', cal.method = 'evmos'))[1:7], + c(-0.9631369, -0.2290479, 0.3755366, -1.1717133, 0.3064434, 0.1247777, 0.1854143), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', cal.method = 'mse_min', multi.model = TRUE))[5:10], + c(0.5364620, -0.6412113, 0.6981868, 0.9551252, 0.7886670, -0.1137257), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset', cal.method = 'crps_min'))[5:10], + c(0.4975732, 0.3078346, 0.3717602, 1.5607014, -1.1259947, 1.7316233), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset', cal.method = 'rpc-based', alpha = 0.05, apply_to = 'all'))[5:10], + c(0.4178384, 0.2323757, 0.2942796, 1.4487467, -1.1659159, 1.6175277), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("5. Output checks: dat4", { + expect_equal( + dim(Calibration(exp4, obs4)), + c(member = 10, sdate = 20) + ) + expect_equal( + as.vector(Calibration(exp4, obs4))[1:5], + c(-0.6372505, 0.3261267, -0.8860036, 2.0048627, 0.4995904), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, exp_cor4, eval.method = "hindcast-vs-forecast"))[1:5], + c(-0.88657225, -0.08128503, 0.58193721, -1.11537810, 0.50614269), + tolerance = 0.0001 + ) + expect_equal( + dim(Calibration(exp4_1, obs4_1, dat_dim = 'dataset')), + c(member = 10, sdate = 20, nexp = 1, nobs = 1) + ) + expect_equal( + as.vector(Calibration(exp4_1, obs4_1, dat_dim = 'dataset')), + as.vector(Calibration(exp4, obs4)), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4_1, obs4_1, exp_cor4, dat_dim = 'dataset')), + as.vector(Calibration(exp4, obs4, exp_cor4)), + tolerance = 0.0001 + ) + expect_equal( + suppressWarnings( + as.vector(Calibration(exp4_1, obs4_2, dat_dim = 'dataset', na.rm = FALSE)) + ), + rep(as.numeric(NA), 200) + ) + expect_equal( + suppressWarnings( + as.vector(Calibration(exp4_1, obs4_2, dat_dim = 'dataset', na.rm = T))[5:10] + ), + c(0.4343443, -0.9018916, 0.6178439, 0.9093767, 0.7205064, -0.3033850), + tolerance = 0.0001 + ) + expect_equal( + suppressWarnings( + as.vector(Calibration(exp4_2, obs4_2, exp_cor4, dat_dim = 'dataset', na.rm = T))[5:10] + ), + c(0.4583975, 0.2645130, 0.3292279, 1.5361189, -1.1972745, 1.7125642), + tolerance = 0.0001 + ) +}) +############################################## +test_that("6. Output checks: dat4", { + expect_equal( + as.vector(Calibration(exp4, obs4, cal.method = 'bias'))[1:7], + c(-0.4039514, 0.4061457, -0.6131262, 1.8177832, 0.5520101, -0.5979660, 0.7099314), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, cal.method = 'evmos'))[1:7], + c(-0.4860875, 0.4252999, -0.7214164, 2.0134410, 0.5894025, -0.7043606, 0.7670694), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, cal.method = 'mse_min', multi.model = TRUE))[1:7], + c(-0.5932137, 0.3231407, -0.8298251, 1.9199372, 0.4881377, -0.8126764, 0.6667729), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, cal.method = 'crps_min'))[1:7], + c(-0.6381684, 0.3261889, -0.8871746, 2.0066329, 0.4998291, -0.8691275, 0.6878220), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, cal.method = 'rpc-based', alpha = 0.5, apply_to = 'sign'))[1:7], + c(-0.8597528, 0.1036243, -1.1085060, 1.7823603, 0.2770880, -1.0904773, 0.4648899), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, exp_cor4, cal.method = 'rpc-based', alpha = 0.5, apply_to = 'sign', eval.method = "hindcast-vs-forecast"))[1:7], + c(-1.0464936, -0.2412064, 0.4220158, -1.2752995, 0.3462213, 0.1469362, 0.2134538), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, eval.method = "in-sample"))[1:5], + c(-0.7119142, 0.2626203, -0.9635483, 1.9607986, 0.4380930), + tolerance = 0.0001 ) }) ############################################## +test_that("6. Output checks: dat4", { + expect_equal( + dim(Calibration(exp3, obs3, exp_cor5, dat_dim = 'dataset')), + c(member = 10, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) +})