Newer
Older
#'@author Verónica Torralba, \email{veronica.torralba@bsc.es}
#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be}
#'@description Equivalent to function \code{Calibration} but for objects of
#'class \code{s2dv_cube}.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load}
#' function, containing the seasonal hindcast experiment data in the element
#' named \code{$data}. The hindcast is used to calibrate the forecast in case
#' the forecast is provided; if not, the same hindcast will be calibrated
#' instead.
#'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load}
#' function, containing the observed data in the element named \code{$data}.
#'@param exp_cor An optional object of class \code{s2dv_cube} as returned by
#' \code{CST_Load} function, containing the seasonal forecast experiment data
#' in the element named \code{$data}. If the forecast is provided, it will be
#' calibrated using the hindcast and observations; if not, the hindcast will
#' be calibrated instead. The dimensions must be the same as 'exp' except
#' dataset dimension. If there is only one corrected dataset, it should not
#' have dataset dimension. If there is a corresponding corrected dataset for
#' each 'exp' forecast, the dataset dimension must have the same length as in
#' 'exp'. The default value is NULL.
#'@param cal.method A character string indicating the calibration method used,
#' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or
#' \code{rpc-based}. Default value is \code{mse_min}.
#'@param eval.method A character string indicating the sampling method used, it
#' can be either \code{in-sample} or \code{leave-one-out}. Default value is the
#' \code{leave-one-out} cross validation. In case the forecast is provided, any
#' chosen eval.method is over-ruled and a third option is used.
#'@param multi.model A boolean that is used only for the \code{mse_min}
#' method. If multi-model ensembles or ensembles of different sizes are used,
#' it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences
#' between the two approaches are generally small but may become large when
#' using small ensemble sizes. Using multi.model when the calibration method is
#' \code{bias}, \code{evmos} or \code{crps_min} will not affect the result.
#'@param na.fill A boolean that indicates what happens in case calibration is
#' not possible or will yield unreliable results. This happens when three or
#' less forecasts-observation pairs are available to perform the training phase
#' of the calibration. By default \code{na.fill} is set to true such that NA
#' values will be returned. If \code{na.fill} is set to false, the uncorrected
#' data will be returned.
#'@param na.rm A boolean that indicates whether to remove the NA values or not.
#' The default value is \code{TRUE}. See Details section for further
#' information about its use and compatibility with \code{na.fill}.
#'@param apply_to A character string that indicates whether to apply the
#' calibration to all the forecast (\code{"all"}) or only to those where the
#' correlation between the ensemble mean and the observations is statistically
#' significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.
#'@param alpha A numeric value indicating the significance level for the
#' correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.
#'@param memb_dim A character string indicating the name of the member dimension.
#' By default, it is set to 'member'.
#'@param sdate_dim A character string indicating the name of the start date
#' dimension. By default, it is set to 'sdate'.
#'@param dat_dim A character string indicating the name of dataset dimension.
#' The length of this dimension can be different between 'exp' and 'obs'.
#' The default value is NULL.
#'@param ncores An integer that indicates the number of cores for parallel
#' computations using multiApply function. The default value is one.
#'@return An object of class \code{s2dv_cube} containing the calibrated forecasts
#' in the element \code{$data} with the same dimensions as the one in the exp
#' object.
Bert Van schaeybroeck
committed
#'@examples
#'# Example 1:
#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7)
#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7)
#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'lon <- seq(0, 30, 5)
#'lat <- seq(0, 25, 5)
#'exp <- list(data = mod1, lat = lat, lon = lon)
#'obs <- list(data = obs1, lat = lat, lon = lon)
#'attr(exp, 'class') <- 's2dv_cube'
#'attr(obs, 'class') <- 's2dv_cube'
#'a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample")
#'
#'# Example 2:
#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7)
#'mod2 <- 1 : (1 * 3 * 1 * 5 * 6 * 7)
#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'dim(mod2) <- c(dataset = 1, member = 3, sdate = 1, ftime = 5, lat = 6, lon = 7)
#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7)
#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'lon <- seq(0, 30, 5)
#'lat <- seq(0, 25, 5)
#'exp <- list(data = mod1, lat = lat, lon = lon)
#'obs <- list(data = obs1, lat = lat, lon = lon)
#'exp_cor <- list(data = mod2, lat = lat, lon = lon)
#'attr(exp, 'class') <- 's2dv_cube'
#'attr(obs, 'class') <- 's2dv_cube'
#'attr(exp_cor, 'class') <- 's2dv_cube'
#'a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, cal.method = "evmos")
#'str(a)
CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min",
eval.method = "leave-one-out", multi.model = FALSE,
Carlos Delgado Torres
committed
na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL,
memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, ncores = NULL) {
if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) {
stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ",
"as output by CSTools::CST_Load.")
}
if (!is.null(exp_cor)) {
if (!inherits(exp_cor, "s2dv_cube")) {
stop("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ",
"as output by CSTools::CST_Load.")
dimnames <- names(dim(exp_cor$data))
# if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast"
# if exp_cor is provided, eval.method is overruled (because if exp_cor is provided, the
# train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor";
# no need for "leave-one-out" or "in-sample")
eval.method = "hindcast-vs-forecast"
} else {
dimnames <- names(dim(exp$data))
}
if (!missing(multi.model) & !(cal.method == "mse_min")) {
warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method))
}
Calibration <- Calibration(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data,
cal.method = cal.method, eval.method = eval.method, multi.model = multi.model,
na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha,
memb_dim = memb_dim, sdate_dim = sdate_dim, dat_dim = dat_dim, ncores = ncores)
pos <- match(dimnames, names(dim(Calibration)))
Calibration <- aperm(Calibration, pos)
if (is.null(exp_cor)) {
exp$data <- Calibration
exp$Datasets <- c(exp$Datasets, obs$Datasets)
exp$source_files <- c(exp$source_files, obs$source_files)
} else {
exp_cor$data <- Calibration
exp_cor$Datasets <- c(exp_cor$Datasets, exp$Datasets, obs$Datasets)
exp_cor$source_files <- c(exp_cor$source_files, exp$source_files, obs$source_files)
Bert Van schaeybroeck
committed
#'Forecast Calibration
#'
#'@author Verónica Torralba, \email{veronica.torralba@bsc.es}
#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be}
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#'@description Five types of member-by-member bias correction can be performed.
#'The \code{"bias"} method corrects the bias only, the \code{"evmos"} method
#'applies a variance inflation technique to ensure the correction of the bias
#'and the correspondence of variance between forecast and observation (Van
#'Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods
#'\code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast
#'variance and the ensemble spread as described in Doblas-Reyes et al. (2005)
#'and Van Schaeybroeck and Vannitsem (2015), respectively. While the
#'\code{"mse_min"} method minimizes a constrained mean-squared error using three
#'parameters, the \code{"crps_min"} method features four parameters and
#'minimizes the Continuous Ranked Probability Score (CRPS). The
#'\code{"rpc-based"} method adjusts the forecast variance ensuring that the
#'ratio of predictable components (RPC) is equal to one, as in Eade et al.
#'(2014).
#'@description Both in-sample or our out-of-sample (leave-one-out cross
#'validation) calibration are possible.
#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the
#'success of multi-model ensembles in seasonal forecasting-II calibration and
#'combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x
#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N.,
#'Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate
#'predictions underestimate the predictability of the read world? Geophysical
#'Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146
#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing
#'through linear regression. Nonlinear Processes in Geophysics, 18(2),
#'147. doi:10.5194/npg-18-147-2011
#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble
#'post-processing using member-by-member approaches: theoretical aspects.
#'Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818.
#'doi:10.1002/qj.2397
Bert Van schaeybroeck
committed
#'
#'@param exp A multidimensional array with named dimensions (at least 'sdate'
#' and 'member') containing the seasonal hindcast experiment data. The hindcast
#' is used to calibrate the forecast in case the forecast is provided; if not,
#' the same hindcast will be calibrated instead.
#'@param obs A multidimensional array with named dimensions (at least 'sdate')
#' containing the observed data.
#'@param exp_cor An optional multidimensional array with named dimensions (at
#' least 'sdate' and 'member') containing the seasonal forecast experiment
#' data. If the forecast is provided, it will be calibrated using the hindcast
#' and observations; if not, the hindcast will be calibrated instead. If there
#' is only one corrected dataset, it should not have dataset dimension. If there
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#' is a corresponding corrected dataset for each 'exp' forecast, the dataset
#' dimension must have the same length as in 'exp'. The default value is NULL.
#'@param cal.method A character string indicating the calibration method used,
#' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min}
#' or \code{rpc-based}. Default value is \code{mse_min}.
#'@param eval.method A character string indicating the sampling method used,
#' can be either \code{in-sample} or \code{leave-one-out}. Default value is
#' the \code{leave-one-out} cross validation. In case the forecast is
#' provided, any chosen eval.method is over-ruled and a third option is
#' used.
#'@param multi.model A boolean that is used only for the \code{mse_min}
#' method. If multi-model ensembles or ensembles of different sizes are used,
#' it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences
#' between the two approaches are generally small but may become large when
#' using small ensemble sizes. Using multi.model when the calibration method
#' is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result.
#'@param na.fill A boolean that indicates what happens in case calibration is
#' not possible or will yield unreliable results. This happens when three or
#' less forecasts-observation pairs are available to perform the training phase
#' of the calibration. By default \code{na.fill} is set to true such that NA
#' values will be returned. If \code{na.fill} is set to false, the uncorrected
#' data will be returned.
#'@param na.rm A boolean that indicates whether to remove the NA values or
#' not. The default value is \code{TRUE}.
#'@param apply_to A character string that indicates whether to apply the
#' calibration to all the forecast (\code{"all"}) or only to those where the
#' correlation between the ensemble mean and the observations is statistically
#' significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.
#'@param alpha A numeric value indicating the significance level for the
#' correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.
#'@param memb_dim A character string indicating the name of the member
#' dimension. By default, it is set to 'member'.
#'@param sdate_dim A character string indicating the name of the start date
#' dimension. By default, it is set to 'sdate'.
#'@param dat_dim A character string indicating the name of dataset dimension.
#' The length of this dimension can be different between 'exp' and 'obs'.
#' The default value is NULL.
#'@param ncores An integer that indicates the number of cores for parallel
#' computation using multiApply function. The default value is NULL (one core).
#'
#'@return An array containing the calibrated forecasts with the same dimensions
#'as the \code{exp} array. If \code{exp_cor} is provided the returned array will
#'be with the same dimensions as \code{exp_cor}.
Bert Van schaeybroeck
committed
#'
#'@importFrom s2dv InsertDim MeanDims Reorder
Bert Van schaeybroeck
committed
#'@import abind
#'@importFrom ClimProjDiags Subset
Bert Van schaeybroeck
committed
#'
#'@seealso \code{\link{CST_Load}}
#'
#'@details Both the \code{na.fill} and \code{na.rm} parameters can be used to
#'indicate how the function has to handle the NA values. The \code{na.fill}
#'parameter checks whether there are more than three forecast-observations pairs
#'to perform the computation. In case there are three or less pairs, the
#'computation is not carried out, and the value returned by the function depends
#'on the value of this parameter (either NA if \code{na.fill == TRUE} or the
#'uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm}
#'is used to indicate the function whether to remove the missing values during
#'the computation of the parameters needed to perform the calibration.
Carlos Delgado Torres
committed
#'
#'@examples
#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7)
#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7)
#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'a <- Calibration(exp = mod1, obs = obs1)
#'str(a)
#'@export
Calibration <- function(exp, obs, exp_cor = NULL,
cal.method = "mse_min", eval.method = "leave-one-out",
Carlos Delgado Torres
committed
multi.model = FALSE, na.fill = TRUE,
na.rm = TRUE, apply_to = NULL, alpha = NULL,
memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, ncores = NULL) {
# Check inputs
## exp, obs
if (!is.array(exp) || !is.numeric(exp)) {
stop("Parameter 'exp' must be a numeric array.")
if (!is.array(obs) || !is.numeric(obs)) {
stop("Parameter 'obs' must be a numeric array.")
}
expdims <- names(dim(exp))
obsdims <- names(dim(obs))
if (is.null(expdims)) {
stop("Parameter 'exp' must have dimension names.")
}
if (is.null(obsdims)) {
stop("Parameter 'obs' must have dimension names.")
}
if (any(is.na(exp))) {
warning("Parameter 'exp' contains NA values.")
}
if (any(is.na(obs))) {
warning("Parameter 'obs' contains NA values.")
}
## exp_cor
if (!is.null(exp_cor)) {
eval.method = "hindcast-vs-forecast"
expcordims <- names(dim(exp_cor))
if (is.null(expcordims)) {
stop("Parameter 'exp_cor' must have dimension names.")
}
if (any(is.na(exp_cor))) {
warning("Parameter 'exp_cor' contains NA values.")
}
}
## dat_dim
if (!is.null(dat_dim)) {
if (!is.character(dat_dim) | length(dat_dim) > 1) {
stop("Parameter 'dat_dim' must be a character string.")
}
if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) {
stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.",
" Set it as NULL if there is no dataset dimension.")
}
}
## sdate_dim and memb_dim
if (!is.character(sdate_dim)) {
stop("Parameter 'sdate_dim' should be a character string indicating the",
"name of the dimension where start dates are stored in 'exp'.")
sdate_dim <- sdate_dim[1]
warning("Parameter 'sdate_dim' has length greater than 1 and only",
" the first element will be used.")
}
if (!is.character(memb_dim)) {
stop("Parameter 'memb_dim' should be a character string indicating the",
"name of the dimension where members are stored in 'exp'.")
Bert Van schaeybroeck
committed
}
if (length(memb_dim) > 1) {
memb_dim <- memb_dim[1]
warning("Parameter 'memb_dim' has length greater than 1 and only",
" the first element will be used.")
target_dims_exp <- c(memb_dim, sdate_dim, dat_dim)
target_dims_obs <- c(sdate_dim, dat_dim)
if (!all(target_dims_exp %in% expdims)) {
stop("Parameter 'exp' requires 'sdate_dim' and 'memb_dim' dimensions.")
}
if (!all(target_dims_obs %in% obsdims)) {
stop("Parameter 'obs' must have the dimension defined in sdate_dim ",
"parameter.")
Bert Van schaeybroeck
committed
}
if (memb_dim %in% obsdims) {
if (dim(obs)[memb_dim] != 1) {
warning("Parameter 'obs' has dimension 'memb_dim' with length larger than 1. Only the first member dimension will be used.")
obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected")
## exp, obs, and exp_cor (2)
name_exp <- sort(names(dim(exp)))
name_obs <- sort(names(dim(obs)))
name_exp <- name_exp[-which(name_exp == memb_dim)]
if (!is.null(dat_dim)) {
name_exp <- name_exp[-which(name_exp == dat_dim)]
name_obs <- name_obs[-which(name_obs == dat_dim)]
Bert Van schaeybroeck
committed
}
if (!identical(length(name_exp), length(name_obs)) |
!identical(dim(exp)[name_exp], dim(obs)[name_obs])) {
stop(paste0("Parameter 'exp' and 'obs' must have same length of all dimensions",
" except 'memb_dim' and 'dat_dim'."))
Bert Van schaeybroeck
committed
}
if (!is.null(exp_cor)) {
name_exp_cor <- sort(names(dim(exp_cor)))
name_exp <- sort(names(dim(exp)))
if (!is.null(dat_dim)) {
if (dat_dim %in% expcordims) {
if (!identical(dim(exp)[dat_dim], dim(exp_cor)[dat_dim])) {
stop(paste0("If parameter 'exp_cor' has dataset dimension, it must be",
" equal to dataset dimension of 'exp'."))
}
name_exp_cor <- name_exp_cor[-which(name_exp_cor == dat_dim)]
target_dims_cor <- c(memb_dim, sdate_dim, dat_dim)
} else {
target_dims_cor <- c(memb_dim, sdate_dim)
}
} else {
target_dims_cor <- c(memb_dim, sdate_dim)
}
name_exp <- name_exp[-which(name_exp %in% target_dims_exp)]
name_exp_cor <- name_exp_cor[-which(name_exp_cor %in% target_dims_cor)]
if (!identical(length(name_exp), length(name_exp_cor)) |
!identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) {
stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ",
"all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'."))
}
## ncores
if (!is.null(ncores)) {
if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
length(ncores) > 1) {
stop("Parameter 'ncores' must be either NULL or a positive integer.")
}
}
## data.set.sufficiently.large
data.set.sufficiently.large.out <- Apply(data = list(exp = exp, obs = obs),
target_dims = list(exp = target_dims_exp,
obs = target_dims_obs),
ncores = ncores,
fun = .data.set.sufficiently.large)$output1
if (!all(data.set.sufficiently.large.out)) {
if (na.fill) {
Carlos Delgado Torres
committed
warning("Some forecast data could not be corrected due to data lack",
" and is replaced with NA values")
} else {
warning("Some forecast data could not be corrected due to data lack",
" and is replaced with uncorrected values")
}
}
Carlos Delgado Torres
committed
if (!na.rm %in% c(TRUE,FALSE)) {
stop("Parameter 'na.rm' must be TRUE or FALSE.")
Bert Van schaeybroeck
committed
}
if (length(na.rm) > 1) {
na.rm <- na.rm[1]
warning("Paramter 'na.rm' has length greater than 1, and only the fist element is used.")
}
## cal.method, apply_to, alpha
if (!any(cal.method %in% c('bias', 'evmos', 'mse_min', 'crps_min', 'rpc-based'))) {
stop("Parameter 'cal.method' must be a character string indicating the calibration method used.")
}
Carlos Delgado Torres
committed
if (cal.method == 'rpc-based') {
if (is.null(apply_to)) {
apply_to <- 'sign'
warning("'apply_to' cannot be NULL for 'rpc-based' method so it has been set to 'sign', as in Eade et al. (2014).")
Carlos Delgado Torres
committed
} else if (!apply_to %in% c('all','sign')) {
stop("'apply_to' must be either 'all' or 'sign' when 'rpc-based' method is used.")
}
if (apply_to == 'sign') {
if (is.null(alpha)) {
alpha <- 0.1
warning("'alpha' cannot be NULL for 'rpc-based' method so it has been set to 0.1, as in Eade et al. (2014).")
Carlos Delgado Torres
committed
} else if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1) {
stop("'alpha' must be a number between 0 and 1.")
}
}
}
## eval.method
if (!any(eval.method %in% c('in-sample', 'leave-one-out', 'hindcast-vs-forecast'))) {
stop("Parameter 'eval.method' must be a character string indicating the sampling method used ('in-sample', 'leave-one-out' or 'hindcast-vs-forecast').")
}
## multi.model
if (!multi.model %in% c(TRUE,FALSE)) {
stop("Parameter 'multi.model' must be TRUE or FALSE.")
}
if (is.null(exp_cor)) {
calibrated <- Apply(data = list(exp = exp, obs = obs), dat_dim = dat_dim,
cal.method = cal.method, eval.method = eval.method, multi.model = multi.model,
na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha,
target_dims = list(exp = target_dims_exp, obs = target_dims_obs),
ncores = ncores, fun = .cal)$output1
} else {
calibrated <- Apply(data = list(exp = exp, obs = obs, exp_cor = exp_cor),
dat_dim = dat_dim, cal.method = cal.method, eval.method = eval.method,
multi.model = multi.model, na.fill = na.fill, na.rm = na.rm,
apply_to = apply_to, alpha = alpha,
target_dims = list(exp = target_dims_exp, obs = target_dims_obs,
exp_cor = target_dims_cor),
ncores = ncores, fun = .cal)$output1
}
if (!is.null(dat_dim)) {
pos <- match(c(names(dim(exp))[-which(names(dim(exp)) == dat_dim)], 'nexp', 'nobs'),
names(dim(calibrated)))
calibrated <- aperm(calibrated, pos)
}
Bert Van schaeybroeck
committed
Bert Van schaeybroeck
committed
return(calibrated)
}
Bert Van schaeybroeck
committed
.data.set.sufficiently.large <- function(exp, obs) {
Bert Van schaeybroeck
committed
amt.min.samples <- 3
amt.good.pts <- sum(!is.na(obs) & !apply(exp, c(2), function(x) all(is.na(x))))
return(amt.good.pts > amt.min.samples)
}
.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor) {
if (eval.method == "leave-one-out") {
dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x,
train.dexes = seq(1, amt.points)[-x])))
dexes.lst <- list(list(eval.dexes = seq(1, amt.points),
train.dexes = seq(1, amt.points)))
} else if (eval.method == "hindcast-vs-forecast") {
dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor),
train.dexes = seq(1, amt.points)))
stop(paste0("unknown sampling method: ", eval.method))
.cal <- function(exp, obs, exp_cor = NULL, dat_dim = NULL, cal.method = "mse_min", eval.method = "leave-one-out",
multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL) {
# exp: [memb_dim, sdate_dim, (dat)]
# obs: [sdate_dim (dat)]
# exp_cor: [memb, sdate, (dat)] or NULL
if (is.null(dat_dim)) {
nexp <- 1
nobs <- 1
exp <- InsertDim(exp, posdim = 3, lendim = 1, name = 'dataset')
obs <- InsertDim(obs, posdim = 2, lendim = 1, name = 'dataset')
} else {
nexp <- as.numeric(dim(exp)[dat_dim])
nobs <- as.numeric(dim(obs)[dat_dim])
}
if (is.null(exp_cor)) {
exp_cor <- exp
cor_dat_dim <- TRUE
} else {
if (length(dim(exp_cor)) == 2) { # ref: [memb, sdate]
cor_dat_dim <- FALSE
cor_dat_dim <- TRUE
}
}
return_data_na <- FALSE
for (i in 1:nexp) {
for (j in 1:nobs) {
if (!.data.set.sufficiently.large(exp = exp[, , i], obs = obs[, j])) {
return_data_na <- TRUE
if (cor_dat_dim) {
var.cor.fc <- NA * exp_cor[, , i]
} else {
var.cor.fc <- NA * exp_cor[, i]
}
if (!na.fill) {
var.cor.fc[, , i] <- exp[, , i]
}
}
}
}
expdims <- dim(exp)
expdims_cor <- dim(exp_cor)
memb <- expdims[1] # memb
sdate <- expdims[2] # sdate
sdate_cor <- expdims_cor[2]
if (!return_data_na) {
var.cor.fc <- array(dim = c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs))
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
for (i in 1:nexp) {
for (j in 1:nobs) {
obs_data <- as.vector(obs[, j])
exp_data <- exp[, , i]
dim(exp_data) <- dim(exp)[1:2]
if (cor_dat_dim) {
expcor_data <- exp_cor[, , i]
dim(expcor_data) = dim(exp_cor)[1:2]
} else {
expcor_data <- exp_cor
}
eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = sdate,
amt.points_cor = sdate_cor)
amt.resamples <- length(eval.train.dexeses)
for (i.sample in seq(1, amt.resamples)) {
eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes
train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes
fc.ev <- expcor_data[ , eval.dexes, drop = FALSE]
fc.tr <- exp_data[ , train.dexes]
obs.tr <- obs_data[train.dexes , drop = FALSE]
if (cal.method == "bias") {
var.cor.fc[ , eval.dexes, i, j] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm)
} else if (cal.method == "evmos") {
quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm)
init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm))
var.cor.fc[ , eval.dexes, i, j] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm)
} else if (cal.method == "mse_min") {
quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm)
init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm)
var.cor.fc[ , eval.dexes, i, j] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm)
} else if (cal.method == "crps_min") {
quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm)
init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001)
init.par[3] <- sqrt(init.par[3])
optim.tmp <- optim(par = init.par, fn = .calc.crps.opt, gr = .calc.crps.grad.opt,
quant.obs.fc = quant.obs.fc.tr, na.rm = na.rm, method = "BFGS")
mbm.par <- optim.tmp$par
var.cor.fc[ , eval.dexes, i, j] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm)
} else if (cal.method == 'rpc-based') {
ens_mean.ev <- Apply(data = fc.ev, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean
ens_mean.tr <- Apply(data = fc.tr, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean
ens_spread.tr <- Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(sdate), fun = "-")$output1 ## Ensemble spread
exp_mean.tr <- mean(fc.tr, na.rm = na.rm) ## Mean (climatology)
var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) ## Ensemble mean variance
var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) ## Variance of ensemble members about ensemble mean (= spread)
var_obs.tr <- var(obs.tr, na.rm = na.rm) ## Variance in the observations
r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson',
use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything"))
## Correlation between observations and the ensemble mean
if ((apply_to == 'all') || (apply_to == 'sign' &&
cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) {
ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr
var.cor.fc[ , eval.dexes, i, j] <- s2dv::Reorder(data = Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev,
ens_mean_cal = ens_mean_cal),
target_dims = names(sdate), fun = .CalibrationMembersRPC,
var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1,
order = names(expdims)[1:2])
} else {
var.cor.fc[ , eval.dexes, i, j] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev))
}
} else {
stop("unknown calibration method: ", cal.method)
}
}
}
dim(var.cor.fc) <- dim(exp_cor)[1:2]
#function to calculate different quantities of a series of ensemble forecasts and corresponding observations
.calc.obs.fc.quant <- function(obs, fc, na.rm) {
obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr)
Carlos Delgado Torres
committed
fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm)
cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs")
Carlos Delgado Torres
committed
obs.av <- mean(obs, na.rm = na.rm)
obs.sd <- sd(obs, na.rm = na.rm)
.calc.fc.quant(fc = fc, na.rm = na.rm),
list(
obs.per.ens = obs.per.ens,
cor.obs.fc = cor.obs.fc,
obs.av = obs.av,
obs.sd = obs.sd
)
)
)
}
#extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations
.calc.obs.fc.quant.ext <- function(obs, fc, na.rm){
obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr)
Carlos Delgado Torres
committed
fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm)
cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs")
Carlos Delgado Torres
committed
obs.av <- mean(obs, na.rm = na.rm)
obs.sd <- sd(obs, na.rm = na.rm)
.calc.fc.quant.ext(fc = fc, na.rm = na.rm),
list(
obs.per.ens = obs.per.ens,
cor.obs.fc = cor.obs.fc,
obs.av = obs.av,
obs.sd = obs.sd
)
)
)
}
#function to calculate different quantities of a series of ensemble forecasts
.calc.fc.quant <- function(fc, na.rm) {
Carlos Delgado Torres
committed
fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm)
fc.ens.av.av <- mean(fc.ens.av, na.rm = na.rm)
fc.ens.av.sd <- sd(fc.ens.av, na.rm = na.rm)
fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr)
Carlos Delgado Torres
committed
fc.ens.sd <- apply(fc, c(2), sd, na.rm = na.rm)
fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = na.rm))
Carlos Delgado Torres
committed
fc.dev.sd <- sd(fc.dev, na.rm = na.rm)
fc.av <- mean(fc, na.rm = na.rm)
fc.sd <- sd(fc, na.rm = na.rm)
return(
list(
fc.ens.av = fc.ens.av,
fc.ens.av.av = fc.ens.av.av,
fc.ens.av.sd = fc.ens.av.sd,
fc.ens.av.per.ens = fc.ens.av.per.ens,
fc.ens.sd = fc.ens.sd,
fc.ens.var.av.sqrt = fc.ens.var.av.sqrt,
fc.av = fc.av,
fc.sd = fc.sd
)
)
}
#extended function to calculate different quantities of a series of ensemble forecasts
.calc.fc.quant.ext <- function(fc, na.rm){
repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr)
repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3))
Carlos Delgado Torres
committed
spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = na.rm)
spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr)
append(.calc.fc.quant(fc, na.rm = na.rm),
Bert Van schaeybroeck
committed
list(spr.abs = spr.abs, spr.abs.per.ens = spr.abs.per.ens))
#Below are the core or elementary functions to calculate the regression parameters for the different methods
.calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm) {
par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.ens.var.av.sqrt)
} else {
par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.dev.sd)
}
par.out[2] <- with(quant.obs.fc, abs(cor.obs.fc) * obs.sd / fc.ens.av.sd)
Carlos Delgado Torres
committed
par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm)
.calc.evmos.par <- function(quant.obs.fc, na.rm){
par.out <- rep(NA, 2)
par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd)
Carlos Delgado Torres
committed
par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm)
#Below are the core or elementary functions to calculate the functions necessary for the minimization of crps
.calc.crps.opt <- function(par, quant.obs.fc, na.rm){
return(
with(quant.obs.fc,
mean(abs(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens +
Carlos Delgado Torres
committed
((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)), na.rm = na.rm) -
mean(abs((par[3])^2 * spr.abs + par[4]) / 2., na.rm = na.rm)
.calc.crps.grad.opt <- function(par, quant.obs.fc, na.rm) {
sgn1 <- with(quant.obs.fc,sign(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens +
((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)))
sgn2 <- with(quant.obs.fc, sign((par[3])^2 + par[4] / spr.abs.per.ens))
sgn3 <- with(quant.obs.fc,sign((par[3])^2 * spr.abs + par[4]))
Carlos Delgado Torres
committed
deriv.par1 <- mean(sgn1, na.rm = na.rm)
deriv.par2 <- with(quant.obs.fc, mean(sgn1 * fc.dev, na.rm = na.rm))
deriv.par3 <- with(quant.obs.fc,
Carlos Delgado Torres
committed
mean(2* par[3] * sgn1 * sgn2 * fc.ens.av.per.ens, na.rm = na.rm) -
mean(spr.abs * sgn3, na.rm = na.rm) / 2.)
deriv.par4 <- with(quant.obs.fc,
Carlos Delgado Torres
committed
mean(sgn1 * sgn2 * fc.ens.av.per.ens / spr.abs.per.ens, na.rm = na.rm) -
mean(sgn3, na.rm = na.rm) / 2.)
return(c(deriv.par1, deriv.par2, deriv.par3, deriv.par4))
}
#Below are the core or elementary functions to correct the evaluation set based on the regression parameters
.correct.evmos.fc <- function(fc, par, na.rm) {
quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm)
return(with(quant.fc.mp, par[1] + par[2] * fc))
}
.correct.mse.min.fc <- function(fc, par, na.rm){
quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm)
return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * par[3]))
}
.correct.crps.min.fc <- function(fc, par, na.rm){
quant.fc.mp <- .calc.fc.quant.ext(fc = fc, na.rm = na.rm)
return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * abs((par[3])^2 + par[4] / spr.abs)))
}
# Function to calibrate the individual members with the RPC-based method
.CalibrationMembersRPC <- function(exp, ens_mean, ens_mean_cal, var_obs, var_noise, r){
member_cal <- (exp - ens_mean) * sqrt(var_obs) * sqrt(1 - r^2) / sqrt(var_noise) + ens_mean_cal
return(member_cal)