Newer
Older
#'@author Verónica Torralba, \email{veronica.torralba@bsc.es}
#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be}
Bert Van schaeybroeck
committed
#'@description Equivalent to function \code{Calibration} but for objects of class \code{s2dv_cube}.
#'
#'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}.
#'@param obs an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}.
Bert Van schaeybroeck
committed
#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min} or \code{crps_min}. Default value is \code{mse_min}.
#'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation.
#'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result.
Bert Van schaeybroeck
committed
#'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.
#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'.
#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'.
#'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.
Bert Van schaeybroeck
committed
#'@return an object of class \code{s2dv_cube} containing the calibrated forecasts in the element \code{$data} with the same dimensions as the one in the exp object.
Bert Van schaeybroeck
committed
#'@examples
#'# Example 1:
#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7)
#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7)
#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'lon <- seq(0, 30, 5)
#'lat <- seq(0, 25, 5)
#'exp <- list(data = mod1, lat = lat, lon = lon)
#'obs <- list(data = obs1, lat = lat, lon = lon)
#'attr(exp, 'class') <- 's2dv_cube'
#'attr(obs, 'class') <- 's2dv_cube'
#'a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample")
CST_Calibration <- function(exp, obs, cal.method = "mse_min",
eval.method = "leave-one-out", multi.model = FALSE,
na.fill = TRUE, memb_dim = 'member',
sdate_dim = 'sdate', ncores = 1) {
if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) {
stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ",
"as output by CSTools::CST_Load.")
Bert Van schaeybroeck
committed
if(!missing(multi.model) & !(cal.method == "mse_min")){
warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method))
}
Bert Van schaeybroeck
committed
exp$data <- Calibration(exp = exp$data, obs = obs$data,
cal.method = cal.method,
eval.method = eval.method,
multi.model = multi.model,
na.fill = na.fill,
memb_dim = memb_dim, sdate_dim = sdate_dim,
Bert Van schaeybroeck
committed
ncores = ncores)
Bert Van schaeybroeck
committed
exp$Datasets <- c(exp$Datasets, obs$Datasets)
exp$source_files <- c(exp$source_files, obs$source_files)
return(exp)
Bert Van schaeybroeck
committed
Bert Van schaeybroeck
committed
#'Forecast Calibration
#'
#'@author Verónica Torralba, \email{veronica.torralba@bsc.es}
#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be}
#'@description Four types of member-by-member bias correction can be performed. The \code{bias} method corrects the bias only, the \code{evmos} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS).
#'@description Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible.
#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x
#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011
#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397
#'
#'@param exp an array containing the seasonal forecast experiment data.
#'@param obs an array containing the observed data.
#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min} or \code{crps_min}. Default value is \code{mse_min}.
#'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation.
#'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result.
#'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.
#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'.
#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'.
#'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.
Bert Van schaeybroeck
committed
#'@return an array containing the calibrated forecasts with the same dimensions as the \code{exp} array.
#'
Bert Van schaeybroeck
committed
#'@import abind
#'@import multiApply
#'@importFrom s2dverification Subset
Bert Van schaeybroeck
committed
#'
#'@seealso \code{\link{CST_Load}}
#'
#'@examples
#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7)
#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7)
#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'a <- Calibration(exp = mod1, obs = obs1)
#'str(a)
#'@export
Calibration <- function(exp, obs, cal.method = "mse_min",
eval.method = "leave-one-out",
multi.model = FALSE, na.fill = TRUE,
memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) {
Bert Van schaeybroeck
committed
dim.exp <- dim(exp)
amt.dims.exp <- length(dim.exp)
dim.obs <- dim(obs)
amt.dims.obs <- length(dim.obs)
dim.names.exp <- names(dim.exp)
dim.names.obs <- names(dim.obs)
if (is.null(memb_dim) || !is.character(memb_dim)) {
stop("Parameter 'memb_dim' should be a character string indicating the",
"name of the dimension where members are stored in 'exp'.")
}
if (length(memb_dim) > 1) {
memb_dim <- memb_dim[1]
warning("Parameter 'memb_dim' has length greater than 1 and only",
" the first element will be used.")
}
Bert Van schaeybroeck
committed
if (is.null(sdate_dim) || !is.character(sdate_dim)) {
stop("Parameter 'sdate_dim' should be a character string indicating the",
"name of the dimension where start dates are stored in 'exp'.")
}
if (length(sdate_dim) > 1) {
sdate_dim <- sdate_dim[1]
warning("Parameter 'sdate_dim' has length greater than 1 and only",
" the first element will be used.")
}
target.dim.names.exp <- c(memb_dim, sdate_dim)
target.dim.names.obs <- sdate_dim
Bert Van schaeybroeck
committed
if (!all(target.dim.names.exp %in% dim.names.exp)) {
stop("Parameter 'exp' must have the dimensions defined in memb_dim ",
"and sdate_dim.")
Bert Van schaeybroeck
committed
}
if (!all(c(sdate_dim) %in% dim.names.obs)) {
stop("Parameter 'obs' must have the dimension defined in sdate_dim ",
"parameter.")
Bert Van schaeybroeck
committed
}
if (any(is.na(exp))) {
warning("Parameter 'exp' contains NA values.")
}
if (any(is.na(obs))) {
warning("Parameter 'obs' contains NA values.")
}
if (memb_dim %in% names(dim(obs))) {
obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected")
Bert Van schaeybroeck
committed
}
Bert Van schaeybroeck
committed
data.set.sufficiently.large.out <-
Apply(data = list(exp = exp, obs = obs),
target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs),
ncores = ncores,
Bert Van schaeybroeck
committed
fun = .data.set.sufficiently.large)$output1
if(!all(data.set.sufficiently.large.out)){
if(na.fill){
warning("Some forecast data could not be corrected due to data lack",
" and is replaced with NA values")
Bert Van schaeybroeck
committed
} else {
warning("Some forecast data could not be corrected due to data lack",
" and is replaced with uncorrected values")
Bert Van schaeybroeck
committed
}
}
Bert Van schaeybroeck
committed
calibrated <- Apply(data = list(exp = exp, obs = obs),
Bert Van schaeybroeck
committed
cal.method = cal.method,
eval.method = eval.method,
multi.model = multi.model,
Bert Van schaeybroeck
committed
na.fill = na.fill,
Bert Van schaeybroeck
committed
target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs),
ncores = ncores, output_dims = target.dim.names.exp,
Bert Van schaeybroeck
committed
fun = .cal)$output1
Bert Van schaeybroeck
committed
dexes <- match(names(dim(exp)), names(dim(calibrated)))
calibrated <- aperm(calibrated, dexes)
dimnames(calibrated) <- dimnames(exp)[dexes]
Bert Van schaeybroeck
committed
return(calibrated)
}
Bert Van schaeybroeck
committed
.data.set.sufficiently.large <- function(exp, obs){
amt.min.samples <- 3
amt.good.pts <- sum(!is.na(obs) & !apply(exp, c(2), function(x) all(is.na(x))))
return(amt.good.pts > amt.min.samples)
}
.make.eval.train.dexes <- function(eval.method, amt.points){
if(eval.method == "leave-one-out"){
dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-x])))
} else if (eval.method == "in-sample"){
dexes.lst <- list(list(eval.dexes = seq(1, amt.points), train.dexes = seq(1, amt.points)))
} else {
stop(paste0("unknown sampling method: ",eval.method))
}
return(dexes.lst)
}
Bert Van schaeybroeck
committed
.cal <- function(exp, obs, cal.method, eval.method, multi.model, na.fill) {
Bert Van schaeybroeck
committed
Bert Van schaeybroeck
committed
obs <- as.vector(obs)
dims.fc <- dim(exp)
Bert Van schaeybroeck
committed
amt.mbr <- dims.fc[1]
amt.sdate <- dims.fc[2]
Bert Van schaeybroeck
committed
var.cor.fc <- NA * exp
Bert Van schaeybroeck
committed
Bert Van schaeybroeck
committed
if(!.data.set.sufficiently.large(exp = exp, obs = obs)){
Bert Van schaeybroeck
committed
if(na.fill){
return(var.cor.fc)
} else {
Bert Van schaeybroeck
committed
var.cor.fc[] <- exp[]
Bert Van schaeybroeck
committed
return(var.cor.fc)
}
}
Bert Van schaeybroeck
committed
eval.train.dexeses <- .make.eval.train.dexes(eval.method, amt.points = amt.sdate)
amt.resamples <- length(eval.train.dexeses)
for (i.sample in seq(1, amt.resamples)) {
# defining training (tr) and evaluation (ev) subsets
eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes
train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes
Bert Van schaeybroeck
committed
fc.ev <- exp[ , eval.dexes, drop = FALSE]
fc.tr <- exp[ , train.dexes]
obs.tr <- obs[train.dexes , drop = FALSE]
if(cal.method == "bias"){
var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = TRUE) - mean(fc.tr, na.rm = TRUE)
} else if(cal.method == "evmos"){
#calculate ensemble and observational characteristics
quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr)
#calculate value for regression parameters
init.par <- c(.calc.evmos.par(quant.obs.fc.tr))
#correct evaluation subset
var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par)
} else if (cal.method == "mse_min"){
#calculate ensemble and observational characteristics
Bert Van schaeybroeck
committed
quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr)
#calculate value for regression parameters
init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model)
var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par)
} else if (cal.method == "crps_min"){
#calculate ensemble and observational characteristics
Bert Van schaeybroeck
committed
quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr)
#calculate initial value for regression parameters
init.par <- c(.calc.mse.min.par(quant.obs.fc.tr), 0.001)
init.par[3] <- sqrt(init.par[3])
#calculate regression parameters on training dataset
optim.tmp <- optim(par = init.par,
fn = .calc.crps.opt,
gr = .calc.crps.grad.opt,
quant.obs.fc = quant.obs.fc.tr,
method = "BFGS")
Bert Van schaeybroeck
committed
mbm.par <- optim.tmp$par
#correct evaluation subset
var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par)
} else {
stop("unknown calibration method: ",cal.method)
}
.calc.obs.fc.quant <- function(obs, fc){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations
obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr)
fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE)
cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs")
obs.av <- mean(obs, na.rm = TRUE)
obs.sd <- sd(obs, na.rm = TRUE)
return(
append(
Bert Van schaeybroeck
committed
.calc.fc.quant(fc = fc),
list(
obs.per.ens = obs.per.ens,
cor.obs.fc = cor.obs.fc,
obs.av = obs.av,
obs.sd = obs.sd
)
)
)
}
.calc.obs.fc.quant.ext <- function(obs, fc){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations
obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr)
fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE)
cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs")
obs.av <- mean(obs, na.rm = TRUE)
obs.sd <- sd(obs, na.rm = TRUE)
Bert Van schaeybroeck
committed
.calc.fc.quant.ext(fc = fc),
list(
obs.per.ens = obs.per.ens,
cor.obs.fc = cor.obs.fc,
obs.av = obs.av,
obs.sd = obs.sd
)
)
)
}
.calc.fc.quant <- function(fc){ #function to calculate different quantities of a series of ensemble forecasts
amt.mbr <- dim(fc)[1]
fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE)
fc.ens.av.av <- mean(fc.ens.av, na.rm = TRUE)
fc.ens.av.sd <- sd(fc.ens.av, na.rm = TRUE)
fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr)
fc.ens.sd <- apply(fc, c(2), sd, na.rm = TRUE)
fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = TRUE))
fc.dev.sd <- sd(fc.dev, na.rm = TRUE)
fc.av <- mean(fc, na.rm = TRUE)
fc.sd <- sd(fc, na.rm = TRUE)
return(
list(
fc.ens.av = fc.ens.av,
fc.ens.av.av = fc.ens.av.av,
fc.ens.av.sd = fc.ens.av.sd,
fc.ens.av.per.ens = fc.ens.av.per.ens,
fc.ens.sd = fc.ens.sd,
fc.ens.var.av.sqrt = fc.ens.var.av.sqrt,
fc.av = fc.av,
fc.sd = fc.sd
)
)
}
.calc.fc.quant.ext <- function(fc){ #extended function to calculate different quantities of a series of ensemble forecasts
repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr)
repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3))
spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = TRUE)
spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr)
append(.calc.fc.quant(fc),
Bert Van schaeybroeck
committed
list(spr.abs = spr.abs, spr.abs.per.ens = spr.abs.per.ens))
#Below are the core or elementary functions to calculate the regression parameters for the different methods
.calc.mse.min.par <- function(quant.obs.fc, multi.model = F){
if(multi.model){
par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.ens.var.av.sqrt)
} else {
par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.dev.sd)
}
par.out[2] <- with(quant.obs.fc, cor.obs.fc * obs.sd / fc.ens.av.sd)
par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = TRUE)
.calc.evmos.par <- function(quant.obs.fc){
par.out <- rep(NA, 2)
par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd)
par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = TRUE)
return(par.out)
#Below are the core or elementary functions to calculate the functions necessary for the minimization of crps
Bert Van schaeybroeck
committed
.calc.crps.opt <- function(par, quant.obs.fc){
return(
with(quant.obs.fc,
mean(abs(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens +
((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)), na.rm = TRUE) -
mean(abs((par[3])^2 * spr.abs + par[4]) / 2., na.rm = TRUE)
)
)
}
.calc.crps.grad.opt <- function(par, quant.obs.fc){
sgn1 <- with(quant.obs.fc,sign(obs.per.ens -
(par[1] + par[2] * fc.ens.av.per.ens +
((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)))
sgn2 <- with(quant.obs.fc, sign((par[3])^2 + par[4] / spr.abs.per.ens))
sgn3 <- with(quant.obs.fc,sign((par[3])^2 * spr.abs + par[4]))
deriv.par1 <- mean(sgn1, na.rm = TRUE)
deriv.par2 <- with(quant.obs.fc, mean(sgn1 * fc.dev, na.rm = TRUE))
deriv.par3 <- with(quant.obs.fc,
mean(2* par[3] * sgn1 * sgn2 * fc.ens.av.per.ens, na.rm = TRUE) -
mean(spr.abs * sgn3, na.rm = TRUE) / 2.)
deriv.par4 <- with(quant.obs.fc,
mean(sgn1 * sgn2 * fc.ens.av.per.ens / spr.abs.per.ens, na.rm = TRUE) -
mean(sgn3, na.rm = TRUE) / 2.)
return(c(deriv.par1, deriv.par2, deriv.par3, deriv.par4))
}
#Below are the core or elementary functions to correct the evaluation set based on the regression parameters
.correct.evmos.fc <- function(fc, par){
quant.fc.mp <- .calc.fc.quant(fc = fc)
return(with(quant.fc.mp, par[1] + par[2] * fc))
}
.correct.mse.min.fc <- function(fc, par){
quant.fc.mp <- .calc.fc.quant(fc = fc)
return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * par[3]))
}
.correct.crps.min.fc <- function(fc, par){
quant.fc.mp <- .calc.fc.quant.ext(fc = fc)
return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * abs((par[3])^2 + par[4] / spr.abs)))
}