Newer
Older
#'@author Verónica Torralba, \email{veronica.torralba@bsc.es}
#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be}
#'@description Three types of member-by-member bias correction can be performed. The \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2015). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS).
#'@description Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible.
#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x
#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011
#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post‐processing using member‐by‐member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397
#'
#'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}.
#'@param obs an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}.
#'@param cal.method is the calibration method used, can be either \code{"evmos"}, \code{"mse_min"} or \code{"crps_min"}. Default value is \code{"mse_min"}.
#'@param eval.method is the sampling method used, can be either \code{"in-sample"} or \code{"leave-one-out"}. Default value is the \code{"leave-one-out"} cross validation.
#'@param ... other parameters to be passed on to the calibration procedure.
#'@return an object of class \code{s2dv_cube} containing the calibrated forecasts in the element \code{$data} with the same dimensions of the experimental data.
#'
#'@import s2dverification
#'# Example
#'# Creation of sample s2dverification objects. These are not complete
#'# s2dverification objects though. The Load function returns complete objects.
#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7)
#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7)
#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7)
#'lon <- seq(0, 30, 5)
#'lat <- seq(0, 25, 5)
#'exp <- list(data = mod1, lat = lat, lon = lon)
#'obs <- list(data = obs1, lat = lat, lon = lon)
#'attr(exp, 'class') <- 's2dv_cube'
#'attr(obs, 'class') <- 's2dv_cube'
#'a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample")
CST_Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-one-out", ...) {
if (!inherits(exp, "s2dv_cube") || !inherits(exp, "s2dv_cube")) {
stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ",
"as output by CSTools::CST_Load.")
stop("The length of the dimension 'member' in the component 'data' ",
"of the parameter 'obs' must be equal to 1.")
Bert Van schaeybroeck
committed
exp$data <- .calibration.wrap(exp = exp$data, obs = obs$data, cal.method = cal.method, eval.method = eval.method, ...)
exp$Datasets <- c(exp$Datasets, obs$Datasets)
exp$source_files <- c(exp$source_files, obs$source_files)
return(exp)
.make.eval.train.dexes <- function(eval.method, amt.points){
if(amt.points < 10 & eval.method != "in-sample"){
#cat("Too few points, so sample method will necessarily be in-sample")
eval.method <- "in-sample"
if(eval.method == "leave-one-out"){
dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-x])))
} else if (eval.method == "in-sample"){
dexes.lst <- list(list(eval.dexes = seq(1, amt.points), train.dexes = seq(1, amt.points)))
} else {
stop(paste0("unknown sampling method: ",eval.method))
}
return(dexes.lst)
}
Bert Van schaeybroeck
committed
.cal <- function(obs.fc, cal.method, eval.method, ...) {
dims.tmp=dim(obs.fc)
amt.mbr <- dims.tmp["member"][] - 1
amt.sdate <- dims.tmp["sdate"][]
pos <- match(c("member","sdate"), names(dims.tmp))
obs.fc <- aperm(obs.fc, pos)
var.obs <- asub(obs.fc, list(1),1)
var.fc <- asub(obs.fc, list(1+seq(1, amt.mbr)),1)
dims.fc <- dim(var.fc)
var.cor.fc <- NA * var.fc
Bert Van schaeybroeck
committed
eval.train.dexeses <- .make.eval.train.dexes(eval.method, amt.points = amt.sdate)
amt.resamples <- length(eval.train.dexeses)
for (i.sample in seq(1, amt.resamples)) {
# defining training (tr) and evaluation (ev) subsets
eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes
train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes
fc.ev <- var.fc[ , eval.dexes, drop = FALSE]
fc.tr <- var.fc[ , train.dexes]
obs.tr <- var.obs[train.dexes , drop = FALSE]
if(cal.method == "bias"){
var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = TRUE) - mean(fc.tr, na.rm = TRUE)
} else if(cal.method == "evmos"){
#calculate ensemble and observational characteristics
quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr)
#calculate value for regression parameters
init.par <- c(.calc.evmos.par(quant.obs.fc.tr))
#correct evaluation subset
var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par)
} else if (cal.method == "mse_min"){
#calculate ensemble and observational characteristics
Bert Van schaeybroeck
committed
quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr)
#calculate value for regression parameters
init.par <- c(.calc.mse.min.par(quant.obs.fc.tr))
var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par)
} else if (cal.method == "crps_min"){
#calculate ensemble and observational characteristics
Bert Van schaeybroeck
committed
quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr)
#calculate initial value for regression parameters
init.par <- c(.calc.mse.min.par(quant.obs.fc.tr), 0.001)
init.par[3] <- sqrt(init.par[3])
#calculate regression parameters on training dataset
Bert Van schaeybroeck
committed
optim.tmp <- optim(par = init.par, fn = .calc.crps.opt, gr = .calc.crps.grad.opt,
quant.obs.fc = quant.obs.fc.tr, method = "BFGS")
mbm.par <- optim.tmp$par
#correct evaluation subset
var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par)
} else {
stop("unknown calibration method: ",cal.method)
}
names(dim(var.cor.fc)) <- c("member", "sdate")
Bert Van schaeybroeck
committed
.calibration.wrap <- function(exp, obs, cal.method, eval.method, ...) {
target.dims <- c("member", "sdate")
if (!all(target.dims %in% names(dim(exp)))) {
stop("Parameter 'exp' must have the dimensions 'member' and 'sdate'.")
}
if (!all(c("sdate") %in% names(dim(obs)))) {
stop("Parameter 'obs' must have the dimension 'sdate'.")
}
if (any(is.na(exp))) {
warning("Parameter 'exp' contains NA values.")
if (any(is.na(obs))) {
warning("Parameter 'obs' contains NA values.")
target_dims_obs <- "sdate"
if ("member" %in% names(dim(obs))) {
target_dims_obs <- c("member", target_dims_obs)
amt.member=dim(exp)["member"]
amt.sdate=dim(exp)["sdate"]
target.dims <- c("member", "sdate")
return.feat <- list(dim = c(amt.member, amt.sdate))
return.feat$name <- c("member", "sdate")
return.feat$dim.name <- list(dimnames(exp)[["member"]],dimnames(exp)[["sdate"]])
calibrated <- .apply.obs.fc(obs = obs,
fc = exp,
target.dims = target.dims,
return.feat = return.feat,
Bert Van schaeybroeck
committed
FUN = .cal,
cal.method = cal.method,
eval.method = eval.method,
...)
.apply.obs.fc <- function(obs, fc, target.dims, FUN, return.feat, ...){ #wrapper function that combines obs and fc into one array and applies a function on it
dimnames.tmp <- dimnames(fc)
fc.dims.tmp <- dim(fc)
dims.out.tmp <- return.feat$dim
obs.fc <- .combine.obs.fc(obs, fc)
names.dim <- names(dim(obs.fc))
amt.dims <- length(names.dim)
margin.all <- seq(1, amt.dims)
matched.dims <- match(target.dims, names.dim)
margin.to.use <- margin.all[-matched.dims]
arr.out <- apply(X = obs.fc,
MARGIN = margin.to.use,
FUN = FUN,
...)
dims.tmp <- dim(arr.out)
names.dims.tmp <- names(dim(arr.out))
if(prod(return.feat$dim) != dims.tmp[1]){
stop("apply.obs.fc: returned dimensions not as expected: ", prod(return.feat$dim), " and ", dims.tmp[1])
dim(arr.out) <- c(dims.out.tmp, dims.tmp[-1])
names(dim(arr.out)) <- c(return.feat$name, names.dims.tmp[c(-1)])
names.dim[matched.dims] <- return.feat$name
pos <- match(names.dim, names(dim(arr.out)))
pos_inv <- match(names(dim(arr.out)), names.dim)
arr.out <- aperm(arr.out, pos)
for (i.item in seq(1,length(return.feat$name))){
dimnames.tmp[[pos_inv[i.item]]] <- return.feat$dim.name[[i.item]]
dimnames(arr.out) <- dimnames.tmp
return(arr.out)
}
.combine.obs.fc <- function(obs,fc){ #function to combine two arrays (obs and fc) into one, in order to be able to use with the apply function
names.dim.tmp <- names(dim(obs))
members.dim <- which(names.dim.tmp == "member")
arr.out <- abind(obs, fc, along = members.dim)
dimnames.tmp <- dimnames(arr.out)
names(dim(arr.out)) <- names.dim.tmp
dimnames(arr.out) <- dimnames.tmp
names(dimnames(arr.out)) <- names.dim.tmp
return(arr.out)
}
.calc.obs.fc.quant <- function(obs, fc){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations
amt.mbr <- dim(fc)[1]
obs.per.ens <- .spr(obs, amt.mbr)
fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE)
cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs")
obs.av <- mean(obs, na.rm = TRUE)
obs.sd <- sd(obs, na.rm = TRUE)
return(
append(
Bert Van schaeybroeck
committed
.calc.fc.quant(fc = fc),
list(
obs.per.ens = obs.per.ens,
cor.obs.fc = cor.obs.fc,
obs.av = obs.av,
obs.sd = obs.sd
)
)
)
}
.calc.obs.fc.quant.ext <- function(obs, fc){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations
amt.mbr <- dim(fc)[1]
obs.per.ens <- .spr(obs, amt.mbr)
fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE)
cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs")
obs.av <- mean(obs, na.rm = TRUE)
obs.sd <- sd(obs, na.rm = TRUE)
return(
append(
Bert Van schaeybroeck
committed
.calc.fc.quant.ext(fc = fc),
list(
obs.per.ens = obs.per.ens,
cor.obs.fc = cor.obs.fc,
obs.av = obs.av,
obs.sd = obs.sd
)
)
)
}
.calc.fc.quant <- function(fc){ #function to calculate different quantities of a series of ensemble forecasts
amt.mbr <- dim(fc)[1]
fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE)
fc.ens.av.av <- mean(fc.ens.av, na.rm = TRUE)
fc.ens.av.sd <- sd(fc.ens.av, na.rm = TRUE)
fc.ens.av.per.ens <- .spr(fc.ens.av, amt.mbr)
fc.ens.sd <- apply(fc, c(2), sd, na.rm = TRUE)
fc.ens.sd.av <- sqrt(mean(fc.ens.sd^2,na.rm = TRUE))
fc.dev <- fc - fc.ens.av.per.ens
fc.av <- mean(fc, na.rm = TRUE)
fc.sd <- sd(fc, na.rm = TRUE)
return(
list(
fc.ens.av = fc.ens.av,
fc.ens.av.av = fc.ens.av.av,
fc.ens.av.sd = fc.ens.av.sd,
fc.ens.av.per.ens = fc.ens.av.per.ens,
fc.ens.sd = fc.ens.sd,
fc.ens.sd.av = fc.ens.sd.av,
fc.dev = fc.dev,
fc.av = fc.av,
fc.sd = fc.sd
)
)
}
.calc.fc.quant.ext <- function(fc){ #extended function to calculate different quantities of a series of ensemble forecasts
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
amt.mbr <- dim(fc)[1]
fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE)
fc.ens.av.av <- mean(fc.ens.av, na.rm = TRUE)
fc.ens.av.sd <- sd(fc.ens.av, na.rm = TRUE)
fc.ens.av.per.ens <- .spr(fc.ens.av, amt.mbr)
fc.ens.sd <- apply(fc, c(2), sd, na.rm = TRUE)
fc.ens.sd.av <- sqrt(mean(fc.ens.sd^2, na.rm = TRUE))
fc.dev <- fc - fc.ens.av.per.ens
repmat1.tmp <- .spr(fc, amt.mbr)
repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3))
spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = TRUE)
spr.abs.per.ens <- .spr(spr.abs, amt.mbr)
fc.av <- mean(fc, na.rm = TRUE)
fc.sd <- sd(fc, na.rm = TRUE)
return(
list(
fc.ens.av = fc.ens.av,
fc.ens.av.av = fc.ens.av.av,
fc.ens.av.sd = fc.ens.av.sd,
fc.ens.av.per.ens = fc.ens.av.per.ens,
fc.ens.sd = fc.ens.sd,
fc.ens.sd.av = fc.ens.sd.av,
fc.dev = fc.dev,
spr.abs = spr.abs,
spr.abs.per.ens = spr.abs.per.ens,
fc.av = fc.av,
fc.sd = fc.sd
)
)
}
#Below are the core or elementary functions to calculate the regression parameters for the different methods
.calc.mse.min.par <- function(quant.obs.fc){
par.out <- rep(NA, 3)
par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1 - cor.obs.fc^2) / fc.ens.sd.av)
par.out[2] <- with(quant.obs.fc, cor.obs.fc * obs.sd / fc.ens.av.sd)
par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = TRUE)
return(par.out)
}
.calc.evmos.par <- function(quant.obs.fc){
par.out <- rep(NA, 2)
par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd)
par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = TRUE)
return(par.out)
#Below are the core or elementary functions to calculate the functions necessary for the minimization of crps
Bert Van schaeybroeck
committed
.calc.crps.opt <- function(par, quant.obs.fc){
return(
with(quant.obs.fc,
mean(abs(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens +
((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)), na.rm = TRUE) -
mean(abs((par[3])^2 * spr.abs + par[4]) / 2., na.rm = TRUE)
)
)
}
Bert Van schaeybroeck
committed
.calc.crps.grad.opt <- function(par, quant.obs.fc){
sgn1 <- with(quant.obs.fc,sign(obs.per.ens -
(par[1] + par[2] * fc.ens.av.per.ens +
((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)))
sgn2 <- with(quant.obs.fc, sign((par[3])^2 + par[4] / spr.abs.per.ens))
sgn3 <- with(quant.obs.fc,sign((par[3])^2 * spr.abs + par[4]))
deriv.par1 <- mean(sgn1, na.rm = TRUE)
Bert Van schaeybroeck
committed
deriv.par2 <- with(quant.obs.fc, mean(sgn1 * fc.dev, na.rm = TRUE))
deriv.par3 <- with(quant.obs.fc,
mean(2* par[3] * sgn1 * sgn2 * fc.ens.av.per.ens, na.rm = TRUE) -
mean(spr.abs * sgn3, na.rm = TRUE) / 2.)
deriv.par4 <- with(quant.obs.fc,
mean(sgn1 * sgn2 * fc.ens.av.per.ens / spr.abs.per.ens, na.rm = TRUE) -
mean(sgn3, na.rm = TRUE) / 2.)
return(c(deriv.par1, deriv.par2, deriv.par3, deriv.par4))
}
#Below are the core or elementary functions to correct the evaluation set based on the regression parameters
.correct.evmos.fc <- function(fc, par){
quant.fc.mp <- .calc.fc.quant(fc = fc)
return(with(quant.fc.mp, par[1] + par[2] * fc))
}
.correct.mse.min.fc <- function(fc, par){
quant.fc.mp <- .calc.fc.quant(fc = fc)
return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * par[3]))
}
.correct.crps.min.fc <- function(fc, par){
quant.fc.mp <- .calc.fc.quant.ext(fc = fc)
return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * abs((par[3])^2 + par[4] / spr.abs)))
}
#Auxiliary functions:
.calc.crps <- function(obs, fc){ #function to calculate the crps taking observation and forecast as input
quant.obs.fc <- .calc.obs.fc.quant.ext(obs = obs, fc = fc)
return(with(quant.obs.fc,
mean(apply(abs(obs.per.ens - fc), c(2), mean, na.rm = TRUE) - spr.abs / 2., na.rm = TRUE)))
}
.spr <- function(x, amt.spr, dim = 1) { #function to extend an array along one dimension (dim), amt.spr times
if(is.vector(x)){
amt.dims <- 1
if(dim == 2){
arr.out <- array(rep(x, amt.spr), c(length(x), amt.spr))
} else if(dim == 1){
arr.out <- t(array(rep(x, amt.spr), c(length(x), amt.spr)))
} else {
stop(paste0("error in .spr: amt.dims = ",amt.dims," while dim = ",dim))
}
} else if(is.array(x)) {
amt.dims <- length(dim(x))
if(dim > amt.dims + 1){
stop(paste0("error in .spr: amt.dims = ",amt.dims," while dim = ",dim))
}
arr.out <- array(rep(as.vector(x), amt.spr), c(dim(x), amt.spr))
if(dim != amt.dims + 1){
amt.dims.out <- amt.dims + 1
dims.tmp <- seq(1, amt.dims.out)
dims.tmp[seq(dim, amt.dims.out)] <- c(amt.dims.out, seq(dim,amt.dims.out-1))
arr.out <- aperm(arr.out, dims.tmp)
}
} else {
stop("x is not array nor vector but is ", class(x))