From 4f5b50426257d075545a8bb58ecea4678182da62 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 7 Jan 2021 14:45:29 +0100 Subject: [PATCH 1/8] added rpc-based calibration method --- R/CST_Calibration.R | 51 ++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 3fa68d95..f4a5e491 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -85,7 +85,7 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. #'@return an array containing the calibrated forecasts with the same dimensions as the \code{exp} array. #' -#'@importFrom s2dv InsertDim +#'@importFrom s2dv InsertDim MeanDims Reorder #'@import abind #'@import multiApply #'@importFrom s2dverification Subset @@ -125,7 +125,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", stop("Parameter 'sdate_dim' should be a character string indicating the", "name of the dimension where start dates are stored in 'exp'.") } - if (length(sdate_dim) > 1) { + if (length(sdate_dim) > 1) { sdate_dim <- sdate_dim[1] warning("Parameter 'sdate_dim' has length greater than 1 and only", " the first element will be used.") @@ -233,24 +233,24 @@ Calibration <- function(exp, obs, cal.method = "mse_min", obs.tr <- obs[train.dexes , drop = FALSE] if(cal.method == "bias"){ - var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = TRUE) - mean(fc.tr, na.rm = TRUE) - } else if(cal.method == "evmos"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr) + var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = TRUE) - mean(fc.tr, na.rm = TRUE) + } else if(cal.method == "evmos"){ + #calculate ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr) #calculate value for regression parameters init.par <- c(.calc.evmos.par(quant.obs.fc.tr)) - #correct evaluation subset + #correct evaluation subset var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par) - } else if (cal.method == "mse_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr) + } else if (cal.method == "mse_min"){ + #calculate ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr) #calculate value for regression parameters init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model) - #correct evaluation subset + #correct evaluation subset var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par) } else if (cal.method == "crps_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr) + #calculate ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr) #calculate initial value for regression parameters init.par <- c(.calc.mse.min.par(quant.obs.fc.tr), 0.001) init.par[3] <- sqrt(init.par[3]) @@ -262,10 +262,31 @@ Calibration <- function(exp, obs, cal.method = "mse_min", method = "BFGS") mbm.par <- optim.tmp$par - #correct evaluation subset + #correct evaluation subset var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par) + } else if (cal.method == 'rpc-based') { + alpha <- 0.1 + apply_to <- 'sign' + na.rm <- FALSE ### == na.fill? + + ens_mean.ev <- s2dv::MeanDims(data = fc.ev, dims = names(amt.mbr), na.rm = na.rm) + ens_mean.tr <- s2dv::MeanDims(data = fc.tr, dims = names(amt.mbr), na.rm = na.rm) ## Ensemble mean + ens_spread.tr <- multiApply::Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(amt.sdate), fun = "-")$output1 ## Ensemble spread + exp_mean.tr <- mean(fc.tr, na.rm = na.rm) ## Mean (climatology) + var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) ## Ensemble mean variance + var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) ## Variance of ensemble members about ensemble mean (= spread) + var_obs.tr <- var(obs.tr, na.rm = na.rm) ## Variance in the observations + r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) ## Correlation between observations and the ensemble mean + if ((apply_to == 'all') || (apply_to == 'sign' && cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { + ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr + var.cor.fc[ , eval.dexes] <- s2dv::Reorder(multiApply::Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, ens_mean_cal = ens_mean_cal), target_dims = names(amt.sdate), + fun = .CalibrationMembersRPC, var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1 , order = names(dims.fc)) + dim(var.cor.fc) <- dims.fc + } else { ## no significant -> replacing with observed climatology + var.cor.fc[ , eval.dexes] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.tr)) + } } else { - stop("unknown calibration method: ",cal.method) + stop("unknown calibration method: ",cal.method) } } return(var.cor.fc) -- GitLab From a6e3ba977c35e01e0606c64cd5367ed2420c5712 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 7 Jan 2021 18:13:36 +0100 Subject: [PATCH 2/8] added na.rm, apply_to and alpha parameters and their checks. Changed na.rm=T for na.rm=na.rm everywhere. Added documentation. --- R/CST_Calibration.R | 134 +++++++++++++++++++++++++++----------------- 1 file changed, 84 insertions(+), 50 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index f4a5e491..a017f8fa 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -6,10 +6,13 @@ #' #'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}. #'@param obs an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}. -#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min} or \code{crps_min}. Default value is \code{mse_min}. +#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. #'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. #'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. #'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. +#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}. +#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. +#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. #'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. #'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. #'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. @@ -38,8 +41,8 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, - na.fill = TRUE, memb_dim = 'member', - sdate_dim = 'sdate', ncores = 1) { + na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, + memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") @@ -68,18 +71,22 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #' #'@author VerĂ³nica Torralba, \email{veronica.torralba@bsc.es} #'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} -#'@description Four types of member-by-member bias correction can be performed. The \code{bias} method corrects the bias only, the \code{evmos} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). +#'@description Four types of member-by-member bias correction can be performed. The \code{"bias"} method corrects the bias only, the \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). #'@description Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. #'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x #'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011 #'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397 +#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 #' #'@param exp an array containing the seasonal forecast experiment data. #'@param obs an array containing the observed data. -#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min} or \code{crps_min}. Default value is \code{mse_min}. +#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. #'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. #'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. #'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. +#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}. +#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. +#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. #'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. #'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. #'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. @@ -92,6 +99,13 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #' #'@seealso \code{\link{CST_Load}} #' +#'@details +#'Compatibility between na.fill and na.rm: +#'\item{na.fill == TRUE & na.rm == TRUE}. +#'\item{na.fill == TRUE & na.rm == FALSE}. +#'\item{na.fill == FALSE & na.rm == TRUE}. +#'\item{na.fill == FALSE & na.rm == FALSE}. +#' #'@examples #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) #'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) @@ -102,7 +116,8 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #'@export Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-one-out", - multi.model = FALSE, na.fill = TRUE, + multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, apply_to = NULL, alpha = NULL, memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { dim.exp <- dim(exp) @@ -161,14 +176,36 @@ Calibration <- function(exp, obs, cal.method = "mse_min", fun = .data.set.sufficiently.large)$output1 if(!all(data.set.sufficiently.large.out)){ - if(na.fill){ - warning("Some forecast data could not be corrected due to data lack", - " and is replaced with NA values") - } else { - warning("Some forecast data could not be corrected due to data lack", - " and is replaced with uncorrected values") - } + if(na.fill){ + warning("Some forecast data could not be corrected due to data lack", + " and is replaced with NA values") + } else { + warning("Some forecast data could not be corrected due to data lack", + " and is replaced with uncorrected values") + } + } + + if (!na.rm %in% c(TRUE,FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE.") } + if (cal.method == 'rpc-based') { + if (is.null(apply_to)) { + apply_to <- 'sign' + warning("'apply_to' cannot be NULL for 'rpc-based' method so it has been set as 'sign', as in Eade et al. (2014).") + } else if (!apply_to %in% c('all','sign')) { + stop("'apply_to' must be either 'all' or 'sign' when 'rpc-based' method is used.") + } + + if (apply_to == 'sign') { + if (is.null(alpha)) { + alpha <- 0.1 + warning("'alpha' cannot be NULL for 'rpc-based' method so it has been set as 0.1, as in Eade et al. (2014).") + } else if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1) { + stop("'alpha' must be a number between 0 and 1.") + } + } + } + calibrated <- Apply(data = list(exp = exp, obs = obs), cal.method = cal.method, eval.method = eval.method, @@ -213,12 +250,12 @@ Calibration <- function(exp, obs, cal.method = "mse_min", names(dim(var.cor.fc)) <- dims.fc if(!.data.set.sufficiently.large(exp = exp, obs = obs)){ - if(na.fill){ - return(var.cor.fc) - } else { - var.cor.fc[] <- exp[] - return(var.cor.fc) - } + if(na.fill){ + return(var.cor.fc) + } else { + var.cor.fc[] <- exp[] + return(var.cor.fc) + } } eval.train.dexeses <- .make.eval.train.dexes(eval.method, amt.points = amt.sdate) @@ -233,7 +270,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", obs.tr <- obs[train.dexes , drop = FALSE] if(cal.method == "bias"){ - var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = TRUE) - mean(fc.tr, na.rm = TRUE) + var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) } else if(cal.method == "evmos"){ #calculate ensemble and observational characteristics quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr) @@ -265,9 +302,6 @@ Calibration <- function(exp, obs, cal.method = "mse_min", #correct evaluation subset var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par) } else if (cal.method == 'rpc-based') { - alpha <- 0.1 - apply_to <- 'sign' - na.rm <- FALSE ### == na.fill? ens_mean.ev <- s2dv::MeanDims(data = fc.ev, dims = names(amt.mbr), na.rm = na.rm) ens_mean.tr <- s2dv::MeanDims(data = fc.tr, dims = names(amt.mbr), na.rm = na.rm) ## Ensemble mean @@ -279,8 +313,8 @@ Calibration <- function(exp, obs, cal.method = "mse_min", r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) ## Correlation between observations and the ensemble mean if ((apply_to == 'all') || (apply_to == 'sign' && cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr - var.cor.fc[ , eval.dexes] <- s2dv::Reorder(multiApply::Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, ens_mean_cal = ens_mean_cal), target_dims = names(amt.sdate), - fun = .CalibrationMembersRPC, var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1 , order = names(dims.fc)) + var.cor.fc[ , eval.dexes] <- s2dv::Reorder(data = multiApply::Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, ens_mean_cal = ens_mean_cal), target_dims = names(amt.sdate), fun = .CalibrationMembersRPC, var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, + order = names(dims.fc)) dim(var.cor.fc) <- dims.fc } else { ## no significant -> replacing with observed climatology var.cor.fc[ , eval.dexes] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.tr)) @@ -295,10 +329,10 @@ Calibration <- function(exp, obs, cal.method = "mse_min", .calc.obs.fc.quant <- function(obs, fc){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations amt.mbr <- dim(fc)[1] obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) - fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE) + fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") - obs.av <- mean(obs, na.rm = TRUE) - obs.sd <- sd(obs, na.rm = TRUE) + obs.av <- mean(obs, na.rm = na.rm) + obs.sd <- sd(obs, na.rm = na.rm) return( append( .calc.fc.quant(fc = fc), @@ -315,10 +349,10 @@ Calibration <- function(exp, obs, cal.method = "mse_min", .calc.obs.fc.quant.ext <- function(obs, fc){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations amt.mbr <- dim(fc)[1] obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) - fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE) + fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") - obs.av <- mean(obs, na.rm = TRUE) - obs.sd <- sd(obs, na.rm = TRUE) + obs.av <- mean(obs, na.rm = na.rm) + obs.sd <- sd(obs, na.rm = na.rm) return( append( @@ -336,16 +370,16 @@ Calibration <- function(exp, obs, cal.method = "mse_min", .calc.fc.quant <- function(fc){ #function to calculate different quantities of a series of ensemble forecasts amt.mbr <- dim(fc)[1] - fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE) - fc.ens.av.av <- mean(fc.ens.av, na.rm = TRUE) - fc.ens.av.sd <- sd(fc.ens.av, na.rm = TRUE) + fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) + fc.ens.av.av <- mean(fc.ens.av, na.rm = na.rm) + fc.ens.av.sd <- sd(fc.ens.av, na.rm = na.rm) fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr) - fc.ens.sd <- apply(fc, c(2), sd, na.rm = TRUE) - fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = TRUE)) + fc.ens.sd <- apply(fc, c(2), sd, na.rm = na.rm) + fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = na.rm)) fc.dev <- fc - fc.ens.av.per.ens - fc.dev.sd <- sd(fc.dev, na.rm = TRUE) - fc.av <- mean(fc, na.rm = TRUE) - fc.sd <- sd(fc, na.rm = TRUE) + fc.dev.sd <- sd(fc.dev, na.rm = na.rm) + fc.av <- mean(fc, na.rm = na.rm) + fc.sd <- sd(fc, na.rm = na.rm) return( list( fc.ens.av = fc.ens.av, @@ -367,7 +401,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", amt.mbr <- dim(fc)[1] repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr) repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3)) - spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = TRUE) + spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = na.rm) spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr) return( @@ -386,14 +420,14 @@ Calibration <- function(exp, obs, cal.method = "mse_min", par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.dev.sd) } par.out[2] <- with(quant.obs.fc, cor.obs.fc * obs.sd / fc.ens.av.sd) - par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = TRUE) + par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) return(par.out) } .calc.evmos.par <- function(quant.obs.fc){ par.out <- rep(NA, 2) par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd) - par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = TRUE) + par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) return(par.out) } #Below are the core or elementary functions to calculate the functions necessary for the minimization of crps @@ -401,8 +435,8 @@ Calibration <- function(exp, obs, cal.method = "mse_min", return( with(quant.obs.fc, mean(abs(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens + - ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)), na.rm = TRUE) - - mean(abs((par[3])^2 * spr.abs + par[4]) / 2., na.rm = TRUE) + ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)), na.rm = na.rm) - + mean(abs((par[3])^2 * spr.abs + par[4]) / 2., na.rm = na.rm) ) ) } @@ -413,14 +447,14 @@ Calibration <- function(exp, obs, cal.method = "mse_min", ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev))) sgn2 <- with(quant.obs.fc, sign((par[3])^2 + par[4] / spr.abs.per.ens)) sgn3 <- with(quant.obs.fc,sign((par[3])^2 * spr.abs + par[4])) - deriv.par1 <- mean(sgn1, na.rm = TRUE) - deriv.par2 <- with(quant.obs.fc, mean(sgn1 * fc.dev, na.rm = TRUE)) + deriv.par1 <- mean(sgn1, na.rm = na.rm) + deriv.par2 <- with(quant.obs.fc, mean(sgn1 * fc.dev, na.rm = na.rm)) deriv.par3 <- with(quant.obs.fc, - mean(2* par[3] * sgn1 * sgn2 * fc.ens.av.per.ens, na.rm = TRUE) - - mean(spr.abs * sgn3, na.rm = TRUE) / 2.) + mean(2* par[3] * sgn1 * sgn2 * fc.ens.av.per.ens, na.rm = na.rm) - + mean(spr.abs * sgn3, na.rm = na.rm) / 2.) deriv.par4 <- with(quant.obs.fc, - mean(sgn1 * sgn2 * fc.ens.av.per.ens / spr.abs.per.ens, na.rm = TRUE) - - mean(sgn3, na.rm = TRUE) / 2.) + mean(sgn1 * sgn2 * fc.ens.av.per.ens / spr.abs.per.ens, na.rm = na.rm) - + mean(sgn3, na.rm = na.rm) / 2.) return(c(deriv.par1, deriv.par2, deriv.par3, deriv.par4)) } -- GitLab From ecd02ad1ed9edbeda5c97f53c86a5b3c425570ca Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 8 Jan 2021 10:30:53 +0100 Subject: [PATCH 3/8] added na.rm parameter in the definition of the functions --- R/CST_Calibration.R | 68 +++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index a017f8fa..c1211bb8 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -55,7 +55,8 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, - na.fill = na.fill, + na.fill = na.fill, na.rm = na.rm, + apply_to = apply_to, alpha = alpha, memb_dim = memb_dim, sdate_dim = sdate_dim, ncores = ncores) @@ -191,15 +192,14 @@ Calibration <- function(exp, obs, cal.method = "mse_min", if (cal.method == 'rpc-based') { if (is.null(apply_to)) { apply_to <- 'sign' - warning("'apply_to' cannot be NULL for 'rpc-based' method so it has been set as 'sign', as in Eade et al. (2014).") + warning("'apply_to' cannot be NULL for 'rpc-based' method so it has been set to 'sign', as in Eade et al. (2014).") } else if (!apply_to %in% c('all','sign')) { stop("'apply_to' must be either 'all' or 'sign' when 'rpc-based' method is used.") } - if (apply_to == 'sign') { if (is.null(alpha)) { alpha <- 0.1 - warning("'alpha' cannot be NULL for 'rpc-based' method so it has been set as 0.1, as in Eade et al. (2014).") + warning("'alpha' cannot be NULL for 'rpc-based' method so it has been set to 0.1, as in Eade et al. (2014).") } else if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1) { stop("'alpha' must be a number between 0 and 1.") } @@ -210,7 +210,8 @@ Calibration <- function(exp, obs, cal.method = "mse_min", cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, - na.fill = na.fill, + na.fill = na.fill, na.rm = na.rm, + apply_to = apply_to, alpha = alpha, target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), ncores = ncores, output_dims = target.dim.names.exp, fun = .cal)$output1 @@ -240,7 +241,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", return(dexes.lst) } -.cal <- function(exp, obs, cal.method, eval.method, multi.model, na.fill) { +.cal <- function(exp, obs, cal.method, eval.method, multi.model, na.fill, na.rm, apply_to, alpha) { obs <- as.vector(obs) dims.fc <- dim(exp) @@ -273,34 +274,35 @@ Calibration <- function(exp, obs, cal.method = "mse_min", var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) } else if(cal.method == "evmos"){ #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr) + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) #calculate value for regression parameters - init.par <- c(.calc.evmos.par(quant.obs.fc.tr)) + init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par) + var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) } else if (cal.method == "mse_min"){ #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr) + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) #calculate value for regression parameters - init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model) + init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par) + var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) } else if (cal.method == "crps_min"){ #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr) + quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) #calculate initial value for regression parameters - init.par <- c(.calc.mse.min.par(quant.obs.fc.tr), 0.001) + init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) init.par[3] <- sqrt(init.par[3]) #calculate regression parameters on training dataset optim.tmp <- optim(par = init.par, fn = .calc.crps.opt, gr = .calc.crps.grad.opt, - quant.obs.fc = quant.obs.fc.tr, + quant.obs.fc = quant.obs.fc.tr, + na.rm = na.rm, method = "BFGS") mbm.par <- optim.tmp$par #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par) + var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) } else if (cal.method == 'rpc-based') { ens_mean.ev <- s2dv::MeanDims(data = fc.ev, dims = names(amt.mbr), na.rm = na.rm) @@ -326,7 +328,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", return(var.cor.fc) } -.calc.obs.fc.quant <- function(obs, fc){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations +.calc.obs.fc.quant <- function(obs, fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations amt.mbr <- dim(fc)[1] obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) @@ -335,7 +337,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", obs.sd <- sd(obs, na.rm = na.rm) return( append( - .calc.fc.quant(fc = fc), + .calc.fc.quant(fc = fc, na.rm = na.rm), list( obs.per.ens = obs.per.ens, cor.obs.fc = cor.obs.fc, @@ -346,7 +348,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", ) } -.calc.obs.fc.quant.ext <- function(obs, fc){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations +.calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations amt.mbr <- dim(fc)[1] obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) @@ -356,7 +358,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", return( append( - .calc.fc.quant.ext(fc = fc), + .calc.fc.quant.ext(fc = fc, na.rm = na.rm), list( obs.per.ens = obs.per.ens, cor.obs.fc = cor.obs.fc, @@ -368,7 +370,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", } -.calc.fc.quant <- function(fc){ #function to calculate different quantities of a series of ensemble forecasts +.calc.fc.quant <- function(fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts amt.mbr <- dim(fc)[1] fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) fc.ens.av.av <- mean(fc.ens.av, na.rm = na.rm) @@ -396,7 +398,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", ) } -.calc.fc.quant.ext <- function(fc){ #extended function to calculate different quantities of a series of ensemble forecasts +.calc.fc.quant.ext <- function(fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts amt.mbr <- dim(fc)[1] repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr) @@ -405,13 +407,13 @@ Calibration <- function(exp, obs, cal.method = "mse_min", spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr) return( - append(.calc.fc.quant(fc), + append(.calc.fc.quant(fc, na.rm = na.rm), list(spr.abs = spr.abs, spr.abs.per.ens = spr.abs.per.ens)) ) } #Below are the core or elementary functions to calculate the regression parameters for the different methods -.calc.mse.min.par <- function(quant.obs.fc, multi.model = F){ +.calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm){ par.out <- rep(NA, 3) if(multi.model){ @@ -424,14 +426,14 @@ Calibration <- function(exp, obs, cal.method = "mse_min", return(par.out) } -.calc.evmos.par <- function(quant.obs.fc){ +.calc.evmos.par <- function(quant.obs.fc, na.rm){ par.out <- rep(NA, 2) par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd) par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) return(par.out) } #Below are the core or elementary functions to calculate the functions necessary for the minimization of crps -.calc.crps.opt <- function(par, quant.obs.fc){ +.calc.crps.opt <- function(par, quant.obs.fc, na.rm){ return( with(quant.obs.fc, mean(abs(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens + @@ -441,7 +443,7 @@ Calibration <- function(exp, obs, cal.method = "mse_min", ) } -.calc.crps.grad.opt <- function(par, quant.obs.fc){ +.calc.crps.grad.opt <- function(par, quant.obs.fc, na.rm){ sgn1 <- with(quant.obs.fc,sign(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens + ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev))) @@ -459,15 +461,15 @@ Calibration <- function(exp, obs, cal.method = "mse_min", } #Below are the core or elementary functions to correct the evaluation set based on the regression parameters -.correct.evmos.fc <- function(fc, par){ - quant.fc.mp <- .calc.fc.quant(fc = fc) +.correct.evmos.fc <- function(fc, par, na.rm){ + quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) return(with(quant.fc.mp, par[1] + par[2] * fc)) } -.correct.mse.min.fc <- function(fc, par){ - quant.fc.mp <- .calc.fc.quant(fc = fc) +.correct.mse.min.fc <- function(fc, par, na.rm){ + quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * par[3])) } -.correct.crps.min.fc <- function(fc, par){ - quant.fc.mp <- .calc.fc.quant.ext(fc = fc) +.correct.crps.min.fc <- function(fc, par, na.rm){ + quant.fc.mp <- .calc.fc.quant.ext(fc = fc, na.rm = na.rm) return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * abs((par[3])^2 + par[4] / spr.abs))) } -- GitLab From 99abfb4b00d7278a36d2cc4caee1f51a70d703ea Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 8 Jan 2021 14:16:37 +0100 Subject: [PATCH 4/8] added the function to calibrate individual members --- R/CST_Calibration.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index c1211bb8..6ad1e49c 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -304,7 +304,6 @@ Calibration <- function(exp, obs, cal.method = "mse_min", #correct evaluation subset var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) } else if (cal.method == 'rpc-based') { - ens_mean.ev <- s2dv::MeanDims(data = fc.ev, dims = names(amt.mbr), na.rm = na.rm) ens_mean.tr <- s2dv::MeanDims(data = fc.tr, dims = names(amt.mbr), na.rm = na.rm) ## Ensemble mean ens_spread.tr <- multiApply::Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(amt.sdate), fun = "-")$output1 ## Ensemble spread @@ -473,3 +472,9 @@ Calibration <- function(exp, obs, cal.method = "mse_min", quant.fc.mp <- .calc.fc.quant.ext(fc = fc, na.rm = na.rm) return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * abs((par[3])^2 + par[4] / spr.abs))) } + +# Function to calibrate the individual members with the RPC-based method +.CalibrationMembersRPC <- function(exp, ens_mean, ens_mean_cal, var_obs, var_noise, r){ + member_cal <- (exp - ens_mean) * sqrt(var_obs) * sqrt(1 - r^2) / sqrt(var_noise) + ens_mean_cal + return(member_cal) +} \ No newline at end of file -- GitLab From 066cebff659e69e55230d7d6d86b16a14a231c2f Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 8 Jan 2021 15:29:04 +0100 Subject: [PATCH 5/8] added the compatibility between na.fill and na.rm --- R/CST_Calibration.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 6ad1e49c..209d09eb 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -102,10 +102,10 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #' #'@details #'Compatibility between na.fill and na.rm: -#'\item{na.fill == TRUE & na.rm == TRUE}. -#'\item{na.fill == TRUE & na.rm == FALSE}. -#'\item{na.fill == FALSE & na.rm == TRUE}. -#'\item{na.fill == FALSE & na.rm == FALSE}. +#'\item{na.fill == TRUE & na.rm == TRUE}: If there are 3 or more NAs, NA will be returned. If there are less than 3 NAs, the corrected value will be returned. +#'\item{na.fill == TRUE & na.rm == FALSE}: If there are any NA, NA will be returned. If there is not any NA, the corrected value will be returned. +#'\item{na.fill == FALSE & na.rm == TRUE}: If there are 3 or more NAs, the uncorrected value will be returned. If there are less than 3 NAs, the corrected value will be returned. +#'\item{na.fill == FALSE & na.rm == FALSE}: If there are 3 or more NAs, the uncorrected value will be returned. If there are 1 or 2 NAs, NA will be returned. If there is not any NA, the corrected value will be returned. #' #'@examples #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -- GitLab From f147f0273d0741af038930bb80a3961954fc12df Mon Sep 17 00:00:00 2001 From: Carlos Delgado Torres Date: Fri, 8 Jan 2021 15:55:27 +0100 Subject: [PATCH 6/8] Update CST_Calibration.R --- R/CST_Calibration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 209d09eb..f49a5c40 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -75,9 +75,9 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #'@description Four types of member-by-member bias correction can be performed. The \code{"bias"} method corrects the bias only, the \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). #'@description Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. #'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x +#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 #'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011 #'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397 -#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 #' #'@param exp an array containing the seasonal forecast experiment data. #'@param obs an array containing the observed data. -- GitLab From 9b7cadead023fc955b57fc273af22a1f3e9f2a7b Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 15 Jan 2021 17:49:05 +0100 Subject: [PATCH 7/8] finished documentation --- R/CST_Calibration.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index f49a5c40..58ef7207 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -85,7 +85,7 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. #'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. #'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. -#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}. +#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. #'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. #'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. #'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. @@ -101,11 +101,7 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #'@seealso \code{\link{CST_Load}} #' #'@details -#'Compatibility between na.fill and na.rm: -#'\item{na.fill == TRUE & na.rm == TRUE}: If there are 3 or more NAs, NA will be returned. If there are less than 3 NAs, the corrected value will be returned. -#'\item{na.fill == TRUE & na.rm == FALSE}: If there are any NA, NA will be returned. If there is not any NA, the corrected value will be returned. -#'\item{na.fill == FALSE & na.rm == TRUE}: If there are 3 or more NAs, the uncorrected value will be returned. If there are less than 3 NAs, the corrected value will be returned. -#'\item{na.fill == FALSE & na.rm == FALSE}: If there are 3 or more NAs, the uncorrected value will be returned. If there are 1 or 2 NAs, NA will be returned. If there is not any NA, the corrected value will be returned. +#'Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate how the function has to handle the NA values. The \code{na.fill} parameter checks whether there are more than three forecast-observations pairs to perform the computation. In case there are three or less pairs, the computation is not carried out, and the value returned by the function depends on the value of this parameter (either NA if \code{na.fill == TRUE} or the uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} is used to indicate the function whether to remove the missing values during the computation of the parameters needed to perform the calibration. #' #'@examples #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -- GitLab From 3abc20d85ddc68891fe508f08a7d8fb859101bc7 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 3 Feb 2021 12:10:15 +0100 Subject: [PATCH 8/8] doc automatically generated --- man/CST_Calibration.Rd | 11 ++++++++++- man/Calibration.Rd | 18 ++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index f15d0e2f..33b3c5ec 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -11,6 +11,9 @@ CST_Calibration( eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, + apply_to = NULL, + alpha = NULL, memb_dim = "member", sdate_dim = "sdate", ncores = 1 @@ -21,7 +24,7 @@ CST_Calibration( \item{obs}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}.} -\item{cal.method}{is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min} or \code{crps_min}. Default value is \code{mse_min}.} +\item{cal.method}{is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}.} \item{eval.method}{is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation.} @@ -29,6 +32,12 @@ CST_Calibration( \item{na.fill}{is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.} +\item{na.rm}{is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}.} + +\item{apply_to}{is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} + +\item{alpha}{is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.} + \item{memb_dim}{is a character string indicating the name of the member dimension. By default, it is set to 'member'.} \item{sdate_dim}{is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} diff --git a/man/Calibration.Rd b/man/Calibration.Rd index df91fd07..f61a3cd3 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -11,6 +11,9 @@ Calibration( eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, + apply_to = NULL, + alpha = NULL, memb_dim = "member", sdate_dim = "sdate", ncores = 1 @@ -21,7 +24,7 @@ Calibration( \item{obs}{an array containing the observed data.} -\item{cal.method}{is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min} or \code{crps_min}. Default value is \code{mse_min}.} +\item{cal.method}{is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}.} \item{eval.method}{is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation.} @@ -29,6 +32,12 @@ Calibration( \item{na.fill}{is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.} +\item{na.rm}{is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}.} + +\item{apply_to}{is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} + +\item{alpha}{is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.} + \item{memb_dim}{is a character string indicating the name of the member dimension. By default, it is set to 'member'.} \item{sdate_dim}{is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} @@ -39,10 +48,13 @@ Calibration( an array containing the calibrated forecasts with the same dimensions as the \code{exp} array. } \description{ -Four types of member-by-member bias correction can be performed. The \code{bias} method corrects the bias only, the \code{evmos} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). +Four types of member-by-member bias correction can be performed. The \code{"bias"} method corrects the bias only, the \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. } +\details{ +Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate how the function has to handle the NA values. The \code{na.fill} parameter checks whether there are more than three forecast-observations pairs to perform the computation. In case there are three or less pairs, the computation is not carried out, and the value returned by the function depends on the value of this parameter (either NA if \code{na.fill == TRUE} or the uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} is used to indicate the function whether to remove the missing values during the computation of the parameters needed to perform the calibration. +} \examples{ mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) @@ -54,6 +66,8 @@ str(a) \references{ Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x +Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 + Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011 Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397 -- GitLab