Newer
Older
#'Compute Brier score, its decomposition, and Brier skill score
#'
#'Compute the Brier score (BS) and the components of its standard decompostion
#'with the two within-bin components described in Stephenson et al., (2008). It
#'also returns the bias-corrected decomposition of the BS (Ferro and Fricker,
#'2012). BSS has the climatology as the reference forecast.
#'@param exp A vector or a numeric array with named dimensions. It should be
#' the predicted probabilities which are within the range [0, 1] if memb_dim
#' doesn't exist. If it has memb_dim, the value should be 0 or 1, and the
#' predicted probabilities will be computed by ensemble mean. The dimensions
#' must at least have 'time_dim'.
#' range [0, 1].
#'@param obs A numeric array with named dimensions of the binary observations
#' (0 or 1). The dimension must at least have 'time_dim' and other dimensions
#' of 'exp' except 'memb_dim', which is optional. The length of 'dat_dim' can
#' be different from 'exp', and the length of 'memb_dim' must be 1 if it has.
#'@param thresholds A numeric vector used to bin the forecasts. The default
#' value is \code{seq(0.1, 0.9, 0.1)}, which means that the bins are
#'@param time_dim A character string indicating the name of dimension along
#' which Brier score is computed. The default value is 'sdate'.
#'@param dat_dim A character string indicating the name of dataset dimension in
#' 'exp' and 'obs'. The length of this dimension can be different between
#' 'exp' and 'obs'. The default value is NULL.
#'@param memb_dim A character string of the name of the member dimension in
#' 'exp' (and 'obs', optional). The function will do the ensemble mean
#' over this dimension. If there is no member dimension, set NULL. The default
#' value is NULL.
#'@param ncores An integer indicating the number of cores to use for parallel
#' computation. The default value is NULL.
#'\item{$rel}{standard reliability}
#'\item{$res}{standard resolution}
#'\item{$unc}{standard uncertainty}
#'\item{$bs}{Brier score}
#'\item{$bs_check_res}{rel - res + unc}
#'\item{$bss_res}{res - rel / unc}
#'\item{$gres}{generalized resolution}
#'\item{$bs_check_gres}{rel - gres + unc}
#'\item{$bss_gres}{gres - rel / unc}
#'\item{$rel_bias_corrected}{bias - corrected rel}
#'\item{$gres_bias_corrected}{bias - corrected gres}
#'\item{$unc_bias_corrected}{bias - corrected unc}
#'\item{$bss_bias_corrected}{gres_bias_corrected - rel_bias_corrected / unc_bias_corrected}
#'\item{$nk}{number of forecast in each bin}
#'\item{$fkbar}{average probability of each bin}
#'\item{$okbar}{relative frequency that the observed event occurred}
#'The data type and dimensions of the items depend on if the input 'exp' and
#''obs' are:\cr
#'(a) Vectors\cr
#'(b) Arrays with 'dat_dim' specified\cr
#'(c) Arrays with no 'dat_dim' specified\cr
#'Items 'rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres',
#''bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected',
#''unc_bias_corrected', and 'bss_bias_corrected' are (a) a number (b) an array
#'with dimensions c(nexp, nobs, all the rest dimensions in 'exp' and 'obs'
#'expect 'time_dim' and 'memb_dim') (c) an array with dimensions of
#''exp' and 'obs' except 'time_dim' and 'memb_dim'\cr
#'Items 'nk', 'fkbar', and 'okbar' are (a) a vector of length of bin number
#'determined by 'threshold' (b) an array with dimensions c(nexp, nobs,
#'no. of bins, all the rest dimensions in 'exp' and 'obs' expect 'time_dim' and
#''memb_dim') (c) an array with dimensions c(no. of bin, all the rest dimensions
#'in 'exp' and 'obs' expect 'time_dim' and 'memb_dim')
#'
#'@references
#'Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr
#'Stephenson et al. (2008). Two extra components in the Brier score decomposition.
#' Weather and Forecasting, 23: 752-757.\cr
#'Ferro and Fricker (2012). A bias-corrected decomposition of the BS.
#' Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924.
#'
#'@examples
#'# Inputs are vectors
#'exp <- runif(10)
#'#===============uncomment the examples below when ProbBins is included==========
#'#example(Load)
#'#bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2)
#'#bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2)
#'#res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member')
BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1), time_dim = 'sdate',
if (is.null(exp) | is.null(obs)) {
stop("Parameter 'exp' and 'obs' cannot be NULL.")
}
if (!is.numeric(exp) | !is.numeric(obs)) {
stop("Parameter 'exp' and 'obs' must be a numeric vector or a numeric array.")
}
if (is.null(dim(exp))) { #is vector
dim(exp) <- c(length(exp))
names(dim(exp)) <- time_dim
}
if (is.null(dim(obs))) { #is vector
dim(obs) <- c(length(obs))
names(dim(obs)) <- time_dim
}
if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) |
any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) {
stop("Parameter 'exp' and 'obs' must have dimension names.")
}
if (any(!obs %in% c(0, 1))) {
stop("Parameter 'obs' must be binary events (0 or 1).")
}
## thresholds
if (!is.numeric(thresholds) | !is.vector(thresholds)) {
stop("Parameter 'thresholds' must be a numeric vector.")
}
if (any(thresholds <= 0 | thresholds >= 1)) {
stop("Parameter 'thresholds' must be between 0 and 1 as the bin-breaks.")
}
## time_dim
if (!is.character(time_dim) | length(time_dim) > 1) {
stop("Parameter 'time_dim' must be a character string.")
}
if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) {
stop("Parameter 'time_dim' is not found in 'exp' and 'obs' dimension.")
}
## dat_dim
if (!is.null(dat_dim)) {
if (!is.character(dat_dim) | length(dat_dim) > 1) {
stop("Parameter 'dat_dim' must be a character string.")
}
if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) {
stop("Parameter 'dat_dim' is not found in 'exp' and 'obs' dimension.")
}
}
## memb_dim
if (!is.null(memb_dim)) {
if (!is.character(memb_dim) | length(memb_dim) > 1) {
stop("Parameter 'memb_dim' must be a character string.")
if (!memb_dim %in% names(dim(exp))) {
stop("Parameter 'memb_dim' is not found in 'exp' dimension.")
if (memb_dim %in% names(dim(obs))) {
if (dim(obs)[memb_dim] != 1) {
stop("The length of parameter 'memb_dim' in 'obs' must be 1.")
}
}
}
## exp and obs (2)
if (is.null(memb_dim)) {
if (max(exp) > 1 | min(exp) < 0) {
stop("Parameter 'exp' must be within [0, 1] range.")
}
} else {
if (any(!exp %in% c(0, 1))) {
stop("Parameter 'exp' must be 0 or 1 if it has memb_dim.")
}
}
name_exp <- sort(names(dim(exp)))
name_obs <- sort(names(dim(obs)))
if (!is.null(memb_dim)) {
name_exp <- name_exp[-which(name_exp == memb_dim)]
if (memb_dim %in% name_obs) {
name_obs <- name_obs[-which(name_obs == memb_dim)]
}
}
if (!is.null(dat_dim)) {
name_exp <- name_exp[-which(name_exp == dat_dim)]
name_obs <- name_obs[-which(name_obs == dat_dim)]
}
if (any(name_exp != name_obs)) {
stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ",
"of all the dimensions expect 'dat_dim' and 'memb_dim'."))
}
if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) {
stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ",
"of all the dimensions expect 'dat_dim' and 'memb_dim'."))
}
## ncores
if (!is.null(ncores)) {
if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
length(ncores) > 1) {
stop("Parameter 'ncores' must be a positive integer.")
}
###############################
# Calculate Brier score
## ensemble mean
if (!is.null(memb_dim)) {
exp <- MeanDims(exp, memb_dim)
if (memb_dim %in% names(dim(obs))) {
obs <- MeanDims(obs, memb_dim)
}
if (is.null(dat_dim)) {
res <- Apply(list(exp, obs),
target_dims = list(c(time_dim),
c(time_dim)),
fun = .BrierScore,
thresholds = thresholds,
ncores = ncores)
} else {
res <- Apply(list(exp, obs),
target_dims = list(c(time_dim, dat_dim),
c(time_dim, dat_dim)),
fun = .BrierScore,
thresholds = thresholds,
ncores = ncores)
}
.BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1)) {
# exp: [sdate] or [sdate, nexp]
# obs: [sdate] or [sdate, nobs]
if (length(dim(exp)) == 2) {
nexp <- as.numeric(dim(exp)[2])
nobs <- as.numeric(dim(obs)[2])
exp_ori <- exp
obs_ori <- obs
# Create empty arrays
arr_rel <- arr_res <- arr_unc <- arr_bs <- arr_bs_check_res <- arr_bss_res <-
arr_gres <- arr_bs_check_gres <- arr_bss_gres <- arr_rel_bias_corrected <-
arr_gres_bias_corrected <- arr_unc_bias_corrected <- arr_bss_bias_corrected <-
array(dim = c(nexp = nexp, nobs = nobs))
arr_nk <- arr_fkbar <- arr_okbar <-
array(dim = c(nexp = nexp, nobs = nobs, bin = length(thresholds) + 1))
for (n_exp in 1:nexp) {
for (n_obs in 1:nobs) {
if (exists('exp_ori')) {
exp <- exp_ori[, n_exp]
obs <- obs_ori[, n_obs]
}
n <- length(exp)
nbins <- length(thresholds) + 1 # Number of bins
bins <- vector('list', nbins) #as.list(paste("bin", 1:nbins, sep = ""))
if (i == 1) {
bins[[i]] <- list(which(exp >= 0 & exp < thresholds[i]))
} else if (i == nbins) {
bins[[i]] <- list(which(exp >= thresholds[i - 1] & exp <= 1))
bins[[i]] <- list(which(exp >= thresholds[i - 1] & exp < thresholds[i]))
fkbar <- okbar <- nk <- array(0, dim = nbins)
for (i in 1:nbins) {
nk[i] <- length(bins[[i]][[1]])
fkbar[i] <- sum(exp[bins[[i]][[1]]]) / nk[i]
okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i]
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
#-----in old .BrierScore()---------
# fkbar[fkbar == Inf] <- 0
# okbar[is.nan(okbar)] <- 0
#----------------------------------
obar <- sum(obs) / length(obs)
relsum <- ressum <- term1 <- term2 <- 0
for (i in 1:nbins) {
if (nk[i] > 0) {
relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2
ressum <- ressum + nk[i] * (okbar[i] - obar)^2
for (j in 1:nk[i]) {
term1 <- term1 + (exp[bins[[i]][[1]][j]] - fkbar[i])^2
term2 <- term2 + (exp[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i])
}
}
}
rel <- relsum / n
res <- ressum / n
unc <- obar * (1 - obar)
bs <- sum((exp - obs)^2) / n
bs_check_res <- rel - res + unc
bss_res <- (res - rel) / unc
gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution
bs_check_gres <- rel - gres + unc # BS using GRES
bss_gres <- (gres - rel) / unc # BSS using GRES
# Estimating the bias-corrected components of the BS
term3 <- array(0, nbins)
for (i in 1:nbins) {
term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i])
}
term_a <- sum(term3, na.rm = T) / n
term_b <- (obar * (1 - obar)) / (n - 1)
rel_bias_corrected <- rel - term_a
gres_bias_corrected <- gres - term_a + term_b
if (rel_bias_corrected < 0 || gres_bias_corrected < 0) {
rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0)
gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0)
rel_bias_corrected <- rel_bias_corrected2
gres_bias_corrected <- gres_bias_corrected2
}
unc_bias_corrected <- unc + term_b
bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected
#if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) {
# cat("No error found \ n")
# cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n")
#}
# Add name for nk, fkbar, okbar
names(dim(nk)) <- 'bin'
names(dim(fkbar)) <- 'bin'
names(dim(okbar)) <- 'bin'
if (exists('exp_ori')) {
arr_rel[n_exp, n_obs] <- rel
arr_res[n_exp, n_obs] <- res
arr_unc[n_exp, n_obs] <- unc
arr_bs[n_exp, n_obs] <- bs
arr_bs_check_res[n_exp, n_obs] <- bs_check_res
arr_bss_res[n_exp, n_obs] <- bss_res
arr_gres[n_exp, n_obs] <- gres
arr_bs_check_gres[n_exp, n_obs] <- bs_check_gres
arr_bss_gres[n_exp, n_obs] <- bss_gres
arr_rel_bias_corrected[n_exp, n_obs] <- rel_bias_corrected
arr_gres_bias_corrected[n_exp, n_obs] <- gres_bias_corrected
arr_unc_bias_corrected[n_exp, n_obs] <- unc_bias_corrected
arr_bss_bias_corrected[n_exp, n_obs] <- bss_bias_corrected
arr_nk[n_exp, n_obs, ] <- nk
arr_fkbar[n_exp, n_obs, ] <- fkbar
arr_okbar[n_exp, n_obs, ] <- okbar
}
if (exists('exp_ori')) {
res_list <- list(rel = arr_rel, res = arr_res, unc = arr_unc, bs = arr_bs,
bs_check_res = arr_bs_check_res, bss_res = arr_bss_res,
gres = arr_gres, bs_check_gres = arr_bs_check_gres,
bss_gres = arr_bss_gres, rel_bias_corrected = arr_rel_bias_corrected,
gres_bias_corrected = arr_gres_bias_corrected,
unc_bias_corrected = arr_unc_bias_corrected,
bss_bias_corrected = arr_bss_bias_corrected, nk = arr_nk,
fkbar = arr_fkbar, okbar = arr_okbar) #bins = list(bins),
} else {
res_list <- list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res,
bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres,
bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected,
gres_bias_corrected = gres_bias_corrected,
unc_bias_corrected = unc_bias_corrected,
bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar,
okbar = okbar) #bins = list(bins),
}