#'Compute Brier score, its decomposition, and Brier skill score #' #'Compute the Brier score (BS) and the components of its standard decompostion #'with the two within-bin components described in Stephenson et al., (2008). It #'also returns the bias-corrected decomposition of the BS (Ferro and Fricker, #'2012). BSS has the climatology as the reference forecast. #' #'@param exp A vector or a numeric array with named dimensions. It should be #' the predicted probabilities which are within the range [0, 1] if memb_dim #' doesn't exist. If it has memb_dim, the value should be 0 or 1, and the #' predicted probabilities will be computed by ensemble mean. The dimensions #' must at least have 'time_dim'. #' range [0, 1]. #'@param obs A numeric array with named dimensions of the binary observations #' (0 or 1). The dimension must be the same as 'exp' except memb_dim, which is #' optional. If it has 'memb_dim', then the length must be 1. The length of #' 'dat_dim' can be different from 'exp' if it has. #'@param thresholds A numeric vector used to bin the forecasts. The default #' value is \code{seq(0.1, 0.9, 0.1)}, which means that the bins are #' \code{[0, 0.1), [0.1, 0.2), ... [0.9, 1]}. #'@param time_dim A character string indicating the name of dimension along #' which Brier score is computed. The default value is 'sdate'. #'@param dat_dim A character string indicating the name of dataset dimension in #' 'exp' and 'obs'. The length of this dimension can be different between #' 'exp' and 'obs'. The default value is NULL. #'@param memb_dim A character string of the name of the member dimension in #' 'exp' (and 'obs', optional). The function will do the ensemble mean #' over this dimension. If there is no member dimension, set NULL. The default #' value is NULL. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return #'A list that contains: #'\item{$rel}{standard reliability} #'\item{$res}{standard resolution} #'\item{$unc}{standard uncertainty} #'\item{$bs}{Brier score} #'\item{$bs_check_res}{rel - res + unc} #'\item{$bss_res}{res - rel / unc} #'\item{$gres}{generalized resolution} #'\item{$bs_check_gres}{rel - gres + unc} #'\item{$bss_gres}{gres - rel / unc} #'\item{$rel_bias_corrected}{bias - corrected rel} #'\item{$gres_bias_corrected}{bias - corrected gres} #'\item{$unc_bias_corrected}{bias - corrected unc} #'\item{$bss_bias_corrected}{gres_bias_corrected - rel_bias_corrected / unc_bias_corrected} #'\item{$nk}{number of forecast in each bin} #'\item{$fkbar}{average probability of each bin} #'\item{$okbar}{relative frequency that the observed event occurred} #'The data type and dimensions of the items depend on if the input 'exp' and #''obs' are:\cr #'(a) Vectors\cr #'(b) Arrays with 'dat_dim' specified\cr #'(c) Arrays with no 'dat_dim' specified\cr #'Items 'rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres', #''bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected', #''unc_bias_corrected', and 'bss_bias_corrected' are (a) a number (b) an array #'with dimensions c(nexp, nobs, all the rest dimensions in 'exp' and 'obs' #'expect 'time_dim' and 'memb_dim') (c) an array with dimensions of #''exp' and 'obs' except 'time_dim' and 'memb_dim'\cr #'Items 'nk', 'fkbar', and 'okbar' are (a) a vector of length of bin number #'determined by 'threshold' (b) an array with dimensions c(nexp, nobs, #'no. of bins, all the rest dimensions in 'exp' and 'obs' expect 'time_dim' and #''memb_dim') (c) an array with dimensions c(no. of bin, all the rest dimensions #'in 'exp' and 'obs' expect 'time_dim' and 'memb_dim') #' #'@references #'Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr #'Stephenson et al. (2008). Two extra components in the Brier score decomposition. #' Weather and Forecasting, 23: 752-757.\cr #'Ferro and Fricker (2012). A bias-corrected decomposition of the BS. #' Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924. #' #'@examples #'# Inputs are vectors #'exp <- runif(10) #'obs <- round(exp) #'x <- BrierScore(exp, obs) #' #'# Inputs are arrays #'example(Load) #'clim <- Clim(sampleData$mod, sampleData$obs) #'ano_exp <- Ano(sampleData$mod, clim$clim_exp) #'ano_obs <- Ano(sampleData$obs, clim$clim_obs) #'bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3)) #'bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3)) #'res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') #' #'@import multiApply #'@export BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1), time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, ncores = NULL) { # Check inputs ## exp and obs (1) if (is.null(exp) | is.null(obs)) { stop("Parameter 'exp' and 'obs' cannot be NULL.") } if (!is.numeric(exp) | !is.numeric(obs)) { stop("Parameter 'exp' and 'obs' must be a numeric vector or a numeric array.") } if (is.null(dim(exp))) { #is vector dim(exp) <- c(length(exp)) names(dim(exp)) <- time_dim } if (is.null(dim(obs))) { #is vector dim(obs) <- c(length(obs)) names(dim(obs)) <- time_dim } if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } if (any(!obs %in% c(0, 1))) { stop("Parameter 'obs' must be binary events (0 or 1).") } ## thresholds if (!is.numeric(thresholds) | !is.vector(thresholds)) { stop("Parameter 'thresholds' must be a numeric vector.") } if (any(thresholds <= 0 | thresholds >= 1)) { stop("Parameter 'thresholds' must be between 0 and 1 as the bin-breaks.") } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") } if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' and 'obs' dimension.") } ## dat_dim if (!is.null(dat_dim)) { if (!is.character(dat_dim) | length(dat_dim) > 1) { stop("Parameter 'dat_dim' must be a character string.") } if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { stop("Parameter 'dat_dim' is not found in 'exp' and 'obs' dimension.") } } ## memb_dim if (!is.null(memb_dim)) { if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") } if (!memb_dim %in% names(dim(exp))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } if (memb_dim %in% names(dim(obs))) { if (dim(obs)[memb_dim] != 1) { stop("The length of parameter 'memb_dim' in 'obs' must be 1.") } } } ## exp and obs (2) if (is.null(memb_dim)) { if (max(exp) > 1 | min(exp) < 0) { stop("Parameter 'exp' must be within [0, 1] range.") } } else { if (any(!exp %in% c(0, 1))) { stop("Parameter 'exp' must be 0 or 1 if it has memb_dim.") } } name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(memb_dim)) { name_exp <- name_exp[-which(name_exp == memb_dim)] if (memb_dim %in% name_obs) { name_obs <- name_obs[-which(name_obs == memb_dim)] } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } if (any(name_exp != name_obs)) { stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", "of all the dimensions expect 'dat_dim' and 'memb_dim'.")) } if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", "of all the dimensions expect 'dat_dim' and 'memb_dim'.")) } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } ############################### # Calculate Brier score ## ensemble mean if (!is.null(memb_dim)) { exp <- MeanDims(exp, memb_dim) if (memb_dim %in% names(dim(obs))) { obs <- MeanDims(obs, memb_dim) } } if (is.null(dat_dim)) { res <- Apply(list(exp, obs), target_dims = list(c(time_dim), c(time_dim)), fun = .BrierScore, thresholds = thresholds, ncores = ncores) } else { res <- Apply(list(exp, obs), target_dims = list(c(time_dim, dat_dim), c(time_dim, dat_dim)), fun = .BrierScore, thresholds = thresholds, ncores = ncores) } return(res) } .BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1)) { # exp: [sdate] or [sdate, nexp] # obs: [sdate] or [sdate, nobs] if (length(dim(exp)) == 2) { nexp <- as.numeric(dim(exp)[2]) nobs <- as.numeric(dim(obs)[2]) exp_ori <- exp obs_ori <- obs # Create empty arrays arr_rel <- arr_res <- arr_unc <- arr_bs <- arr_bs_check_res <- arr_bss_res <- arr_gres <- arr_bs_check_gres <- arr_bss_gres <- arr_rel_bias_corrected <- arr_gres_bias_corrected <- arr_unc_bias_corrected <- arr_bss_bias_corrected <- array(dim = c(nexp = nexp, nobs = nobs)) arr_nk <- arr_fkbar <- arr_okbar <- array(dim = c(nexp = nexp, nobs = nobs, bin = length(thresholds) + 1)) } else { nexp <- 1 nobs <- 1 } for (n_exp in 1:nexp) { for (n_obs in 1:nobs) { if (exists('exp_ori')) { exp <- exp_ori[, n_exp] obs <- obs_ori[, n_obs] } n <- length(exp) nbins <- length(thresholds) + 1 # Number of bins bins <- vector('list', nbins) #as.list(paste("bin", 1:nbins, sep = "")) for (i in 1:nbins) { if (i == 1) { bins[[i]] <- list(which(exp >= 0 & exp < thresholds[i])) } else if (i == nbins) { bins[[i]] <- list(which(exp >= thresholds[i - 1] & exp <= 1)) } else { bins[[i]] <- list(which(exp >= thresholds[i - 1] & exp < thresholds[i])) } } fkbar <- okbar <- nk <- array(0, dim = nbins) for (i in 1:nbins) { nk[i] <- length(bins[[i]][[1]]) fkbar[i] <- sum(exp[bins[[i]][[1]]]) / nk[i] okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] } #-----in old .BrierScore()--------- # fkbar[fkbar == Inf] <- 0 # okbar[is.nan(okbar)] <- 0 #---------------------------------- obar <- sum(obs) / length(obs) relsum <- ressum <- term1 <- term2 <- 0 for (i in 1:nbins) { if (nk[i] > 0) { relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 ressum <- ressum + nk[i] * (okbar[i] - obar)^2 for (j in 1:nk[i]) { term1 <- term1 + (exp[bins[[i]][[1]][j]] - fkbar[i])^2 term2 <- term2 + (exp[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) } } } rel <- relsum / n res <- ressum / n unc <- obar * (1 - obar) bs <- sum((exp - obs)^2) / n bs_check_res <- rel - res + unc bss_res <- (res - rel) / unc gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution bs_check_gres <- rel - gres + unc # BS using GRES bss_gres <- (gres - rel) / unc # BSS using GRES # Estimating the bias-corrected components of the BS term3 <- array(0, nbins) for (i in 1:nbins) { term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) } term_a <- sum(term3, na.rm = T) / n term_b <- (obar * (1 - obar)) / (n - 1) rel_bias_corrected <- rel - term_a gres_bias_corrected <- gres - term_a + term_b if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) rel_bias_corrected <- rel_bias_corrected2 gres_bias_corrected <- gres_bias_corrected2 } unc_bias_corrected <- unc + term_b bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { # cat("No error found \ n") # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") #} # Add name for nk, fkbar, okbar names(dim(nk)) <- 'bin' names(dim(fkbar)) <- 'bin' names(dim(okbar)) <- 'bin' if (exists('exp_ori')) { arr_rel[n_exp, n_obs] <- rel arr_res[n_exp, n_obs] <- res arr_unc[n_exp, n_obs] <- unc arr_bs[n_exp, n_obs] <- bs arr_bs_check_res[n_exp, n_obs] <- bs_check_res arr_bss_res[n_exp, n_obs] <- bss_res arr_gres[n_exp, n_obs] <- gres arr_bs_check_gres[n_exp, n_obs] <- bs_check_gres arr_bss_gres[n_exp, n_obs] <- bss_gres arr_rel_bias_corrected[n_exp, n_obs] <- rel_bias_corrected arr_gres_bias_corrected[n_exp, n_obs] <- gres_bias_corrected arr_unc_bias_corrected[n_exp, n_obs] <- unc_bias_corrected arr_bss_bias_corrected[n_exp, n_obs] <- bss_bias_corrected arr_nk[n_exp, n_obs, ] <- nk arr_fkbar[n_exp, n_obs, ] <- fkbar arr_okbar[n_exp, n_obs, ] <- okbar } } } if (exists('exp_ori')) { res_list <- list(rel = arr_rel, res = arr_res, unc = arr_unc, bs = arr_bs, bs_check_res = arr_bs_check_res, bss_res = arr_bss_res, gres = arr_gres, bs_check_gres = arr_bs_check_gres, bss_gres = arr_bss_gres, rel_bias_corrected = arr_rel_bias_corrected, gres_bias_corrected = arr_gres_bias_corrected, unc_bias_corrected = arr_unc_bias_corrected, bss_bias_corrected = arr_bss_bias_corrected, nk = arr_nk, fkbar = arr_fkbar, okbar = arr_okbar) #bins = list(bins), } else { res_list <- list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar) #bins = list(bins), } return(invisible(res_list)) }