From 85c8d3bbef0eca53543def5edf492f7c1934b9a0 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 12 Apr 2021 10:14:18 +0200 Subject: [PATCH 01/13] Include BrierScore.R --- R/BrierScore.R | 243 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 R/BrierScore.R diff --git a/R/BrierScore.R b/R/BrierScore.R new file mode 100644 index 0000000..8a9b11a --- /dev/null +++ b/R/BrierScore.R @@ -0,0 +1,243 @@ +#'Compute Brier score and its decomposition and Brier skill score +#' +#'Compute the Brier score (BS) and the components of its standard decompostion +#'as well with the two within-bin components described in Stephenson et al., +#'(2008). It also returns the bias-corrected decomposition of the BS (Ferro and +#'Fricker, 2012). BSS has the climatology as the reference forecast. + +#'.BrierScore provides the same functionality, but taking a matrix of ensemble +#'members (exp) as input. +#' +#'@param obs Vector of binary observations (1 or 0). +#'@param pred Vector of probablistic predictions with values in the range [0,1]. +#'@param thresholds Values used to bin the forecasts. By default the bins are +#' {[0,0.1), [0.1, 0.2), ... [0.9, 1]}. +#'@param exp Matrix of predictions with values in the range [0,1] for the +#' .BrierScore function +#' +#'@return Both BrierScore and .Brier score provide the same outputs: +#'\itemize{ +#' \item{$rel}{standard reliability} +#' \item{$res}{standard resolution} +#' \item{$unc}{standard uncertainty} +#' \item{$bs}{Brier score} +#' \item{$bs_check_res}{rel-res+unc} +#' \item{$bss_res}{res-rel/unc} +#' \item{$gres}{generalized resolution} +#' \item{$bs_check_gres}{rel-gres+unc} +#' \item{$bss_gres}{gres-rel/unc} +#' \item{$rel_bias_corrected}{bias-corrected rel} +#' \item{$gres_bias_corrected}{bias-corrected gres} +#' \item{$unc_bias_corrected}{bias-corrected unc} +#' \item{$bss_bias_corrected}{gres_bias_corrected-rel_bias_corrected/unc_bias_corrected} +#' \item{$nk}{number of forecast in each bin} +#' \item{$fkbar}{average probability of each bin} +#' \item{$okbar}{relative frequency that the observed event occurred} +#' \item{$bins}{bins used} +#' \item{$pred}{values with which the forecasts are verified} +#' \item{$obs}{probability forecasts of the event} +#'} +#' +#'@references +#'Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr +#'Stephenson et al. (2008). Two extra components in the Brier score decomposition. +#' Weather and Forecasting, 23: 752-757.\cr +#'Ferro and Fricker (2012). A bias-corrected decomposition of the BS. +#' Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924. +#' +#'@examples +#'# Minimalist examples with BrierScore +#'a <- runif(10) +#'b <- round(a) +#'x <- BrierScore(b, a) +#'x$bs - x$bs_check_res +#'x$bs - x$bs_check_gres +#'x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected +#' \dontrun{ +#'a <- runif(10) +#'b <- cbind(round(a),round(a)) # matrix containing 2 identical ensemble members... +#'x2 <- BrierScore(a, b) +#' } +#' +#'# Example of BrierScore using UltimateBrier +#'# See ?UltimateBrier for more information +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'bs <- UltimateBrier(ano_exp, ano_obs, thr = c(1/3, 2/3)) +#' +#' \dontrun{ +#'# Example of .BrierScore with veriApply +#'require(easyVerification) +#'BrierScore2 <- s2dverification:::.BrierScore +#'bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +#'bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +#'bs2 <- veriApply("BrierScore2", bins_ano_exp, Mean1Dim(bins_ano_ob,s 3), +#' tdim = 2, ensdim = 3) +#' } +#'@import multiApply +#'@export +BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { + if (max(pred) > 1 | min(pred) < 0) { + stop("Predictions outside [0,1] range. Are you certain this is a probability forecast? \n") + } else if (max(obs) != 1 & min(obs) != 0) { + .message("Binary events must be either 0 or 1. Are you certain this is a binary event? ") + } else { + nbins <- length(thresholds) - 1 # Number of bins + n <- length(pred) + bins <- as.list(paste("bin", 1:nbins,sep = "")) + for (i in 1:nbins) { + if (i == nbins) { + bins[[i]] <- list(which(pred >= thresholds[i] & pred <= thresholds[i + 1])) + } else { + bins[[i]] <- list(which(pred >= thresholds[i] & pred < thresholds[i + 1])) + } + } + + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(pred[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + } + + obar <- sum(obs) / length(obs) + relsum <- ressum <- term1 <- term2 <- 0 + for (i in 1:nbins) { + if (nk[i] > 0) { + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 + ressum <- ressum + nk[i] * (okbar[i] - obar)^2 + for (j in 1:nk[i]) { + term1 <- term1 + (pred[bins[[i]][[1]][j]] - fkbar[i])^2 + term2 <- term2 + (pred[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + } + } + } + rel <- relsum / n + res <- ressum / n + unc <- obar * (1 - obar) + bs <- sum((pred - obs)^2) / n + bs_check_res <- rel - res + unc + bss_res <- (res - rel) / unc + gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution + bs_check_gres <- rel - gres + unc # BS using GRES + bss_gres <- (gres - rel) / unc # BSS using GRES + + # + # Estimating the bias-corrected components of the BS + # + term3 <- array(0, nbins) + for (i in 1:nbins) { + term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) + } + term_a <- sum(term3, na.rm = T) / n + term_b <- (obar * (1 - obar)) / (n - 1) + rel_bias_corrected <- rel - term_a + gres_bias_corrected <- gres - term_a + term_b + if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { + rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) + gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) + rel_bias_corrected <- rel_bias_corrected2 + gres_bias_corrected <- gres_bias_corrected2 + } + unc_bias_corrected <- unc + term_b + bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected + + #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { + # cat("No error found \ n") + # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") + #} + + invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar, bins = bins, pred = pred, obs = obs)) + } +} + +#'@rdname BrierScore +#'@export +.BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1)) { + if (max(exp) > 1 || min(exp) < 0) { + stop("Parameter 'exp' contains predictions outside [0,1] range. Are you certain this is a probability forecast?") + } else if (max(obs) != 1 && min(obs) != 0) { + .message("Binary events in 'obs' must be either 0 or 1. Are you certain this is a binary event?") + } else { + nbins <- length(thresholds) - 1 # Number of bins + n <- dim(exp)[1] # Number of observations + ens_mean <- rowMeans(exp, na.rm = TRUE) + n.ens <- seq(1, dim(exp)[2], 1) # Number of ensemble members + bins <- as.list(paste("bin", 1:nbins, sep = "")) + for (i in 1:nbins) { + if (i == nbins) { + bins[[i]] <- list(which(ens_mean >= thresholds[i] & ens_mean <= thresholds[i + 1])) + } else { + bins[[i]] <- list(which(ens_mean >= thresholds[i] & ens_mean < thresholds[i + 1])) + } + } + + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(ens_mean[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + } + + fkbar[fkbar == Inf] <- 0 + okbar[is.nan(okbar)] <- 0 + obar <- sum(obs) / length(obs) + relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- 0 + + for (i in 1:nbins) { + if (nk[i] > 0) { + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i]) ^ 2 + ressum <- ressum + nk[i] * (okbar[i] - obar) ^ 2 + for (j in 1:nk[i]) { + term1 <- term1 + (ens_mean[bins[[i]][[1]][j]] - fkbar[i]) ^ 2 + term2 <- term2 + (ens_mean[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + } + } + } + } + rel <- relsum / n + res <- ressum / n + unc <- obar * (1 - obar) + #bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) + bs <- sum((rowMeans(exp, na.rm = T) - obs) ^ 2) / n + bs_check_res <- rel - res + unc + bss_res <- (res - rel) / unc + gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution + bs_check_gres <- rel - gres + unc # BS using GRES + bss_gres <- (gres - rel) / unc # BSS using GRES + + # + # Estimating the bias-corrected components of the BS + # + term3 <- array(0, nbins) + for (i in 1:nbins) { + term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) + } + term_a <- sum(term3, na.rm = T) / n + term_b <- (obar * (1 - obar)) / (n - 1) + rel_bias_corrected <- rel - term_a + gres_bias_corrected <- gres - term_a + term_b + if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { + rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) + gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) + rel_bias_corrected <- rel_bias_corrected2 + gres_bias_corrected <- gres_bias_corrected2 + } + unc_bias_corrected <- unc + term_b + bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected + + #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { + # cat("No error found \ n") + # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") + #} + + invisible(list(rel = rel, res = res, unc = unc, bs = bs, + bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, + bs_check_gres = bs_check_gres, bss_gres = bss_gres, + rel_bias_corrected = rel_bias_corrected, + gres_bias_corrected = gres_bias_corrected, + unc_bias_corrected = unc_bias_corrected, + bss_bias_corrected = bss_bias_corrected)) +} -- GitLab From 5eff14bc8f78ecd36d8b0270025165c6a2c9e431 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 12 Apr 2021 20:13:00 +0200 Subject: [PATCH 02/13] Modify BrierScore() and create unit test --- NAMESPACE | 1 + R/BrierScore.R | 354 ++++++++++++++++--------------- man/AMV.Rd | 29 ++- man/BrierScore.Rd | 89 ++++++++ man/GMST.Rd | 51 +++-- man/GSAT.Rd | 30 +-- man/SPOD.Rd | 30 +-- man/TPI.Rd | 30 +-- tests/testthat/test-BrierScore.R | 169 +++++++++++++++ 9 files changed, 521 insertions(+), 262 deletions(-) create mode 100644 man/BrierScore.Rd create mode 100644 tests/testthat/test-BrierScore.R diff --git a/NAMESPACE b/NAMESPACE index 87937e2..0e54899 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(ACC) export(AMV) export(AnimateMap) export(Ano) +export(BrierScore) export(CDORemap) export(Clim) export(Cluster) diff --git a/R/BrierScore.R b/R/BrierScore.R index 8a9b11a..bf456a5 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -4,39 +4,45 @@ #'as well with the two within-bin components described in Stephenson et al., #'(2008). It also returns the bias-corrected decomposition of the BS (Ferro and #'Fricker, 2012). BSS has the climatology as the reference forecast. - -#'.BrierScore provides the same functionality, but taking a matrix of ensemble -#'members (exp) as input. #' -#'@param obs Vector of binary observations (1 or 0). -#'@param pred Vector of probablistic predictions with values in the range [0,1]. -#'@param thresholds Values used to bin the forecasts. By default the bins are -#' {[0,0.1), [0.1, 0.2), ... [0.9, 1]}. -#'@param exp Matrix of predictions with values in the range [0,1] for the -#' .BrierScore function +#'@param exp A vector or a numeric array with named dimensions of the probablistic +#' prediction data. The dimension must at least have 'time_dim'. It may have +#' 'memb_dim' for performing ensemble mean. The values should be within the +#' range [0, 1]. +#'@param obs A numeric array with named dimensions of the binary observations +#' (0 or 1). The dimension must at least have 'time_dim' and other dimensions +#' of 'exp' except 'memb_dim'. +#'@param thresholds A numeric vector used to bin the forecasts. The default +#' value is \code{seq(0, 1, 0,1)}, which means that the bins are +#' \code{[0,0.1), [0.1, 0.2), ... [0.9, 1]}. +#'@param time_dim A character string indicating the name of dimension along +#' which Brier score is computed. The default value is 'sdate'. +#'@param memb_dim A character string of the name of the member dimension. It +#' must be one dimension of 'exp'. The function will do the ensemble mean +#' over this dimension. If there is no member dimension, set NULL. The default +#' value is NULL. #' -#'@return Both BrierScore and .Brier score provide the same outputs: -#'\itemize{ -#' \item{$rel}{standard reliability} -#' \item{$res}{standard resolution} -#' \item{$unc}{standard uncertainty} -#' \item{$bs}{Brier score} -#' \item{$bs_check_res}{rel-res+unc} -#' \item{$bss_res}{res-rel/unc} -#' \item{$gres}{generalized resolution} -#' \item{$bs_check_gres}{rel-gres+unc} -#' \item{$bss_gres}{gres-rel/unc} -#' \item{$rel_bias_corrected}{bias-corrected rel} -#' \item{$gres_bias_corrected}{bias-corrected gres} -#' \item{$unc_bias_corrected}{bias-corrected unc} -#' \item{$bss_bias_corrected}{gres_bias_corrected-rel_bias_corrected/unc_bias_corrected} -#' \item{$nk}{number of forecast in each bin} -#' \item{$fkbar}{average probability of each bin} -#' \item{$okbar}{relative frequency that the observed event occurred} -#' \item{$bins}{bins used} -#' \item{$pred}{values with which the forecasts are verified} -#' \item{$obs}{probability forecasts of the event} -#'} +#'@return A list that contains: +#'The numeric arrays with all 'exp' and 'obs' dimensions expect 'time_dim' and +#''memb_dim': +#'\item{$rel}{standard reliability} +#'\item{$res}{standard resolution} +#'\item{$unc}{standard uncertainty} +#'\item{$bs}{Brier score} +#'\item{$bs_check_res}{rel - res + unc} +#'\item{$bss_res}{res - rel / unc} +#'\item{$gres}{generalized resolution} +#'\item{$bs_check_gres}{rel - gres + unc} +#'\item{$bss_gres}{gres - rel / unc} +#'\item{$rel_bias_corrected}{bias - corrected rel} +#'\item{$gres_bias_corrected}{bias - corrected gres} +#'\item{$unc_bias_corrected}{bias - corrected unc} +#'\item{$bss_bias_corrected}{gres_bias_corrected - rel_bias_corrected / unc_bias_corrected} +#'The numeric arrays with the same dimensions as above and one additional +#'dimension 'bin': +#'\item{$nk}{number of forecast in each bin} +#'\item{$fkbar}{average probability of each bin} +#'\item{$okbar}{relative frequency that the observed event occurred} #' #'@references #'Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr @@ -46,171 +52,162 @@ #' Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924. #' #'@examples -#'# Minimalist examples with BrierScore -#'a <- runif(10) -#'b <- round(a) -#'x <- BrierScore(b, a) -#'x$bs - x$bs_check_res -#'x$bs - x$bs_check_gres -#'x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected -#' \dontrun{ -#'a <- runif(10) -#'b <- cbind(round(a),round(a)) # matrix containing 2 identical ensemble members... -#'x2 <- BrierScore(a, b) -#' } +#'# Inputs are vectors +#'exp <- runif(10) +#'obs <- round(a) +#'x <- BrierScore(exp, obs) +#'res <- x$bs - x$bs_check_res +#'res <- x$bs - x$bs_check_gres +#'res <- x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected #' -#'# Example of BrierScore using UltimateBrier -#'# See ?UltimateBrier for more information +#'# Inputs are arrays #'example(Load) -#'clim <- Clim(sampleData$mod, sampleData$obs) -#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) -#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) -#'bs <- UltimateBrier(ano_exp, ano_obs, thr = c(1/3, 2/3)) -#' -#' \dontrun{ -#'# Example of .BrierScore with veriApply -#'require(easyVerification) -#'BrierScore2 <- s2dverification:::.BrierScore #'bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) #'bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -#'bs2 <- veriApply("BrierScore2", bins_ano_exp, Mean1Dim(bins_ano_ob,s 3), -#' tdim = 2, ensdim = 3) -#' } +#'res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') +#' #'@import multiApply #'@export -BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { - if (max(pred) > 1 | min(pred) < 0) { - stop("Predictions outside [0,1] range. Are you certain this is a probability forecast? \n") - } else if (max(obs) != 1 & min(obs) != 0) { - .message("Binary events must be either 0 or 1. Are you certain this is a binary event? ") - } else { - nbins <- length(thresholds) - 1 # Number of bins - n <- length(pred) - bins <- as.list(paste("bin", 1:nbins,sep = "")) - for (i in 1:nbins) { - if (i == nbins) { - bins[[i]] <- list(which(pred >= thresholds[i] & pred <= thresholds[i + 1])) - } else { - bins[[i]] <- list(which(pred >= thresholds[i] & pred < thresholds[i + 1])) - } - } - - fkbar <- okbar <- nk <- array(0, dim = nbins) - for (i in 1:nbins) { - nk[i] <- length(bins[[i]][[1]]) - fkbar[i] <- sum(pred[bins[[i]][[1]]]) / nk[i] - okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] - } - - obar <- sum(obs) / length(obs) - relsum <- ressum <- term1 <- term2 <- 0 - for (i in 1:nbins) { - if (nk[i] > 0) { - relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 - ressum <- ressum + nk[i] * (okbar[i] - obar)^2 - for (j in 1:nk[i]) { - term1 <- term1 + (pred[bins[[i]][[1]][j]] - fkbar[i])^2 - term2 <- term2 + (pred[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) - } - } +BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate', + memb_dim = NULL, ncores = NULL) { + + # Check inputs + ## exp and obs + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a vector or a numeric array.") + } + if (is.null(dim(exp))) { #is vector + dim(exp) <- c(length(exp)) + names(dim(exp)) <- time_dim + } + if (is.null(dim(obs))) { #is vector + dim(obs) <- c(length(obs)) + names(dim(obs)) <- time_dim + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (max(exp) > 1 | min(exp) < 0) { + stop("Parameter 'exp' must be within [0, 1] range.") + } + if (any(!obs %in% c(0, 1))) { + stop("Parameter 'obs' must be binary events (0 or 1).") + } + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (any(name_exp != name_obs)) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'exp' may have 'memb_dim'.")) + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'exp' may have 'memb_dim'.")) + } + ## thresholds + if (!is.numeric(thresholds) | !is.vector(thresholds)) { + stop("Parameter 'thresholds' must be a numeric vector.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' and 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") } - rel <- relsum / n - res <- ressum / n - unc <- obar * (1 - obar) - bs <- sum((pred - obs)^2) / n - bs_check_res <- rel - res + unc - bss_res <- (res - rel) / unc - gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution - bs_check_gres <- rel - gres + unc # BS using GRES - bss_gres <- (gres - rel) / unc # BSS using GRES - - # - # Estimating the bias-corrected components of the BS - # - term3 <- array(0, nbins) - for (i in 1:nbins) { - term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } - term_a <- sum(term3, na.rm = T) / n - term_b <- (obar * (1 - obar)) / (n - 1) - rel_bias_corrected <- rel - term_a - gres_bias_corrected <- gres - term_a + term_b - if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { - rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) - gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) - rel_bias_corrected <- rel_bias_corrected2 - gres_bias_corrected <- gres_bias_corrected2 + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") } - unc_bias_corrected <- unc + term_b - bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected - - #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { - # cat("No error found \ n") - # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") - #} - - invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar, bins = bins, pred = pred, obs = obs)) + } + + ############################### + # Calculate Brier score + + ## ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim) } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim), + c(time_dim)), + fun = .BrierScore, + thresholds = thresholds, + ncores = ncores) + + return(res) } -#'@rdname BrierScore -#'@export .BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1)) { - if (max(exp) > 1 || min(exp) < 0) { - stop("Parameter 'exp' contains predictions outside [0,1] range. Are you certain this is a probability forecast?") - } else if (max(obs) != 1 && min(obs) != 0) { - .message("Binary events in 'obs' must be either 0 or 1. Are you certain this is a binary event?") - } else { - nbins <- length(thresholds) - 1 # Number of bins - n <- dim(exp)[1] # Number of observations - ens_mean <- rowMeans(exp, na.rm = TRUE) - n.ens <- seq(1, dim(exp)[2], 1) # Number of ensemble members - bins <- as.list(paste("bin", 1:nbins, sep = "")) - for (i in 1:nbins) { - if (i == nbins) { - bins[[i]] <- list(which(ens_mean >= thresholds[i] & ens_mean <= thresholds[i + 1])) - } else { - bins[[i]] <- list(which(ens_mean >= thresholds[i] & ens_mean < thresholds[i + 1])) - } - } - fkbar <- okbar <- nk <- array(0, dim = nbins) - for (i in 1:nbins) { - nk[i] <- length(bins[[i]][[1]]) - fkbar[i] <- sum(ens_mean[bins[[i]][[1]]]) / nk[i] - okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + # exp: [sdate] + # obs: [sdate] + + n <- length(exp) + nbins <- length(thresholds) - 1 # Number of bins + bins <- as.list(paste("bin", 1:nbins, sep = "")) + for (i in 1:nbins) { + if (i == nbins) { + bins[[i]] <- list(which(exp >= thresholds[i] & exp <= thresholds[i + 1])) + } else { + bins[[i]] <- list(which(exp >= thresholds[i] & exp < thresholds[i + 1])) } + } - fkbar[fkbar == Inf] <- 0 - okbar[is.nan(okbar)] <- 0 - obar <- sum(obs) / length(obs) - relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- 0 + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(exp[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + } + +#-----in old .BrierScore()--------- +# fkbar[fkbar == Inf] <- 0 +# okbar[is.nan(okbar)] <- 0 +#---------------------------------- - for (i in 1:nbins) { - if (nk[i] > 0) { - relsum <- relsum + nk[i] * (fkbar[i] - okbar[i]) ^ 2 - ressum <- ressum + nk[i] * (okbar[i] - obar) ^ 2 - for (j in 1:nk[i]) { - term1 <- term1 + (ens_mean[bins[[i]][[1]][j]] - fkbar[i]) ^ 2 - term2 <- term2 + (ens_mean[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) - } + obar <- sum(obs) / length(obs) + relsum <- ressum <- term1 <- term2 <- 0 + for (i in 1:nbins) { + if (nk[i] > 0) { + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 + ressum <- ressum + nk[i] * (okbar[i] - obar)^2 + for (j in 1:nk[i]) { + term1 <- term1 + (exp[bins[[i]][[1]][j]] - fkbar[i])^2 + term2 <- term2 + (exp[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) } } } rel <- relsum / n res <- ressum / n unc <- obar * (1 - obar) - #bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) - bs <- sum((rowMeans(exp, na.rm = T) - obs) ^ 2) / n + bs <- sum((exp - obs)^2) / n bs_check_res <- rel - res + unc bss_res <- (res - rel) / unc gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution bs_check_gres <- rel - gres + unc # BS using GRES bss_gres <- (gres - rel) / unc # BSS using GRES + - # # Estimating the bias-corrected components of the BS - # term3 <- array(0, nbins) for (i in 1:nbins) { term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) @@ -227,17 +224,24 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { } unc_bias_corrected <- unc + term_b bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected - + #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { # cat("No error found \ n") # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") #} - - invisible(list(rel = rel, res = res, unc = unc, bs = bs, - bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, - bs_check_gres = bs_check_gres, bss_gres = bss_gres, - rel_bias_corrected = rel_bias_corrected, - gres_bias_corrected = gres_bias_corrected, - unc_bias_corrected = unc_bias_corrected, - bss_bias_corrected = bss_bias_corrected)) + + # Add name for nk, fkbar, okbar + names(dim(nk)) <- 'bin' + names(dim(fkbar)) <- 'bin' + names(dim(okbar)) <- 'bin' + + res_list <- list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, + bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, + bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, + gres_bias_corrected = gres_bias_corrected, + unc_bias_corrected = unc_bias_corrected, + bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, + okbar = okbar) #bins = list(bins), + + return(invisible(res_list)) } diff --git a/man/AMV.Rd b/man/AMV.Rd index 0f81652..3cc4113 100644 --- a/man/AMV.Rd +++ b/man/AMV.Rd @@ -18,16 +18,15 @@ AMV( indices_for_clim = NULL, year_dim = "year", month_dim = "month", - member_dim = "member", + na.rm = TRUE, ncores = NULL ) } \arguments{ -\item{data}{A numerical array to be used for the index computation with the -dimensions: 1) latitude, longitude, start date, forecast month, and member -(in case of decadal predictions), 2) latitude, longitude, year, month and -member (in case of historical simulations), or 3) latitude, longitude, year -and month (in case of observations or reanalyses). This data has to be +\item{data}{A numerical array to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be provided, at least, over the whole region needed to compute the index.} \item{data_lats}{A numeric vector indicating the latitudes of the data.} @@ -70,7 +69,7 @@ climatology is calculated over the whole period. If the data are already anomalies, set it to FALSE. The default value is NULL.\cr In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative to the first forecast year, and the climatology is automatically computed -over the actual common period for the different forecast years.} +over the common calendar period for the different forecast years.} \item{year_dim}{A character string indicating the name of the year dimension The default value is 'year'. Only used if parameter 'type' is 'hist' or @@ -80,18 +79,17 @@ The default value is 'year'. Only used if parameter 'type' is 'hist' or dimension. The default value is 'month'. Only used if parameter 'type' is 'hist' or 'obs'.} -\item{member_dim}{A character string indicating the name of the member -dimension. The default value is 'member'. Only used if parameter 'type' is -'dcpp' or 'hist'.} +\item{na.rm}{A logical value indicanting whether to remove NA values. The default +value is TRUE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ -A numerical array of the AMV index with the dimensions of: - 1) sdate, forecast year, and member (in case of decadal predictions); - 2) year and member (in case of historical simulations); or - 3) year (in case of observations or reanalyses). +A numerical array with the AMV index with the same dimensions as data except + the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions + (historical simulations or observations). In case of decadal predictions, a new dimension + 'fyear' is added. } \description{ The Atlantic Multidecadal Variability (AMV), also known as Atlantic @@ -100,7 +98,8 @@ surface temperatures (SST) over the North Atlantic Ocean on multi-decadal time scales. The AMV index is computed as the difference of weighted-averaged SST anomalies over the North Atlantic region (0ºN-60ºN, 280ºE-360ºE) and the weighted-averaged SST anomalies over 60ºS-60ºN, 0ºE-360ºE (Trenberth & -Dennis, 2005; Doblas-Reyes et al., 2013). +Dennis, 2005; Doblas-Reyes et al., 2013). If different members and/or datasets are provided, +the climatology (used to calculate the anomalies) is computed individually for all of them. } \examples{ ## Observations or reanalyses diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd new file mode 100644 index 0000000..9217718 --- /dev/null +++ b/man/BrierScore.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/BrierScore.R +\name{BrierScore} +\alias{BrierScore} +\title{Compute Brier score and its decomposition and Brier skill score} +\usage{ +BrierScore( + exp, + obs, + thresholds = seq(0, 1, 0.1), + time_dim = "sdate", + memb_dim = NULL, + ncores = NULL +) +} +\arguments{ +\item{exp}{A vector or a numeric array with named dimensions of the probablistic +prediction data. The dimension must at least have 'time_dim'. It may have +'memb_dim' for performing ensemble mean. The values should be within the +range [0, 1].} + +\item{obs}{A numeric array with named dimensions of the binary observations +(0 or 1). The dimension must at least have 'time_dim' and other dimensions +of 'exp' except 'memb_dim'.} + +\item{thresholds}{A numeric vector used to bin the forecasts. The default +value is \code{seq(0, 1, 0,1)}, which means that the bins are + \code{[0,0.1), [0.1, 0.2), ... [0.9, 1]}.} + +\item{time_dim}{A character string indicating the name of dimension along +which Brier score is computed. The default value is 'sdate'.} + +\item{memb_dim}{A character string of the name of the member dimension. It +must be one dimension of 'exp'. The function will do the ensemble mean +over this dimension. If there is no member dimension, set NULL. The default +value is NULL.} +} +\value{ +A list that contains: +The numeric arrays with all 'exp' and 'obs' dimensions expect 'time_dim' and +'memb_dim': +\item{$rel}{standard reliability} +\item{$res}{standard resolution} +\item{$unc}{standard uncertainty} +\item{$bs}{Brier score} +\item{$bs_check_res}{rel - res + unc} +\item{$bss_res}{res - rel / unc} +\item{$gres}{generalized resolution} +\item{$bs_check_gres}{rel - gres + unc} +\item{$bss_gres}{gres - rel / unc} +\item{$rel_bias_corrected}{bias - corrected rel} +\item{$gres_bias_corrected}{bias - corrected gres} +\item{$unc_bias_corrected}{bias - corrected unc} +\item{$bss_bias_corrected}{gres_bias_corrected - rel_bias_corrected / unc_bias_corrected} +The numeric arrays with the same dimensions as above and one additional +dimension 'bin': +\item{$nk}{number of forecast in each bin} +\item{$fkbar}{average probability of each bin} +\item{$okbar}{relative frequency that the observed event occurred} +} +\description{ +Compute the Brier score (BS) and the components of its standard decompostion +as well with the two within-bin components described in Stephenson et al., +(2008). It also returns the bias-corrected decomposition of the BS (Ferro and +Fricker, 2012). BSS has the climatology as the reference forecast. +} +\examples{ +# Inputs are vectors +exp <- runif(10) +obs <- round(a) +x <- BrierScore(exp, obs) +res <- x$bs - x$bs_check_res +res <- x$bs - x$bs_check_gres +res <- x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected + +# Inputs are arrays +example(Load) +bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') + +} +\references{ +Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr +Stephenson et al. (2008). Two extra components in the Brier score decomposition. + Weather and Forecasting, 23: 752-757.\cr +Ferro and Fricker (2012). A bias-corrected decomposition of the BS. + Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924. +} diff --git a/man/GMST.Rd b/man/GMST.Rd index 8021943..f23e60d 100644 --- a/man/GMST.Rd +++ b/man/GMST.Rd @@ -21,28 +21,25 @@ GMST( indices_for_clim = NULL, year_dim = "year", month_dim = "month", - member_dim = "member", + na.rm = TRUE, ncores = NULL ) } \arguments{ -\item{data_tas}{A numerical array indicating the surface air temperature data -to be used for the index computation with the dimensions: 1) latitude, -longitude, start date, forecast month, and member (in case of decadal -predictions), 2) latitude, longitude, year, month and member (in case of -historical simulations), or 3) latitude, longitude, year and month (in case -of observations or reanalyses). This data has to be provided, at least, -over the whole region needed to compute the index. The dimensions must be -identical to those of data_tos.} - -\item{data_tos}{A numerical array indicating the sea surface temperature data -to be used for the index computation with the dimensions: 1) latitude, -longitude, start date, forecast month, and member (in case of decadal -predictions), 2) latitude, longitude, year, month and member (in case of -historical simulations), or 3) latitude, longitude, year and month (in case -of observations or reanalyses). This data has to be provided, at least, -over the whole region needed to compute the index. The dimensions must be -identical to those of data_tas.} +\item{data_tas}{A numerical array with the surface air temperature data +to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be +provided, at least, over the whole region needed to compute the index. +The dimensions must be identical to thos of data_tos. +#'@param data_tos A numerical array with the sea surface temperature data +to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be +provided, at least, over the whole region needed to compute the index. +The dimensions must be identical to thos of data_tas.} \item{data_lats}{A numeric vector indicating the latitudes of the data.} @@ -90,7 +87,7 @@ climatology is calculated over the whole period. If the data are already anomalies, set it to FALSE. The default value is NULL.\cr In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative to the first forecast year, and the climatology is automatically computed -over the actual common period for the different forecast years.} +over the common calendar period for the different forecast years.} \item{year_dim}{A character string indicating the name of the year dimension The default value is 'year'. Only used if parameter 'type' is 'hist' or @@ -100,23 +97,23 @@ The default value is 'year'. Only used if parameter 'type' is 'hist' or dimension. The default value is 'month'. Only used if parameter 'type' is 'hist' or 'obs'.} -\item{member_dim}{A character string indicating the name of the member -dimension. The default value is 'member'. Only used if parameter 'type' is -'dcpp' or 'hist'.} +\item{na.rm}{A logical value indicanting whether to remove NA values. The default +value is TRUE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ -A numerical array of the GMST anomalies with the dimensions of: - 1) sdate, forecast year, and member (in case of decadal predictions); - 2) year and member (in case of historical simulations); or - 3) year (in case of observations or reanalyses). +A numerical array with the GMST anomalies with the same dimensions as data_tas except + the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions + (historical simulations or observations). In case of decadal predictions, a new dimension + 'fyear' is added. } \description{ The Global Mean Surface Temperature (GMST) anomalies are computed as the weighted-averaged surface air temperature anomalies over land and sea surface -temperature anomalies over the ocean. +temperature anomalies over the ocean. If different members and/or datasets are provided, +the climatology (used to calculate the anomalies) is computed individually for all of them. } \examples{ ## Observations or reanalyses diff --git a/man/GSAT.Rd b/man/GSAT.Rd index d7fe3cf..9f03ff2 100644 --- a/man/GSAT.Rd +++ b/man/GSAT.Rd @@ -18,16 +18,15 @@ GSAT( indices_for_clim = NULL, year_dim = "year", month_dim = "month", - member_dim = "member", + na.rm = TRUE, ncores = NULL ) } \arguments{ -\item{data}{A numerical array to be used for the index computation with the -dimensions: 1) latitude, longitude, start date, forecast month, and member -(in case of decadal predictions), 2) latitude, longitude, year, month and -member (in case of historical simulations), or 3) latitude, longitude, year -and month (in case of observations or reanalyses). This data has to be +\item{data}{A numerical array to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be provided, at least, over the whole region needed to compute the index.} \item{data_lats}{A numeric vector indicating the latitudes of the data.} @@ -70,7 +69,7 @@ climatology is calculated over the whole period. If the data are already anomalies, set it to FALSE. The default value is NULL.\cr In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative to the first forecast year, and the climatology is automatically computed -over the actual common period for the different forecast years.} +over the common calendar period for the different forecast years.} \item{year_dim}{A character string indicating the name of the year dimension The default value is 'year'. Only used if parameter 'type' is 'hist' or @@ -80,22 +79,23 @@ The default value is 'year'. Only used if parameter 'type' is 'hist' or dimension. The default value is 'month'. Only used if parameter 'type' is 'hist' or 'obs'.} -\item{member_dim}{A character string indicating the name of the member -dimension. The default value is 'member'. Only used if parameter 'type' is -'dcpp' or 'hist'.} +\item{na.rm}{A logical value indicanting whether to remove NA values. The default +value is TRUE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ -A numerical array of the GSAT anomalies with the dimensions of: - 1) sdate, forecast year, and member (in case of decadal predictions); - 2) year and member (in case of historical simulations); or - 3) year (in case of observations or reanalyses). +A numerical array with the GSAT anomalies with the same dimensions as data except + the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions + (historical simulations or observations). In case of decadal predictions, a new dimension + 'fyear' is added. } \description{ The Global Surface Air Temperature (GSAT) anomalies are computed as the -weighted-averaged surface air temperature anomalies over the global region. +weighted-averaged surface air temperature anomalies over the global region. +If different members and/or datasets are provided, the climatology (used to +calculate the anomalies) is computed individually for all of them. } \examples{ ## Observations or reanalyses diff --git a/man/SPOD.Rd b/man/SPOD.Rd index 0491739..4c4ed29 100644 --- a/man/SPOD.Rd +++ b/man/SPOD.Rd @@ -18,16 +18,15 @@ SPOD( indices_for_clim = NULL, year_dim = "year", month_dim = "month", - member_dim = "member", + na.rm = TRUE, ncores = NULL ) } \arguments{ -\item{data}{A numerical array to be used for the index computation with the -dimensions: 1) latitude, longitude, start date, forecast month, and member -(in case of decadal predictions), 2) latitude, longitude, year, month and -member (in case of historical simulations), or 3) latitude, longitude, year -and month (in case of observations or reanalyses). This data has to be +\item{data}{A numerical array to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be provided, at least, over the whole region needed to compute the index.} \item{data_lats}{A numeric vector indicating the latitudes of the data.} @@ -70,7 +69,7 @@ climatology is calculated over the whole period. If the data are already anomalies, set it to FALSE. The default value is NULL.\cr In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative to the first forecast year, and the climatology is automatically computed -over the actual common period for the different forecast years.} +over the common calendar period for the different forecast years.} \item{year_dim}{A character string indicating the name of the year dimension The default value is 'year'. Only used if parameter 'type' is 'hist' or @@ -80,25 +79,26 @@ The default value is 'year'. Only used if parameter 'type' is 'hist' or dimension. The default value is 'month'. Only used if parameter 'type' is 'hist' or 'obs'.} -\item{member_dim}{A character string indicating the name of the member -dimension. The default value is 'member'. Only used if parameter 'type' is -'dcpp' or 'hist'.} +\item{na.rm}{A logical value indicanting whether to remove NA values. The default +value is TRUE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ -A numerical array of the SPOD index with the dimensions of: - 1) sdate, forecast year, and member (in case of decadal predictions); - 2) year and member (in case of historical simulations); or - 3) year (in case of observations or reanalyses). +A numerical array with the SPOD index with the same dimensions as data except + the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions + (historical simulations or observations). In case of decadal predictions, a new dimension + 'fyear' is added. } \description{ The South Pacific Ocean Dipole (SPOD) index is related to the El Nino-Southern Oscillation (ENSO) and the Inderdecadal Pacific Oscillation (IPO). The SPOD index is computed as the difference of weighted-averaged SST anomalies over 20ºS-48ºS, 165ºE-190ºE (NW pole) and the weighted-averaged SST -anomalies over 44ºS-65ºS, 220ºE-260ºE (SE pole) (Saurral et al., 2020). +anomalies over 44ºS-65ºS, 220ºE-260ºE (SE pole) (Saurral et al., 2020). +If different members and/or datasets are provided, the climatology (used to +calculate the anomalies) is computed individually for all of them. } \examples{ ## Observations or reanalyses diff --git a/man/TPI.Rd b/man/TPI.Rd index 3bdc17c..5e8f716 100644 --- a/man/TPI.Rd +++ b/man/TPI.Rd @@ -18,16 +18,15 @@ TPI( indices_for_clim = NULL, year_dim = "year", month_dim = "month", - member_dim = "member", + na.rm = TRUE, ncores = NULL ) } \arguments{ -\item{data}{A numerical array to be used for the index computation with the -dimensions: 1) latitude, longitude, start date, forecast month, and member -(in case of decadal predictions), 2) latitude, longitude, year, month and -member (in case of historical simulations), or 3) latitude, longitude, year -and month (in case of observations or reanalyses). This data has to be +\item{data}{A numerical array to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be provided, at least, over the whole region needed to compute the index.} \item{data_lats}{A numeric vector indicating the latitudes of the data.} @@ -70,7 +69,7 @@ climatology is calculated over the whole period. If the data are already anomalies, set it to FALSE. The default value is NULL.\cr In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative to the first forecast year, and the climatology is automatically computed -over the actual common period for the different forecast years.} +over the common calendar period for the different forecast years.} \item{year_dim}{A character string indicating the name of the year dimension The default value is 'year'. Only used if parameter 'type' is 'hist' or @@ -80,24 +79,25 @@ The default value is 'year'. Only used if parameter 'type' is 'hist' or dimension. The default value is 'month'. Only used if parameter 'type' is 'hist' or 'obs'.} -\item{member_dim}{A character string indicating the name of the member -dimension. The default value is 'member'. Only used if parameter 'type' is -'dcpp' or 'hist'.} +\item{na.rm}{A logical value indicanting whether to remove NA values. The default +value is TRUE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ -A numerical array of the TPI index with the dimensions of: - 1) sdate, forecast year, and member (in case of decadal predictions); - 2) year and member (in case of historical simulations); or - 3) year (in case of observations or reanalyses). +A numerical array with the TPI index with the same dimensions as data except + the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions + (historical simulations or observations). In case of decadal predictions, a new dimension + 'fyear' is added. } \description{ The Tripole Index (TPI) for the Interdecadal Pacific Oscillation (IPO) is computed as the difference of weighted-averaged SST anomalies over 10ºS-10ºN, 170ºE-270ºE minus the mean of the weighted-averaged SST anomalies over -25ºN-45ºN, 140ºE-215ºE and 50ºS-15ºS, 150ºE-200ºE (Henley et al., 2015). +25ºN-45ºN, 140ºE-215ºE and 50ºS-15ºS, 150ºE-200ºE (Henley et al., 2015). +If different members and/or datasets are provided, the climatology (used to +calculate the anomalies) is computed individually for all of them. } \examples{ ## Observations or reanalyses diff --git a/tests/testthat/test-BrierScore.R b/tests/testthat/test-BrierScore.R new file mode 100644 index 0000000..d4080b4 --- /dev/null +++ b/tests/testthat/test-BrierScore.R @@ -0,0 +1,169 @@ +context("s2dv::BrierScore tests") + +############################################## +# dat1 +set.seed(1) +exp1 <- array(runif(30), dim = c(dataset = 1, member = 3, sdate = 5, ftime = 2)) +set.seed(2) +obs1 <- array(round(runif(10)), dim = c(dataset = 1, sdate = 5, ftime = 2)) + +# dat2 +set.seed(1) +exp2 <- runif(10) +set.seed(2) +obs2 <- round(runif(10)) + + +############################################## +test_that("1. Input checks", { + # exp and obs + expect_error( + BrierScore(exp1, c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + BrierScore(c('b'), obs1), + "Parameter 'exp' and 'obs' must be a vector or a numeric array." + ) + expect_error( + BrierScore(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + BrierScore(exp = 1:10, obs = obs1), + "Parameter 'exp' must be within \\[0, 1\\] range." + ) + expect_error( + BrierScore(exp1, runif(10)), + "Parameter 'obs' must be binary events \\(0 or 1\\)." + ) + expect_error( + BrierScore(exp1, obs1), + paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'exp' may have 'memb_dim'.") + ) + # thresholds + expect_error( + BrierScore(exp2, obs2, thresholds = TRUE), + "Parameter 'thresholds' must be a numeric vector." + ) + # time_dim + expect_error( + BrierScore(exp2, obs2, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + BrierScore(exp1, obs1, memb_dim = 'member', time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' and 'obs' dimension." + ) + expect_error( + BrierScore(exp2, obs2, memb_dim = 2), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + BrierScore(exp2, obs2, memb_dim = 'ensemble'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + expect_error( + BrierScore(exp2, obs2, ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +length(BrierScore(exp1, obs1, memb_dim = 'member')), +16 +) +expect_equal( +names(BrierScore(exp1, obs1, memb_dim = 'member')), +c('rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres', + 'bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected', + 'unc_bias_corrected', 'bss_bias_corrected', 'nk', 'fkbar', 'okbar') +) +expect_equal( +dim(BrierScore(exp1, obs1, memb_dim = 'member')$rel), +c(dataset = 1, ftime = 2) +) +expect_equal( +BrierScore(exp1, obs1, memb_dim = 'member')$rel[1, ], +c(0.1013649, 0.2549810), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp1, obs1, memb_dim = 'member')$res[1, ], +c(0.24, 0.24), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp1, obs1, memb_dim = 'member')$bs[1, ], +c(0.1016759, 0.2549810), +tolerance = 0.0001 +) +expect_equal( +dim(BrierScore(exp1, obs1, memb_dim = 'member')$okbar), +c(bin = 10, dataset = 1, ftime = 2) +) +expect_equal( +BrierScore(exp1, obs1, memb_dim = 'member')$okbar[, 1, 1], +c(NaN, 0, NaN, NaN, 0, NaN, 1, 1, NaN, NaN) +) +expect_equal( +BrierScore(exp1, obs1, memb_dim = 'member')$fkbar[, 1, 1], +c(NaN, 0.1481059, NaN, NaN, 0.4034953, NaN, 0.6415412, 0.7448624, NaN, NaN), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp1, obs1, memb_dim = 'member')$nk[, 1, 1], +c(0, 1, 0, 0, 1, 0, 2, 1, 0, 0) +) + +}) + + +############################################## +test_that("3. Output checks: dat2", { +expect_equal( +length(BrierScore(exp2, obs2)), +16 +) +expect_equal( +dim(BrierScore(exp2, obs2))$bss, +NULL +) +expect_equal( +length(BrierScore(exp2, obs2)$bss_res), +1 +) +expect_equal( +dim(BrierScore(exp2, obs2))$nk, +NULL +) +expect_equal( +length(BrierScore(exp2, obs2)$nk), +10 +) +expect_equal( +BrierScore(exp2, obs2)$bs, +0.4403154, +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp2, obs2)$gres_bias_corrected, +0.06313199, +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp2, obs2)$bss_bias_corrected, +-0.6511828, +tolerance = 0.0001 +) +expect_equal( +as.vector(BrierScore(exp2, obs2)$nk), +c(1, 0, 2, 1, 0, 1, 2, 0, 1, 2) +) + +}) -- GitLab From 020a7c64cadfa183fef07e52787b2cbf57874b6c Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 20 Apr 2021 20:23:03 +0200 Subject: [PATCH 03/13] Refine output dimensions --- R/BrierScore.R | 292 +++++++++++++++++++++---------- man/BrierScore.Rd | 45 +++-- tests/testthat/test-BrierScore.R | 26 +++ 3 files changed, 254 insertions(+), 109 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index bf456a5..3951b97 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -1,9 +1,9 @@ -#'Compute Brier score and its decomposition and Brier skill score +#'Compute Brier score, its decomposition, and Brier skill score #' #'Compute the Brier score (BS) and the components of its standard decompostion -#'as well with the two within-bin components described in Stephenson et al., -#'(2008). It also returns the bias-corrected decomposition of the BS (Ferro and -#'Fricker, 2012). BSS has the climatology as the reference forecast. +#'with the two within-bin components described in Stephenson et al., (2008). It +#'also returns the bias-corrected decomposition of the BS (Ferro and Fricker, +#'2012). BSS has the climatology as the reference forecast. #' #'@param exp A vector or a numeric array with named dimensions of the probablistic #' prediction data. The dimension must at least have 'time_dim'. It may have @@ -11,20 +11,25 @@ #' range [0, 1]. #'@param obs A numeric array with named dimensions of the binary observations #' (0 or 1). The dimension must at least have 'time_dim' and other dimensions -#' of 'exp' except 'memb_dim'. +#' of 'exp' except 'memb_dim'. The length of 'dat_dim' can be different from +#' 'exp'. #'@param thresholds A numeric vector used to bin the forecasts. The default -#' value is \code{seq(0, 1, 0,1)}, which means that the bins are -#' \code{[0,0.1), [0.1, 0.2), ... [0.9, 1]}. +#' value is \code{seq(0, 1, 0.1)}, which means that the bins are +#' \code{[0, 0.1), [0.1, 0.2), ... [0.9, 1]}. #'@param time_dim A character string indicating the name of dimension along #' which Brier score is computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension in +#' 'exp' and 'obs'. The length of this dimension can be different between +#' 'exp' and 'obs'. The default value is NULL. #'@param memb_dim A character string of the name of the member dimension. It #' must be one dimension of 'exp'. The function will do the ensemble mean #' over this dimension. If there is no member dimension, set NULL. The default #' value is NULL. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' -#'@return A list that contains: -#'The numeric arrays with all 'exp' and 'obs' dimensions expect 'time_dim' and -#''memb_dim': +#'@return +#'A list that contains: #'\item{$rel}{standard reliability} #'\item{$res}{standard resolution} #'\item{$unc}{standard uncertainty} @@ -38,12 +43,26 @@ #'\item{$gres_bias_corrected}{bias - corrected gres} #'\item{$unc_bias_corrected}{bias - corrected unc} #'\item{$bss_bias_corrected}{gres_bias_corrected - rel_bias_corrected / unc_bias_corrected} -#'The numeric arrays with the same dimensions as above and one additional -#'dimension 'bin': #'\item{$nk}{number of forecast in each bin} #'\item{$fkbar}{average probability of each bin} #'\item{$okbar}{relative frequency that the observed event occurred} -#' +#'The data type and dimensions of the items depend on if the input 'exp' and +#''obs' are:\cr +#'(a) Vectors\cr +#'(b) Arrays with 'dat_dim' specified\cr +#'(c) Arrays with no 'dat_dim' specified\cr +#'Items 'rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres', +#''bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected', +#''unc_bias_corrected', and 'bss_bias_corrected' are (a) a number (b) an array +#'with dimensions c(nexp, nobs, all the rest dimensions in 'exp' and 'obs' +#'expect 'time_dim' and 'memb_dim') (c) an array with dimensions of +#''exp' and 'obs' except 'time_dim' and 'memb_dim'\cr +#'Items 'nk', 'fkbar', and 'okbar' are (a) a vector of length of bin number +#'determined by 'threshold' (b) an array with dimensions c(nexp, nobs, +#'no. of bins, all the rest dimensions in 'exp' and 'obs' expect 'time_dim' and +#''memb_dim') (c) an array with dimensions c(no. of bin, all the rest dimensions +#'in 'exp' and 'obs' expect 'time_dim' and 'memb_dim') +#' #'@references #'Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr #'Stephenson et al. (2008). Two extra components in the Brier score decomposition. @@ -54,7 +73,7 @@ #'@examples #'# Inputs are vectors #'exp <- runif(10) -#'obs <- round(a) +#'obs <- round(exp) #'x <- BrierScore(exp, obs) #'res <- x$bs - x$bs_check_res #'res <- x$bs - x$bs_check_gres @@ -69,7 +88,7 @@ #'@import multiApply #'@export BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate', - memb_dim = NULL, ncores = NULL) { + dat_dim = NULL, memb_dim = NULL, ncores = NULL) { # Check inputs ## exp and obs @@ -102,6 +121,10 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' if (!is.null(memb_dim)) { name_exp <- name_exp[-which(name_exp == memb_dim)] } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } if (any(name_exp != name_obs)) { stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", "of all the dimensions expect 'exp' may have 'memb_dim'.")) @@ -121,6 +144,15 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' if (!time_dim %in% names(dim(exp))) { stop("Parameter 'time_dim' is not found in 'exp' and 'obs' dimension.") } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp))) { + stop("Parameter 'dat_dim' is not found in 'exp' and 'obs' dimension.") + } + } ## memb_dim if (!is.null(memb_dim)) { if (!is.character(memb_dim) | length(memb_dim) > 1) { @@ -146,102 +178,168 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' exp <- MeanDims(exp, memb_dim) } - res <- Apply(list(exp, obs), - target_dims = list(c(time_dim), - c(time_dim)), - fun = .BrierScore, - thresholds = thresholds, - ncores = ncores) + if (is.null(dat_dim)) { + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim), + c(time_dim)), + fun = .BrierScore, + thresholds = thresholds, + ncores = ncores) + } else { + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim), + c(time_dim, dat_dim)), + fun = .BrierScore, + thresholds = thresholds, + ncores = ncores) + } return(res) } .BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1)) { - # exp: [sdate] - # obs: [sdate] + # exp: [sdate] or [sdate, nexp] + # obs: [sdate] or [sdate, nobs] + if (length(dim(exp)) == 2) { + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + exp_ori <- exp + obs_ori <- obs + # Create empty arrays + arr_rel <- arr_res <- arr_unc <- arr_bs <- arr_bs_check_res <- arr_bss_res <- + arr_gres <- arr_bs_check_gres <- arr_bss_gres <- arr_rel_bias_corrected <- + arr_gres_bias_corrected <- arr_unc_bias_corrected <- arr_bss_bias_corrected <- + array(dim = c(nexp = nexp, nobs = nobs)) + arr_nk <- arr_fkbar <- arr_okbar <- + array(dim = c(nexp = nexp, nobs = nobs, bin = length(thresholds) - 1)) - n <- length(exp) - nbins <- length(thresholds) - 1 # Number of bins - bins <- as.list(paste("bin", 1:nbins, sep = "")) - for (i in 1:nbins) { - if (i == nbins) { - bins[[i]] <- list(which(exp >= thresholds[i] & exp <= thresholds[i + 1])) - } else { - bins[[i]] <- list(which(exp >= thresholds[i] & exp < thresholds[i + 1])) - } + } else { + nexp <- 1 + nobs <- 1 } - fkbar <- okbar <- nk <- array(0, dim = nbins) - for (i in 1:nbins) { - nk[i] <- length(bins[[i]][[1]]) - fkbar[i] <- sum(exp[bins[[i]][[1]]]) / nk[i] - okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] - } + for (n_exp in 1:nexp) { + for (n_obs in 1:nobs) { + if (exists('exp_ori')) { + exp <- exp_ori[, n_exp] + obs <- obs_ori[, n_obs] + } + n <- length(exp) + nbins <- length(thresholds) - 1 # Number of bins + bins <- as.list(paste("bin", 1:nbins, sep = "")) + for (i in 1:nbins) { + if (i == nbins) { + bins[[i]] <- list(which(exp >= thresholds[i] & exp <= thresholds[i + 1])) + } else { + bins[[i]] <- list(which(exp >= thresholds[i] & exp < thresholds[i + 1])) + } + } -#-----in old .BrierScore()--------- -# fkbar[fkbar == Inf] <- 0 -# okbar[is.nan(okbar)] <- 0 -#---------------------------------- - - obar <- sum(obs) / length(obs) - relsum <- ressum <- term1 <- term2 <- 0 - for (i in 1:nbins) { - if (nk[i] > 0) { - relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 - ressum <- ressum + nk[i] * (okbar[i] - obar)^2 - for (j in 1:nk[i]) { - term1 <- term1 + (exp[bins[[i]][[1]][j]] - fkbar[i])^2 - term2 <- term2 + (exp[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(exp[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] } + + #-----in old .BrierScore()--------- + # fkbar[fkbar == Inf] <- 0 + # okbar[is.nan(okbar)] <- 0 + #---------------------------------- + + obar <- sum(obs) / length(obs) + relsum <- ressum <- term1 <- term2 <- 0 + for (i in 1:nbins) { + if (nk[i] > 0) { + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 + ressum <- ressum + nk[i] * (okbar[i] - obar)^2 + for (j in 1:nk[i]) { + term1 <- term1 + (exp[bins[[i]][[1]][j]] - fkbar[i])^2 + term2 <- term2 + (exp[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + } + } + } + rel <- relsum / n + res <- ressum / n + unc <- obar * (1 - obar) + bs <- sum((exp - obs)^2) / n + bs_check_res <- rel - res + unc + bss_res <- (res - rel) / unc + gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution + bs_check_gres <- rel - gres + unc # BS using GRES + bss_gres <- (gres - rel) / unc # BSS using GRES + + + # Estimating the bias-corrected components of the BS + term3 <- array(0, nbins) + for (i in 1:nbins) { + term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) + } + term_a <- sum(term3, na.rm = T) / n + term_b <- (obar * (1 - obar)) / (n - 1) + rel_bias_corrected <- rel - term_a + gres_bias_corrected <- gres - term_a + term_b + if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { + rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) + gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) + rel_bias_corrected <- rel_bias_corrected2 + gres_bias_corrected <- gres_bias_corrected2 + } + unc_bias_corrected <- unc + term_b + bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected + + #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { + # cat("No error found \ n") + # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") + #} + + # Add name for nk, fkbar, okbar + names(dim(nk)) <- 'bin' + names(dim(fkbar)) <- 'bin' + names(dim(okbar)) <- 'bin' + + if (exists('exp_ori')) { + arr_rel[n_exp, n_obs] <- rel + arr_res[n_exp, n_obs] <- res + arr_unc[n_exp, n_obs] <- unc + arr_bs[n_exp, n_obs] <- bs + arr_bs_check_res[n_exp, n_obs] <- bs_check_res + arr_bss_res[n_exp, n_obs] <- bss_res + arr_gres[n_exp, n_obs] <- gres + arr_bs_check_gres[n_exp, n_obs] <- bs_check_gres + arr_bss_gres[n_exp, n_obs] <- bss_gres + arr_rel_bias_corrected[n_exp, n_obs] <- rel_bias_corrected + arr_gres_bias_corrected[n_exp, n_obs] <- gres_bias_corrected + arr_unc_bias_corrected[n_exp, n_obs] <- unc_bias_corrected + arr_bss_bias_corrected[n_exp, n_obs] <- bss_bias_corrected + arr_nk[n_exp, n_obs, ] <- nk + arr_fkbar[n_exp, n_obs, ] <- fkbar + arr_okbar[n_exp, n_obs, ] <- okbar + } + } } - rel <- relsum / n - res <- ressum / n - unc <- obar * (1 - obar) - bs <- sum((exp - obs)^2) / n - bs_check_res <- rel - res + unc - bss_res <- (res - rel) / unc - gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution - bs_check_gres <- rel - gres + unc # BS using GRES - bss_gres <- (gres - rel) / unc # BSS using GRES - - - # Estimating the bias-corrected components of the BS - term3 <- array(0, nbins) - for (i in 1:nbins) { - term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) - } - term_a <- sum(term3, na.rm = T) / n - term_b <- (obar * (1 - obar)) / (n - 1) - rel_bias_corrected <- rel - term_a - gres_bias_corrected <- gres - term_a + term_b - if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { - rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) - gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) - rel_bias_corrected <- rel_bias_corrected2 - gres_bias_corrected <- gres_bias_corrected2 - } - unc_bias_corrected <- unc + term_b - bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected - - #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { - # cat("No error found \ n") - # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") - #} - - # Add name for nk, fkbar, okbar - names(dim(nk)) <- 'bin' - names(dim(fkbar)) <- 'bin' - names(dim(okbar)) <- 'bin' - res_list <- list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, - bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, - bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, - gres_bias_corrected = gres_bias_corrected, - unc_bias_corrected = unc_bias_corrected, - bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, - okbar = okbar) #bins = list(bins), + if (exists('exp_ori')) { + res_list <- list(rel = arr_rel, res = arr_res, unc = arr_unc, bs = arr_bs, + bs_check_res = arr_bs_check_res, bss_res = arr_bss_res, + gres = arr_gres, bs_check_gres = arr_bs_check_gres, + bss_gres = arr_bss_gres, rel_bias_corrected = arr_rel_bias_corrected, + gres_bias_corrected = arr_gres_bias_corrected, + unc_bias_corrected = arr_unc_bias_corrected, + bss_bias_corrected = arr_bss_bias_corrected, nk = arr_nk, + fkbar = arr_fkbar, okbar = arr_okbar) #bins = list(bins), + } else { + + res_list <- list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, + bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, + bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, + gres_bias_corrected = gres_bias_corrected, + unc_bias_corrected = unc_bias_corrected, + bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, + okbar = okbar) #bins = list(bins), + } return(invisible(res_list)) } diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index 9217718..2486972 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -2,13 +2,14 @@ % Please edit documentation in R/BrierScore.R \name{BrierScore} \alias{BrierScore} -\title{Compute Brier score and its decomposition and Brier skill score} +\title{Compute Brier score, its decomposition, and Brier skill score} \usage{ BrierScore( exp, obs, thresholds = seq(0, 1, 0.1), time_dim = "sdate", + dat_dim = NULL, memb_dim = NULL, ncores = NULL ) @@ -21,24 +22,30 @@ range [0, 1].} \item{obs}{A numeric array with named dimensions of the binary observations (0 or 1). The dimension must at least have 'time_dim' and other dimensions -of 'exp' except 'memb_dim'.} +of 'exp' except 'memb_dim'. The length of 'dat_dim' can be different from +'exp'.} \item{thresholds}{A numeric vector used to bin the forecasts. The default -value is \code{seq(0, 1, 0,1)}, which means that the bins are - \code{[0,0.1), [0.1, 0.2), ... [0.9, 1]}.} +value is \code{seq(0, 1, 0.1)}, which means that the bins are + \code{[0, 0.1), [0.1, 0.2), ... [0.9, 1]}.} \item{time_dim}{A character string indicating the name of dimension along which Brier score is computed. The default value is 'sdate'.} +\item{dat_dim}{A character string indicating the name of dataset dimension in +'exp' and 'obs'. The length of this dimension can be different between +'exp' and 'obs'. The default value is NULL.} + \item{memb_dim}{A character string of the name of the member dimension. It must be one dimension of 'exp'. The function will do the ensemble mean over this dimension. If there is no member dimension, set NULL. The default value is NULL.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ A list that contains: -The numeric arrays with all 'exp' and 'obs' dimensions expect 'time_dim' and -'memb_dim': \item{$rel}{standard reliability} \item{$res}{standard resolution} \item{$unc}{standard uncertainty} @@ -52,22 +59,36 @@ The numeric arrays with all 'exp' and 'obs' dimensions expect 'time_dim' and \item{$gres_bias_corrected}{bias - corrected gres} \item{$unc_bias_corrected}{bias - corrected unc} \item{$bss_bias_corrected}{gres_bias_corrected - rel_bias_corrected / unc_bias_corrected} -The numeric arrays with the same dimensions as above and one additional -dimension 'bin': \item{$nk}{number of forecast in each bin} \item{$fkbar}{average probability of each bin} \item{$okbar}{relative frequency that the observed event occurred} +The data type and dimensions of the items depend on if the input 'exp' and +'obs' are:\cr +(a) Vectors\cr +(b) Arrays with 'dat_dim' specified\cr +(c) Arrays with no 'dat_dim' specified\cr +Items 'rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres', +'bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected', +'unc_bias_corrected', and 'bss_bias_corrected' are (a) a number (b) an array +with dimensions c(nexp, nobs, all the rest dimensions in 'exp' and 'obs' +expect 'time_dim' and 'memb_dim') (c) an array with dimensions of +'exp' and 'obs' except 'time_dim' and 'memb_dim'\cr +Items 'nk', 'fkbar', and 'okbar' are (a) a vector of length of bin number +determined by 'threshold' (b) an array with dimensions c(nexp, nobs, +no. of bins, all the rest dimensions in 'exp' and 'obs' expect 'time_dim' and +'memb_dim') (c) an array with dimensions c(no. of bin, all the rest dimensions +in 'exp' and 'obs' expect 'time_dim' and 'memb_dim') } \description{ Compute the Brier score (BS) and the components of its standard decompostion -as well with the two within-bin components described in Stephenson et al., -(2008). It also returns the bias-corrected decomposition of the BS (Ferro and -Fricker, 2012). BSS has the climatology as the reference forecast. +with the two within-bin components described in Stephenson et al., (2008). It +also returns the bias-corrected decomposition of the BS (Ferro and Fricker, +2012). BSS has the climatology as the reference forecast. } \examples{ # Inputs are vectors exp <- runif(10) -obs <- round(a) +obs <- round(exp) x <- BrierScore(exp, obs) res <- x$bs - x$bs_check_res res <- x$bs - x$bs_check_gres diff --git a/tests/testthat/test-BrierScore.R b/tests/testthat/test-BrierScore.R index d4080b4..5acfa55 100644 --- a/tests/testthat/test-BrierScore.R +++ b/tests/testthat/test-BrierScore.R @@ -56,6 +56,16 @@ test_that("1. Input checks", { BrierScore(exp1, obs1, memb_dim = 'member', time_dim = 'time'), "Parameter 'time_dim' is not found in 'exp' and 'obs' dimension." ) + # dat_dim + expect_error( + BrierScore(exp1, obs1, dat_dim = 2), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + BrierScore(exp1, obs1, dat_dim = 'dat'), + "Parameter 'dat_dim' is not found in 'exp' and 'obs' dimension." + ) + # memb_dim expect_error( BrierScore(exp2, obs2, memb_dim = 2), "Parameter 'memb_dim' must be a character string." @@ -121,6 +131,22 @@ BrierScore(exp1, obs1, memb_dim = 'member')$nk[, 1, 1], c(0, 1, 0, 0, 1, 0, 2, 1, 0, 0) ) +expect_equal( +dim(BrierScore(exp1, obs1, memb_dim = 'member', dat_dim = 'dataset')$rel), +c(nexp = 1, nobs = 1, ftime = 2) +) +expect_equal( +dim(BrierScore(exp1, obs1, memb_dim = 'member', dat_dim = 'dataset')$nk), +c(nexp = 1, nobs = 1, bin = 10, ftime = 2) +) +expect_equal( +as.vector(BrierScore(exp1, obs1, memb_dim = 'member', dat_dim = 'dataset')$nk), +as.vector(BrierScore(exp1, obs1, memb_dim = 'member')$nk) +) +expect_equal( +as.vector(BrierScore(exp1, obs1, memb_dim = 'member', dat_dim = 'dataset')$bs), +as.vector(BrierScore(exp1, obs1, memb_dim = 'member')$bs) +) }) -- GitLab From eacf840345d1b22e9c581a00c2445e3782e5b546 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 20 Apr 2021 20:24:43 +0200 Subject: [PATCH 04/13] Comment examples temporary --- R/BrierScore.R | 9 +++++---- man/BrierScore.Rd | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index 3951b97..d79f62d 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -79,11 +79,12 @@ #'res <- x$bs - x$bs_check_gres #'res <- x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected #' +#'#===============uncomment the examples below when ProbBins is included========== #'# Inputs are arrays -#'example(Load) -#'bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -#'bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -#'res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') +#'#example(Load) +#'#bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +#'#bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +#'#res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') #' #'@import multiApply #'@export diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index 2486972..b54d11b 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -94,11 +94,12 @@ res <- x$bs - x$bs_check_res res <- x$bs - x$bs_check_gres res <- x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected +#===============uncomment the examples below when ProbBins is included========== # Inputs are arrays -example(Load) -bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') +#example(Load) +#bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +#bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +#res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') } \references{ -- GitLab From 93d69aca3d5f3c47e4a33445531799b2c10bd972 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 21 Apr 2021 12:08:12 +0200 Subject: [PATCH 05/13] Include UltimateBrier --- R/UltimateBrier.R | 324 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 324 insertions(+) create mode 100644 R/UltimateBrier.R diff --git a/R/UltimateBrier.R b/R/UltimateBrier.R new file mode 100644 index 0000000..f969e2c --- /dev/null +++ b/R/UltimateBrier.R @@ -0,0 +1,324 @@ +#'Compute Brier scores +#' +#'Interface to compute probabilistic scores (Brier Score, Brier Skill Score) +#'from data obtained from s2dverification. +#' +#'@param exp Array of forecast anomalies, as provided by \code{Ano()}. +#' Dimensions c(n. of experimental datasets, n. of members, n. of start dates, +#' n. of forecast time steps, n. of latitudes, n. of longitudes). Dimensions +#' in other orders are also supported. See parameters \code{posdatasets}, +#' \code{posmemb} and \code{posdates}. +#'@param obs Array of observational reference anomalies, as provided by +#' \code{Ano()}. Dimensions c(n. of observational reference datasets, +#' n. of members, n. of start dates, n. of forecast time steps, +#' n. of latitudes, n. of longitudes). Dimensions in other orders are also +#' supported. See parameters \code{posdatasets}, \code{posmemb} and +#' \code{posdates}. The memb_dim of 'obs' must be 1. +#'@param posdatasets Expected position of dimension corresponding to the +#' different evaluated datasets in input data (exp and obs). +#' By default 1. +#'@param posmemb Expected position of dimension corresponding to members in +#' input data (exp and obs). By default 2. +#'@param posdates Expected position of dimension corresponding to starting +#' dates in input data (exp and obs). By default 3. +#'@param quantile Flag to stipulate whether a quantile (TRUE) or a threshold +#' (FALSE) is used to estimate the forecast and observed probabilities. +#' Takes TRUE by default. +#'@param thr Values to be used as quantiles if 'quantile' is TRUE or as +#' thresholds if 'quantile' is FALSE. Takes by default \code{c(0.05, 0.95)} +#' if 'quantile' is TRUE. +#'@param type Type of score desired. Can take the following values: +#'\itemize{ +#' \item{'BS': Simple Brier Score.} +#' \item{'FairEnsembleBS': Corrected Brier Score computed across ensemble +#' members.} +#' \item{'FairStartDatesBS': Corrected Brier Score computed across starting +#' dates.} +#' \item{'BSS': Simple Brier Skill Score.} +#' \item{'FairEnsembleBSS': Corrected Brier Skill Score computed across +#' ensemble members.} +#' \item{'FairStartDatesBSS': Corrected Brier Skill Score computed across +#' starting dates.} +#'} +#'@param decomposition Flag to determine whether the decomposition of the +#' Brier Score into its components should be provided (TRUE) or not (FALSE). +#' Takes TRUE by default. The decomposition will be computed only if 'type' +#' is 'BS' or 'FairStartDatesBS'. +#'@return +#'If 'type' is 'FairEnsembleBS', 'BSS', 'FairEnsemblesBSS' or +#''FairStartDatesBSS' or 'decomposition' is FALSE and 'type' is 'BS' or +#''FairStartDatesBS', the Brier Score or Brier Skill Score will be returned +#'respectively. +#'If 'decomposition' is TRUE and 'type' is 'BS' or 'FairStartDatesBS' the +#'returned value is a named list with the following entries: +#' \itemize{ +#' \item{'BS': Brier Score.} +#' \item{'REL': Reliability component.} +#' \item{'UNC': Uncertainty component.} +#' \item{'RES': Resolution component.} +#' } +#'The dimensions of each of these arrays will be c(n. of experimental datasets, +#'n. of observational reference datasets, n. of bins, the rest of input +#'dimensions except for the ones pointed by 'posmemb' and 'posdates'). +#'@examples +#'# See ?Load for an explanation on the first part of this example. +#' \dontrun{ +#'data_path <- system.file('sample_data', package = 's2dverification') +#'expA <- list(name = 'experiment', path = file.path(data_path, +#' 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', +#' '$VAR_NAME$_$START_DATE$.nc')) +#'obsX <- list(name = 'observation', path = file.path(data_path, +#' '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', +#' '$VAR_NAME$_$YEAR$$MONTH$.nc')) +#' +#'# Now we are ready to use Load(). +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- Load('tos', list(expA), list(obsX), startDates, +#' leadtimemin = 1, leadtimemax = 4, output = 'lonlat', +#' latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) +#' } +#' \dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#' } +#'sampleData$mod <- Season(sampleData$mod, 4, 11, 12, 2) +#'sampleData$obs <- Season(sampleData$obs, 4, 11, 12, 2) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'exp <- Ano(sampleData$mod, clim$clim_exp) +#'obs <- Ano(sampleData$obs, clim$clim_obs) +#'bs <- UltimateBrier(exp, obs) +#'bss <- UltimateBrier(exp, obs, type = 'BSS') +#' +#'@import SpecsVerification plyr multiApply +#'@export +UltimateBrier <- function(exp, obs, posdatasets = 1, posmemb = 2, + posdates = 3, quantile = TRUE, + thr = c(5/100, 95/100), type = 'BS', + decomposition = TRUE) { + # Checking exp + if (!is.numeric(exp) || !is.array(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (length(dim(exp)) < 3) { + stop("'exp' must have at least the dimensions c(n. experimental data sets, n. members, n. start dates/forecast time steps/time steps).") + } + + # Checking obs + if (!is.numeric(obs) || !is.array(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (length(dim(obs)) < 3) { + stop("'obs' must have at least the dimensions c(n. observational data sets, n. obs. members, n. start dates/forecast time steps/time steps).") + } + + # Checking consistency in exp and obs + if (!(length(dim(exp)) == length(dim(obs)))) { + stop("'obs' and 'exp' must have the same number of dimensions.") + } + if (!identical(dim(exp)[c(1:length(dim(exp)))[-c(posdatasets, posmemb)]], + dim(obs)[c(1:length(dim(obs)))[-c(posdatasets, posmemb)]])) { + stop("'obs' and 'exp' must have dimensions of equal size except for the datasets and members dimensions.") + } + if (dim(obs)[posmemb] != 1) { + stop("Only observations with one member are supported. dim(obs)[posmemb] should be 1") + } + if (any(is.na(exp)) || any(is.na(obs))) { + stop("There can't be NA values in 'exp' and 'obs'") + } + + # Checking posdatasets + if (!is.numeric(posdatasets)) { + stop("Parameter 'posdatasets' must be an integer.") + } else { + posdatasets <- round(posdatasets) + if (posdatasets > length(dim(exp))) { + stop("Parameter 'posdatasets' exceeds the number of dimensions of the provided anomalies.") + } + } + + # Checking posmemb + if (!is.numeric(posmemb)) { + stop("Parameter 'posmemb' must be an integer.") + } else { + posmemb <- round(posmemb) + if (posmemb > length(dim(exp))) { + stop("Parameter 'posmemb' exceeds the number of dimensions of the provided anomalies.") + } + } + + # Checking posdates + if (!is.numeric(posdates)) { + stop("Parameter 'posdates' must be an integer.") + } else { + posdates <- round(posdates) + if (posdates > length(dim(exp))) { + stop("Parameter 'posdates' exceeds the number of dimensions of the provided anomalies.") + } + } + + if (posdatasets == posmemb || posdatasets == posdates || posmemb == posdates) { + stop("Parameters 'posdatasets', 'posmemb' and 'posdates' must all point to different dimensions.") + } + + # Checking quantile + if (!is.logical(quantile)) { + stop("Parameter 'quantile' must be either TRUE or FALSE.") + } + + # Checking thr + if (!is.numeric(thr)) { + stop("Parameter 'thr' must be a numerical vector.") + } + if (quantile) { + if (!all(thr <= 1 & thr >= 0)) { + stop("All quantiles specified in parameter 'thr' must fall in the range [0, 1].") + } + } + + # Checking type + if (!(type %in% c('BS', 'FairEnsembleBS', 'FairStartDatesBS', 'BSS', 'FairEnsembleBSS', 'FairStartDatesBSS'))) { + stop("'type' not supported") + } + + # Checking decomposition + if (!is.logical(decomposition)) { + stop("Parameter 'decomposition' must be either TRUE or FALSE.") + } + + + ## The functions used to compute FEBSS and FEBS have as input anomalies from Ano() + ## dims: c(datasets, members, startdates, leadtimes, latitudes, longitudes) + if (type %in% c('FairEnsembleBSS', 'FairEnsembleBS')) { + input_exp <- exp + input_obs <- obs + input_posdatasets <- posdatasets + input_posmemb <- posmemb + input_posdates <- posdates + ## The functions used to compute the other scores receive data from ProbBins() + ## dims: c(bins, startdates, members, datasets, leadtimes, latitudes, longitudes) + } else { + # Calculate probabilities of data with ProbBins and average over members + exp <- MeanDims( + ProbBins(exp, 1:dim(exp)[posdates], thr, quantile, posdates, posmemb), + memb_dim) + obs <- MeanDims( + ProbBins(obs, 1:dim(obs)[posdates], thr, quantile, posdates, posmemb), + memb_dim) + +#NOTE: The original code below insert memb_dim = 1 back + exp_probs <- array(Mean1Dim(ProbBins(exp, 1:dim(exp)[posdates], thr, quantile, posdates, posmemb), 3), + dim = c(length(thr) + 1, dim(exp)[posdates], 1, dim(exp)[-c(posmemb, posdates)])) + obs_probs <- array(Mean1Dim(ProbBins(obs, 1:dim(obs)[posdates], thr, quantile, posdates, posmemb), 3), + dim = c(length(thr) + 1, dim(obs)[posdates], 1, dim(obs)[-c(posmemb, posdates)])) + input_exp <- exp_probs + input_obs <- obs_probs + input_posdatasets <- 4 + input_posmemb <- 3 + input_posdates <- 2 + } + + ## Here we define the function 'f' for each type of score (read further for more info). + if (type == 'FairEnsembleBSS') { + size_ens_ref <- prod(dim(obs)[c(posmemb, posdates)]) + f <- function(x) { + ens_ref <- matrix(do.call("[", c(list(x = x), indices_obs)), size_ens_ref, size_ens_ref, byrow = TRUE) + sapply(c(thr, 1), function(tau) { + FairBrierSs(t(do.call("[", c(list(x = x), indices_exp))) > tau, + ens_ref > tau, + do.call("[", c(list(x = x), indices_obs)) > tau)['skillscore'] + }) + } + } else { + if (type == 'FairEnsembleBS') { + f <- function(x) sapply(c(thr, 1), function(tau) mean(FairBrier(t(do.call("[", c(list(x = x), indices_exp))) > tau, do.call("[", c(list(x = x), indices_obs)) > tau), na.rm = TRUE)) + } else { + if (type == 'BS') { + f <- function(x) as.vector(BrierScoreDecomposition(do.call("[", c(list(x = x), indices_exp)), do.call("[", c(list(x = x), indices_obs)))[1, ]) + } else if (type == 'FairStartDatesBS') { + f <- function(x) unlist(BrierScore(do.call("[", c(list(x = x), indices_obs)), do.call("[", c(list(x = x), indices_exp)))[c('rel', 'res', 'unc')], use.names = FALSE) + } else if (type == 'BSS') { + f <- function(x) BrierScore(do.call("[", c(list(x = x), indices_obs)), do.call("[", c(list(x = x), indices_exp)))$bss_res + } else if (type == 'FairStartDatesBSS') { + f <- function(x) BrierScore(do.call("[", c(list(x = x), indices_obs)), do.call("[", c(list(x = x), indices_exp)))$bss_gres + } + } + } + + ## We will calculate score for each exp, obs, bin, leadtime, latitude and longitude + ## So we create array to store results + ## If we calculate a BS we will store its rel, res and unc + if (type %in% c('BS', 'FairStartDatesBS')) { + result <- array(dim = c(dim(exp)[posdatasets], dim(obs)[posdatasets], 3, length(thr) + 1, dim(exp)[-c(posdatasets, posmemb, posdates)])) + } else { + result <- array(dim = c(dim(exp)[posdatasets], dim(obs)[posdatasets], length(thr) + 1, dim(exp)[-c(posdatasets, posmemb, posdates)])) + } + ## In order to be able to use apply, we put data of each exp and obs in a single array, + ## all merged over the members dimension. + indices_exp <- as.list(rep(TRUE, length(dim(input_exp)))) + indices_exp[[input_posmemb]] <- c(1:dim(input_exp)[input_posmemb]) + indices_exp <- indices_exp[c(input_posdatasets, input_posmemb, input_posdates)] + indices_obs <- as.list(rep(TRUE, length(dim(input_obs)))) + indices_obs[[input_posmemb]] <- c(1:dim(input_obs)[input_posmemb]) + dim(input_exp)[input_posmemb] + indices_obs <- indices_obs[c(input_posdatasets, input_posmemb, input_posdates)] + out_indices <- as.list(rep(TRUE, length(dim(result)))) + for (n_obs in 1:dim(obs)[posdatasets]) { + for (n_exp in 1:dim(exp)[posdatasets]) { + data <- abind(take(input_exp, input_posdatasets, n_exp), + take(input_obs, input_posdatasets, n_obs), + along = input_posmemb) + out_indices[c(1, 2)] <- c(n_exp, n_obs) + ## We apply function 'f' to data of each couple of exp and obs, merged in a single array. + ## This data will have dimensions + ## (1, nmembexp + nmembobs, nsdates, nltimes, nlat, nlon) + ## or + ## (nbins, nsdates, nmembexp + nmembobs, 1, nltimes, nlat, nlon) + ## depending on the input type. + ## The function 'f' is applied along all dimensions but (datasets, members and sdates) + ## so the produced output by apply is at least of dimensions + ## (nltimes, nlat, nlon) + ## or + ## (nbins, nltimes, nlat, nlon) + ## depending on the input type (FairEnsembleBS and FairEnsembleBSS + ## will have at lest the first set of dimensions, + ## other scores will have at least the second). + ## So 'f' must have as input an array of dims (1, nmembexp + nmembobs, nsdates). + ## 'indices_exp' and 'indices_obs' will pick for us the input data corresponding to + ## experiments or observations respectively. + ## In order to match with dimensions of 'result', the apply() must have as + ## output an array of dims (nbins, nltimes, nlat, nlon) + ## or (3, nbins, nltimes, nlat, nlon) if calculating BS or FairStartDatesBS. + ## All in all, after looking at apply()'s 'at least' output + ## dimensions and at apply()'s required output dimensions: + ## 'f' must have as output a vector of length nbins for FEBS and FEBSS + ## 'f' must have as output a vector of length 3 (corresponding to rel, res and unc) for BS and FSDBS + ## 'f' must have as output a single numeric element for other scores + result <- do.call("[<-", c(list(x = result), out_indices, list(value = apply(data, c(1:length(dim(data)))[-c(input_posdatasets, input_posmemb, input_posdates)], f)))) + } + } + + if (type %in% c('BSS', 'FairStartDatesBSS', 'FairEnsembleBSS')) { + result + } else { + if (decomposition && type != 'FairEnsembleBS') { + rel <- take(result, 3, 1) + dim(rel) <- dim(rel)[-3] + res <- take(result, 3, 2) + dim(res) <- dim(res)[-3] + unc <- take(result, 3, 3) + dim(unc) <- dim(unc)[-3] + bs <- rel - res + unc + list(BS = bs, REL = rel, UNC = unc, RES = res) + } else { + result <- take(result, 3, 1) - take(result, 3, 2) + take(result, 3, 3) + dim(result) <- dim(result)[-3] + result + } + } +} + -- GitLab From 06e8881ab006ea6b3f8d796c3786ce0a4090e89d Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 28 Apr 2021 16:04:28 +0200 Subject: [PATCH 06/13] Transform UltimateBrier.R --- NAMESPACE | 2 + R/UltimateBrier.R | 502 ++++++++++++++-------------- man/UltimateBrier.Rd | 113 +++++++ tests/testthat/test-UltimateBrier.R | 236 +++++++++++++ 4 files changed, 597 insertions(+), 256 deletions(-) create mode 100644 man/UltimateBrier.Rd create mode 100644 tests/testthat/test-UltimateBrier.R diff --git a/NAMESPACE b/NAMESPACE index 0e54899..03c3f84 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,10 +51,12 @@ export(Smoothing) export(TPI) export(ToyModel) export(Trend) +export(UltimateBrier) export(clim.colors) export(clim.palette) import(GEOmap) import(NbClust) +import(SpecsVerification) import(bigmemory) import(geomapdata) import(graphics) diff --git a/R/UltimateBrier.R b/R/UltimateBrier.R index f969e2c..af88148 100644 --- a/R/UltimateBrier.R +++ b/R/UltimateBrier.R @@ -1,94 +1,81 @@ #'Compute Brier scores #' #'Interface to compute probabilistic scores (Brier Score, Brier Skill Score) -#'from data obtained from s2dverification. +#'from the forecast and observational data anomalies. It provides six types +#'to choose. #' -#'@param exp Array of forecast anomalies, as provided by \code{Ano()}. -#' Dimensions c(n. of experimental datasets, n. of members, n. of start dates, -#' n. of forecast time steps, n. of latitudes, n. of longitudes). Dimensions -#' in other orders are also supported. See parameters \code{posdatasets}, -#' \code{posmemb} and \code{posdates}. -#'@param obs Array of observational reference anomalies, as provided by -#' \code{Ano()}. Dimensions c(n. of observational reference datasets, -#' n. of members, n. of start dates, n. of forecast time steps, -#' n. of latitudes, n. of longitudes). Dimensions in other orders are also -#' supported. See parameters \code{posdatasets}, \code{posmemb} and -#' \code{posdates}. The memb_dim of 'obs' must be 1. -#'@param posdatasets Expected position of dimension corresponding to the -#' different evaluated datasets in input data (exp and obs). -#' By default 1. -#'@param posmemb Expected position of dimension corresponding to members in -#' input data (exp and obs). By default 2. -#'@param posdates Expected position of dimension corresponding to starting -#' dates in input data (exp and obs). By default 3. -#'@param quantile Flag to stipulate whether a quantile (TRUE) or a threshold -#' (FALSE) is used to estimate the forecast and observed probabilities. -#' Takes TRUE by default. -#'@param thr Values to be used as quantiles if 'quantile' is TRUE or as -#' thresholds if 'quantile' is FALSE. Takes by default \code{c(0.05, 0.95)} -#' if 'quantile' is TRUE. -#'@param type Type of score desired. Can take the following values: +#'@param exp A numeric array of forecast anomalies with named dimensions that +#' at least include 'dat_dim', 'memb_dim', and 'time_dim'. It can be provided +#' by \code{Ano()}. +#'@param obs A numeric array of observational reference anomalies with named +#' dimensions that at least include 'dat_dim' and 'time_dim'. If it has +#' 'memb_dim', the length must be 1. The dimensions should be consistent with +#' 'exp' except 'dat_dim' and 'memb_dim'. It can be provided by \code{Ano()}. +#'@param dat_dim A character string indicating the name of the dataset +#' dimension in 'exp' and 'obs'. The default value is 'dataset'. +#'@param memb_dim A character string indicating the name of the member +#' dimension in 'exp' (and 'obs') for ensemble mean calculation. The default +#' value is 'member'. +#'@param time_dim A character string indicating the dimension along which to +#' compute the probabilistic scores. The default value is 'sdate'. +#'@param quantile A logical value to decide whether a quantile (TRUE) or a +#' threshold (FALSE) is used to estimate the forecast and observed +#' probabilities. The default value is TRUE. +#'@param thr A numeric vector to be used in probability calculation (for 'BS', +#' 'FairStartDatesBS', 'BSS', and 'FairStartDatesBSS') and binary event +#' judgement (for 'FairEnsembleBS' and 'FairEnsembleBSS'). It is as +#' quantiles if 'quantile' is TRUE or as thresholds if 'quantile' is FALSE. +#' The default value is \code{c(0.05, 0.95)} for 'quantile = TRUE'. +#'@param type A character string of the desired score type. It can be the +#' following values: #'\itemize{ -#' \item{'BS': Simple Brier Score.} +#' \item{'BS': Simple Brier Score. Use SpecsVerification::BrierDecomp inside.} #' \item{'FairEnsembleBS': Corrected Brier Score computed across ensemble -#' members.} +#' members. Use SpecsVerification::FairBrier inside.} #' \item{'FairStartDatesBS': Corrected Brier Score computed across starting -#' dates.} -#' \item{'BSS': Simple Brier Skill Score.} +#' dates. Use s2dv:::.BrierScore inside.} +#' \item{'BSS': Simple Brier Skill Score. Use s2dv:::.BrierScore inside.} #' \item{'FairEnsembleBSS': Corrected Brier Skill Score computed across -#' ensemble members.} +#' ensemble members. Use SpecsVerification::FairBrierSs inside.} #' \item{'FairStartDatesBSS': Corrected Brier Skill Score computed across -#' starting dates.} +#' starting dates. Use s2dv:::.BrierScore inside.} #'} -#'@param decomposition Flag to determine whether the decomposition of the -#' Brier Score into its components should be provided (TRUE) or not (FALSE). -#' Takes TRUE by default. The decomposition will be computed only if 'type' -#' is 'BS' or 'FairStartDatesBS'. +#' The default value is 'BS'. +#'@param decomposition A logical value to determine whether the decomposition +#' of the Brier Score should be provided (TRUE) or not (FALSE). It is only +#' used when 'type' is 'BS' or 'FairStartDatesBS'. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' #'@return -#'If 'type' is 'FairEnsembleBS', 'BSS', 'FairEnsemblesBSS' or -#''FairStartDatesBSS' or 'decomposition' is FALSE and 'type' is 'BS' or -#''FairStartDatesBS', the Brier Score or Brier Skill Score will be returned -#'respectively. -#'If 'decomposition' is TRUE and 'type' is 'BS' or 'FairStartDatesBS' the -#'returned value is a named list with the following entries: +#'If 'type' is 'BS' or 'FairStartDatesBS' and 'decomposition' is TRUE, the +#'output is a list of 4 arrays (see details below.) In other cases, the output +#'is an array of Brier scores or Brier score skills. All the arrays have the +#'same dimensions: +#'c(nexp, nobs, no. of bins, the rest dimensions of 'exp' except 'time_dim' and +#''memb_dim'). 'nexp' and 'nobs' is the length of dataset dimension in 'exp' +#'and 'obs' respectively.\cr +#'The list of 4 inlcudes: #' \itemize{ -#' \item{'BS': Brier Score.} -#' \item{'REL': Reliability component.} -#' \item{'UNC': Uncertainty component.} -#' \item{'RES': Resolution component.} +#' \item{$bs: Brier Score} +#' \item{$rel: Reliability component} +#' \item{$res: Resolution component} +#' \item{$unc: Uncertainty component} #' } -#'The dimensions of each of these arrays will be c(n. of experimental datasets, -#'n. of observational reference datasets, n. of bins, the rest of input -#'dimensions except for the ones pointed by 'posmemb' and 'posdates'). -#'@examples -#'# See ?Load for an explanation on the first part of this example. -#' \dontrun{ -#'data_path <- system.file('sample_data', package = 's2dverification') -#'expA <- list(name = 'experiment', path = file.path(data_path, -#' 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', -#' '$VAR_NAME$_$START_DATE$.nc')) -#'obsX <- list(name = 'observation', path = file.path(data_path, -#' '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', -#' '$VAR_NAME$_$YEAR$$MONTH$.nc')) #' -#'# Now we are ready to use Load(). -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- Load('tos', list(expA), list(obsX), startDates, -#' leadtimemin = 1, leadtimemax = 4, output = 'lonlat', -#' latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) -#' } +#'@examples #' \dontshow{ #'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), -#' c('observation'), startDates, -#' leadtimemin = 1, -#' leadtimemax = 4, -#' output = 'lonlat', -#' latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) #' } -#'sampleData$mod <- Season(sampleData$mod, 4, 11, 12, 2) -#'sampleData$obs <- Season(sampleData$obs, 4, 11, 12, 2) +#'sampleData$mod <- Season(sampleData$mod, monini = 11, moninf = 12, monsup = 2) +#'sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) #'clim <- Clim(sampleData$mod, sampleData$obs) #'exp <- Ano(sampleData$mod, clim$clim_exp) #'obs <- Ano(sampleData$obs, clim$clim_obs) @@ -97,228 +84,231 @@ #' #'@import SpecsVerification plyr multiApply #'@export -UltimateBrier <- function(exp, obs, posdatasets = 1, posmemb = 2, - posdates = 3, quantile = TRUE, - thr = c(5/100, 95/100), type = 'BS', - decomposition = TRUE) { - # Checking exp - if (!is.numeric(exp) || !is.array(exp)) { - stop("Parameter 'exp' must be a numeric array.") +UltimateBrier <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', time_dim = 'sdate', + quantile = TRUE, thr = c(5/100, 95/100), type = 'BS', + decomposition = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") } - if (length(dim(exp)) < 3) { - stop("'exp' must have at least the dimensions c(n. experimental data sets, n. members, n. start dates/forecast time steps/time steps).") + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a vector or a numeric array.") } - - # Checking obs - if (!is.numeric(obs) || !is.array(obs)) { - stop("Parameter 'obs' must be a numeric array.") + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") } - if (length(dim(obs)) < 3) { - stop("'obs' must have at least the dimensions c(n. observational data sets, n. obs. members, n. start dates/forecast time steps/time steps).") + ## dat_dim + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") } - - # Checking consistency in exp and obs - if (!(length(dim(exp)) == length(dim(obs)))) { - stop("'obs' and 'exp' must have the same number of dimensions.") + if (!dat_dim %in% names(dim(exp))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") } - if (!identical(dim(exp)[c(1:length(dim(exp)))[-c(posdatasets, posmemb)]], - dim(obs)[c(1:length(dim(obs)))[-c(posdatasets, posmemb)]])) { - stop("'obs' and 'exp' must have dimensions of equal size except for the datasets and members dimensions.") + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") } - if (dim(obs)[posmemb] != 1) { - stop("Only observations with one member are supported. dim(obs)[posmemb] should be 1") + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } - if (any(is.na(exp)) || any(is.na(obs))) { - stop("There can't be NA values in 'exp' and 'obs'") + if (!memb_dim %in% names(dim(obs))) { + # Insert memb_dim into obs for the ease of later calculation + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } else if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") } - - # Checking posdatasets - if (!is.numeric(posdatasets)) { - stop("Parameter 'posdatasets' must be an integer.") - } else { - posdatasets <- round(posdatasets) - if (posdatasets > length(dim(exp))) { - stop("Parameter 'posdatasets' exceeds the number of dimensions of the provided anomalies.") - } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") } - - # Checking posmemb - if (!is.numeric(posmemb)) { - stop("Parameter 'posmemb' must be an integer.") - } else { - posmemb <- round(posmemb) - if (posmemb > length(dim(exp))) { - stop("Parameter 'posmemb' exceeds the number of dimensions of the provided anomalies.") - } + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } - - # Checking posdates - if (!is.numeric(posdates)) { - stop("Parameter 'posdates' must be an integer.") - } else { - posdates <- round(posdates) - if (posdates > length(dim(exp))) { - stop("Parameter 'posdates' exceeds the number of dimensions of the provided anomalies.") - } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + if (any(name_exp != name_obs)) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.")) } - - if (posdatasets == posmemb || posdatasets == posdates || posmemb == posdates) { - stop("Parameters 'posdatasets', 'posmemb' and 'posdates' must all point to different dimensions.") + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.")) } - - # Checking quantile - if (!is.logical(quantile)) { - stop("Parameter 'quantile' must be either TRUE or FALSE.") + ## quantile + if (!is.logical(quantile) | length(quantile) > 1) { + stop("Parameter 'quantile' must be one logical value.") } - - # Checking thr - if (!is.numeric(thr)) { - stop("Parameter 'thr' must be a numerical vector.") + ## thr + if (!is.numeric(thr) | !is.vector(thr)) { + stop("Parameter 'thr' must be a numeric vector.") } if (quantile) { if (!all(thr <= 1 & thr >= 0)) { - stop("All quantiles specified in parameter 'thr' must fall in the range [0, 1].") + stop("Parameter 'thr' must be within [0, 1] when quantile is TRUE.") } } - - # Checking type - if (!(type %in% c('BS', 'FairEnsembleBS', 'FairStartDatesBS', 'BSS', 'FairEnsembleBSS', 'FairStartDatesBSS'))) { - stop("'type' not supported") + ## type + if (!(type %in% c("BS", "BSS", "FairEnsembleBS", "FairEnsembleBSS", "FairStartDatesBS", "FairStartDatesBSS"))) { + stop("Parameter 'type' must be one of 'BS', 'BSS', 'FairEnsembleBS', 'FairEnsembleBSS', 'FairStartDatesBS' or 'FairStartDatesBSS'.") } - - # Checking decomposition - if (!is.logical(decomposition)) { - stop("Parameter 'decomposition' must be either TRUE or FALSE.") + ## decomposition + if (!is.logical(decomposition) | length(decomposition) > 1) { + stop("Parameter 'decomposition' must be one logical value.") } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ############################### + # Calculate UltimateBrier - ## The functions used to compute FEBSS and FEBS have as input anomalies from Ano() - ## dims: c(datasets, members, startdates, leadtimes, latitudes, longitudes) if (type %in% c('FairEnsembleBSS', 'FairEnsembleBS')) { - input_exp <- exp - input_obs <- obs - input_posdatasets <- posdatasets - input_posmemb <- posmemb - input_posdates <- posdates - ## The functions used to compute the other scores receive data from ProbBins() - ## dims: c(bins, startdates, members, datasets, leadtimes, latitudes, longitudes) + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .UltimateBrier, + thr = thr, type = type, + decomposition = decomposition, + ncores = ncores)$output1 + } else { - # Calculate probabilities of data with ProbBins and average over members + # Calculate probablities by ProbBins() and ensemble mean first. + # The first dim will become 'bin' and memb_dim is gone. exp <- MeanDims( - ProbBins(exp, 1:dim(exp)[posdates], thr, quantile, posdates, posmemb), + ProbBins(exp, thr = thr, time_dim = time_dim, memb_dim = memb_dim, + quantile = quantile, ncores = ncores), memb_dim) obs <- MeanDims( - ProbBins(obs, 1:dim(obs)[posdates], thr, quantile, posdates, posmemb), + ProbBins(obs, thr = thr, time_dim = time_dim, memb_dim = memb_dim, + quantile = quantile, ncores = ncores), memb_dim) -#NOTE: The original code below insert memb_dim = 1 back - exp_probs <- array(Mean1Dim(ProbBins(exp, 1:dim(exp)[posdates], thr, quantile, posdates, posmemb), 3), - dim = c(length(thr) + 1, dim(exp)[posdates], 1, dim(exp)[-c(posmemb, posdates)])) - obs_probs <- array(Mean1Dim(ProbBins(obs, 1:dim(obs)[posdates], thr, quantile, posdates, posmemb), 3), - dim = c(length(thr) + 1, dim(obs)[posdates], 1, dim(obs)[-c(posmemb, posdates)])) - input_exp <- exp_probs - input_obs <- obs_probs - input_posdatasets <- 4 - input_posmemb <- 3 - input_posdates <- 2 + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim), + c(time_dim, dat_dim)), + fun = .UltimateBrier, + thr = thr, type = type, + decomposition = decomposition, + ncores = ncores) + + if (type %in% c('BSS', 'FairStartDatesBSS')) { + res <- res$output1 + } else if (!decomposition) { + res <- res$bs + } } - ## Here we define the function 'f' for each type of score (read further for more info). + return(res) +} + +.UltimateBrier <- function(exp, obs, thr = c(5/100, 95/100), type = 'BS', + decomposition = TRUE) { + # If exp and obs are probablistics + # exp: [sdate, nexp] + # obs: [sdate, nobs] + # If exp and obs are anomalies + # exp: [sdate, nexp, memb] + # obs: [sdate, nobs, memb] + + #NOTE: 'thr' is used in 'FairEnsembleBSS' and 'FairEnsembleBS'. But if quantile = F and + # thr is real value, does it work? if (type == 'FairEnsembleBSS') { - size_ens_ref <- prod(dim(obs)[c(posmemb, posdates)]) - f <- function(x) { - ens_ref <- matrix(do.call("[", c(list(x = x), indices_obs)), size_ens_ref, size_ens_ref, byrow = TRUE) - sapply(c(thr, 1), function(tau) { - FairBrierSs(t(do.call("[", c(list(x = x), indices_exp))) > tau, - ens_ref > tau, - do.call("[", c(list(x = x), indices_obs)) > tau)['skillscore'] - }) - } - } else { - if (type == 'FairEnsembleBS') { - f <- function(x) sapply(c(thr, 1), function(tau) mean(FairBrier(t(do.call("[", c(list(x = x), indices_exp))) > tau, do.call("[", c(list(x = x), indices_obs)) > tau), na.rm = TRUE)) - } else { - if (type == 'BS') { - f <- function(x) as.vector(BrierScoreDecomposition(do.call("[", c(list(x = x), indices_exp)), do.call("[", c(list(x = x), indices_obs)))[1, ]) - } else if (type == 'FairStartDatesBS') { - f <- function(x) unlist(BrierScore(do.call("[", c(list(x = x), indices_obs)), do.call("[", c(list(x = x), indices_exp)))[c('rel', 'res', 'unc')], use.names = FALSE) - } else if (type == 'BSS') { - f <- function(x) BrierScore(do.call("[", c(list(x = x), indices_obs)), do.call("[", c(list(x = x), indices_exp)))$bss_res - } else if (type == 'FairStartDatesBSS') { - f <- function(x) BrierScore(do.call("[", c(list(x = x), indices_obs)), do.call("[", c(list(x = x), indices_exp)))$bss_gres + size_ens_ref <- prod(dim(obs)[c(1, 3)]) + res <- array(dim = c(nexp = as.numeric(dim(exp)[2]), + nobs = as.numeric(dim(obs)[2]), + bin = length(thr) + 1)) + for (n_exp in 1:dim(exp)[2]) { + for (n_obs in 1:dim(obs)[2]) { + ens_ref <- matrix(obs[, n_obs, 1], size_ens_ref, size_ens_ref, byrow = TRUE) + for (n_thr in 1:length(c(thr, 1))) { + #NOTE: FairBreirSs is deprecated now. Should change to SkillScore (according to + # SpecsVerification's documentation) + res[n_exp, n_obs, n_thr] <- SpecsVerification::FairBrierSs(exp[, n_exp, ] > c(thr, 1)[n_thr], + ens_ref > c(thr, 1)[n_thr], + obs[, n_obs, 1] > c(thr, 1)[n_thr])['skillscore'] + } } } - } - ## We will calculate score for each exp, obs, bin, leadtime, latitude and longitude - ## So we create array to store results - ## If we calculate a BS we will store its rel, res and unc - if (type %in% c('BS', 'FairStartDatesBS')) { - result <- array(dim = c(dim(exp)[posdatasets], dim(obs)[posdatasets], 3, length(thr) + 1, dim(exp)[-c(posdatasets, posmemb, posdates)])) - } else { - result <- array(dim = c(dim(exp)[posdatasets], dim(obs)[posdatasets], length(thr) + 1, dim(exp)[-c(posdatasets, posmemb, posdates)])) - } - ## In order to be able to use apply, we put data of each exp and obs in a single array, - ## all merged over the members dimension. - indices_exp <- as.list(rep(TRUE, length(dim(input_exp)))) - indices_exp[[input_posmemb]] <- c(1:dim(input_exp)[input_posmemb]) - indices_exp <- indices_exp[c(input_posdatasets, input_posmemb, input_posdates)] - indices_obs <- as.list(rep(TRUE, length(dim(input_obs)))) - indices_obs[[input_posmemb]] <- c(1:dim(input_obs)[input_posmemb]) + dim(input_exp)[input_posmemb] - indices_obs <- indices_obs[c(input_posdatasets, input_posmemb, input_posdates)] - out_indices <- as.list(rep(TRUE, length(dim(result)))) - for (n_obs in 1:dim(obs)[posdatasets]) { - for (n_exp in 1:dim(exp)[posdatasets]) { - data <- abind(take(input_exp, input_posdatasets, n_exp), - take(input_obs, input_posdatasets, n_obs), - along = input_posmemb) - out_indices[c(1, 2)] <- c(n_exp, n_obs) - ## We apply function 'f' to data of each couple of exp and obs, merged in a single array. - ## This data will have dimensions - ## (1, nmembexp + nmembobs, nsdates, nltimes, nlat, nlon) - ## or - ## (nbins, nsdates, nmembexp + nmembobs, 1, nltimes, nlat, nlon) - ## depending on the input type. - ## The function 'f' is applied along all dimensions but (datasets, members and sdates) - ## so the produced output by apply is at least of dimensions - ## (nltimes, nlat, nlon) - ## or - ## (nbins, nltimes, nlat, nlon) - ## depending on the input type (FairEnsembleBS and FairEnsembleBSS - ## will have at lest the first set of dimensions, - ## other scores will have at least the second). - ## So 'f' must have as input an array of dims (1, nmembexp + nmembobs, nsdates). - ## 'indices_exp' and 'indices_obs' will pick for us the input data corresponding to - ## experiments or observations respectively. - ## In order to match with dimensions of 'result', the apply() must have as - ## output an array of dims (nbins, nltimes, nlat, nlon) - ## or (3, nbins, nltimes, nlat, nlon) if calculating BS or FairStartDatesBS. - ## All in all, after looking at apply()'s 'at least' output - ## dimensions and at apply()'s required output dimensions: - ## 'f' must have as output a vector of length nbins for FEBS and FEBSS - ## 'f' must have as output a vector of length 3 (corresponding to rel, res and unc) for BS and FSDBS - ## 'f' must have as output a single numeric element for other scores - result <- do.call("[<-", c(list(x = result), out_indices, list(value = apply(data, c(1:length(dim(data)))[-c(input_posdatasets, input_posmemb, input_posdates)], f)))) + } else if (type == 'FairEnsembleBS') { + #NOTE: The calculation in s2dverification::UltimateBrier is wrong. In the final stage, + # the function calculates like "take(result, 3, 1) - take(result, 3, 2) + take(result, 3, 3)", + # but the 3rd dim of result is 'bins' instead of decomposition. 'FairEnsembleBS' does + # not have decomposition. + # The calculation is fixed here. + res <- array(dim = c(nexp = as.numeric(dim(exp)[2]), + nobs = as.numeric(dim(obs)[2]), + bin = length(thr) + 1)) + for (n_exp in 1:dim(exp)[2]) { + for (n_obs in 1:dim(obs)[2]) { + for (n_thr in 1:length(c(thr, 1))) { + fb <- SpecsVerification::FairBrier(ens = exp[, n_exp, ] > c(thr, 1)[n_thr], + obs = obs[, n_obs, 1] > c(thr, 1)[n_thr]) + res[n_exp, n_obs, n_thr] <- mean(fb, na.rm = T) + } + } } - } +# tmp <- res[, , 1] - res[, , 2] + res[, , 3] +# res <- array(tmp, dim = c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2]))) - if (type %in% c('BSS', 'FairStartDatesBSS', 'FairEnsembleBSS')) { - result - } else { - if (decomposition && type != 'FairEnsembleBS') { - rel <- take(result, 3, 1) - dim(rel) <- dim(rel)[-3] - res <- take(result, 3, 2) - dim(res) <- dim(res)[-3] - unc <- take(result, 3, 3) - dim(unc) <- dim(unc)[-3] + } else if (type == 'BS') { + comp <- array(dim = c(nexp = as.numeric(dim(exp)[2]), + nobs = as.numeric(dim(obs)[2]), + comp = 3)) + for (n_exp in 1:dim(exp)[2]) { + for (n_obs in 1:dim(obs)[2]) { + #NOTE: Parameter 'bins' is default. + comp[n_exp, n_obs, ] <- SpecsVerification::BrierDecomp(p = exp[, n_exp], + y = obs[, n_obs])[1, ] + } + } + if (decomposition) { + rel <- comp[, , 1] + dim(rel) <- c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2])) + res <- comp[, , 2] + dim(res) <- c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2])) + unc <- comp[, , 3] + dim(unc) <- c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2])) bs <- rel - res + unc - list(BS = bs, REL = rel, UNC = unc, RES = res) + dim(bs) <- c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2])) + res <- list(bs = bs, rel = rel, res = res, unc = unc) } else { - result <- take(result, 3, 1) - take(result, 3, 2) + take(result, 3, 3) - dim(result) <- dim(result)[-3] - result + bs <- comp[, , 1] - comp[, , 2] + comp[, , 3] + dim(bs) <- c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2])) + res <- list(bs = bs) } + + } else if (type == 'FairStartDatesBS') { + #NOTE: parameter 'thresholds' is not specified. + res <- .BrierScore(exp = exp, obs = obs) + if (decomposition) { + res <- list(bs = res$bs, rel = res$rel, res = res$res, unc = res$unc) + } else { + res <- list(bs = res$bs) + } + + } else if (type == 'BSS') { + #NOTE: parameter 'thresholds' is not specified. + res <- .BrierScore(exp = exp, obs = obs)$bss_res + + } else if (type == 'FairStartDatesBSS') { + #NOTE: parameter 'thresholds' is not specified. + res <- .BrierScore(exp = exp, obs = obs)$bss_gres } + + return(res) + } diff --git a/man/UltimateBrier.Rd b/man/UltimateBrier.Rd new file mode 100644 index 0000000..2fd2063 --- /dev/null +++ b/man/UltimateBrier.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/UltimateBrier.R +\name{UltimateBrier} +\alias{UltimateBrier} +\title{Compute Brier scores} +\usage{ +UltimateBrier( + exp, + obs, + dat_dim = "dataset", + memb_dim = "member", + time_dim = "sdate", + quantile = TRUE, + thr = c(5/100, 95/100), + type = "BS", + decomposition = TRUE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A numeric array of forecast anomalies with named dimensions that +at least include 'dat_dim', 'memb_dim', and 'time_dim'. It can be provided +by \code{Ano()}.} + +\item{obs}{A numeric array of observational reference anomalies with named +dimensions that at least include 'dat_dim' and 'time_dim'. If it has +'memb_dim', the length must be 1. The dimensions should be consistent with +'exp' except 'dat_dim' and 'memb_dim'. It can be provided by \code{Ano()}.} + +\item{dat_dim}{A character string indicating the name of the dataset +dimension in 'exp' and 'obs'. The default value is 'dataset'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension in 'exp' (and 'obs') for ensemble mean calculation. The default +value is 'member'.} + +\item{time_dim}{A character string indicating the dimension along which to +compute the probabilistic scores. The default value is 'sdate'.} + +\item{quantile}{A logical value to decide whether a quantile (TRUE) or a +threshold (FALSE) is used to estimate the forecast and observed +probabilities. The default value is TRUE.} + +\item{thr}{A numeric vector to be used in probability calculation (for 'BS', +'FairStartDatesBS', 'BSS', and 'FairStartDatesBSS') and binary event +judgement (for 'FairEnsembleBS' and 'FairEnsembleBSS'). It is as +quantiles if 'quantile' is TRUE or as thresholds if 'quantile' is FALSE. +The default value is \code{c(0.05, 0.95)} for 'quantile = TRUE'.} + +\item{type}{A character string of the desired score type. It can be the + following values: +\itemize{ + \item{'BS': Simple Brier Score. Use SpecsVerification::BrierDecomp inside.} + \item{'FairEnsembleBS': Corrected Brier Score computed across ensemble + members. Use SpecsVerification::FairBrier inside.} + \item{'FairStartDatesBS': Corrected Brier Score computed across starting + dates. Use s2dv:::.BrierScore inside.} + \item{'BSS': Simple Brier Skill Score. Use s2dv:::.BrierScore inside.} + \item{'FairEnsembleBSS': Corrected Brier Skill Score computed across + ensemble members. Use SpecsVerification::FairBrierSs inside.} + \item{'FairStartDatesBSS': Corrected Brier Skill Score computed across + starting dates. Use s2dv:::.BrierScore inside.} +} + The default value is 'BS'.} + +\item{decomposition}{A logical value to determine whether the decomposition +of the Brier Score should be provided (TRUE) or not (FALSE). It is only +used when 'type' is 'BS' or 'FairStartDatesBS'. The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +If 'type' is 'BS' or 'FairStartDatesBS' and 'decomposition' is TRUE, the +output is a list of 4 arrays (see details below.) In other cases, the output +is an array of Brier scores or Brier score skills. All the arrays have the +same dimensions: +c(nexp, nobs, no. of bins, the rest dimensions of 'exp' except 'time_dim' and +'memb_dim'). 'nexp' and 'nobs' is the length of dataset dimension in 'exp' +and 'obs' respectively.\cr +The list of 4 inlcudes: + \itemize{ + \item{$bs: Brier Score} + \item{$rel: Reliability component} + \item{$res: Resolution component} + \item{$unc: Uncertainty component} + } +} +\description{ +Interface to compute probabilistic scores (Brier Score, Brier Skill Score) +from the forecast and observational data anomalies. It provides six types +to choose. +} +\examples{ + \dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } +sampleData$mod <- Season(sampleData$mod, monini = 11, moninf = 12, monsup = 2) +sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) +clim <- Clim(sampleData$mod, sampleData$obs) +exp <- Ano(sampleData$mod, clim$clim_exp) +obs <- Ano(sampleData$obs, clim$clim_obs) +bs <- UltimateBrier(exp, obs) +bss <- UltimateBrier(exp, obs, type = 'BSS') + +} diff --git a/tests/testthat/test-UltimateBrier.R b/tests/testthat/test-UltimateBrier.R new file mode 100644 index 0000000..e8114f6 --- /dev/null +++ b/tests/testthat/test-UltimateBrier.R @@ -0,0 +1,236 @@ +context("s2dv::UltimateBrier tests") + +############################################## +# dat1 +set.seed(1) +exp1 <- array(rnorm(30), dim = c(dataset = 1, member = 3, sdate = 5, ftime = 2)) +set.seed(2) +obs1 <- array(round(rnorm(10)), dim = c(dataset = 1, sdate = 5, ftime = 2)) + + +############################################## +test_that("1. Input checks", { + # exp and obs + expect_error( + UltimateBrier(exp1, c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + UltimateBrier(c('b'), obs1), + "Parameter 'exp' and 'obs' must be a vector or a numeric array." + ) + expect_error( + UltimateBrier(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + # dat_dim + expect_error( + UltimateBrier(exp1, obs1, dat_dim = 2), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + UltimateBrier(exp1, obs1, dat_dim = 'dat'), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb_dim + expect_error( + UltimateBrier(exp1, obs1, memb_dim = 2), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + UltimateBrier(exp1, obs1, memb_dim = 'ensemble'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + expect_error( + UltimateBrier(exp1, array(1:10, dim = c(dataset = 1, member = 2, sdate = 5, ftime = 2))), + "The length of parameter 'memb_dim' in 'obs' must be 1." + ) + # time_dim + expect_error( + UltimateBrier(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + UltimateBrier(exp1, obs1, memb_dim = 'member', time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + # exp and obs (2) + expect_error( + UltimateBrier(exp1, array(1:10, dim = c(dataset = 1, sdate = 6, ftime = 2))), + paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.") + ) + expect_error( + UltimateBrier(exp1, array(1:10, dim = c(dataset = 1, sdate = 5, time = 2))), + paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.") + ) + # quantile + expect_error( + UltimateBrier(exp1, obs1, quantile = c(0.05, 0.95)), + "Parameter 'quantile' must be one logical value." + ) + # thr + expect_error( + UltimateBrier(exp1, obs1, thr = TRUE), + "Parameter 'thr' must be a numeric vector." + ) + expect_error( + UltimateBrier(exp1, obs1, quantile = TRUE, thr = 1:3), + "Parameter 'thr' must be within \\[0, 1\\] when quantile is TRUE." + ) + # type + expect_error( + UltimateBrier(exp1, obs1, type = 'UltimateBrier'), + "Parameter 'type' must be one of 'BS', 'BSS', 'FairEnsembleBS', 'FairEnsembleBSS', 'FairStartDatesBS' or 'FairStartDatesBSS'." + ) + # decomposition + expect_error( + UltimateBrier(exp1, obs1, decomposition = 1), + "Parameter 'decomposition' must be one logical value." + ) + # ncores + expect_error( + UltimateBrier(exp1, obs1, ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + + +}) + +############################################## +test_that("2. Output checks: dat1", { + +# 'BS' +expect_equal( +is.list(UltimateBrier(exp1, obs1)), +TRUE +) +expect_equal( +names(UltimateBrier(exp1, obs1)), +c('bs', 'rel', 'res', 'unc') +) +expect_equal( +is.list(UltimateBrier(exp1, obs1, decomposition = FALSE)), +FALSE +) +expect_equal( +dim(UltimateBrier(exp1, obs1, decomposition = FALSE)), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +dim(UltimateBrier(exp1, obs1, decomposition = FALSE, thr = c(0.25, 0.5, 0.75))), +c(nexp = 1, nobs = 1, bin = 4, ftime = 2) +) +expect_equal( +UltimateBrier(exp1, obs1)$bs, +UltimateBrier(exp1, obs1, decomposition = FALSE) +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1)$bs), +c(0.42222222, 0.44444444, 0.02222222, 0.48888889, 0.37777778, 0.02222222), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1)$rel), +c(0.22222222, 0.31111111, 0.02222222, 0.28888889, 0.24444444, 0.02222222), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1)$res), +c(0.0400000, 0.1066667, 0.0000000, 0.0400000, 0.1066667, 0.0000000), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1)$unc), +c(0.24, 0.24, 0.00, 0.24, 0.24, 0.00), +tolerance = 0.0001 +) + +# 'BSS' +expect_equal( +dim(UltimateBrier(exp1, obs1, type = 'BSS')), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'BSS')), +c(-0.7592593, -0.8518519, -Inf, -1.0370370, -0.5740741, -Inf), +tolerance = 0.0001 +) + +# 'FairStartDatesBS' +expect_equal( +is.list(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')), +TRUE +) +expect_equal( +names(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')), +c('bs', 'rel', 'res', 'unc') +) +expect_equal( +is.list(UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS')), +FALSE +) +expect_equal( +dim(UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS')), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$bs, +UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS') +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$bs), +c(0.42222222, 0.44444444, 0.02222222, 0.48888889, 0.37777778, 0.02222222), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$rel), +c(0.22222222, 0.31111111, 0.02222222, 0.28888889, 0.24444444, 0.02222222), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$res), +c(0.0400000, 0.1066667, 0.0000000, 0.0400000, 0.1066667, 0.0000000), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$unc), +c(0.24, 0.24, 0.00, 0.24, 0.24, 0.00), +tolerance = 0.0001 +) + +# 'FairStartDatesBSS' +expect_equal( +dim(UltimateBrier(exp1, obs1, type = 'FairStartDatesBSS')), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBSS')), +c(-0.7592593, -0.8518519, -Inf, -1.0370370, -0.5740741, -Inf), +tolerance = 0.0001 +) +# 'FairEnsembleBS' +expect_equal( +dim(UltimateBrier(exp1, obs1, type = 'FairEnsembleBS')), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairEnsembleBS')), +c(0.1333333, 0.2000000, 0.2000000, 0.1333333, 0.4000000, 0.2000000), +tolerance = 0.0001 +) +# 'FairEnsembleBSS' +expect_equal( +dim(UltimateBrier(exp1, obs1, type = 'FairEnsembleBSS')), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairEnsembleBSS')), +c(-0.1111111, -0.6666667, -0.6666667, 0.2592593, -1.2222222, -0.6666667), +tolerance = 0.0001 +) + +}) + + -- GitLab From ab55d9ebcd5677bc4309a0533f4320011de64c22 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 28 Apr 2021 16:06:01 +0200 Subject: [PATCH 07/13] Typo fix --- R/UltimateBrier.R | 2 +- man/UltimateBrier.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/UltimateBrier.R b/R/UltimateBrier.R index af88148..c9fee49 100644 --- a/R/UltimateBrier.R +++ b/R/UltimateBrier.R @@ -50,7 +50,7 @@ #'@return #'If 'type' is 'BS' or 'FairStartDatesBS' and 'decomposition' is TRUE, the #'output is a list of 4 arrays (see details below.) In other cases, the output -#'is an array of Brier scores or Brier score skills. All the arrays have the +#'is an array of Brier scores or Brier skill scores. All the arrays have the #'same dimensions: #'c(nexp, nobs, no. of bins, the rest dimensions of 'exp' except 'time_dim' and #''memb_dim'). 'nexp' and 'nobs' is the length of dataset dimension in 'exp' diff --git a/man/UltimateBrier.Rd b/man/UltimateBrier.Rd index 2fd2063..714dc74 100644 --- a/man/UltimateBrier.Rd +++ b/man/UltimateBrier.Rd @@ -73,7 +73,7 @@ computation. The default value is NULL.} \value{ If 'type' is 'BS' or 'FairStartDatesBS' and 'decomposition' is TRUE, the output is a list of 4 arrays (see details below.) In other cases, the output -is an array of Brier scores or Brier score skills. All the arrays have the +is an array of Brier scores or Brier skill scores. All the arrays have the same dimensions: c(nexp, nobs, no. of bins, the rest dimensions of 'exp' except 'time_dim' and 'memb_dim'). 'nexp' and 'nobs' is the length of dataset dimension in 'exp' -- GitLab From 1798e7bb1cacf8f8822e0cfd5d3339f03a7592e1 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 28 Apr 2021 16:42:15 +0200 Subject: [PATCH 08/13] Allow 'obs' to have 'member' dim in BrierScore. --- R/BrierScore.R | 60 +++++++++++++++++++------------- R/UltimateBrier.R | 4 +-- man/BrierScore.Rd | 8 ++--- tests/testthat/test-BrierScore.R | 7 +++- 4 files changed, 48 insertions(+), 31 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index d79f62d..b49e0b6 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -11,8 +11,8 @@ #' range [0, 1]. #'@param obs A numeric array with named dimensions of the binary observations #' (0 or 1). The dimension must at least have 'time_dim' and other dimensions -#' of 'exp' except 'memb_dim'. The length of 'dat_dim' can be different from -#' 'exp'. +#' of 'exp' except 'memb_dim' (optional). The length of 'dat_dim' can be +#' different from 'exp', and the length of 'memb_dim' must be 1 if it has. #'@param thresholds A numeric vector used to bin the forecasts. The default #' value is \code{seq(0, 1, 0.1)}, which means that the bins are #' \code{[0, 0.1), [0.1, 0.2), ... [0.9, 1]}. @@ -21,8 +21,8 @@ #'@param dat_dim A character string indicating the name of dataset dimension in #' 'exp' and 'obs'. The length of this dimension can be different between #' 'exp' and 'obs'. The default value is NULL. -#'@param memb_dim A character string of the name of the member dimension. It -#' must be one dimension of 'exp'. The function will do the ensemble mean +#'@param memb_dim A character string of the name of the member dimension in +#' 'exp' (and 'obs', optional). The function will do the ensemble mean #' over this dimension. If there is no member dimension, set NULL. The default #' value is NULL. #'@param ncores An integer indicating the number of cores to use for parallel @@ -92,7 +92,7 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' dat_dim = NULL, memb_dim = NULL, ncores = NULL) { # Check inputs - ## exp and obs + ## exp and obs (1) if (is.null(exp) | is.null(obs)) { stop("Parameter 'exp' and 'obs' cannot be NULL.") } @@ -117,23 +117,6 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' if (any(!obs %in% c(0, 1))) { stop("Parameter 'obs' must be binary events (0 or 1).") } - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - } - if (!is.null(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] - } - if (any(name_exp != name_obs)) { - stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", - "of all the dimensions expect 'exp' may have 'memb_dim'.")) - } - if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", - "of all the dimensions expect 'exp' may have 'memb_dim'.")) - } ## thresholds if (!is.numeric(thresholds) | !is.vector(thresholds)) { stop("Parameter 'thresholds' must be a numeric vector.") @@ -142,7 +125,7 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") } - if (!time_dim %in% names(dim(exp))) { + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' and 'obs' dimension.") } ## dat_dim @@ -150,7 +133,7 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' if (!is.character(dat_dim) | length(dat_dim) > 1) { stop("Parameter 'dat_dim' must be a character string.") } - if (!dat_dim %in% names(dim(exp))) { + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { stop("Parameter 'dat_dim' is not found in 'exp' and 'obs' dimension.") } } @@ -162,6 +145,32 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' if (!memb_dim %in% names(dim(exp))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (any(name_exp != name_obs)) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.")) + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.")) } ## ncores if (!is.null(ncores)) { @@ -177,6 +186,9 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' ## ensemble mean if (!is.null(memb_dim)) { exp <- MeanDims(exp, memb_dim) + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim) + } } if (is.null(dat_dim)) { diff --git a/R/UltimateBrier.R b/R/UltimateBrier.R index c9fee49..60e8b80 100644 --- a/R/UltimateBrier.R +++ b/R/UltimateBrier.R @@ -104,7 +104,7 @@ UltimateBrier <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', ti if (!is.character(dat_dim) | length(dat_dim) > 1) { stop("Parameter 'dat_dim' must be a character string.") } - if (!dat_dim %in% names(dim(exp))) { + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") } ## memb_dim @@ -124,7 +124,7 @@ UltimateBrier <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', ti if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") } - if (!time_dim %in% names(dim(exp))) { + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } ## exp and obs (2) diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index b54d11b..7466ac0 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -22,8 +22,8 @@ range [0, 1].} \item{obs}{A numeric array with named dimensions of the binary observations (0 or 1). The dimension must at least have 'time_dim' and other dimensions -of 'exp' except 'memb_dim'. The length of 'dat_dim' can be different from -'exp'.} +of 'exp' except 'memb_dim' (optional). The length of 'dat_dim' can be +different from 'exp', and the length of 'memb_dim' must be 1 if it has.} \item{thresholds}{A numeric vector used to bin the forecasts. The default value is \code{seq(0, 1, 0.1)}, which means that the bins are @@ -36,8 +36,8 @@ which Brier score is computed. The default value is 'sdate'.} 'exp' and 'obs'. The length of this dimension can be different between 'exp' and 'obs'. The default value is NULL.} -\item{memb_dim}{A character string of the name of the member dimension. It -must be one dimension of 'exp'. The function will do the ensemble mean +\item{memb_dim}{A character string of the name of the member dimension in +'exp' (and 'obs', optional). The function will do the ensemble mean over this dimension. If there is no member dimension, set NULL. The default value is NULL.} diff --git a/tests/testthat/test-BrierScore.R b/tests/testthat/test-BrierScore.R index 5acfa55..f1ff1e4 100644 --- a/tests/testthat/test-BrierScore.R +++ b/tests/testthat/test-BrierScore.R @@ -40,7 +40,7 @@ test_that("1. Input checks", { expect_error( BrierScore(exp1, obs1), paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", - "of all the dimensions expect 'exp' may have 'memb_dim'.") + "of all the dimensions expect 'dat_dim' and 'memb_dim'.") ) # thresholds expect_error( @@ -75,6 +75,11 @@ test_that("1. Input checks", { "Parameter 'memb_dim' is not found in 'exp' dimension." ) expect_error( + BrierScore(exp1, array(1, dim = c(dataset = 1, member = 3, sdate = 5, ftime = 2)), memb_dim = 'member'), + "The length of parameter 'memb_dim' in 'obs' must be 1." + ) + # ncores + expect_error( BrierScore(exp2, obs2, ncores = 0), "Parameter 'ncores' must be a positive integer." ) -- GitLab From 444451d074faca69f5971e6d90d03e11f9cdba82 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 28 Apr 2021 16:59:12 +0200 Subject: [PATCH 09/13] Add SpecsVerification in dependency --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f8f5567..8fffd60 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,8 @@ Imports: plyr, ncdf4, NbClust, - multiApply (>= 2.1.1) + multiApply (>= 2.1.1), + SpecsVerification (>= 0.5.0) Suggests: easyVerification, testthat -- GitLab From 6a3c46b32b607fff3385d0ebe45aaf17c7f26897 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 1 Jun 2021 13:10:52 +0200 Subject: [PATCH 10/13] Adjust param 'thresholds' of BrierScore(). Check if 'exp' is 0/1 if memb_dim exists. --- R/BrierScore.R | 52 +++++++----- R/UltimateBrier.R | 12 ++- tests/testthat/test-BrierScore.R | 123 ++++++++++++++++++++++------ tests/testthat/test-UltimateBrier.R | 6 +- 4 files changed, 140 insertions(+), 53 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index b49e0b6..8fc45c2 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -5,16 +5,18 @@ #'also returns the bias-corrected decomposition of the BS (Ferro and Fricker, #'2012). BSS has the climatology as the reference forecast. #' -#'@param exp A vector or a numeric array with named dimensions of the probablistic -#' prediction data. The dimension must at least have 'time_dim'. It may have -#' 'memb_dim' for performing ensemble mean. The values should be within the +#'@param exp A vector or a numeric array with named dimensions. It should be +#' the predicted probabilities which are within the range [0, 1] if memb_dim +#' doesn't exist. If it has memb_dim, the value should be 0 or 1, and the +#' predicted probabilities will be computed by ensemble mean. The dimensions +#' must at least have 'time_dim'. #' range [0, 1]. #'@param obs A numeric array with named dimensions of the binary observations #' (0 or 1). The dimension must at least have 'time_dim' and other dimensions -#' of 'exp' except 'memb_dim' (optional). The length of 'dat_dim' can be -#' different from 'exp', and the length of 'memb_dim' must be 1 if it has. +#' of 'exp' except 'memb_dim', which is optional. The length of 'dat_dim' can +#' be different from 'exp', and the length of 'memb_dim' must be 1 if it has. #'@param thresholds A numeric vector used to bin the forecasts. The default -#' value is \code{seq(0, 1, 0.1)}, which means that the bins are +#' value is \code{seq(0.1, 0.9, 0.1)}, which means that the bins are #' \code{[0, 0.1), [0.1, 0.2), ... [0.9, 1]}. #'@param time_dim A character string indicating the name of dimension along #' which Brier score is computed. The default value is 'sdate'. @@ -75,9 +77,6 @@ #'exp <- runif(10) #'obs <- round(exp) #'x <- BrierScore(exp, obs) -#'res <- x$bs - x$bs_check_res -#'res <- x$bs - x$bs_check_gres -#'res <- x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected #' #'#===============uncomment the examples below when ProbBins is included========== #'# Inputs are arrays @@ -88,7 +87,7 @@ #' #'@import multiApply #'@export -BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate', +BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1), time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, ncores = NULL) { # Check inputs @@ -97,7 +96,7 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' stop("Parameter 'exp' and 'obs' cannot be NULL.") } if (!is.numeric(exp) | !is.numeric(obs)) { - stop("Parameter 'exp' and 'obs' must be a vector or a numeric array.") + stop("Parameter 'exp' and 'obs' must be a numeric vector or a numeric array.") } if (is.null(dim(exp))) { #is vector dim(exp) <- c(length(exp)) @@ -111,9 +110,6 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if (max(exp) > 1 | min(exp) < 0) { - stop("Parameter 'exp' must be within [0, 1] range.") - } if (any(!obs %in% c(0, 1))) { stop("Parameter 'obs' must be binary events (0 or 1).") } @@ -121,6 +117,9 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' if (!is.numeric(thresholds) | !is.vector(thresholds)) { stop("Parameter 'thresholds' must be a numeric vector.") } + if (any(thresholds <= 0 | thresholds >= 1)) { + stop("Parameter 'thresholds' must be between 0 and 1 as the bin-breaks.") + } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") @@ -152,6 +151,15 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' } } ## exp and obs (2) + if (is.null(memb_dim)) { + if (max(exp) > 1 | min(exp) < 0) { + stop("Parameter 'exp' must be within [0, 1] range.") + } + } else { + if (any(!exp %in% c(0, 1))) { + stop("Parameter 'exp' must be 0 or 1 if it has memb_dim.") + } + } name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(memb_dim)) { @@ -210,7 +218,7 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' return(res) } -.BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1)) { +.BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1)) { # exp: [sdate] or [sdate, nexp] # obs: [sdate] or [sdate, nobs] @@ -225,7 +233,7 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' arr_gres_bias_corrected <- arr_unc_bias_corrected <- arr_bss_bias_corrected <- array(dim = c(nexp = nexp, nobs = nobs)) arr_nk <- arr_fkbar <- arr_okbar <- - array(dim = c(nexp = nexp, nobs = nobs, bin = length(thresholds) - 1)) + array(dim = c(nexp = nexp, nobs = nobs, bin = length(thresholds) + 1)) } else { nexp <- 1 @@ -239,13 +247,15 @@ BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1), time_dim = 'sdate' obs <- obs_ori[, n_obs] } n <- length(exp) - nbins <- length(thresholds) - 1 # Number of bins - bins <- as.list(paste("bin", 1:nbins, sep = "")) + nbins <- length(thresholds) + 1 # Number of bins + bins <- vector('list', nbins) #as.list(paste("bin", 1:nbins, sep = "")) for (i in 1:nbins) { - if (i == nbins) { - bins[[i]] <- list(which(exp >= thresholds[i] & exp <= thresholds[i + 1])) + if (i == 1) { + bins[[i]] <- list(which(exp >= 0 & exp < thresholds[i])) + } else if (i == nbins) { + bins[[i]] <- list(which(exp >= thresholds[i - 1] & exp <= 1)) } else { - bins[[i]] <- list(which(exp >= thresholds[i] & exp < thresholds[i + 1])) + bins[[i]] <- list(which(exp >= thresholds[i - 1] & exp < thresholds[i])) } } diff --git a/R/UltimateBrier.R b/R/UltimateBrier.R index 60e8b80..aeaddcd 100644 --- a/R/UltimateBrier.R +++ b/R/UltimateBrier.R @@ -20,7 +20,8 @@ #' compute the probabilistic scores. The default value is 'sdate'. #'@param quantile A logical value to decide whether a quantile (TRUE) or a #' threshold (FALSE) is used to estimate the forecast and observed -#' probabilities. The default value is TRUE. +#' probabilities. If 'type' is 'FairEnsembleBS' or 'FairEnsembleBSS', it must +#' be TRUE. The default value is TRUE. #'@param thr A numeric vector to be used in probability calculation (for 'BS', #' 'FairStartDatesBS', 'BSS', and 'FairStartDatesBSS') and binary event #' judgement (for 'FairEnsembleBS' and 'FairEnsembleBSS'). It is as @@ -55,7 +56,7 @@ #'c(nexp, nobs, no. of bins, the rest dimensions of 'exp' except 'time_dim' and #''memb_dim'). 'nexp' and 'nobs' is the length of dataset dimension in 'exp' #'and 'obs' respectively.\cr -#'The list of 4 inlcudes: +#'The list of 4 includes: #' \itemize{ #' \item{$bs: Brier Score} #' \item{$rel: Reliability component} @@ -151,10 +152,13 @@ UltimateBrier <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', ti stop("Parameter 'thr' must be a numeric vector.") } if (quantile) { - if (!all(thr <= 1 & thr >= 0)) { - stop("Parameter 'thr' must be within [0, 1] when quantile is TRUE.") + if (!all(thr < 1 & thr > 0)) { + stop("Parameter 'thr' must be between 0 and 1 when quantile is TRUE.") } } + if (!quantile & (type %in% c('FairEnsembleBSS', 'FairEnsembleBS'))) { + stop("Parameter 'quantile' must be TRUE if 'type' is 'FairEnsembleBSS' or 'FairEnsembleBS'.") + } ## type if (!(type %in% c("BS", "BSS", "FairEnsembleBS", "FairEnsembleBSS", "FairStartDatesBS", "FairStartDatesBSS"))) { stop("Parameter 'type' must be one of 'BS', 'BSS', 'FairEnsembleBS', 'FairEnsembleBSS', 'FairStartDatesBS' or 'FairStartDatesBSS'.") diff --git a/tests/testthat/test-BrierScore.R b/tests/testthat/test-BrierScore.R index f1ff1e4..5668a08 100644 --- a/tests/testthat/test-BrierScore.R +++ b/tests/testthat/test-BrierScore.R @@ -3,7 +3,7 @@ context("s2dv::BrierScore tests") ############################################## # dat1 set.seed(1) -exp1 <- array(runif(30), dim = c(dataset = 1, member = 3, sdate = 5, ftime = 2)) +exp1 <- array(runif(10), dim = c(dataset = 1, sdate = 5, ftime = 2)) set.seed(2) obs1 <- array(round(runif(10)), dim = c(dataset = 1, sdate = 5, ftime = 2)) @@ -13,6 +13,13 @@ exp2 <- runif(10) set.seed(2) obs2 <- round(runif(10)) +# dat3 +set.seed(1) +exp3 <- array(sample(c(0, 1), 60, replace = T), + dim = c(dataset = 2, member = 3, sdate = 5, ftime = 2)) +set.seed(2) +obs3 <- array(sample(c(0, 1), 10, replace = T), + dim = c(dataset = 1, sdate = 5, ftime = 2)) ############################################## test_that("1. Input checks", { @@ -23,7 +30,7 @@ test_that("1. Input checks", { ) expect_error( BrierScore(c('b'), obs1), - "Parameter 'exp' and 'obs' must be a vector or a numeric array." + "Parameter 'exp' and 'obs' must be a numeric vector or a numeric array." ) expect_error( BrierScore(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), @@ -34,11 +41,15 @@ test_that("1. Input checks", { "Parameter 'exp' must be within \\[0, 1\\] range." ) expect_error( + BrierScore(exp = array(exp1, dim = dim(exp3)), obs = obs3, memb_dim = 'member'), + "Parameter 'exp' must be 0 or 1 if it has memb_dim." + ) + expect_error( BrierScore(exp1, runif(10)), "Parameter 'obs' must be binary events \\(0 or 1\\)." ) expect_error( - BrierScore(exp1, obs1), + BrierScore(exp3, obs3), paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", "of all the dimensions expect 'dat_dim' and 'memb_dim'.") ) @@ -47,6 +58,10 @@ test_that("1. Input checks", { BrierScore(exp2, obs2, thresholds = TRUE), "Parameter 'thresholds' must be a numeric vector." ) + expect_error( + BrierScore(exp2, obs2, thresholds = seq(0, 1, length.out = 4)), + "Parameter 'thresholds' must be between 0 and 1 as the bin-breaks." + ) # time_dim expect_error( BrierScore(exp2, obs2, time_dim = 1), @@ -75,7 +90,7 @@ test_that("1. Input checks", { "Parameter 'memb_dim' is not found in 'exp' dimension." ) expect_error( - BrierScore(exp1, array(1, dim = c(dataset = 1, member = 3, sdate = 5, ftime = 2)), memb_dim = 'member'), + BrierScore(exp3, array(1, dim = c(dataset = 1, member = 3, sdate = 5, ftime = 2)), memb_dim = 'member'), "The length of parameter 'memb_dim' in 'obs' must be 1." ) # ncores @@ -86,74 +101,75 @@ test_that("1. Input checks", { }) + ############################################## test_that("2. Output checks: dat1", { expect_equal( -length(BrierScore(exp1, obs1, memb_dim = 'member')), +length(BrierScore(exp1, obs1)), 16 ) expect_equal( -names(BrierScore(exp1, obs1, memb_dim = 'member')), +names(BrierScore(exp1, obs1)), c('rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres', 'bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected', 'unc_bias_corrected', 'bss_bias_corrected', 'nk', 'fkbar', 'okbar') ) expect_equal( -dim(BrierScore(exp1, obs1, memb_dim = 'member')$rel), +dim(BrierScore(exp1, obs1)$rel), c(dataset = 1, ftime = 2) ) expect_equal( -BrierScore(exp1, obs1, memb_dim = 'member')$rel[1, ], -c(0.1013649, 0.2549810), +BrierScore(exp1, obs1)$rel[1, ], +c(0.3086934, 0.3650011), tolerance = 0.0001 ) expect_equal( -BrierScore(exp1, obs1, memb_dim = 'member')$res[1, ], -c(0.24, 0.24), +BrierScore(exp1, obs1)$res[1, ], +c(0.14, 0.14), tolerance = 0.0001 ) expect_equal( -BrierScore(exp1, obs1, memb_dim = 'member')$bs[1, ], -c(0.1016759, 0.2549810), +BrierScore(exp1, obs1)$bs[1, ], +c(0.4218661, 0.4587647), tolerance = 0.0001 ) expect_equal( -dim(BrierScore(exp1, obs1, memb_dim = 'member')$okbar), +dim(BrierScore(exp1, obs1)$okbar), c(bin = 10, dataset = 1, ftime = 2) ) expect_equal( -BrierScore(exp1, obs1, memb_dim = 'member')$okbar[, 1, 1], -c(NaN, 0, NaN, NaN, 0, NaN, 1, 1, NaN, NaN) +BrierScore(exp1, obs1)$okbar[, 1, 1], +c(NaN, NaN, 0.5,1.0, NaN, 1.0, NaN, NaN, NaN, 0.0) ) expect_equal( -BrierScore(exp1, obs1, memb_dim = 'member')$fkbar[, 1, 1], -c(NaN, 0.1481059, NaN, NaN, 0.4034953, NaN, 0.6415412, 0.7448624, NaN, NaN), +BrierScore(exp1, obs1)$fkbar[, 1, 1], +c(NaN, NaN, 0.2335953, 0.3721239, NaN, 0.5728534, NaN, NaN, NaN, 0.9082078), tolerance = 0.0001 ) expect_equal( -BrierScore(exp1, obs1, memb_dim = 'member')$nk[, 1, 1], -c(0, 1, 0, 0, 1, 0, 2, 1, 0, 0) +BrierScore(exp1, obs1)$nk[, 1, 1], +c(0, 0, 2, 1, 0, 1, 0, 0, 0, 1) ) expect_equal( -dim(BrierScore(exp1, obs1, memb_dim = 'member', dat_dim = 'dataset')$rel), +dim(BrierScore(exp1, obs1, dat_dim = 'dataset')$rel), c(nexp = 1, nobs = 1, ftime = 2) ) expect_equal( -dim(BrierScore(exp1, obs1, memb_dim = 'member', dat_dim = 'dataset')$nk), +dim(BrierScore(exp1, obs1, dat_dim = 'dataset')$nk), c(nexp = 1, nobs = 1, bin = 10, ftime = 2) ) expect_equal( -as.vector(BrierScore(exp1, obs1, memb_dim = 'member', dat_dim = 'dataset')$nk), -as.vector(BrierScore(exp1, obs1, memb_dim = 'member')$nk) +as.vector(BrierScore(exp1, obs1, dat_dim = 'dataset')$nk), +as.vector(BrierScore(exp1, obs1)$nk) ) expect_equal( -as.vector(BrierScore(exp1, obs1, memb_dim = 'member', dat_dim = 'dataset')$bs), -as.vector(BrierScore(exp1, obs1, memb_dim = 'member')$bs) +as.vector(BrierScore(exp1, obs1, dat_dim = 'dataset')$bs), +as.vector(BrierScore(exp1, obs1)$bs) ) -}) +}) ############################################## test_that("3. Output checks: dat2", { @@ -198,3 +214,56 @@ c(1, 0, 2, 1, 0, 1, 2, 0, 1, 2) ) }) +############################################## +test_that("4. Output checks: dat3", { + +expect_equal( +length(BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')), +16 +) +expect_equal( +names(BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')), +c('rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres', + 'bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected', + 'unc_bias_corrected', 'bss_bias_corrected', 'nk', 'fkbar', 'okbar') +) +expect_equal( +dim(BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$rel), +c(nexp = 2, nobs = 1, ftime = 2) +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$rel[, 1, 2], +c(0.3555556, 0.2222222), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$res[1, 1, ], +c(0.0000000, 0.1066667), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$bs[2, 1, ], +c(0.3555556, 0.4222222), +tolerance = 0.0001 +) +expect_equal( +dim(BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$okbar), +c(nexp = 2, nobs = 1, bin = 10, ftime = 2) +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$okbar[, 1, 1, 1], +c(NaN, 1) +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', thresholds = 1:2/3)$fkbar[2, 1, , 1], +c(0.0000000, 0.3333333, 0.6666667), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', thresholds = 1:2/3)$nk[1, 1, , 1], +c(0, 5, 0) +) + +}) + + diff --git a/tests/testthat/test-UltimateBrier.R b/tests/testthat/test-UltimateBrier.R index e8114f6..654aad0 100644 --- a/tests/testthat/test-UltimateBrier.R +++ b/tests/testthat/test-UltimateBrier.R @@ -70,6 +70,10 @@ test_that("1. Input checks", { UltimateBrier(exp1, obs1, quantile = c(0.05, 0.95)), "Parameter 'quantile' must be one logical value." ) + expect_error( + UltimateBrier(exp1, obs1, quantile = FALSE, thr = 1:3, type = 'FairEnsembleBS'), + "Parameter 'quantile' must be TRUE if 'type' is 'FairEnsembleBSS' or 'FairEnsembleBS'." + ) # thr expect_error( UltimateBrier(exp1, obs1, thr = TRUE), @@ -77,7 +81,7 @@ test_that("1. Input checks", { ) expect_error( UltimateBrier(exp1, obs1, quantile = TRUE, thr = 1:3), - "Parameter 'thr' must be within \\[0, 1\\] when quantile is TRUE." + "Parameter 'thr' must be between 0 and 1 when quantile is TRUE." ) # type expect_error( -- GitLab From 8c817d84757471a5427ae4030bed4a9b1da6504f Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 1 Jun 2021 13:25:31 +0200 Subject: [PATCH 11/13] Improve documentation --- R/BrierScore.R | 6 +++--- man/BrierScore.Rd | 21 ++++++++++----------- man/UltimateBrier.Rd | 5 +++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index 8fc45c2..02b1480 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -12,9 +12,9 @@ #' must at least have 'time_dim'. #' range [0, 1]. #'@param obs A numeric array with named dimensions of the binary observations -#' (0 or 1). The dimension must at least have 'time_dim' and other dimensions -#' of 'exp' except 'memb_dim', which is optional. The length of 'dat_dim' can -#' be different from 'exp', and the length of 'memb_dim' must be 1 if it has. +#' (0 or 1). The dimension must be the same as 'exp' except memb_dim, which is +#' optional. If it has 'memb_dim', then the length must be 1. The length of +#' 'dat_dim' can be different from 'exp' if it has. #'@param thresholds A numeric vector used to bin the forecasts. The default #' value is \code{seq(0.1, 0.9, 0.1)}, which means that the bins are #' \code{[0, 0.1), [0.1, 0.2), ... [0.9, 1]}. diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index 7466ac0..2b8aefe 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -7,7 +7,7 @@ BrierScore( exp, obs, - thresholds = seq(0, 1, 0.1), + thresholds = seq(0.1, 0.9, 0.1), time_dim = "sdate", dat_dim = NULL, memb_dim = NULL, @@ -15,18 +15,20 @@ BrierScore( ) } \arguments{ -\item{exp}{A vector or a numeric array with named dimensions of the probablistic -prediction data. The dimension must at least have 'time_dim'. It may have -'memb_dim' for performing ensemble mean. The values should be within the +\item{exp}{A vector or a numeric array with named dimensions. It should be +the predicted probabilities which are within the range [0, 1] if memb_dim +doesn't exist. If it has memb_dim, the value should be 0 or 1, and the +predicted probabilities will be computed by ensemble mean. The dimensions +must at least have 'time_dim'. range [0, 1].} \item{obs}{A numeric array with named dimensions of the binary observations -(0 or 1). The dimension must at least have 'time_dim' and other dimensions -of 'exp' except 'memb_dim' (optional). The length of 'dat_dim' can be -different from 'exp', and the length of 'memb_dim' must be 1 if it has.} +(0 or 1). The dimension must be the same as 'exp' except memb_dim, which is +optional. If it has 'memb_dim', then the length must be 1. The length of +'dat_dim' can be different from 'exp' if it has.} \item{thresholds}{A numeric vector used to bin the forecasts. The default -value is \code{seq(0, 1, 0.1)}, which means that the bins are +value is \code{seq(0.1, 0.9, 0.1)}, which means that the bins are \code{[0, 0.1), [0.1, 0.2), ... [0.9, 1]}.} \item{time_dim}{A character string indicating the name of dimension along @@ -90,9 +92,6 @@ also returns the bias-corrected decomposition of the BS (Ferro and Fricker, exp <- runif(10) obs <- round(exp) x <- BrierScore(exp, obs) -res <- x$bs - x$bs_check_res -res <- x$bs - x$bs_check_gres -res <- x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected #===============uncomment the examples below when ProbBins is included========== # Inputs are arrays diff --git a/man/UltimateBrier.Rd b/man/UltimateBrier.Rd index 714dc74..2cad133 100644 --- a/man/UltimateBrier.Rd +++ b/man/UltimateBrier.Rd @@ -39,7 +39,8 @@ compute the probabilistic scores. The default value is 'sdate'.} \item{quantile}{A logical value to decide whether a quantile (TRUE) or a threshold (FALSE) is used to estimate the forecast and observed -probabilities. The default value is TRUE.} +probabilities. If 'type' is 'FairEnsembleBS' or 'FairEnsembleBSS', it must +be TRUE. The default value is TRUE.} \item{thr}{A numeric vector to be used in probability calculation (for 'BS', 'FairStartDatesBS', 'BSS', and 'FairStartDatesBSS') and binary event @@ -78,7 +79,7 @@ same dimensions: c(nexp, nobs, no. of bins, the rest dimensions of 'exp' except 'time_dim' and 'memb_dim'). 'nexp' and 'nobs' is the length of dataset dimension in 'exp' and 'obs' respectively.\cr -The list of 4 inlcudes: +The list of 4 includes: \itemize{ \item{$bs: Brier Score} \item{$rel: Reliability component} -- GitLab From eaf10714ebb18ae81edcb1b1a83f47e43516ce12 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 7 Jun 2021 12:14:47 +0200 Subject: [PATCH 12/13] Fix example --- R/BrierScore.R | 9 ++++----- man/BrierScore.Rd | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index 02b1480..5978751 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -78,12 +78,11 @@ #'obs <- round(exp) #'x <- BrierScore(exp, obs) #' -#'#===============uncomment the examples below when ProbBins is included========== #'# Inputs are arrays -#'#example(Load) -#'#bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -#'#bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -#'#res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') +#'example(Load) +#'bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +#'bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +#'res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') #' #'@import multiApply #'@export diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index 2b8aefe..5ace589 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -93,12 +93,11 @@ exp <- runif(10) obs <- round(exp) x <- BrierScore(exp, obs) -#===============uncomment the examples below when ProbBins is included========== # Inputs are arrays -#example(Load) -#bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -#bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -#res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') +example(Load) +bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') } \references{ -- GitLab From 41a66d622e95c44f11b90aad6172072624554baf Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 7 Jun 2021 12:41:20 +0200 Subject: [PATCH 13/13] Revise example --- R/BrierScore.R | 7 +++++-- man/BrierScore.Rd | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index 5978751..1363f61 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -80,8 +80,11 @@ #' #'# Inputs are arrays #'example(Load) -#'bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -#'bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3)) +#'bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3)) #'res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') #' #'@import multiApply diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index 5ace589..9271a2a 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -95,8 +95,11 @@ x <- BrierScore(exp, obs) # Inputs are arrays example(Load) -bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) -bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3)) +bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3)) res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') } -- GitLab