diff --git a/R/GetProbs.R b/R/GetProbs.R index 9960c53ff3604d6f1fff182290d647fea449df5b..25251ce9e9d1c8d2caea187341b5a73cc0a0613c 100644 --- a/R/GetProbs.R +++ b/R/GetProbs.R @@ -9,7 +9,8 @@ #'each category. For observations (or forecast without member dimension), 1 #'means that the event happened, while 0 indicates that the event did not #'happen. Weighted probabilities can be computed if the weights are provided for -#'each ensemble member and time step. +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. #' #'@param data A named numerical array of the forecasts or observations with, at #' least, time dimension. @@ -22,9 +23,21 @@ #'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to #' 1) between the categories. The default value is c(1/3, 2/3), which #' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. #'@param indices_for_quantiles A vector of the indices to be taken along #' 'time_dim' for computing the absolute thresholds between the probabilistic -#' categories. If NULL, the whole period is used. The default value is NULL. +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. #'@param weights A named numerical array of the weights for 'data' with #' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value #' is NULL. The ensemble should have at least 70 members or span at least 10 @@ -37,20 +50,30 @@ #' computation. The default value is NULL. #' #'@return -#'A numerical array of probabilities with dimensions c(bin, the rest dimensions -#'of 'data' except 'memb_dim'). 'bin' dimension has the length of probabilistic -#'categories, i.e., \code{length(prob_thresholds) + 1}. +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. #' #'@examples #'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) #'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', #' indices_for_quantiles = 4:17) #' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' #'@import multiApply #'@importFrom easyVerification convert2prob #'@export -GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_quantiles = NULL, - prob_thresholds = c(1/3, 2/3), weights = NULL, cross.val = FALSE, ncores = NULL) { +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { # Check inputs ## data @@ -79,23 +102,67 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_ "dimension exists, set it as NULL.") } } - ## prob_thresholds - if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | - any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { - stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') } - ## indices_for_quantiles - if (is.null(indices_for_quantiles)) { - indices_for_quantiles <- 1:dim(data)[time_dim] - } else { - if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { - stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") - } else if (length(indices_for_quantiles) > dim(data)[time_dim] | - max(indices_for_quantiles) > dim(data)[time_dim] | - any(indices_for_quantiles < 1)) { - stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- 1:dim(data)[time_dim] + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (any(!dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + } + ## weights if (!is.null(weights)) { if (!is.array(weights) | !is.numeric(weights)) @@ -145,62 +212,83 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_ } ############################### - - res <- Apply(data = list(data = data), - target_dims = c(time_dim, memb_dim), #, dat_dim), - output_dims = c("bin", time_dim), - fun = .GetProbs, -# dat_dim = dat_dim, - prob_thresholds = prob_thresholds, - indices_for_quantiles = indices_for_quantiles, - weights = weights, cross.val = cross.val, ncores = ncores)$output1 + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } return(res) } .GetProbs <- function(data, indices_for_quantiles, - prob_thresholds = c(1/3, 2/3), weights = NULL, cross.val = FALSE) { + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { # .GetProbs() is used in RPS, RPSS, ROCSS # data ## if data is exp: [sdate, memb] ## if data is obs: [sdate, (memb)] # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] # Add dim [memb = 1] to data if it doesn't have memb_dim if (length(dim(data)) == 1) { dim(data) <- c(dim(data), 1) if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) } - # Absolute thresholds - if (cross.val) { - quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) - for (i_time in 1:dim(data)[1]) { + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in 1:dim(data)[1]) { + if (is.null(weights)) { + quantiles[, i_time] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ], + weights[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + } + + } else { if (is.null(weights)) { - quantiles[, i_time] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]), - probs = prob_thresholds, type = 8, na.rm = TRUE) + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) } else { # weights: [sdate, memb] - sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ], - weights[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]) + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) sorted_data <- sorted_arrays$data cumulative_weights <- sorted_arrays$cumulative_weights - quantiles[, i_time] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) } - } else { - if (is.null(weights)) { - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), - probs = prob_thresholds, type = 8, na.rm = TRUE) - } else { - # weights: [sdate, memb] - sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], - weights[indices_for_quantiles, ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, len = dim(data)[1], + pos = 2, name = names(dim(data))[1]) } - quantiles <- array(rep(quantiles, dim(data)[1]), dim = c(bin = length(quantiles), dim(data)[1])) } # quantiles: [bin-1, sdate] diff --git a/R/MSE.R b/R/MSE.R index 97e4e828e88f85fea639fb63520b54da7366db6b..6b67083a837f3d646a089955dc2591908471ad95 100644 --- a/R/MSE.R +++ b/R/MSE.R @@ -69,7 +69,7 @@ #' #'exp2 <- array(rnorm(20), dim = c(sdate = 5, member = 4)) #'obs2 <- array(rnorm(10), dim = c(sdate = 5, member = 2)) -#'res2 <- MSE(exp3, obs3, memb_dim = 'member') +#'res2 <- MSE(exp2, obs2, memb_dim = 'member') #' #'@import multiApply #'@importFrom ClimProjDiags Subset diff --git a/man/GetProbs.Rd b/man/GetProbs.Rd index fd84d2f878ecb208baafdf9176e67dfc145070d5..06ad046a1764d59c76f61028281a864e116f2aad 100644 --- a/man/GetProbs.Rd +++ b/man/GetProbs.Rd @@ -10,6 +10,8 @@ GetProbs( memb_dim = "member", indices_for_quantiles = NULL, prob_thresholds = c(1/3, 2/3), + abs_thresholds = NULL, + bin_dim_abs = "bin", weights = NULL, cross.val = FALSE, ncores = NULL @@ -29,12 +31,26 @@ member). The default value is 'member'.} \item{indices_for_quantiles}{A vector of the indices to be taken along 'time_dim' for computing the absolute thresholds between the probabilistic -categories. If NULL, the whole period is used. The default value is NULL.} +categories. If NULL (default), the whole period is used. It is only used +when 'prob_thresholds' is provided.} \item{prob_thresholds}{A numeric vector of the relative thresholds (from 0 to 1) between the categories. The default value is c(1/3, 2/3), which corresponds to tercile equiprobable categories.} +\item{abs_thresholds}{A numeric array or vector of the absolute thresholds in +the same units as \code{data}. If an array is provided, it should have at +least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +thresholds for different locations, i.e. lon and lat dimensions), they +should match the dimensions of \code{data}, except the member dimension +which should not be included. The default value is NULL and, in this case, +'prob_thresholds' is used for calculating the probabilities.} + +\item{bin_dim_abs}{A character string of the dimension name of +'abs_thresholds' array in which category limits are stored. It will also be +the probabilistic category dimension name in the output. The default value +is 'bin'.} + \item{weights}{A named numerical array of the weights for 'data' with dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value is NULL. The ensemble should have at least 70 members or span at least 10 @@ -49,9 +65,9 @@ is FALSE.} computation. The default value is NULL.} } \value{ -A numerical array of probabilities with dimensions c(bin, the rest dimensions -of 'data' except 'memb_dim'). 'bin' dimension has the length of probabilistic -categories, i.e., \code{length(prob_thresholds) + 1}. +A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. } \description{ Compute probabilistic forecasts from an ensemble based on the relative @@ -63,11 +79,20 @@ the probabilities are calculated as the percentage of members that fall into each category. For observations (or forecast without member dimension), 1 means that the event happened, while 0 indicates that the event did not happen. Weighted probabilities can be computed if the weights are provided for -each ensemble member and time step. +each ensemble member and time step. The absolute thresholds can also be +provided directly for probabilities calculation. } \examples{ data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', indices_for_quantiles = 4:17) +# abs_thresholds is provided +abs_thr1 <- c(-0.2, 0.3) +abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', + prob_thresholds = NULL, abs_thresholds = abs_thr1) +res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', + prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') + } diff --git a/man/MSE.Rd b/man/MSE.Rd index 291d08c2891d54fb76c7ba7b5d083c34ad21c620..9464be6518a0065bbf4d262cb8025195268e01cb 100644 --- a/man/MSE.Rd +++ b/man/MSE.Rd @@ -97,6 +97,6 @@ res1 <- MSE(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') exp2 <- array(rnorm(20), dim = c(sdate = 5, member = 4)) obs2 <- array(rnorm(10), dim = c(sdate = 5, member = 2)) -res2 <- MSE(exp3, obs3, memb_dim = 'member') +res2 <- MSE(exp2, obs2, memb_dim = 'member') } diff --git a/tests/testthat/test-GetProbs.R b/tests/testthat/test-GetProbs.R index f1958dc22c21bf35c227df8a5159040147bdf8db..47413c1111507ba263d6e3ce1625f378938196d9 100644 --- a/tests/testthat/test-GetProbs.R +++ b/tests/testthat/test-GetProbs.R @@ -6,11 +6,22 @@ data1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, time = 2)) set.seed(2) weights1 <- array(abs(rnorm(30)), dim = c(member = 3, sdate = 10)) +abs_thr1_1 <- c(-0.2, 0.4) +abs_thr1_2 <- array(abs_thr1_1, dim = c(bin = 2)) +set.seed(4) +abs_thr1_3 <- array(abs_thr1_1 + rnorm(20)*0.1, dim = c(bin = 2, sdate = 10)) +abs_thr1_4 <- array(abs_thr1_3, dim = c(dim(abs_thr1_3), time = 2)) + # dat2 set.seed(1) data2 <- array(rnorm(20), dim = c(sdate = 10, time = 2)) set.seed(2) weights2 <- array(abs(rnorm(10)), dim = c(sdate = 10)) +abs_thr2_1 <- c(-0.2, 0.4) +abs_thr2_2 <- array(abs_thr2_1, dim = c(bin = 2)) +set.seed(4) +abs_thr2_3 <- array(abs_thr2_1 + rnorm(20)*0.3, dim = c(bin = 2, sdate = 10)) +abs_thr2_4 <- array(abs_thr2_3, dim = c(dim(abs_thr2_3), time = 2)) ############################################## @@ -47,6 +58,23 @@ test_that("1. Input checks", { GetProbs(data1, prob_thresholds = 1), "Parameter 'prob_thresholds' must be a numeric vector between 0 and 1." ) + # abs_thresholds + expect_error( + GetProbs(data1, prob_thresholds = NULL, abs_thresholds = abs_thr1_2, bin_dim_abs = 'cat'), + "Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension." + ) + expect_error( + GetProbs(data1, prob_thresholds = NULL, abs_thresholds = array(abs_thr1_3, dim = c(dim(abs_thr1_3), member = 3))), + "Parameter abs_thresholds' cannot have member dimension." + ) + expect_error( + GetProbs(data1, prob_thresholds = NULL, abs_thresholds = array(abs_thr1_3, dim = c(dim(abs_thr1_3), extra = 3))), + "Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well." + ) + expect_error( + GetProbs(data1, prob_thresholds = NULL, abs_thresholds = array(abs_thr1_3, dim = c(bin = 2, sdate = 5, time = 2))), + "Parameter 'abs_thresholds' dimensions must have the same length as 'data'." + ) # indices_for_clim expect_error( GetProbs(data1, indices_for_quantiles = array(1:6, dim = c(2, 3))), @@ -94,6 +122,10 @@ dim(GetProbs(data1)), c(bin = 3, sdate = 10, time = 2) ) expect_equal( +dim(GetProbs(data1, bin_dim_abs = "cat")), +c(cat = 3, sdate = 10, time = 2) +) +expect_equal( c(GetProbs(data1)[, 10, 2]), c(0.3333333, 0.3333333, 0.3333333), tolerance = 0.0001 @@ -173,6 +205,30 @@ c(0.3335612, 0.5277459, 0.1386929), tolerance = 0.0001 ) +# abs_threshold +expect_equal( +dim(GetProbs(data1, abs_thresholds = abs_thr1_1, prob_thresholds = NULL, indices_for_quantiles = NULL)), +c(bin = 3, sdate = 10, time = 2) +) +expect_equal( +c(GetProbs(data1, abs_thresholds = abs_thr1_1, prob_thresholds = NULL, indices_for_quantiles = NULL)[, 3, 2]), +c(0.3333333, 0.3333333, 0.3333333), +tolerance = 0.0001 +) +expect_equal( +GetProbs(data1, abs_thresholds = abs_thr1_1, prob_thresholds = NULL, indices_for_quantiles = NULL), +GetProbs(data1, abs_thresholds = abs_thr1_2, prob_thresholds = NULL, indices_for_quantiles = NULL) +) +expect_equal( +c(GetProbs(data1, abs_thresholds = abs_thr1_3, prob_thresholds = NULL, indices_for_quantiles = NULL)[, 3, 2]), +c(0.6666667, 0, 0.3333333), +tolerance = 0.0001 +) +expect_equal( +GetProbs(data1, abs_thresholds = abs_thr1_3, prob_thresholds = NULL, indices_for_quantiles = NULL), +GetProbs(data1, abs_thresholds = abs_thr1_4, prob_thresholds = NULL, indices_for_quantiles = NULL) +) + }) @@ -254,4 +310,28 @@ c(GetProbs(data2, memb_dim = NULL, cross.val = T, weights = weights2)[, 10, 2]), c(0, 1, 0) ) +# abs_threshold +expect_equal( +dim(GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_1, prob_thresholds = NULL, indices_for_quantiles = NULL)), +c(bin = 3, sdate = 10, time = 2) +) +expect_equal( +c(GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_1, prob_thresholds = NULL, indices_for_quantiles = NULL)[, 5, 1]), +c(0, 1, 0), +tolerance = 0.0001 +) +expect_equal( +GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_1, prob_thresholds = NULL, indices_for_quantiles = NULL), +GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_2, prob_thresholds = NULL, indices_for_quantiles = NULL) +) +expect_equal( +c(GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_3, prob_thresholds = NULL, indices_for_quantiles = NULL)[, 5, 1]), +c(1, 0, 0), +tolerance = 0.0001 +) +expect_equal( +GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_3, prob_thresholds = NULL, indices_for_quantiles = NULL), +GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_4, prob_thresholds = NULL, indices_for_quantiles = NULL) +) + })