diff --git a/NAMESPACE b/NAMESPACE index 3aa8f2901eef4b768a0125e9aa4bfc3a0b6d1c93..3b097ba5778c78dca252519bd7b86269ed230046 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,7 @@ export(PlotLayout) export(PlotMatrix) export(PlotSection) export(PlotStereoMap) +export(ProbBins) export(ProjectField) export(REOF) export(RMS) diff --git a/R/ProbBins.R b/R/ProbBins.R new file mode 100644 index 0000000000000000000000000000000000000000..327ceb34fafb48b6425373536e2b1bf7d70041dc --- /dev/null +++ b/R/ProbBins.R @@ -0,0 +1,213 @@ +#'Compute probabilistic information of a forecast relative to a threshold or a quantile +#' +#'Compute probabilistic bins of a set of forecast years ('fcyr') relative to +#'the forecast climatology over the whole period of anomalies, optionally excluding +#'the selected forecast years ('fcyr') or the forecast year for which the +#'probabilistic bins are being computed (see 'compPeriod'). +#' +#'@param data An numeric array of anomalies with the dimensions 'time_dim' and +#' 'memb_dim' at least. It can be generated by \code{Ano()}. +#'@param thr A numeric vector used as the quantiles (if 'quantile' is TRUE) or +#' thresholds (if 'quantile' is FALSE) to bin the anomalies. If it is quantile, +#' it must be within [0, 1]. +#'@param fcyr A numeric vector of the indices of the forecast years (i.e., +#' time_dim) to compute the probabilistic bins for, or 'all' to compute the +#' bins for all the years. E.g., c(1:5), c(1, 4), 4, or 'all'. The default +#' value is 'all'. +#'@param time_dim A character string indicating the dimension along which to +#' compute the probabilistic bins. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension or the dimension to be merged with 'time_dim' for probabilistic +#' calculation. The default value is 'member'. +#'@param quantile A logical value indicating if the thresholds ('thr') are +#' quantiles (TRUE) or the absolute thresholds of the bins (FALSE). The +#' default value is TRUE. +#'@param compPeriod A character string referring to three computation options:\cr +#' "Full period": The probabilities are computed based on 'data';\cr +#' "Without fcyr": The probabilities are computed based on 'data' with all +#' 'fcyr' removed;\cr +#' "Cross-validation": The probabilities are computed based on leave-one-out +#' cross-validation.\cr +#' The default value is "Full period". +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numeric array of probabilistic information with dimensions:\cr +#' c(bin = length of 'thr' + 1, time_dim = length of 'fcyr', memb_dim, the +#' rest of dimensions of 'data')\cr +#' The values along the 'bin' dimension take values 0 or 1 depending on which +#' of the 'thr' + 1 cathegories the forecast or observation at the corresponding +#' grid point, time step, member and start date belongs to. +#' +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'clim <- Clim(sampleMap$mod, sampleMap$obs) +#'ano_exp <- Ano(sampleMap$mod, clim$clim_exp) +#'PB <- ProbBins(ano_exp, fcyr = 3, thr = c(1/3, 2/3), quantile = TRUE) +#' +#'@import multiApply +#'@importFrom abind abind +#'@export +ProbBins <- function(data, thr, fcyr = 'all', time_dim = 'sdate', memb_dim = 'member', + quantile = TRUE, compPeriod = "Full period", + ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } +# if (is.null(dim(data))) { #is vector +# dim(data) <- c(length(data)) +# names(dim(data)) <- time_dim +# } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## thr + if (is.null(thr)) { + stop("Parameter 'thr' cannot be NULL.") + } + if (!is.numeric(thr) | !is.vector(thr)) { + stop("Parameter 'thr' must be a numeric vector.") + } else if (quantile) { + if (!all(thr <= 1 & thr >= 0)) { + stop("Parameter 'thr' must be within the range [0, 1] if 'quantile' is TRUE.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimension.") + } + ## fcyr + if (fcyr != 'all') { + if (!is.numeric(fcyr) | !is.vector(fcyr)) { + stop("Parameter 'fcyr' must be a numeric vector or 'all'.") + } else if (any(fcyr %% 1 != 0) | min(fcyr) < 1 | max(fcyr) > dim(data)[time_dim]) { + stop(paste0("Parameter 'fcyr' must be the indices of 'time_dim' within ", + "the range [1, ", dim(data)[time_dim], "].")) + } + } else { + fcyr <- 1:dim(data)[time_dim] + } + ## quantile + if (!is.logical(quantile) | length(quantile) > 1) { + stop("Parameter 'quantile' must be one logical value.") + } + ## compPeriod + if (length(compPeriod) != 1 | any(!compPeriod %in% c('Full period', 'Without fcyr', 'Cross-validation'))) { + stop("Parameter 'compPeriod' must be either 'Full period', 'Without fcyr', or 'Cross-validation'.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate ProbBins + + res <- Apply(list(data), + target_dims = list(c(time_dim, memb_dim)), + output_dims = list(c('bin', time_dim, memb_dim)), + fun = .ProbBins, + thr = thr, fcyr = fcyr, quantile = quantile, + compPeriod = compPeriod, + ncores = ncores)$output1 + + return(res) +} + +.ProbBins <- function(data, thr = thr, fcyr = 'all', quantile, compPeriod = "Full period") { + + # data: [sdate, member] + + if (compPeriod != 'Cross-validation') { + # forecast + fore <- data[fcyr, ] + sample_fore <- as.vector(fore) # vector: [fcyr + member] + # hindcast + if (compPeriod == "Full period") { + hind <- data + sample <- as.vector(hind) # vector: [sdate + member] + } else if (compPeriod == "Without fcyr") { + hind <- data[-fcyr, ] + sample <- as.vector(hind) # vector: [sdate - fcyr + member] + } + + # quantiles + if (quantile) { + qum <- quantile(sample, probs = thr, na.rm = TRUE, names = FALSE, + type = 8) # vector: [length(thr)] + } else { + qum <- thr + } + + # PBF: Probabilistic bins of a forecast + # This array contains 0s and 1s that indicate the category where the forecast is. + PBF <- array(counts(c(qum, sample_fore), nbthr = length(thr)), + dim = c(length(thr) + 1, length(fcyr), dim(data)[2])) +# names(dim(PBF)) <- c('bin', 'sdate', 'member') + + return(invisible(PBF)) + + + } else { # Cross-Validation + + result <- NULL + for (iyr in fcyr) { + if (is.null(result)) { + result <- .ProbBins(data, fcyr = iyr, thr = thr, quantile = quantile, + compPeriod = "Without fcyr") # [bin, sdate, member] + } else { + result <- abind::abind(result, .ProbBins(data, fcyr = iyr, thr = thr, + quantile = quantile, + compPeriod = "Without fcyr"), + along = 2) # along sdate + } + } + + return(result) + + } + +} + +# This function assign the values to a category which is limited by the thresholds +# It provides binary information +counts <- function (dat, nbthr) { + thr <- dat[1:nbthr] + data <- dat[nbthr + 1:(length(dat) - nbthr)] + prob <- array(NA, dim = c(nbthr + 1, length(dat) - nbthr)) + prob[1, ] <- 1*(data <= thr[1]) + if (nbthr != 1) { + for (ithr in 2:(nbthr)) { + prob[ithr, ] <- 1 * ((data > thr[ithr - 1]) & (data <= thr[ithr])) + } + } + prob[nbthr + 1, ] <- 1 * (data > thr[nbthr]) + return(prob) +} + diff --git a/man/ProbBins.Rd b/man/ProbBins.Rd new file mode 100644 index 0000000000000000000000000000000000000000..cfd7affed05b4ac98400ff9f6c51973ca798a995 --- /dev/null +++ b/man/ProbBins.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ProbBins.R +\name{ProbBins} +\alias{ProbBins} +\title{Compute probabilistic information of a forecast relative to a threshold or a quantile} +\usage{ +ProbBins( + data, + thr, + fcyr = "all", + time_dim = "sdate", + memb_dim = "member", + quantile = TRUE, + compPeriod = "Full period", + ncores = NULL +) +} +\arguments{ +\item{data}{An numeric array of anomalies with the dimensions 'time_dim' and +'memb_dim' at least. It can be generated by \code{Ano()}.} + +\item{thr}{A numeric vector used as the quantiles (if 'quantile' is TRUE) or +thresholds (if 'quantile' is FALSE) to bin the anomalies. If it is quantile, +it must be within [0, 1].} + +\item{fcyr}{A numeric vector of the indices of the forecast years (i.e., +time_dim) to compute the probabilistic bins for, or 'all' to compute the +bins for all the years. E.g., c(1:5), c(1, 4), 4, or 'all'. The default +value is 'all'.} + +\item{time_dim}{A character string indicating the dimension along which to +compute the probabilistic bins. The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension or the dimension to be merged with 'time_dim' for probabilistic +calculation. The default value is 'member'.} + +\item{quantile}{A logical value indicating if the thresholds ('thr') are +quantiles (TRUE) or the absolute thresholds of the bins (FALSE). The +default value is TRUE.} + +\item{compPeriod}{A character string referring to three computation options:\cr +"Full period": The probabilities are computed based on 'data';\cr +"Without fcyr": The probabilities are computed based on 'data' with all +'fcyr' removed;\cr +"Cross-validation": The probabilities are computed based on leave-one-out +cross-validation.\cr +The default value is "Full period".} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numeric array of probabilistic information with dimensions:\cr + c(bin = length of 'thr' + 1, time_dim = length of 'fcyr', memb_dim, the + rest of dimensions of 'data')\cr + The values along the 'bin' dimension take values 0 or 1 depending on which + of the 'thr' + 1 cathegories the forecast or observation at the corresponding + grid point, time step, member and start date belongs to. +} +\description{ +Compute probabilistic bins of a set of forecast years ('fcyr') relative to +the forecast climatology over the whole period of anomalies, optionally excluding +the selected forecast years ('fcyr') or the forecast year for which the +probabilistic bins are being computed (see 'compPeriod'). +} +\examples{ +\dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} +clim <- Clim(sampleMap$mod, sampleMap$obs) +ano_exp <- Ano(sampleMap$mod, clim$clim_exp) +PB <- ProbBins(ano_exp, fcyr = 3, thr = c(1/3, 2/3), quantile = TRUE) + +} diff --git a/tests/testthat/test-ProbBins.R b/tests/testthat/test-ProbBins.R new file mode 100644 index 0000000000000000000000000000000000000000..4b3d0ecde379c9543d2485e054bc15153d22d90c --- /dev/null +++ b/tests/testthat/test-ProbBins.R @@ -0,0 +1,172 @@ +context("s2dv::ProbBins tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(24), dim = c(dataset = 1, member = 3, sdate = 4, ftime = 2)) +############################################## + +test_that("1. Input checks", { + + expect_error( + ProbBins(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + ProbBins(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + expect_error( + ProbBins(array(1:10, dim = c(2, 5))), + "Parameter 'data' must have dimension names." + ) + # thr + expect_error( + ProbBins(dat1, thr = c()), + "Parameter 'thr' cannot be NULL." + ) + expect_error( + ProbBins(dat1, thr = TRUE), + "Parameter 'thr' must be a numeric vector." + ) + expect_error( + ProbBins(dat1, thr = 1:10), + "Parameter 'thr' must be within the range \\[0, 1\\] if 'quantile' is TRUE." + ) + # time_dim + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), time_dim = 'time'), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + # memb_dim + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), memb_dim = 2), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), memb_dim = 'ens'), + "Parameter 'memb_dim' is not found in 'data' dimension." + ) + # fcyr + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 'sdate'), + "Parameter 'fcyr' must be a numeric vector or 'all'." + ) + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2:6), + "Parameter 'fcyr' must be the indices of 'time_dim' within the range \\[1, 4\\]." + ) + # quantile + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), quantile = 0.9), + "Parameter 'quantile' must be one logical value." + ) + # compPeriod + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), compPeriod = TRUE), + "Parameter 'compPeriod' must be either 'Full period', 'Without fcyr', or 'Cross-validation'." + ) + # ncores + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + +}) + + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +dim(ProbBins(dat1, thr = c(1/3, 2/3))), +c(bin = 3, sdate = 4, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +dim(ProbBins(dat1, thr = c(0.25, 0.5, 0.75))), +c(bin = 4, sdate = 4, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +dim(ProbBins(dat1, thr = c(0.25, 0.5, 0.75), compPeriod = 'Cross-validation')), +c(bin = 4, sdate = 4, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +dim(ProbBins(dat1, thr = c(0.25, 0.5, 0.75), compPeriod = 'Without fcyr')), +c(bin = 4, sdate = 4, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3)) == 0)), +48 +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3)) == 1)), +24 +) +expect_equal( +which(ProbBins(dat1, thr = c(1/3, 2/3)) == 1), +c(1, 6, 8, 10, 14, 17, 21, 24, 25, 28, 33, 35, 37, 40, 45, 47, 49, 53, 56, 59, 63, 66, 69, 70) +) +expect_equal( +all(is.na(ProbBins(dat1, thr = c(1/3, 2/3), compPeriod = 'Without fcyr'))), +TRUE +) +expect_equal( +dim(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Without fcyr')), +c(bin = 3, sdate = 1, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Without fcyr') == 0)), +12 +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Without fcyr') == 1)), +6 +) +expect_equal( +which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Without fcyr') == 1), +c(3, 5, 7, 11, 14, 18) +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Cross-validation') == 0)), +12 +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Cross-validation') == 1)), +6 +) +expect_equal( +which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Cross-validation') == 1), +c(3, 5, 7, 11, 14, 18) +) + +expect_equal( +dim(ProbBins(dat1, thr = c(1/3, 2/3), quantile = FALSE)), +c(bin = 3, sdate = 4, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), quantile = FALSE) == 0)), +48 +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), quantile = FALSE) == 1)), +24 +) +expect_equal( +which(ProbBins(dat1, thr = c(1/3, 2/3), quantile = FALSE) == 1), +c(1, 6, 8, 10, 13, 16, 21, 24, 25, 28, 32, 35, 37, 40, 45, 48, 49, 52, 56, 58, 63, 66, 69, 70) +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1:3), quantile = FALSE) == 0)), +72 +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1:3), quantile = FALSE) == 1)), +24 +) + + +})