diff --git a/NAMESPACE b/NAMESPACE index 9214a1a54c1b7c4bc7e1e81203b51ac5e674cc26..587aec211d4a7bb13e0ef6e9e7dd9a4a99f6eadd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -77,6 +77,7 @@ export(Season) export(SignalNoiseRatio) export(Smoothing) export(Spectrum) +export(SprErr) export(Spread) export(StatSeasAtlHurr) export(TPI) diff --git a/R/SprErr.R b/R/SprErr.R new file mode 100644 index 0000000000000000000000000000000000000000..9892d949c3734b312bf539b476ad40baff2f2ffd --- /dev/null +++ b/R/SprErr.R @@ -0,0 +1,219 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a two-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The +#' default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only +#' present if \code{pval = TRUE}. +#'} +#' +#'@examples +#'exp <- array(rnorm(30), dim = c(lat = 2, sdate = 3, member = 5)) +#'obs <- array(rnorm(30), dim = c(lat = 2, sdate = 3)) +#'sprerr1 <- SprErr(exp, obs) +#'sprerr2 <- SprErr(exp, obs, pval = FALSE, sign = TRUE) +#'sprerr3 <- SprErr(exp, obs, pval = TRUE, sign = TRUE) +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + sign = sign, + na.rm = na.rm, + fun = .SprErr, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) + enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) + if (pval | sign) { + F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) + } else { + p.val[jexp, jobs] <- NA + } + } + } + } + + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha} + + return(res) +} diff --git a/man/SprErr.Rd b/man/SprErr.Rd new file mode 100644 index 0000000000000000000000000000000000000000..cdc647eb86373a69c6ff2fedd4a1b15dbc90b584 --- /dev/null +++ b/man/SprErr.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SprErr.R +\name{SprErr} +\alias{SprErr} +\title{Compute the ratio between the ensemble spread and RMSE} +\usage{ +SprErr( + exp, + obs, + dat_dim = NULL, + memb_dim = "member", + time_dim = "sdate", + pval = TRUE, + sign = FALSE, + alpha = 0.05, + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data with at least two +dimensions 'memb_dim' and 'time_dim'.} + +\item{obs}{A named numeric array of observational data with at least two +dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +parameter 'exp' except along 'dat_dim' and 'memb_dim'.} + +\item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) +dimension. The default value is NULL (no dataset).} + +\item{memb_dim}{A character string indicating the name of the member +dimension. It must be one dimension in 'exp' and 'obs'. The default value +is 'member'.} + +\item{time_dim}{A character string indicating the name of dimension along +which the ratio is computed. The default value is 'sdate'.} + +\item{pval}{A logical value indicating whether to compute the p-value +of the test Ho : SD/RMSE = 1 or not. The default value is TRUE.} + +\item{sign}{A logical value indicating whether to retrieve the statistical +significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +FALSE.} + +\item{alpha}{A numeric indicating the significance level for the statistical +significance test. The default value is 0.05.} + +\item{na.rm}{A logical value indicating whether to remove NA values. The +default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list of two arrays with dimensions c(nexp, nobs, the rest of + dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is + the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. + If dat_dim is NULL, nexp and nobs are omitted. \cr +\item{$ratio}{ + The ratio of the ensemble spread and RMSE. +} +\item{$p_val}{ + The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only + present if \code{pval = TRUE}. +} +} +\description{ +Compute the ratio between the spread of the members around the +ensemble mean in experimental data and the RMSE between the ensemble mean of +experimental and observational data. The p-value and/or the statistical +significance is provided by a two-sided Fisher's test. +} +\examples{ +exp <- array(rnorm(30), dim = c(lat = 2, sdate = 3, member = 5)) +obs <- array(rnorm(30), dim = c(lat = 2, sdate = 3)) +sprerr1 <- SprErr(exp, obs) +sprerr2 <- SprErr(exp, obs, pval = FALSE, sign = TRUE) +sprerr3 <- SprErr(exp, obs, pval = TRUE, sign = TRUE) + +}