#'Compute the ratio between the ensemble spread and RMSE #' #'Compute the ratio between the spread of the members around the #'ensemble mean in experimental data and the RMSE between the ensemble mean of #'experimental and observational data. The p-value and/or the statistical #'significance is provided by a two-sided Fisher's test. #' #'@param exp A named numeric array of experimental data with at least two #' dimensions 'memb_dim' and 'time_dim'. #'@param obs A named numeric array of observational data with at least two #' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as #' parameter 'exp' except along 'dat_dim' and 'memb_dim'. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) #' dimension. The default value is NULL (no dataset). #'@param memb_dim A character string indicating the name of the member #' dimension. It must be one dimension in 'exp' and 'obs'. The default value #' is 'member'. #'@param time_dim A character string indicating the name of dimension along #' which the ratio is computed. The default value is 'sdate'. #'@param pval A logical value indicating whether to compute the p-value #' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. #'@param sign A logical value indicating whether to retrieve the statistical #' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is #' FALSE. #'@param alpha A numeric indicating the significance level for the statistical #' significance test. The default value is 0.05. #'@param na.rm A logical value indicating whether to remove NA values. The #' default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return A list of two arrays with dimensions c(nexp, nobs, the rest of #' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is #' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. #' If dat_dim is NULL, nexp and nobs are omitted. \cr #'\item{$ratio}{ #' The ratio of the ensemble spread and RMSE. #'} #'\item{$p_val}{ #' The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only #' present if \code{pval = TRUE}. #'} #' #'@examples #'exp <- array(rnorm(30), dim = c(lat = 2, sdate = 3, member = 5)) #'obs <- array(rnorm(30), dim = c(lat = 2, sdate = 3)) #'sprerr1 <- SprErr(exp, obs) #'sprerr2 <- SprErr(exp, obs, pval = FALSE, sign = TRUE) #'sprerr3 <- SprErr(exp, obs, pval = TRUE, sign = TRUE) #' #'@import multiApply #'@export SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_dim = 'sdate', pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE, ncores = NULL) { # Check inputs ## exp and obs (1) if (is.null(exp) | is.null(obs)) { stop("Parameter 'exp' and 'obs' cannot be NULL.") } if (!is.numeric(exp) | !is.numeric(obs)) { stop("Parameter 'exp' and 'obs' must be a numeric array.") } if (is.null(dim(exp)) | is.null(dim(obs))) { stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", "dimensions memb_dim and time_dim.")) } if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## dat_dim if (!is.null(dat_dim)) { if (!is.character(dat_dim) | length(dat_dim) > 1) { stop("Parameter 'dat_dim' must be a character string.") } if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") } } ## memb_dim if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") } if (!memb_dim %in% names(dim(exp))) { stop("Parameter 'memb_dim' is not found in 'exp' dimensions. ", "'exp' must have the member dimension to compute the spread.") } # Add [member = 1] if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { dim(obs) <- c(dim(obs), 1) names(dim(obs))[length(dim(obs))] <- memb_dim } if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { ## check no longer needed? dim(exp) <- c(dim(exp), 1) names(dim(exp))[length(dim(exp))] <- memb_dim } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") } if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } name_exp <- name_exp[-which(name_exp == memb_dim)] name_obs <- name_obs[-which(name_obs == memb_dim)] if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", "all the dimensions except 'dat_dim' and 'memb_dim'.")) } ## pval if (!is.logical(pval) | length(pval) > 1) { stop("Parameter 'pval' must be one logical value.") } ## sign if (!is.logical(sign) | length(sign) > 1) { stop("Parameter 'sign' must be one logical value.") } # alpha if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { stop("Parameter 'alpha' must be a numeric number between 0 and 1.") } # na.rm if (!na.rm %in% c(TRUE, FALSE)) { stop("Parameter 'na.rm' must be TRUE or FALSE") } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } ############################### # Calculate RatioSDRMS # If dat_dim = NULL, insert dat dim remove_dat_dim <- FALSE if (is.null(dat_dim)) { dat_dim <- 'dataset' exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') remove_dat_dim <- TRUE } res <- Apply(list(exp, obs), target_dims = list(c(dat_dim, memb_dim, time_dim), c(dat_dim, memb_dim, time_dim)), pval = pval, sign = sign, alpha = alpha, na.rm = na.rm, fun = .SprErr, ncores = ncores) if (remove_dat_dim) { if (length(dim(res[[1]])) > 2) { res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') } else { res <- lapply(res, as.vector) } } return(res) } .SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { # exp: [dat_exp, member, sdate] # obs: [dat_obs, member, sdate] nexp <- dim(exp)[1] nobs <- dim(obs)[1] # ensemble mean ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] ens_obs <- MeanDims(obs, 2, na.rm = na.rm) # Create empty arrays ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] for (jexp in 1:nexp) { for (jobs in 1:nobs) { # spread and error spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) ratio[jexp, jobs] <- spread/error # effective sample size enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) if (pval | sign) { f_statistic <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) if (!is.na(f_statistic) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { p.val[jexp, jobs] <- pf(f_statistic, enospr - 1, enodif - 1) p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) } else { p.val[jexp, jobs] <- NA } } } } res <- list(ratio = ratio) if (pval) {res$p.val <- p.val} if (sign) {res$sign <- p.val <= alpha} return(res) }