diff --git a/NAMESPACE b/NAMESPACE index 18679ec35dd971ece18f106ce36b9c2cc358e0be..390f85aa6794e959cd3e161936970463f4829743 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -55,6 +55,7 @@ export(RMS) export(RMSSS) export(RandomWalkTest) export(RatioRMS) +export(RatioSDRMS) export(Regression) export(Reorder) export(SPOD) diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R new file mode 100644 index 0000000000000000000000000000000000000000..4d833d7ff12e05085c9bea899e8950ada803b4fd --- /dev/null +++ b/R/RatioSDRMS.R @@ -0,0 +1,200 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the standard deviation of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value is provided by a one-sided +#'Fischer test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. If there is no dataset dimension, set as NULL. The default value +#' is 'dataset'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' (only present if \code{pval = TRUE}) of the one-sided Fisher test with +#'Ho: SD/RMSE = 1.\cr\cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the one-sided Fisher test with Ho: SD/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) +#'# Reorder the data in order to plot it with PlotVsLTime +#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +#'rsdrms_plot[, , 2, ] <- rsdrms$ratio +#'rsdrms_plot[, , 4, ] <- rsdrms$p.val +#'\donttest{ +#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE, +#' fileout = 'tos_rsdrms.eps') +#'} +#' +#'@import multiApply +#'@export +RatioSDRMS <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions expect 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + fun = .RatioSDRMS, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.RatioSDRMS <- function(exp, obs, pval = TRUE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = TRUE) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = TRUE) + + dif <- exp - InsertDim(ens_exp, 2, dim(exp)[2]) # [nexp, member, sdate] + std <- apply(dif, 1, sd, na.rm = TRUE) # [nexp] + enosd <- apply(Eno(dif, names(dim(exp))[3]), 1, sum, na.rm = TRUE) + + # Create empty arrays + ratiosdrms <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + dif <- ens_exp[jexp, ] - ens_obs[jobs, ] + rms <- mean(dif^2, na.rm = TRUE)^0.5 + enorms <- Eno(dif) + ratiosdrms[jexp, jobs] <- std[jexp]/rms + + if (pval) { + F <- (enosd[jexp] * std[jexp]^2 / (enosd[jexp] - 1)) / (enorms * rms^2 / (enorms - 1)) + if (!is.na(F) & !is.na(enosd) & !is.na(enorms) & enosd > 2 && enorms > 2) { + p.val[jexp, jobs] <- 1 - pf(F, enosd[jexp] - 1, enorms - 1) + } else { + ratiosdrms[jexp, jobs] <- NA + } + } + } + } + + if (pval) { + return(invisible(list(ratio = ratiosdrms, p.val = p.val))) + } else { + return(invisible(list(ratio = ratiosdrms))) + } +} diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd new file mode 100644 index 0000000000000000000000000000000000000000..7dbd68283599edcfa599768a1946b00ce89fdea1 --- /dev/null +++ b/man/RatioSDRMS.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RatioSDRMS.R +\name{RatioSDRMS} +\alias{RatioSDRMS} +\title{Compute the ratio between the ensemble spread and RMSE} +\usage{ +RatioSDRMS( + exp, + obs, + dat_dim = "dataset", + memb_dim = "member", + time_dim = "sdate", + pval = TRUE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data with at least two +dimensions 'memb_dim' and 'time_dim'.} + +\item{obs}{A named numeric array of observational data with at least two +dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +parameter 'exp' except along 'dat_dim' and 'memb_dim'.} + +\item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) +dimension. If there is no dataset dimension, set as NULL. The default value +is 'dataset'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension. It must be one dimension in 'exp' and 'obs'. The default value +is 'member'.} + +\item{time_dim}{A character string indicating the name of dimension along +which the ratio is computed. The default value is 'sdate'.} + +\item{pval}{A logical value indicating whether to compute or not the p-value +of the test Ho : SD/RMSE = 1 or not. The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list of two arrays with dimensions c(nexp, nobs, the rest of + dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is + the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +(only present if \code{pval = TRUE}) of the one-sided Fisher test with +Ho: SD/RMSE = 1.\cr\cr +\item{$ratio}{ + The ratio of the ensemble spread and RMSE. +} +\item{$p_val}{ + The p-value of the one-sided Fisher test with Ho: SD/RMSE = 1. Only present + if \code{pval = TRUE}. +} +} +\description{ +Compute the ratio between the standard deviation of the members around the +ensemble mean in experimental data and the RMSE between the ensemble mean of +experimental and observational data. The p-value is provided by a one-sided +Fischer test. +} +\examples{ +# Load sample data as in Load() example: +example(Load) +rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) +# Reorder the data in order to plot it with PlotVsLTime +rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +rsdrms_plot[, , 2, ] <- rsdrms$ratio +rsdrms_plot[, , 4, ] <- rsdrms$p.val +\donttest{ +PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", + monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, siglev = TRUE, + fileout = 'tos_rsdrms.eps') +} + +} diff --git a/tests/testthat/test-RatioSDRMS.R b/tests/testthat/test-RatioSDRMS.R new file mode 100644 index 0000000000000000000000000000000000000000..4b93fafa213dcac0e831b7ac59d7646064f8f856 --- /dev/null +++ b/tests/testthat/test-RatioSDRMS.R @@ -0,0 +1,165 @@ +context("s2dv::RatioSDRMS tests") + +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(40), dim = c(dataset = 2, member = 2, sdate = 5, ftime = 2)) + set.seed(2) + obs1 <- array(rnorm(10), dim = c(dataset = 1, member = 1, sdate = 5, ftime = 2)) + + # dat2 + exp2 <- exp1 + obs2 <- obs1 + exp2[1] <- NA + + # dat 3 + set.seed(3) + exp3 <- array(rnorm(10), dim = c(member = 2, sdate = 5)) + set.seed(4) + obs3 <- array(rnorm(5), dim = c(member = 1, sdate = 5)) + + +############################################## +test_that("1. Input checks", { + + expect_error( + RatioSDRMS(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + RatioSDRMS(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + RatioSDRMS(c(1:10), c(2:4)), + "Parameter 'exp' and 'obs' must be array with as least two dimensions memb_dim and time_dim." + ) + expect_error( + RatioSDRMS(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + RatioSDRMS(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'exp' and 'obs' must have the same dimension names." + ) + expect_error( + RatioSDRMS(exp1, obs1, dat_dim = 1), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + RatioSDRMS(exp1, obs1, dat_dim = 'a'), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RatioSDRMS(exp1, obs1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + RatioSDRMS(exp1, obs1, memb_dim = 'a'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RatioSDRMS(exp1, obs1, time_dim = c('sdate', 'a')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + RatioSDRMS(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RatioSDRMS(exp1, obs1, pval = 1), + "Parameter 'pval' must be one logical value." + ) + expect_error( + RatioSDRMS(exp1, obs1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( + RatioSDRMS(exp = exp3, + obs = array(1:2, dim = c(member = 1, sdate = 2)), dat_dim = NULL), + "Parameter 'exp' and 'obs' must have same length of all the dimensions expect 'dat_dim' and 'memb_dim'." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { +expect_equal( +names(RatioSDRMS(exp1, obs1)), +c('ratio', 'p.val') +) +expect_equal( +dim(RatioSDRMS(exp1, obs1)$ratio), +c(nexp = 2, nobs = 1, ftime = 2) +) +expect_equal( +dim(RatioSDRMS(exp1, obs1)$p.val), +c(nexp = 2, nobs = 1, ftime = 2) +) +expect_equal( +as.vector(RatioSDRMS(exp1, obs1)$ratio), +c(0.7198164, 0.6525068, 0.6218262, 0.6101527), +tolerance = 0.0001 +) +expect_equal( +as.vector(RatioSDRMS(exp1, obs1)$p.val), +c(0.8464094, 0.8959219, 0.9155102, 0.9224119), +tolerance = 0.0001 +) +expect_equal( +names(RatioSDRMS(exp1, obs1, pval = F)), +c('ratio') +) +expect_equal( +as.vector(RatioSDRMS(exp1, obs1)$ratio), +as.vector(RatioSDRMS(exp1, obs1, pval = F)$ratio) +) + +}) + +############################################## +test_that("3. Output checks: dat2", { +expect_equal( +dim(RatioSDRMS(exp2, obs2)$ratio), +c(nexp = 2, nobs = 1, ftime = 2) +) +expect_equal( +as.vector(RatioSDRMS(exp2, obs2)$ratio), +c(0.7635267, 0.6525068, 0.6218262, 0.6101527), +tolerance = 0.0001 +) +expect_equal( +as.vector(RatioSDRMS(exp1, obs1)$p.val), +c(0.8464094, 0.8959219, 0.9155102, 0.9224119), +tolerance = 0.0001 +) + +}) + +############################################## +test_that("4. Output checks: dat3", { +expect_equal( +names(RatioSDRMS(exp3, obs3, dat_dim = NULL)), +c('ratio', 'p.val') +) +expect_equal( +dim(RatioSDRMS(exp3, obs3, dat_dim = NULL)$ratio), +NULL +) +expect_equal( +dim(RatioSDRMS(exp3, obs3, dat_dim = NULL)$p.val), +NULL +) +expect_equal( +as.numeric(RatioSDRMS(exp3, obs3, dat_dim = NULL)$ratio), +0.8291582, +tolerance = 0.0001 +) +expect_equal( +as.numeric(RatioSDRMS(exp3, obs3, dat_dim = NULL)$p.val), +0.7525497, +tolerance = 0.0001 +) + + +})