diff --git a/R/RMSSS.R b/R/RMSSS.R index e44105cea96ba9f14c2e96f8488f4cc986c9826d..9c47da4554aebd21fa8c17e963b0121557779d0c 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -6,11 +6,12 @@ #'different, with the number of experiments/models (nexp) and the number of #'observational datasets (nobs).\cr #'RMSSS computes the root mean square error skill score of each jexp in 1:nexp -#'against each jobs in 1:nobs which gives nexp * nobs RMSSS for each other -#'grid point of the array.\cr -#'The RMSSS are computed along the time_dim dimension which should corresponds -#'to the startdate dimension.\cr -#'The p-value is optionally provided by an one-sided Fisher test.\cr +#'against each job in 1:nobs which gives nexp * nobs RMSSS for each grid point +#'of the array.\cr +#'The RMSSS are computed along the time_dim dimension which should correspond +#'to the start date dimension.\cr +#'The p-value and significance test are optionally provided by an one-sided +#'Fisher test.\cr #' #'@param exp A named numeric array of experimental data which contains at least #' two dimensions for dat_dim and time_dim. It can also be a vector with the @@ -27,8 +28,12 @@ #'@param time_dim A character string indicating the name of dimension along #' which the RMSSS are computed. The default value is 'sdate'. #'@param pval A logical value indicating whether to compute or not the p-value -#' of the test Ho: RMSSS = 0. If pval = TRUE, the insignificant RMSSS will -#' return NA. The default value is TRUE. +#' of the test Ho: RMSSS = 0. The default value is TRUE. +#'@param sign A logical value indicating whether to compute or not the +#' statistical significance of the test Ho: RMSSS = 0. The default value is +#' FALSE. +#'@param alpha A numeric of the significance level to be used in the +#' statistical significance test. The default value is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -36,12 +41,18 @@ #'A list containing the numeric arrays with dimension:\cr #' c(nexp, nobs, all other dimensions of exp except time_dim).\cr #'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the -#'number of observation (i.e., dat_dim in obs).\cr +#'number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +#'nobs are omitted.\cr #'\item{$rmsss}{ -#' The root mean square error skill score. +#' A numerical array of the root mean square error skill score. #'} #'\item{$p.val}{ -#' The p-value. Only present if \code{pval = TRUE}. +#' A numerical array of the p-value with the same dimensions as $rmsss. +#' Only present if \code{pval = TRUE}. +#'} +#'\item{sign}{ +#' A logical array of the statistical significance of the RMSSS with the same +#' dimensions as $rmsss. Only present if \code{sign = TRUE}. #'} #' #'@examples @@ -49,14 +60,14 @@ #' exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) #' set.seed(2) #' obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) -#' res <- RMSSS(exp, obs, time_dim = 'time') +#' res <- RMSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset') #' #'@rdname RMSSS #'@import multiApply #'@importFrom stats pf #'@export RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', - pval = TRUE, ncores = NULL) { + pval = TRUE, sign = FALSE, alpha = 0.05, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -109,6 +120,14 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', if (!is.logical(pval) | length(pval) > 1) { stop("Parameter 'pval' must be one logical value.") } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be one numeric value.") + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -148,14 +167,17 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', c(time_dim, dat_dim)), fun = .RMSSS, time_dim = time_dim, dat_dim = dat_dim, - pval = pval, ncores_input = ncores, + pval = pval, sign = sign, alpha = alpha, ncores = ncores) return(res) } .RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, - ncores_input = NULL) { + sign = FALSE, alpha = 0.05) { + # exp: [sdate, (dat)] + # obs: [sdate, (dat)] + if (is.null(dat_dim)) { # exp: [sdate] # obs: [sdate] @@ -188,9 +210,7 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) } - # rms1 and eno1 rms1 <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) - # rms2 and eno2 rms2 <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs)) rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( rms2), na.rm = TRUE) / 1000 @@ -203,21 +223,24 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', # use rms1 and rms2 to calculate rmsss rmsss <- 1 - rms1/rms2 - ## pval and conf - if (pval) { - eno1 <- Eno(dif1, time_dim, ncores = ncores_input) - eno2 <- Eno(obs, time_dim, ncores = ncores_input) + ## pval and sign + if (pval || sign) { + eno1 <- Eno(dif1, time_dim) + eno2 <- Eno(obs, time_dim) eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) eno2 <- Reorder(eno2, c(2, 1)) } # pval - if (pval) { + if (pval || sign) { F.stat <- (eno2 * rms2^2 / (eno2- 1)) / ((eno1 * rms1^2 / (eno1- 1))) tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + if (sign) signif <- p_val <= alpha + # If there isn't enough valid data, return NA p_val[which(!tmp)] <- NA + if (sign) signif[which(!tmp)] <- NA # change not significant rmsss to NA rmsss[which(!tmp)] <- NA @@ -228,15 +251,19 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', if (is.null(dat_dim)) { dim(rmsss) <- NULL dim(p_val) <- NULL + if (sign) dim(signif) <- NULL } ################################### # output + res <- list(rmsss = rmsss) if (pval) { - res <- list(rmsss = rmsss, p.val = p_val) - - } else { - res <- list(rmsss = rmsss) + p.val <- list(p.val = p_val) + res <- c(res, p.val) + } + if (sign) { + signif <- list(sign = signif) + res <- c(res, signif) } return(res) diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index 9ebcf65475512398233ccf6da90fe09289bf0388..05dfb1bb47af6aa8a51df3ec9ceda1df484bbc4f 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -10,6 +10,8 @@ RMSSS( time_dim = "sdate", dat_dim = "dataset", pval = TRUE, + sign = FALSE, + alpha = 0.05, ncores = NULL ) } @@ -33,8 +35,14 @@ which the RMSSS are computed. The default value is 'sdate'.} dimension. The default value is 'dataset'.} \item{pval}{A logical value indicating whether to compute or not the p-value -of the test Ho: RMSSS = 0. If pval = TRUE, the insignificant RMSSS will -return NA. The default value is TRUE.} +of the test Ho: RMSSS = 0. The default value is TRUE.} + +\item{sign}{A logical value indicating whether to compute or not the +statistical significance of the test Ho: RMSSS = 0. The default value is +FALSE.} + +\item{alpha}{A numeric of the significance level to be used in the +statistical significance test. The default value is 0.05.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} @@ -43,12 +51,18 @@ computation. The default value is NULL.} A list containing the numeric arrays with dimension:\cr c(nexp, nobs, all other dimensions of exp except time_dim).\cr nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the -number of observation (i.e., dat_dim in obs).\cr +number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +nobs are omitted.\cr \item{$rmsss}{ - The root mean square error skill score. + A numerical array of the root mean square error skill score. } \item{$p.val}{ - The p-value. Only present if \code{pval = TRUE}. + A numerical array of the p-value with the same dimensions as $rmsss. + Only present if \code{pval = TRUE}. +} +\item{sign}{ + A logical array of the statistical significance of the RMSSS with the same + dimensions as $rmsss. Only present if \code{sign = TRUE}. } } \description{ @@ -58,17 +72,18 @@ have the same dimensions except along dat_dim, where the length can be different, with the number of experiments/models (nexp) and the number of observational datasets (nobs).\cr RMSSS computes the root mean square error skill score of each jexp in 1:nexp -against each jobs in 1:nobs which gives nexp * nobs RMSSS for each other -grid point of the array.\cr -The RMSSS are computed along the time_dim dimension which should corresponds -to the startdate dimension.\cr -The p-value is optionally provided by an one-sided Fisher test.\cr +against each job in 1:nobs which gives nexp * nobs RMSSS for each grid point +of the array.\cr +The RMSSS are computed along the time_dim dimension which should correspond +to the start date dimension.\cr +The p-value and significance test are optionally provided by an one-sided +Fisher test.\cr } \examples{ set.seed(1) exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) set.seed(2) obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) -res <- RMSSS(exp, obs, time_dim = 'time') +res <- RMSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset') } diff --git a/tests/testthat/test-RMSSS.R b/tests/testthat/test-RMSSS.R index d592130fc7c8f1ea6478da94a4e5d8677586fdb0..0a4916ed607912bd6f1ab00947bfb652128255d7 100644 --- a/tests/testthat/test-RMSSS.R +++ b/tests/testthat/test-RMSSS.R @@ -1,23 +1,17 @@ context("s2dv::RMSSS tests") ############################################## - # case 0 - set.seed(1) - exp0 <- array(rnorm(15), dim = c(sdate = 3, dataset = 5)) - set.seed(2) - obs0 <- array(rnorm(6), dim = c(sdate = 3, dataset = 2)) - # case 1 set.seed(1) - exp1 <- array(rnorm(15), dim = c(time = 3, memb = 5)) + exp1 <- array(rnorm(15), dim = c(sdate = 3, dataset = 5)) set.seed(2) - obs1 <- array(rnorm(6), dim = c(time = 3, memb = 2)) + obs1 <- array(rnorm(6), dim = c(sdate = 3, dataset = 2)) # case 2 set.seed(3) - exp2 <- array(rnorm(120), dim = c(sdate = 10, dat = 1, lon = 3, lat = 2, dataset = 2)) + exp2 <- array(rnorm(120), dim = c(time = 10, dat = 1, lon = 3, lat = 2, dataset = 2)) set.seed(4) - obs2 <- array(rnorm(60), dim = c(dat = 1, sdate = 10, dataset = 1, lat = 2, lon = 3)) + obs2 <- array(rnorm(60), dim = c(dat = 1, time = 10, dataset = 1, lat = 2, lon = 3)) # case 3: vector set.seed(5) @@ -27,14 +21,14 @@ context("s2dv::RMSSS tests") # case 4 set.seed(7) - exp4 <- array(rnorm(120), dim = c(sdate = 10, dat = 1, lon = 3, lat = 2)) + exp4 <- array(rnorm(60), dim = c(sdate = 10, lon = 3, lat = 2)) set.seed(8) - obs4 <- array(rnorm(60), dim = c(dat = 1, sdate = 10, lat = 2, lon = 3)) + obs4 <- array(exp4 + rnorm(60) / 2, dim = dim(exp4)) ############################################## test_that("1. Input checks", { - + ## exp and obs (1) expect_error( RMSSS(c(), c()), "Parameter 'exp' and 'obs' cannot be NULL." @@ -56,31 +50,46 @@ test_that("1. Input checks", { RMSSS(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), "Parameter 'exp' and 'obs' must have same dimension name" ) + ## time_dim expect_error( RMSSS(exp1, obs1, time_dim = 1), "Parameter 'time_dim' must be a character string." ) expect_error( - RMSSS(exp0, obs0, time_dim = 'a'), + RMSSS(exp1, obs1, time_dim = 'a'), "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." ) + ## dat_dim expect_error( - RMSSS(exp0, obs0, dat_dim = NA), + RMSSS(exp1, obs1, dat_dim = NA), "Parameter 'dat_dim' must be a character string." ) expect_error( - RMSSS(exp0, obs0, dat_dim = 'memb'), + RMSSS(exp1, obs1, dat_dim = 'memb'), paste0("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", " Set it as NULL if there is no dataset dimension.") ) + ## pval expect_error( - RMSSS(exp0, obs0, pval = c(T, T)), + RMSSS(exp1, obs1, pval = c(T, T)), "Parameter 'pval' must be one logical value." ) + ## sign + expect_error( + RMSSS(exp1, obs1, sign = 0.05), + "Parameter 'sign' must be one logical value." + ) + ## alpha expect_error( - RMSSS(exp0, obs0, ncores = 1.4), + RMSSS(exp1, obs1, alpha = T), + "Parameter 'alpha' must be one numeric value." + ) + ## ncores + expect_error( + RMSSS(exp1, obs1, ncores = 1.4), "Parameter 'ncores' must be a positive integer." ) + ## exp and obs (2) expect_error( RMSSS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), obs = array(1:4, dim = c(a = 1, sdate = 2, dataset = 2))), @@ -96,7 +105,7 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: case 1", { - res1_1 <- RMSSS(exp1, obs1, time_dim = 'time', dat_dim = 'memb') + res1_1 <- RMSSS(exp1, obs1, dat_dim = 'dataset') expect_equal( dim(res1_1$rmsss), c(nexp = 5, nobs = 2) @@ -110,12 +119,17 @@ test_that("2. Output checks: case 1", { -0.5449538, tolerance = 0.00001 ) + expect_equal( + as.vector(res1_1$p.val)[3:7], + c(0.4914588, 0.5134658, 0.6428701, 0.4919943, 0.8672634), + tolerance = 0.001 + ) exp1_2 <- exp1 exp1_2[2:4] <- NA obs1_2 <- obs1 obs1_2[1:2] <- NA - res1_2 <- RMSSS(exp1_2, obs1_2, time_dim = 'time', dat_dim = 'memb', pval = TRUE) + res1_2 <- RMSSS(exp1_2, obs1_2, dat_dim = 'dataset', pval = TRUE) expect_equal( length(res1_2$rmsss[which(is.na(res1_2$rmsss))]), @@ -134,16 +148,16 @@ test_that("2. Output checks: case 1", { test_that("3. Output checks: case 2", { expect_equal( - dim(RMSSS(exp2, obs2)$rmsss), + dim(RMSSS(exp2, obs2, time_dim = 'time')$rmsss), c(nexp = 2, nobs = 1, dat = 1, lon = 3, lat = 2) ) expect_equal( - mean(RMSSS(exp2, obs2)$rmsss), + mean(RMSSS(exp2, obs2, time_dim = 'time')$rmsss), -0.3912208, tolerance = 0.00001 ) expect_equal( - range(RMSSS(exp2, obs2)$p.val), + range(RMSSS(exp2, obs2, time_dim = 'time')$p.val), c(0.2627770, 0.9868412), tolerance = 0.00001 ) @@ -171,17 +185,41 @@ test_that("5. Output checks: case 4", { expect_equal( dim(RMSSS(exp4, obs4, dat_dim = NULL)$rmsss), - c(dat = 1, lon = 3, lat = 2) + c(lon = 3, lat = 2) + ) + expect_equal( + dim(RMSSS(exp4, obs4, dat_dim = NULL)$p.val), + c(lon = 3, lat = 2) ) expect_equal( - mean(RMSSS(exp4, obs4, dat_dim = NULL)$rmsss), - -0.3114424, + as.vector(RMSSS(exp4, obs4, dat_dim = NULL)$rmsss), + c(0.5393823, 0.6818405, 0.4953423, 0.4093817, 0.5972085, 0.5861135), tolerance = 0.00001 ) expect_equal( - range(RMSSS(exp4, obs4, dat_dim = NULL)$p.val), - c(0.3560534, 0.9192801), + as.vector(RMSSS(exp4, obs4, dat_dim = NULL)$p.val), + c(0.015203983, 0.001091360, 0.026987112, 0.066279877, 0.006161059, 0.007437649), tolerance = 0.00001 ) - + expect_equal( + names(RMSSS(exp4, obs4, dat_dim = NULL)), + c('rmsss', 'p.val') + ) + expect_equal( + names(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = F)), + c('rmsss', 'sign') + ) + expect_equal( + names(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = T)), + c('rmsss', 'p.val', 'sign') + ) + expect_equal( + as.vector(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = F)$sign), + c(T, T, T, F, T, T) + ) + expect_equal( + as.vector(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = F, alpha = 0.01)$sign), + c(F, T, F, F, T, T) + ) + })