From 07f3ea0dc1d01d40250d6ac48e0ca2bcaec271ca Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 17 Dec 2019 17:04:33 +0100 Subject: [PATCH 1/8] Modify RMSSS() with Apply --- DESCRIPTION | 1 + NAMESPACE | 1 + R/RMSSS.R | 324 ++++++++++++++++++------------------ man/RMSSS.Rd | 90 ++++------ tests/testthat/test-RMSSS.R | 150 +++++++++++++++++ 5 files changed, 345 insertions(+), 221 deletions(-) create mode 100644 tests/testthat/test-RMSSS.R diff --git a/DESCRIPTION b/DESCRIPTION index 0045265c..9aba92c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,6 +51,7 @@ Imports: parallel, plyr, SpecsVerification (>= 0.5.0) + multiApply (>= 2.0.0) Suggests: easyVerification, testthat diff --git a/NAMESPACE b/NAMESPACE index 4e855789..00c59bfa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -90,6 +90,7 @@ import(graphics) import(mapproj) import(maps) import(methods) +import(multiApply) import(ncdf4) import(parallel) import(plyr) diff --git a/R/RMSSS.R b/R/RMSSS.R index 0ffe36f6..140bb19d 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -1,191 +1,191 @@ #'Computes Root Mean Square Skill Score #' #'Computes the root mean square error skill score between an array of -#'forecasts, var_exp and an array of observations, var_obs, which should -#'have the same dimensions except along posloop where the lengths can be -#'different, with the number of experiments/models for var_exp (nexp) and -#'the number of obserational datasets for var_obs (nobs).\cr +#'forecasts 'exp' and an array of observations 'obs'. The two array should +#'have the same dimensions except along memb_dim, where the length can be +#'different, with the number of experiments/models for exp (nexp) and +#'the number of obserational datasets for obs (nobs).\cr #'RMSSS computes the Root Mean Square Skill Score of each jexp in 1:nexp #'against each jobs in 1:nobs which gives nexp x nobs RMSSS for each other #'grid point of the matrix (each latitude/longitude/level/leadtime).\cr -#'The RMSSS are computed along the posRMS dimension which should correspond +#'The RMSSS are computed along the sdate_dim dimension which should correspond #'to the startdate dimension.\cr -#'The p-value is optionally provided by a one-sided Fisher test.\cr\cr #'.RMSSS provides the same functionality but taking a matrix of ensemble #'members as input (exp). #' -#'@param var_exp Array of experimental data. -#'@param var_obs Array of observational data, same dimensions as var_exp -#' except along posloop dimension, where the length can be nobs instead of -#' nexp. -#'@param posloop Dimension nobs and nexp. -#'@param posRMS Dimension along which the RMSE are to be computed (the -#' dimension of the start dates). -#'@param pval Whether to compute or not the p-value of the test Ho : -#' RMSSS = 0. TRUE by default. -#'@param exp N by M matrix of N forecasts from M ensemble members. -#'@param obs Vector of the corresponding observations of length N. +#'@param exp A named numeric array of experimental data which contains at least two +#' dimensions for memb_dim and sdate_dim. +#'@param obs A named numeric array of observational data which contains at least two +#' dimensions for memb_dim and sdate_dim. The dimensions should be the same +#' as paramter 'exp' except the length of 'memb_dim' dimension. The order of +#' dimension can be different. +#'@param memb_dim A character string indicating the name of member (nobs/nexp) +#' dimension. The default value is 'member'. +#'@param sdate_dim A character string indicating the name of dimension along which the +#' RMSSS are computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho: RMSSS = 0. If pval = TRUE, the insignificant RMSSS will +#' return NA. Set TRUE as default. +#' +#'@return An array with dimensions:\cr +#' c(n_exp, n_obs, stats, all other dimensions of exp except sdate_dim).\cr +#' If \code{pval = FALSE}, the length of the 3rd dimension 'stats' is 1, +#' the RMSSS; if \code{pval = TRUE}, the length is 2, the RMSSS and the +#' corresponding p-value. #' -#'@return RMSSS: Array with dimensions:\cr -#' c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 2, -#' all other dimensions of var_exp & var_obs except posRMS).\cr -#'The 3rd dimension corresponds to the RMSSS and, if \code{pval = TRUE}, -#' the p-value of the one-sided Fisher test with Ho: RMSSS = 0.\cr\cr -#'.RMSSS: -#' \itemize{ -#' \item{$rmsss}{ -#' The RMSSS. -#' } -#' \item{$p_val}{ -#' Corresponds to the p values (only present if \code{pval = TRUE}) for -#' the RMSSS. -#' } -#' } #'@keywords datagen #'@author History:\cr -#'0.1 - 2012-04 (V. Guemas, \email{vguemas@@bsc.es}) - Original code\cr -#'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@@bsc.es}) - Formatting to R CRAN\cr -#'1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter@@bsc.es}) - Adapted to veriApply() +#'0.1 - 2012-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr +#'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN\cr +#'1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() +#'3.0 - 2019-12 (A. Ho, \email{an.ho@bsc.es}) - Adapted to multiApply #'@examples -#'# Load sample data as in Load() example: -#'example(Load) -#'clim <- Clim(sampleData$mod, sampleData$obs) -#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) -#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) -#'rmsss <- RMSSS(Mean1Dim(ano_exp, 2), Mean1Dim(ano_obs, 2)) -#'rmsss_plot <- array(dim = c(dim(rmsss)[1:2], 4, dim(rmsss)[4])) -#'rmsss_plot[, , 2, ] <- rmsss[, , 1, ] -#'rmsss_plot[, , 4, ] <- rmsss[, , 2, ] -#' \donttest{ -#'PlotVsLTime(rmsss_plot, toptitle = "Root Mean Square Skill Score", ytitle = "", -#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), -#' listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), -#' fileout = 'tos_rmsss.eps') -#' } -#'# The following example uses veriApply combined with .RMSSS instead of RMSSS -#' \dontrun{ -#'require(easyVerification) -#'RMSSS2 <- s2dverification:::.RMSSS -#'rmsss2 <- veriApply("RMSSS2", ano_exp, -#' # see ?veriApply for how to use the 'parallel' option -#' Mean1Dim(ano_obs, 2), -#' tdim = 3, ensdim = 2) -#' } +#' set.seed(1) +#' exp <- array(rnorm(15), dim = c(dat = 1, time = 3, member = 5)) +#' set.seed(2) +#' obs <- array(rnorm(6), dim = c(time = 3, member = 2, dat = 1)) +#' res <- RMSSS(exp, obs, sdate_dim = 'time') +#' #'@rdname RMSSS +#'@import multiApply #'@export -RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { - # - # Enlarge var_exp & var_obs & clim to 10 dim + move posloop & posRMS - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp) - for (iind in 1:length(dimsvar)) { - if (iind !=posloop & dim(var_obs)[iind] != dimsvar[iind]) { - stop("var_exp & var_obs must have same dimensions except along posloop") - } - } - if (dimsvar[posRMS] < 2 ) { - stop("At least 2 values required to compute RMSE") - } - enlvarexp <- Enlarge(var_exp, 10) - enlvarobs <- Enlarge(var_obs, 10) - nexp <- dimsvar[posloop] - nobs <- dim(var_obs)[posloop] - posaperm <- numeric(10) - posaperm[1] <- posloop - posaperm[2] <- posRMS - posaperm[3:10] <- seq(1, 10)[-c(posloop, posRMS)] - permvarexp <- aperm(enlvarexp, posaperm) - permvarobs <- aperm(enlvarobs, posaperm) - dimsaperm <- dim(permvarexp) - # - # RMSSS and its pvalue computation - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (pval) { - nvals <- 2 - } else { - nvals <- 1 +RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRUE) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing sdate_dim and memb_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name") + } + + ## sdate_dim + if (!is.character(sdate_dim) | length(sdate_dim) > 1) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!sdate_dim %in% names(dim(exp)) | !sdate_dim %in% names(dim(obs))) { + stop("Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension.") + } + + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") } - enlRMSSS <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) - for (jexp in 1:nexp) { - for (jobs in 1:nobs) { - dif1 <- array(dim = dimsaperm[-1]) - dif2 <- array(dim = dimsaperm[-1]) - dif1[, , , , , , , , ] <- permvarexp[jexp, , , , , , , , - , ] - permvarobs[jobs, , , , , , , , , ] - dif2[, , , , , , , , ] <- permvarobs[jobs, , , , , , , , , ] - rms1 <- Mean1Dim(dif1 ** 2, 1, narm = TRUE) ** 0.5 - rms2 <- Mean1Dim(dif2 ** 2, 1, narm = TRUE) ** 0.5 - rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( - rms2), na.rm = TRUE) / 1000 - enlRMSSS[jexp, jobs, 1, , , , , , , , ] <- 1 - (rms1 / rms2) - if (pval) { - eno1 <- Eno(dif1, 1) - eno2 <- Eno(dif2, 1) - F <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - l1 <- eno1[j3, j4, j5, j6, j7, j8, j9, j10] - l2 <- eno2[j3, j4, j5, j6, j7, j8, j9, j10] - if (is.na(l1) == FALSE & is.na(l2) == FALSE & l1 > 2 & l2 > 2) { - enlRMSSS[jexp, jobs, 2, j3, j4, j5, j6, j7, j8, j9, - j10] <- 1 - pf(F[j3, j4, j5, j6, j7, j8, j9, - j10], l1 - 1, l2 - 1) - } else { - enlRMSSS[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, - j10] <- NA - } - } - } - } - } - } - } - } - } - } - } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") } - dim(enlRMSSS) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, posRMS)]) - # - # Output - # ~~~~~~~~ - # - enlRMSSS + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of all dimension expect 'memb_dim'.") + } + if (dim(exp)[sdate_dim] < 2) { + stop("The length of sdate_dim must be at least 2 to compute RMSSS.") + } + + ############################### + # Calculate RMSSS + + RMSSS <- Apply(list(exp, obs), + target_dims = list(c(sdate_dim, memb_dim), + c(sdate_dim, memb_dim)), + output_dims = c('nexp', 'nobs', 'stats'), + fun = .RMSSS, + pval = pval)$output1 + + return(RMSSS) } + #'@rdname RMSSS #'@importFrom stats pf #'@export .RMSSS <- function(exp, obs, pval = TRUE) { - dif2 <- obs - dif1 <- rowMeans(exp) - obs - rms1 <- mean(dif1 ** 2, na.rm = TRUE) ** 0.5 - rms2 <- mean(dif2 ** 2, na.rm = TRUE) ** 0.5 - rms2[abs(rms2) <= abs(rms2) / 1000] <- abs(rms2) / 1000 - rmsss <- c(1 - (rms1 / rms2)) + # exp: [sdate, member_exp] + # obs: [sdate, member_obs] + n_exp <- as.numeric(dim(exp)[2]) + n_obs <- as.numeric(dim(obs)[2]) + n_sdate <- as.numeric(dim(exp)[1]) + + p_val <- array(dim = c(nexp = n_exp, nobs = n_obs)) + rms1 <- array(dim = c(nexp = n_exp, nobs = n_obs)) + eno1 <- array(dim = c(sdate = n_sdate)) + dif1 <- array(dim = c(sdate = n_sdate, n_exp = n_exp, n_obs = n_obs)) + + # dif1 + for (i in 1:n_obs) { + dif1[, , i] <- sapply(1:n_exp, function(x) {exp[, x] - obs[, i]}) + } + + # rms1 and eno1 + rms1 <- apply(dif1^2, c(2, 3), mean)^0.5 #array(dim = c(n_exp, n_obs)) if (pval == TRUE) { - eno1 <- Eno(dif1, 1) - eno2 <- Eno(dif2, 1) - F.stat <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) - if (is.na(eno1) == FALSE && is.na(eno2) == FALSE && eno1 > 2 && eno2 > 2) { - p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) - } + eno1 <- Eno(dif1, 1) #count effective sample along sdate. dim = c(n_exp, n_obs) + ##eno1 <- Eno(dif1, sdate_dim) #change to this line when Eno() is done + } + + # rms2 and eno2 + rms2 <- array(colMeans(obs^2)^0.5, dim = c(n_obs = n_obs)) + if (pval == TRUE) { + eno2 <- Eno(obs, 1) #dim = n_obs + ##eno2 <- Eno(obs, sdate_dim) #change to this line when Eno() is done + } + + rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( + rms2), na.rm = TRUE) / 1000 + + # use rms1 and rms2 to calculate rmsss + rmsss <- as.array(sapply(1:n_obs, function(i) {1 - rms1[, i]/rms2[i]})) + + # pval + if (pval == TRUE) { + F.stat <- as.array(sapply(1:n_obs, + function(i) { + (eno2[i] * (rms2[i])^2 / (eno2[i] - 1)) / (eno1[, i] * (rms1[, i])^2 / (eno1[, i] - 1)) + })) + + tmp <- as.array(sapply(1:n_obs, function(i) { + is.na(eno1[, i]) == FALSE & is.na(eno2[i]) == FALSE & eno1[, i] > 2 & eno2[i] > 2 + })) + p_val <- as.array(sapply(1:n_obs, + function(i) { + 1 - pf(F.stat[, i], eno1[, i] - 1, eno2[i] - 1) + })) + + p_val[which(!tmp)] <- NA + + # change not significant rmsss to NA + rmsss[which(!tmp)] <- NA + + } + + if (pval == TRUE) { + res <- array(dim = c(nexp = n_exp, nobs = n_obs, 2)) + res[,,1] <- rmsss + res[,,2] <- p_val } else { - p_val <- NA + res <- array(dim = c(nexp = n_exp, nobs = n_obs, 1)) + res[,,1] <- rmsss } - - # - # Output - # ~~~~~~~~ - # - list(rmsss = rmsss, p_val = p_val) + res } diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index f602702b..b6542b20 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -5,92 +5,64 @@ \alias{RMSSS} \title{Computes Root Mean Square Skill Score} \usage{ -RMSSS(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) +RMSSS(exp, obs, sdate_dim = "sdate", memb_dim = "member", pval = TRUE) .RMSSS(exp, obs, pval = TRUE) } \arguments{ -\item{var_exp}{Array of experimental data.} +\item{exp}{A named numeric array of experimental data which contains at least two +dimensions for memb_dim and sdate_dim.} -\item{var_obs}{Array of observational data, same dimensions as var_exp -except along posloop dimension, where the length can be nobs instead of -nexp.} +\item{obs}{A named numeric array of observational data which contains at least two +dimensions for memb_dim and sdate_dim. The dimensions should be the same +as paramter 'exp' except the length of 'memb_dim' dimension. The order of +dimension can be different.} -\item{posloop}{Dimension nobs and nexp.} +\item{sdate_dim}{A character string indicating the name of dimension along which the +RMSSS are computed. The default value is 'sdate'.} -\item{posRMS}{Dimension along which the RMSE are to be computed (the -dimension of the start dates).} +\item{memb_dim}{A character string indicating the name of member (nobs/nexp) +dimension. The default value is 'member'.} -\item{pval}{Whether to compute or not the p-value of the test Ho : -RMSSS = 0. TRUE by default.} - -\item{exp}{N by M matrix of N forecasts from M ensemble members.} - -\item{obs}{Vector of the corresponding observations of length N.} +\item{pval}{A logical value indicating whether to compute or not the p-value +of the test Ho: RMSSS = 0. If pval = TRUE, the insignificant RMSSS will +return NA. Set TRUE as default.} } \value{ -RMSSS: Array with dimensions:\cr - c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 2, - all other dimensions of var_exp & var_obs except posRMS).\cr -The 3rd dimension corresponds to the RMSSS and, if \code{pval = TRUE}, - the p-value of the one-sided Fisher test with Ho: RMSSS = 0.\cr\cr -.RMSSS: - \itemize{ - \item{$rmsss}{ - The RMSSS. - } - \item{$p_val}{ - Corresponds to the p values (only present if \code{pval = TRUE}) for - the RMSSS. - } - } +An array with dimensions:\cr + c(n_exp, n_obs, stats, all other dimensions of exp except sdate_dim).\cr + If \code{pval = FALSE}, the length of the 3rd dimension 'stats' is 1, + the RMSSS; if \code{pval = TRUE}, the length is 2, the RMSSS and the + corresponding p-value. } \description{ Computes the root mean square error skill score between an array of -forecasts, var_exp and an array of observations, var_obs, which should -have the same dimensions except along posloop where the lengths can be -different, with the number of experiments/models for var_exp (nexp) and -the number of obserational datasets for var_obs (nobs).\cr +forecasts 'exp' and an array of observations 'obs'. The two array should +have the same dimensions except along memb_dim, where the length can be +different, with the number of experiments/models for exp (nexp) and +the number of obserational datasets for obs (nobs).\cr RMSSS computes the Root Mean Square Skill Score of each jexp in 1:nexp against each jobs in 1:nobs which gives nexp x nobs RMSSS for each other grid point of the matrix (each latitude/longitude/level/leadtime).\cr -The RMSSS are computed along the posRMS dimension which should correspond +The RMSSS are computed along the sdate_dim dimension which should correspond to the startdate dimension.\cr -The p-value is optionally provided by a one-sided Fisher test.\cr\cr .RMSSS provides the same functionality but taking a matrix of ensemble members as input (exp). } \examples{ -# Load sample data as in Load() example: -example(Load) -clim <- Clim(sampleData$mod, sampleData$obs) -ano_exp <- Ano(sampleData$mod, clim$clim_exp) -ano_obs <- Ano(sampleData$obs, clim$clim_obs) -rmsss <- RMSSS(Mean1Dim(ano_exp, 2), Mean1Dim(ano_obs, 2)) -rmsss_plot <- array(dim = c(dim(rmsss)[1:2], 4, dim(rmsss)[4])) -rmsss_plot[, , 2, ] <- rmsss[, , 1, ] -rmsss_plot[, , 4, ] <- rmsss[, , 2, ] - \donttest{ -PlotVsLTime(rmsss_plot, toptitle = "Root Mean Square Skill Score", ytitle = "", - monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), - fileout = 'tos_rmsss.eps') - } -# The following example uses veriApply combined with .RMSSS instead of RMSSS - \dontrun{ -require(easyVerification) -RMSSS2 <- s2dverification:::.RMSSS -rmsss2 <- veriApply("RMSSS2", ano_exp, - # see ?veriApply for how to use the 'parallel' option - Mean1Dim(ano_obs, 2), - tdim = 3, ensdim = 2) - } +set.seed(1) +exp <- array(rnorm(15), dim = c(dat = 1, time = 3, member = 5)) +set.seed(2) +obs <- array(rnorm(6), dim = c(time = 3, member = 2, dat = 1)) +res <- RMSSS(exp, obs, sdate_dim = 'time') + } \author{ History:\cr 0.1 - 2012-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN\cr 1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() +3.0 - 2019-12 (A. Ho, \email{an.ho@bsc.es}) - Adapted to multiApply } \keyword{datagen} diff --git a/tests/testthat/test-RMSSS.R b/tests/testthat/test-RMSSS.R new file mode 100644 index 00000000..9473cf84 --- /dev/null +++ b/tests/testthat/test-RMSSS.R @@ -0,0 +1,150 @@ +context("s2dverification::RMSSS tests") + +############################################## + # case 0 + set.seed(1) + exp0 <- array(rnorm(15), dim = c(sdate = 3, member = 5)) + set.seed(2) + obs0 <- array(rnorm(6), dim = c(sdate = 3, member = 2)) + + # case 1 + set.seed(1) + exp1 <- array(rnorm(15), dim = c(time = 3, memb = 5)) + set.seed(2) + obs1 <- array(rnorm(6), dim = c(time = 3, memb = 2)) + + # case 2 + set.seed(3) + exp2 <- array(rnorm(120), dim = c(sdate = 10, dat = 1, member = 2, lon = 3, lat = 2)) + set.seed(4) + obs2 <- array(rnorm(60), dim = c(dat = 1, sdate = 10, member = 1, lat = 2, lon = 3)) + +############################################## + +test_that("1. Input checks", { + + expect_error( + RMSSS(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + + expect_error( + RMSSS('exp', 'obs'), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + + expect_error( + RMSSS(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing sdate_dim and memb_dim.") + ) + + expect_error( + RMSSS(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + + expect_error( + RMSSS(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'exp' and 'obs' must have same dimension name" + ) + + expect_error( + RMSSS(exp1, obs1, sdate_dim = 1), + "Parameter 'sdate_dim' must be a character string." + ) + + expect_error( + RMSSS(exp0, obs0, sdate_dim = 'a'), + "Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension." + ) + + expect_error( + RMSSS(exp0, obs0, memb_dim = NA), + "Parameter 'memb_dim' must be a character string." + ) + + expect_error( + RMSSS(exp0, obs0, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + + expect_error( + RMSSS(exp0, obs0, pval = c(T, T)), + "Parameter 'pval' must be one logical value." + ) + + expect_error( + RMSSS(exp = array(1:10, dim = c(sdate = 1, member = 5, a = 1)), + obs = array(1:4, dim = c(a = 1, sdate = 2, member = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimension expect 'memb_dim'." + ) + + expect_error( + RMSSS(exp = array(1:10, dim = c(sdate = 1, member = 5, a = 1)), + obs = array(1:4, dim = c(a = 1, sdate = 1, member = 2))), + "The length of sdate_dim must be at least 2 to compute RMSSS." + ) + +}) + +############################################## +test_that("1. Output checks: case 1", { + + res1_1 <- RMSSS(exp1, obs1, sdate_dim = 'time', memb_dim = 'memb', pval = FALSE) + expect_equal( + dim(res1_1), + c(nexp = 5, nobs = 2, stats = 1) + ) + + expect_equal( + mean(res1_1), + -0.5449538, + tolerance = 0.00001 + ) + + exp1_2 <- exp1 + exp1_2[2:4] <- NA + obs1_2 <- obs1 + obs1_2[1:2] <- NA + res1_2 <- RMSSS(exp1_2, obs1_2, sdate_dim = 'time', memb_dim = 'memb', pval = TRUE) + + expect_equal( + length(res1_2[which(is.na(res1_2))]), + 14 + ) + + expect_equal( + range(res1_2, na.rm = T), + c(-1.1108657, 0.8167073), + tolerance = 0.00001 + ) + +}) + + +############################################## +test_that("2. Output checks: case 2", { + + expect_equal( + dim(RMSSS(exp2, obs2)), + c(nexp = 2, nobs = 1, stats = 2, dat = 1, lon = 1, lat = 1) + ) + + expect_equal( + mean(RMSSS(exp2, obs2)[,,1,,,]), + -0.2454137, + tolerance = 0.00001 + ) + + expect_equal( + range(RMSSS(exp2, obs2)[,,2,,,]), + c(0.09174432, 0.97502252), + tolerance = 0.00001 + ) + +}) + +############################################## + +}) -- GitLab From 39940b6f2dceb61c69c06db6909fe0f8fa49084c Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 17 Dec 2019 17:07:53 +0100 Subject: [PATCH 2/8] Syntax fix --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9aba92c6..6780c83d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,7 +50,7 @@ Imports: ncdf4, parallel, plyr, - SpecsVerification (>= 0.5.0) + SpecsVerification (>= 0.5.0), multiApply (>= 2.0.0) Suggests: easyVerification, -- GitLab From 708f358119448b7a018b7bf21de48ec44bd15b8a Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 17 Dec 2019 17:16:38 +0100 Subject: [PATCH 3/8] test-RMSSS.R bugfix --- tests/testthat/test-RMSSS.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-RMSSS.R b/tests/testthat/test-RMSSS.R index 9473cf84..14eb1f5c 100644 --- a/tests/testthat/test-RMSSS.R +++ b/tests/testthat/test-RMSSS.R @@ -128,7 +128,7 @@ test_that("2. Output checks: case 2", { expect_equal( dim(RMSSS(exp2, obs2)), - c(nexp = 2, nobs = 1, stats = 2, dat = 1, lon = 1, lat = 1) + c(nexp = 2, nobs = 1, stats = 2, dat = 1, lon = 3, lat = 2) ) expect_equal( @@ -147,4 +147,4 @@ test_that("2. Output checks: case 2", { ############################################## -}) + -- GitLab From e19c8179ba59042c693c682dd6d6f5f92988d364 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 18 Dec 2019 09:59:31 +0100 Subject: [PATCH 4/8] Revise header --- R/RMSSS.R | 13 ++++++------- man/RMSSS.Rd | 13 ++++++------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/R/RMSSS.R b/R/RMSSS.R index 140bb19d..3771b7f7 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -1,17 +1,16 @@ #'Computes Root Mean Square Skill Score #' -#'Computes the root mean square error skill score between an array of -#'forecasts 'exp' and an array of observations 'obs'. The two array should +#'Computes the root mean square error skill score (RMSSS) between an array of +#'forecast 'exp' and an array of observation 'obs'. The two arrays should #'have the same dimensions except along memb_dim, where the length can be -#'different, with the number of experiments/models for exp (nexp) and -#'the number of obserational datasets for obs (nobs).\cr +#'different, with the number of experiments/models (nexp) and the number of +#'observational datasets (nobs).\cr #'RMSSS computes the Root Mean Square Skill Score of each jexp in 1:nexp #'against each jobs in 1:nobs which gives nexp x nobs RMSSS for each other -#'grid point of the matrix (each latitude/longitude/level/leadtime).\cr +#'grid point of the array (e.g., each latitude/longitude/level/ftime).\cr #'The RMSSS are computed along the sdate_dim dimension which should correspond #'to the startdate dimension.\cr -#'.RMSSS provides the same functionality but taking a matrix of ensemble -#'members as input (exp). +#'The p-value is optionally provided by an one-sided Fisher test.\cr #' #'@param exp A named numeric array of experimental data which contains at least two #' dimensions for memb_dim and sdate_dim. diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index b6542b20..c651a8c0 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -36,18 +36,17 @@ An array with dimensions:\cr corresponding p-value. } \description{ -Computes the root mean square error skill score between an array of -forecasts 'exp' and an array of observations 'obs'. The two array should +Computes the root mean square error skill score (RMSSS) between an array of +forecast 'exp' and an array of observation 'obs'. The two arrays should have the same dimensions except along memb_dim, where the length can be -different, with the number of experiments/models for exp (nexp) and -the number of obserational datasets for obs (nobs).\cr +different, with the number of experiments/models (nexp) and the number of +observational datasets (nobs).\cr RMSSS computes the Root Mean Square Skill Score of each jexp in 1:nexp against each jobs in 1:nobs which gives nexp x nobs RMSSS for each other -grid point of the matrix (each latitude/longitude/level/leadtime).\cr +grid point of the array (e.g., each latitude/longitude/level/ftime).\cr The RMSSS are computed along the sdate_dim dimension which should correspond to the startdate dimension.\cr -.RMSSS provides the same functionality but taking a matrix of ensemble -members as input (exp). +The p-value is optionally provided by an one-sided Fisher test.\cr } \examples{ set.seed(1) -- GitLab From 970ae444426fff4f6dfc95e79a99c3ab35cc4706 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 18 Dec 2019 17:23:05 +0100 Subject: [PATCH 5/8] Add param 'ncores' --- R/RMSSS.R | 15 ++++++++++----- man/RMSSS.Rd | 6 +++++- tests/testthat/test-RMSSS.R | 5 +++++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/R/RMSSS.R b/R/RMSSS.R index 3771b7f7..f538ef99 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -25,6 +25,8 @@ #'@param pval A logical value indicating whether to compute or not the p-value #' of the test Ho: RMSSS = 0. If pval = TRUE, the insignificant RMSSS will #' return NA. Set TRUE as default. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. Default value is NULL. #' #'@return An array with dimensions:\cr #' c(n_exp, n_obs, stats, all other dimensions of exp except sdate_dim).\cr @@ -48,7 +50,7 @@ #'@rdname RMSSS #'@import multiApply #'@export -RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRUE) { +RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRUE, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -69,7 +71,6 @@ RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRU if(!all(names(dim(exp)) %in% names(dim(obs))) | !all(names(dim(obs)) %in% names(dim(exp)))) { stop("Parameter 'exp' and 'obs' must have same dimension name") } - ## sdate_dim if (!is.character(sdate_dim) | length(sdate_dim) > 1) { stop("Parameter 'sdate_dim' must be a character string.") @@ -77,7 +78,6 @@ RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRU if (!sdate_dim %in% names(dim(exp)) | !sdate_dim %in% names(dim(obs))) { stop("Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension.") } - ## memb_dim if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") @@ -85,12 +85,17 @@ RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRU if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") } - ## pval if (!is.logical(pval) | length(pval) > 1) { stop("Parameter 'pval' must be one logical value.") } - + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index c651a8c0..99c0be57 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -5,7 +5,8 @@ \alias{RMSSS} \title{Computes Root Mean Square Skill Score} \usage{ -RMSSS(exp, obs, sdate_dim = "sdate", memb_dim = "member", pval = TRUE) +RMSSS(exp, obs, sdate_dim = "sdate", memb_dim = "member", pval = TRUE, + ncores = NULL) .RMSSS(exp, obs, pval = TRUE) } @@ -27,6 +28,9 @@ dimension. The default value is 'member'.} \item{pval}{A logical value indicating whether to compute or not the p-value of the test Ho: RMSSS = 0. If pval = TRUE, the insignificant RMSSS will return NA. Set TRUE as default.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. Default value is NULL.} } \value{ An array with dimensions:\cr diff --git a/tests/testthat/test-RMSSS.R b/tests/testthat/test-RMSSS.R index 14eb1f5c..4ffcad25 100644 --- a/tests/testthat/test-RMSSS.R +++ b/tests/testthat/test-RMSSS.R @@ -74,6 +74,11 @@ test_that("1. Input checks", { "Parameter 'pval' must be one logical value." ) + expect_error( + RMSSS(exp0, obs0, ncores = 1.4), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( RMSSS(exp = array(1:10, dim = c(sdate = 1, member = 5, a = 1)), obs = array(1:4, dim = c(a = 1, sdate = 2, member = 2))), -- GitLab From 91c4a496bd6b0c89080104bde3c39ac1c87549e6 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 20 Jan 2020 18:49:49 +0100 Subject: [PATCH 6/8] Small fix --- R/RMSSS.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/R/RMSSS.R b/R/RMSSS.R index f538ef99..953506b8 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -116,15 +116,12 @@ RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRU c(sdate_dim, memb_dim)), output_dims = c('nexp', 'nobs', 'stats'), fun = .RMSSS, - pval = pval)$output1 + pval = pval, ncores = ncores)$output1 return(RMSSS) } -#'@rdname RMSSS -#'@importFrom stats pf -#'@export -.RMSSS <- function(exp, obs, pval = TRUE) { +.RMSSS <- function(exp, obs, pval = pval) { # exp: [sdate, member_exp] # obs: [sdate, member_obs] n_exp <- as.numeric(dim(exp)[2]) -- GitLab From 7ea2db908455833335002dd9071a5aa94581b6d8 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 29 Jan 2020 12:46:16 +0100 Subject: [PATCH 7/8] Change output class to list --- NAMESPACE | 1 - R/RMSSS.R | 160 +++++++++++++++++++++--------------- man/RMSSS.Rd | 54 ++++++------ tests/testthat/test-RMSSS.R | 65 ++++++--------- 4 files changed, 147 insertions(+), 133 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 00c59bfa..53243677 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,7 +3,6 @@ export(.BrierScore) export(.Corr) export(.RMS) -export(.RMSSS) export(.RatioRMS) export(.RatioSDRMS) export(.Trend) diff --git a/R/RMSSS.R b/R/RMSSS.R index 953506b8..46e6533a 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -1,56 +1,63 @@ -#'Computes Root Mean Square Skill Score +#'Compute root mean square error skill score #' -#'Computes the root mean square error skill score (RMSSS) between an array of +#'Compute the root mean square error skill score (RMSSS) between an array of #'forecast 'exp' and an array of observation 'obs'. The two arrays should #'have the same dimensions except along memb_dim, where the length can be #'different, with the number of experiments/models (nexp) and the number of #'observational datasets (nobs).\cr -#'RMSSS computes the Root Mean Square Skill Score of each jexp in 1:nexp -#'against each jobs in 1:nobs which gives nexp x nobs RMSSS for each other -#'grid point of the array (e.g., each latitude/longitude/level/ftime).\cr -#'The RMSSS are computed along the sdate_dim dimension which should correspond +#'RMSSS computes the root mean square error skill score of each jexp in 1:nexp +#'against each jobs in 1:nobs which gives nexp * nobs RMSSS for each other +#'grid point of the array.\cr +#'The RMSSS are computed along the time_dim dimension which should corresponds #'to the startdate dimension.\cr #'The p-value is optionally provided by an one-sided Fisher test.\cr #' -#'@param exp A named numeric array of experimental data which contains at least two -#' dimensions for memb_dim and sdate_dim. -#'@param obs A named numeric array of observational data which contains at least two -#' dimensions for memb_dim and sdate_dim. The dimensions should be the same -#' as paramter 'exp' except the length of 'memb_dim' dimension. The order of +#'@param exp A named numeric array of experimental data which contains at least +#' two dimensions for memb_dim and time_dim. +#'@param obs A named numeric array of observational data which contains at least +#' two dimensions for memb_dim and time_dim. The dimensions should be the same +#' as paramter 'exp' except the length of 'memb_dim' dimension. The order of #' dimension can be different. #'@param memb_dim A character string indicating the name of member (nobs/nexp) #' dimension. The default value is 'member'. -#'@param sdate_dim A character string indicating the name of dimension along which the -#' RMSSS are computed. The default value is 'sdate'. +#'@param time_dim A character string indicating the name of dimension along +#' which the RMSSS are computed. The default value is 'sdate'. #'@param pval A logical value indicating whether to compute or not the p-value #' of the test Ho: RMSSS = 0. If pval = TRUE, the insignificant RMSSS will -#' return NA. Set TRUE as default. +#' return NA. The default value is TRUE. #'@param ncores An integer indicating the number of cores to use for parallel -#' computation. Default value is NULL. +#' computation. The default value is NULL. #' -#'@return An array with dimensions:\cr -#' c(n_exp, n_obs, stats, all other dimensions of exp except sdate_dim).\cr -#' If \code{pval = FALSE}, the length of the 3rd dimension 'stats' is 1, -#' the RMSSS; if \code{pval = TRUE}, the length is 2, the RMSSS and the -#' corresponding p-value. +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., memb_dim in exp), and nobs is the +#'number of observation (i.e., memb_dim in obs).\cr +#'\item{$rmsss}{ +#' The root mean square error skill score. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} #' #'@keywords datagen #'@author History:\cr #'0.1 - 2012-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr #'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN\cr #'1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() -#'3.0 - 2019-12 (A. Ho, \email{an.ho@bsc.es}) - Adapted to multiApply +#'3.0 - 2019-12 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply feature #'@examples #' set.seed(1) #' exp <- array(rnorm(15), dim = c(dat = 1, time = 3, member = 5)) #' set.seed(2) #' obs <- array(rnorm(6), dim = c(time = 3, member = 2, dat = 1)) -#' res <- RMSSS(exp, obs, sdate_dim = 'time') +#' res <- RMSSS(exp, obs, time_dim = 'time') #' #'@rdname RMSSS #'@import multiApply #'@export -RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRUE, ncores = NULL) { +RMSSS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', + pval = TRUE, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -62,21 +69,22 @@ RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRU } if (is.null(dim(exp)) | is.null(dim(obs))) { stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", - "containing sdate_dim and memb_dim.")) + "containing time_dim and memb_dim.")) } if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp)) %in% names(dim(obs))) | !all(names(dim(obs)) %in% names(dim(exp)))) { - stop("Parameter 'exp' and 'obs' must have same dimension name") + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name.") } - ## sdate_dim - if (!is.character(sdate_dim) | length(sdate_dim) > 1) { - stop("Parameter 'sdate_dim' must be a character string.") + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") } - if (!sdate_dim %in% names(dim(exp)) | !sdate_dim %in% names(dim(obs))) { - stop("Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } ## memb_dim if (!is.character(memb_dim) | length(memb_dim) > 1) { @@ -102,26 +110,37 @@ RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRU name_exp <- name_exp[-which(name_exp == memb_dim)] name_obs <- name_obs[-which(name_obs == memb_dim)] if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop("Parameter 'exp' and 'obs' must have same length of all dimension expect 'memb_dim'.") + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'memb_dim'.")) } - if (dim(exp)[sdate_dim] < 2) { - stop("The length of sdate_dim must be at least 2 to compute RMSSS.") + if (dim(exp)[time_dim] <= 2) { + stop("The length of time_dim must be more than 2 to compute RMSSS.") } + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- s2dverification:::.aperm2(obs, order_obs) + + ############################### # Calculate RMSSS - RMSSS <- Apply(list(exp, obs), - target_dims = list(c(sdate_dim, memb_dim), - c(sdate_dim, memb_dim)), - output_dims = c('nexp', 'nobs', 'stats'), - fun = .RMSSS, - pval = pval, ncores = ncores)$output1 + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, memb_dim), + c(time_dim, memb_dim)), + fun = .RMSSS, + time_dim = time_dim, memb_dim = memb_dim, + pval = pval, #conf = conf, conf.lev = conf.lev, + ncores = ncores) - return(RMSSS) + return(res) } -.RMSSS <- function(exp, obs, pval = pval) { +.RMSSS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', pval = TRUE) { # exp: [sdate, member_exp] # obs: [sdate, member_obs] n_exp <- as.numeric(dim(exp)[2]) @@ -129,37 +148,44 @@ RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRU n_sdate <- as.numeric(dim(exp)[1]) p_val <- array(dim = c(nexp = n_exp, nobs = n_obs)) - rms1 <- array(dim = c(nexp = n_exp, nobs = n_obs)) - eno1 <- array(dim = c(sdate = n_sdate)) - dif1 <- array(dim = c(sdate = n_sdate, n_exp = n_exp, n_obs = n_obs)) + dif1 <- array(dim = c(n_sdate, n_exp, n_obs)) + names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') + +# if (conf) { +# conflow <- (1 - conf.lev) / 2 +# confhigh <- 1 - conflow +# conf_low <- array(dim = c(nexp = n_exp, nobs = n_obs)) +# conf_high <- array(dim = c(nexp = n_exp, nobs = n_obs)) +# } # dif1 for (i in 1:n_obs) { dif1[, , i] <- sapply(1:n_exp, function(x) {exp[, x] - obs[, i]}) } - + # rms1 and eno1 rms1 <- apply(dif1^2, c(2, 3), mean)^0.5 #array(dim = c(n_exp, n_obs)) - if (pval == TRUE) { - eno1 <- Eno(dif1, 1) #count effective sample along sdate. dim = c(n_exp, n_obs) - ##eno1 <- Eno(dif1, sdate_dim) #change to this line when Eno() is done - } - + # rms2 and eno2 rms2 <- array(colMeans(obs^2)^0.5, dim = c(n_obs = n_obs)) - if (pval == TRUE) { - eno2 <- Eno(obs, 1) #dim = n_obs - ##eno2 <- Eno(obs, sdate_dim) #change to this line when Eno() is done - } - rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( rms2), na.rm = TRUE) / 1000 # use rms1 and rms2 to calculate rmsss rmsss <- as.array(sapply(1:n_obs, function(i) {1 - rms1[, i]/rms2[i]})) - + names(dim(rmsss)) <- c('nexp', 'nobs') + + ## pval and conf + if (pval) { + eno1 <- Eno(dif1, 1) + eno2 <- Eno(obs, 1) +# eno1 <- Eno(dif1, time_dim) #NEW +# eno2 <- Eno(obs, time_dim) #NEW + } +#print(dim(eno1)) +#print(dim(eno2)) # pval - if (pval == TRUE) { + if (pval) { F.stat <- as.array(sapply(1:n_obs, function(i) { (eno2[i] * (rms2[i])^2 / (eno2[i] - 1)) / (eno1[, i] * (rms1[, i])^2 / (eno1[, i] - 1)) @@ -177,16 +203,16 @@ RMSSS <- function(exp, obs, sdate_dim = 'sdate', memb_dim = 'member', pval = TRU # change not significant rmsss to NA rmsss[which(!tmp)] <- NA - } - - if (pval == TRUE) { - res <- array(dim = c(nexp = n_exp, nobs = n_obs, 2)) - res[,,1] <- rmsss - res[,,2] <- p_val + names(dim(p_val)) <- c('nexp', 'nobs') + + # output + if (pval) { + res <- list(rmsss = rmsss, p.val = p_val) + } else { - res <- array(dim = c(nexp = n_exp, nobs = n_obs, 1)) - res[,,1] <- rmsss + res <- list(rmsss = rmsss) } - res + + return(res) } diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index 99c0be57..240a7463 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -1,54 +1,56 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RMSSS.R \name{RMSSS} -\alias{.RMSSS} \alias{RMSSS} -\title{Computes Root Mean Square Skill Score} +\title{Compute root mean square error skill score} \usage{ -RMSSS(exp, obs, sdate_dim = "sdate", memb_dim = "member", pval = TRUE, +RMSSS(exp, obs, time_dim = "sdate", memb_dim = "member", pval = TRUE, ncores = NULL) - -.RMSSS(exp, obs, pval = TRUE) } \arguments{ -\item{exp}{A named numeric array of experimental data which contains at least two -dimensions for memb_dim and sdate_dim.} +\item{exp}{A named numeric array of experimental data which contains at least +two dimensions for memb_dim and time_dim.} -\item{obs}{A named numeric array of observational data which contains at least two -dimensions for memb_dim and sdate_dim. The dimensions should be the same -as paramter 'exp' except the length of 'memb_dim' dimension. The order of +\item{obs}{A named numeric array of observational data which contains at least +two dimensions for memb_dim and time_dim. The dimensions should be the same +as paramter 'exp' except the length of 'memb_dim' dimension. The order of dimension can be different.} -\item{sdate_dim}{A character string indicating the name of dimension along which the -RMSSS are computed. The default value is 'sdate'.} +\item{time_dim}{A character string indicating the name of dimension along +which the RMSSS are computed. The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of member (nobs/nexp) dimension. The default value is 'member'.} \item{pval}{A logical value indicating whether to compute or not the p-value of the test Ho: RMSSS = 0. If pval = TRUE, the insignificant RMSSS will -return NA. Set TRUE as default.} +return NA. The default value is TRUE.} \item{ncores}{An integer indicating the number of cores to use for parallel -computation. Default value is NULL.} +computation. The default value is NULL.} } \value{ -An array with dimensions:\cr - c(n_exp, n_obs, stats, all other dimensions of exp except sdate_dim).\cr - If \code{pval = FALSE}, the length of the 3rd dimension 'stats' is 1, - the RMSSS; if \code{pval = TRUE}, the length is 2, the RMSSS and the - corresponding p-value. +A list containing the numeric arrays with dimension:\cr + c(nexp, nobs, all other dimensions of exp except time_dim).\cr +nexp is the number of experiment (i.e., memb_dim in exp), and nobs is the +number of observation (i.e., memb_dim in obs).\cr +\item{$rmsss}{ + The root mean square error skill score. +} +\item{$p.val}{ + The p-value. Only present if \code{pval = TRUE}. +} } \description{ -Computes the root mean square error skill score (RMSSS) between an array of +Compute the root mean square error skill score (RMSSS) between an array of forecast 'exp' and an array of observation 'obs'. The two arrays should have the same dimensions except along memb_dim, where the length can be different, with the number of experiments/models (nexp) and the number of observational datasets (nobs).\cr -RMSSS computes the Root Mean Square Skill Score of each jexp in 1:nexp -against each jobs in 1:nobs which gives nexp x nobs RMSSS for each other -grid point of the array (e.g., each latitude/longitude/level/ftime).\cr -The RMSSS are computed along the sdate_dim dimension which should correspond +RMSSS computes the root mean square error skill score of each jexp in 1:nexp +against each jobs in 1:nobs which gives nexp * nobs RMSSS for each other +grid point of the array.\cr +The RMSSS are computed along the time_dim dimension which should corresponds to the startdate dimension.\cr The p-value is optionally provided by an one-sided Fisher test.\cr } @@ -57,7 +59,7 @@ set.seed(1) exp <- array(rnorm(15), dim = c(dat = 1, time = 3, member = 5)) set.seed(2) obs <- array(rnorm(6), dim = c(time = 3, member = 2, dat = 1)) -res <- RMSSS(exp, obs, sdate_dim = 'time') +res <- RMSSS(exp, obs, time_dim = 'time') } \author{ @@ -65,7 +67,7 @@ History:\cr 0.1 - 2012-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN\cr 1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() -3.0 - 2019-12 (A. Ho, \email{an.ho@bsc.es}) - Adapted to multiApply +3.0 - 2019-12 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply feature } \keyword{datagen} diff --git a/tests/testthat/test-RMSSS.R b/tests/testthat/test-RMSSS.R index 4ffcad25..a73f90e7 100644 --- a/tests/testthat/test-RMSSS.R +++ b/tests/testthat/test-RMSSS.R @@ -15,7 +15,7 @@ context("s2dverification::RMSSS tests") # case 2 set.seed(3) - exp2 <- array(rnorm(120), dim = c(sdate = 10, dat = 1, member = 2, lon = 3, lat = 2)) + exp2 <- array(rnorm(120), dim = c(sdate = 10, dat = 1, lon = 3, lat = 2, member = 2)) set.seed(4) obs2 <- array(rnorm(60), dim = c(dat = 1, sdate = 10, member = 1, lat = 2, lon = 3)) @@ -27,83 +27,73 @@ test_that("1. Input checks", { RMSSS(c(), c()), "Parameter 'exp' and 'obs' cannot be NULL." ) - expect_error( RMSSS('exp', 'obs'), "Parameter 'exp' and 'obs' must be a numeric array." ) - expect_error( RMSSS(c(1:10), c(2:4)), paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", - "containing sdate_dim and memb_dim.") + "containing time_dim and memb_dim.") ) - expect_error( RMSSS(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), "Parameter 'exp' and 'obs' must have dimension names." ) - expect_error( RMSSS(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), "Parameter 'exp' and 'obs' must have same dimension name" ) - expect_error( - RMSSS(exp1, obs1, sdate_dim = 1), - "Parameter 'sdate_dim' must be a character string." + RMSSS(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." ) - expect_error( - RMSSS(exp0, obs0, sdate_dim = 'a'), - "Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension." + RMSSS(exp0, obs0, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." ) - expect_error( RMSSS(exp0, obs0, memb_dim = NA), "Parameter 'memb_dim' must be a character string." ) - expect_error( RMSSS(exp0, obs0, memb_dim = 'memb'), "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." ) - expect_error( RMSSS(exp0, obs0, pval = c(T, T)), "Parameter 'pval' must be one logical value." ) - expect_error( RMSSS(exp0, obs0, ncores = 1.4), "Parameter 'ncores' must be a positive integer." ) - expect_error( RMSSS(exp = array(1:10, dim = c(sdate = 1, member = 5, a = 1)), obs = array(1:4, dim = c(a = 1, sdate = 2, member = 2))), "Parameter 'exp' and 'obs' must have same length of all dimension expect 'memb_dim'." ) - expect_error( RMSSS(exp = array(1:10, dim = c(sdate = 1, member = 5, a = 1)), obs = array(1:4, dim = c(a = 1, sdate = 1, member = 2))), - "The length of sdate_dim must be at least 2 to compute RMSSS." + "The length of time_dim must be more than 2 to compute RMSSS." ) - }) ############################################## test_that("1. Output checks: case 1", { - res1_1 <- RMSSS(exp1, obs1, sdate_dim = 'time', memb_dim = 'memb', pval = FALSE) + res1_1 <- RMSSS(exp1, obs1, time_dim = 'time', memb_dim = 'memb') expect_equal( - dim(res1_1), - c(nexp = 5, nobs = 2, stats = 1) + dim(res1_1$rmsss), + c(nexp = 5, nobs = 2) ) - expect_equal( - mean(res1_1), + dim(res1_1$p.val), + c(nexp = 5, nobs = 2) + ) + expect_equal( + mean(res1_1$rmsss), -0.5449538, tolerance = 0.00001 ) @@ -112,16 +102,15 @@ test_that("1. Output checks: case 1", { exp1_2[2:4] <- NA obs1_2 <- obs1 obs1_2[1:2] <- NA - res1_2 <- RMSSS(exp1_2, obs1_2, sdate_dim = 'time', memb_dim = 'memb', pval = TRUE) + res1_2 <- RMSSS(exp1_2, obs1_2, time_dim = 'time', memb_dim = 'memb', pval = TRUE) expect_equal( - length(res1_2[which(is.na(res1_2))]), - 14 + length(res1_2$rmsss[which(is.na(res1_2$rmsss))]), + 7 ) - expect_equal( - range(res1_2, na.rm = T), - c(-1.1108657, 0.8167073), + range(res1_2$p.val, na.rm = T), + c(0.7159769, 0.8167073), tolerance = 0.00001 ) @@ -132,19 +121,17 @@ test_that("1. Output checks: case 1", { test_that("2. Output checks: case 2", { expect_equal( - dim(RMSSS(exp2, obs2)), - c(nexp = 2, nobs = 1, stats = 2, dat = 1, lon = 3, lat = 2) + dim(RMSSS(exp2, obs2)$rmsss), + c(nexp = 2, nobs = 1, dat = 1, lon = 3, lat = 2) ) - expect_equal( - mean(RMSSS(exp2, obs2)[,,1,,,]), - -0.2454137, + mean(RMSSS(exp2, obs2)$rmsss), + -0.3912208, tolerance = 0.00001 ) - expect_equal( - range(RMSSS(exp2, obs2)[,,2,,,]), - c(0.09174432, 0.97502252), + range(RMSSS(exp2, obs2)$p.val), + c(0.2627770, 0.9868412), tolerance = 0.00001 ) -- GitLab From 711d4e53a912ad56b7b18baa037f119198e39d5f Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 29 Jan 2020 14:27:26 +0100 Subject: [PATCH 8/8] Change RMSSS() to use new Eno() --- R/RMSSS.R | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/R/RMSSS.R b/R/RMSSS.R index 46e6533a..76d0dda9 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -162,49 +162,41 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', for (i in 1:n_obs) { dif1[, , i] <- sapply(1:n_exp, function(x) {exp[, x] - obs[, i]}) } - - # rms1 and eno1 - rms1 <- apply(dif1^2, c(2, 3), mean)^0.5 #array(dim = c(n_exp, n_obs)) + # rms1 and eno1 + rms1 <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(n_exp, n_obs)) # rms2 and eno2 - rms2 <- array(colMeans(obs^2)^0.5, dim = c(n_obs = n_obs)) + rms2 <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(n_obs = n_obs)) rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( rms2), na.rm = TRUE) / 1000 - + #rms2 above: [nobs] + rms2 <- array(rms2, dim = c(nobs = n_obs, nexp = n_exp)) + #rms2 above: [nobs, nexp] + rms2 <- s2dverification:::.aperm2(rms2, c(2, 1)) + #rms2 above: [nexp, nobs] + # use rms1 and rms2 to calculate rmsss - rmsss <- as.array(sapply(1:n_obs, function(i) {1 - rms1[, i]/rms2[i]})) - names(dim(rmsss)) <- c('nexp', 'nobs') + rmsss <- 1 - rms1/rms2 ## pval and conf if (pval) { - eno1 <- Eno(dif1, 1) - eno2 <- Eno(obs, 1) -# eno1 <- Eno(dif1, time_dim) #NEW -# eno2 <- Eno(obs, time_dim) #NEW + eno1 <- Eno(dif1, time_dim) + eno2 <- Eno(obs, time_dim) + eno2 <- array(eno2, dim = c(nobs = n_obs, nexp = n_exp)) + eno2 <- s2dverification:::.aperm2(eno2, c(2, 1)) } -#print(dim(eno1)) -#print(dim(eno2)) + # pval if (pval) { - F.stat <- as.array(sapply(1:n_obs, - function(i) { - (eno2[i] * (rms2[i])^2 / (eno2[i] - 1)) / (eno1[, i] * (rms1[, i])^2 / (eno1[, i] - 1)) - })) - - tmp <- as.array(sapply(1:n_obs, function(i) { - is.na(eno1[, i]) == FALSE & is.na(eno2[i]) == FALSE & eno1[, i] > 2 & eno2[i] > 2 - })) - p_val <- as.array(sapply(1:n_obs, - function(i) { - 1 - pf(F.stat[, i], eno1[, i] - 1, eno2[i] - 1) - })) - + + F.stat <- (eno2 * rms2^2 / (eno2- 1)) / ((eno1 * rms1^2 / (eno1- 1))) + tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 + p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) p_val[which(!tmp)] <- NA # change not significant rmsss to NA rmsss[which(!tmp)] <- NA } - names(dim(p_val)) <- c('nexp', 'nobs') # output if (pval) { -- GitLab