diff --git a/NAMESPACE b/NAMESPACE index d74cc97a892ba0fa115f4135007ec2295501a75f..9214a1a54c1b7c4bc7e1e81203b51ac5e674cc26 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -41,6 +41,8 @@ export(Histo2Hindcast) export(InsertDim) export(LeapYear) export(Load) +export(MSE) +export(MSSS) export(MeanDims) export(NAO) export(Persistence) diff --git a/R/MSE.R b/R/MSE.R new file mode 100644 index 0000000000000000000000000000000000000000..97e4e828e88f85fea639fb63520b54da7366db6b --- /dev/null +++ b/R/MSE.R @@ -0,0 +1,311 @@ +#'Compute mean square error +#' +#'Compute the mean square error for an array of forecasts and an array of +#'observations. The MSEs are computed along time_dim, the dimension which +#'corresponds to the start date dimension. If comp_dim is given, the MSEs are +#'computed only if obs along the comp_dim dimension are complete between +#'limits[1] and limits[2], i.e. there are no NAs between limits[1] and +#'limits[2]. This option can be activated if the user wants to account only +#'for the forecasts for which the corresponding observations are available at +#'all leadtimes.\cr +#'The confidence interval is computed by the chi2 distribution.\cr +#' +#'@param exp A named numeric array of experimental data, with at least #' 'time_dim' dimension. It can also be a vector with the same length as 'obs'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. It can also be a +#' vector with the same length as 'exp'. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the input data are +#' already the ensemble mean. The default value is NULL. +#'@param dat_dim A character string indicating the name of dataset or member +#' (nobs/nexp) dimension. The datasets of exp and obs will be paired and +#' computed MSE for each pair. The default value is NULL. +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default value is c(1, length(comp_dim dimension)). +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. The default value is TRUE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +#'number of observation (i.e., dat_dim in obs).\cr +#'\item{$mse}{ +#' The mean square error. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = 12, time_dim = 'ftime') +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = 12, time_dim = 'ftime') +#'res <- MSE(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', +#' comp_dim = 'ftime', limits = c(7, 54)) +#' +#'# Synthetic data: +#'exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 10, ftime = 4)) +#'obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 10, ftime = 4)) +#'na <- floor(runif(10, min = 1, max = 80)) +#'obs1[na] <- NA +#'res1 <- MSE(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') +#' +#'exp2 <- array(rnorm(20), dim = c(sdate = 5, member = 4)) +#'obs2 <- array(rnorm(10), dim = c(sdate = 5, member = 2)) +#'res2 <- MSE(exp3, obs3, memb_dim = 'member') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats qchisq +#'@export +MSE <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, + comp_dim = NULL, limits = NULL, conf = TRUE, alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector + if (length(exp) == length(obs)) { + exp <- array(exp, dim = c(length(exp))) + names(dim(exp)) <- c(time_dim) + obs <- array(obs, dim = c(length(obs))) + names(dim(obs)) <- c(time_dim) + } else { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + } else if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + "Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + if (memb_dim %in% name_exp) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!all(name_exp == name_obs)) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'dat_dim' and 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2 to compute MSE.") + } + + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim, na.rm = T) + } + } + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + ############################### + # Calculate MSE + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + if (is.null(limits)) { + limits <- c(1, dim(obs)[comp_dim]) + } + pos <- which(names(dim(obs)) == comp_dim) + obs_sub <- Subset(obs, pos, list(limits[1]:limits[2])) + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim), + c(time_dim, dat_dim)), + fun = .MSE, + time_dim = time_dim, dat_dim = dat_dim, + conf = conf, alpha = alpha, + ncores = ncores) + return(res) +} + +.MSE <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, conf = TRUE, alpha = 0.05) { + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + ini_dims <- dim(exp) + dim(exp) <- c(ini_dims, dat_dim = 1) + dim(obs) <- c(ini_dims, dat_dim = 1) + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + } + + dif <- array(dim = c(dim(exp)[1], nexp = nexp, nobs = nobs)) + chi <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (conf) { + conflow <- alpha / 2 + confhigh <- 1 - conflow + conf.lower <- array(dim = c(nexp = nexp, nobs = nobs)) + conf.upper <- array(dim = c(nexp = nexp, nobs = nobs)) + } + + # dif + for (i in 1:nobs) { + dif[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + } + + mse <- colMeans(dif^2, na.rm = TRUE) # array(dim = c(nexp, nobs)) + + if (conf) { + #count effective sample along sdate. eno: c(nexp, nobs) +# eno <- Eno(dif, time_dim) # slower than for loop below? + eno <- array(dim = c(nexp = nexp, nobs = nobs)) + for (n_obs in 1:nobs) { + for (n_exp in 1:nexp) { + eno[n_exp, n_obs] <- .Eno(dif[, n_exp, n_obs], na.action = na.pass) + } + } + + # conf.lower + chi <- sapply(1:nobs, function(i) { + qchisq(confhigh, eno[, i] - 1) + }) + conf.lower <- (eno * mse ** 2 / chi) ** 0.5 + + # conf.upper + chi <- sapply(1:nobs, function(i) { + qchisq(conflow, eno[, i] - 1) + }) + conf.upper <- (eno * mse ** 2 / chi) ** 0.5 + } + + ################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim)) { + dim(mse) <- NULL + if (conf) { + dim(conf.lower) <- NULL + dim(conf.upper) <- NULL + } + } + + ################################### + + res <- list(mse = mse) + if (conf) res <- c(res, list(conf.lower = conf.lower, conf.upper = conf.upper)) + + return(res) + +} diff --git a/R/MSSS.R b/R/MSSS.R new file mode 100644 index 0000000000000000000000000000000000000000..a11c50c85ee9a20b33c13666b333b0e6f6b22a97 --- /dev/null +++ b/R/MSSS.R @@ -0,0 +1,435 @@ +#'Compute mean square error skill score +#' +#'Compute the mean square error skill score (MSSS) between an array of forecast +#''exp' and an array of observation 'obs'. The two arrays should have the same +#'dimensions except along 'dat_dim' and 'memb_dim'. The MSSSs are computed along +#''time_dim', the dimension which corresponds to the start date dimension. +#'MSSS computes the mean square error skill score of each exp in 1:nexp +#'against each obs in 1:nobs which gives nexp * nobs MSSS for each grid point +#'of the array.\cr +#'The p-value and significance test are optionally provided by an one-sided +#'Fisher test or Random Walk test.\cr +#' +#'@param exp A named numeric array of experimental data which contains at least +#' time dimension (time_dim). It can also be a vector with the same length as +#' 'obs', then the vector will automatically be 'time_dim'. +#'@param obs A named numeric array of observational data which contains at least +#' time dimension (time_dim). The dimensions should be the same as parameter +#' 'exp' except the length of 'dat_dim' and 'memb_dim' dimension. It can also +#' be a vector with the same length as 'exp', then the vector will +#' automatically be 'time_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension, or 0 (typical climatological forecast) or 1 +#' (normalized climatological forecast). If it is an array, the dimensions must +#' be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one +#' reference dataset, it should not have dataset dimension. If there is +#' corresponding reference for each experiment, the dataset dimension must +#' have the same length as in 'exp'. If 'ref' is NULL, the typical +#' climatological forecast is used as reference forecast (equivalent to 0.) +#' The default value is NULL. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL. +#'@param time_dim A character string indicating the name of dimension along +#' which the MSSS are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the data are +#' already the ensemble mean. The default value is NULL. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho: MSSS = 0. The default value is TRUE. +#'@param sign A logical value indicating whether to compute or not the +#' statistical significance of the test Ho: MSSS = 0. The default value is +#' FALSE. +#'@param alpha A numeric of the significance level to be used in the +#' statistical significance test. The default value is 0.05. +#'@param sig_method A character string indicating the significance method. The +#' options are "one-sided Fisher" (default) and "Random Walk". +#'@param sig_method.type A character string indicating the test type of the +#' significance method. Check \code{RandomWalkTest()} parameter +#' \code{test.type} for details if parameter "sig_method" is "Random Walk". The +#' default is NULL (since "one-sided Fisher" doesn't have different test +#' types.) +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +#'number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +#'nobs are omitted.\cr +#'\item{$msss}{ +#' A numerical array of the mean square error skill score. +#'} +#'\item{$p.val}{ +#' A numerical array of the p-value with the same dimensions as $msss. +#' Only present if \code{pval = TRUE}. +#'} +#'\item{sign}{ +#' A logical array of the statistical significance of the MSSS with the same +#' dimensions as $msss. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'rmsss <- MSSS(ano_exp, ano_obs, dat_dim = 'dataset', memb_dim = 'member') +#' +#'# Synthetic data: +#'exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +#'obs <- array(rnorm(15), dim = c(time = 3, dataset = 1)) +#'res <- MSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset', memb_dim = 'memb') +#' +#'@rdname MSSS +#'@import multiApply +#'@importFrom stats pf +#'@export +MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector + if (length(exp) == length(obs)) { + exp <- array(exp, dim = c(length(exp))) + names(dim(exp)) <- c(time_dim) + obs <- array(obs, dim = c(length(obs))) + names(dim(obs)) <- c(time_dim) + } else { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + } else if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.numeric(ref)) { + stop("Parameter 'ref' must be numeric.") + } + if (is.array(ref)) { + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { + stop("Parameter 'ref' must be a numeric array or number 0 or 1.") + } + } else { + ref <- 0 + } + if (!is.array(ref)) { # 0 or 1 + ref <- array(data = ref, dim = dim(exp)) + } + + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be one numeric value.") + } + ## sig_method + if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { + stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") + } + ## sig_method.type + if (sig_method == 'Random Walk') { + if (is.null(sig_method.type)) { + .warning("Parameter 'sig_method.type' must be specified if 'sig_method' is ", + "Random Walk. Assign it as 'two.sided'.") + sig_method.type <- "two.sided" + } + if (!any(sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less'))) { + stop("Parameter 'sig_method.type' must be a method accepted by RandomWalkTest() parameter 'test.type'.") + } + if (sig_method.type == 'two.sided.approx' & pval == T) { + .warning("p-value cannot be calculated by Random Walk 'two.sided.approx' method.") + pval <- FALSE + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + alpha <- 0.05 + } + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + if (memb_dim %in% name_exp) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!all(name_exp == name_obs)) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'dat_dim' and 'memb_dim'.")) + } + + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } + + if (dim(exp)[time_dim] <= 2) { + stop("The length of time_dim must be more than 2 to compute MSSS.") + } + + ############################### + # # Sort dimension + # name_exp <- names(dim(exp)) + # name_obs <- names(dim(obs)) + # order_obs <- match(name_exp, name_obs) + # obs <- Reorder(obs, order_obs) + + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(ref))) { + ref <- MeanDims(ref, memb_dim, na.rm = T) + } + } + + ############################### + # Calculate MSSS + + data <- list(exp = exp, obs = obs, ref = ref) + if (!is.null(dat_dim)) { + if (dat_dim %in% names(dim(ref))) { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim, dat_dim)) + } else { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim)) + } + } else { + target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) + } + + res <- Apply(data, + target_dims = target_dims, + fun = .MSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + + return(res) +} + +.MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, + sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', + sig_method.type = NULL) { + # exp: [sdate, (dat)] + # obs: [sdate, (dat)] + # ref: [sdate, (dat)] or NULL + + if (is.null(ref)) { + ref <- array(data = 0, dim = dim(obs)) + } else if (identical(ref, 0) | identical(ref, 1)) { + ref <- array(ref, dim = dim(exp)) + } + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + nref <- 1 + # Add dat dim back temporarily + dim(exp) <- c(dim(exp), dat = 1) + dim(obs) <- c(dim(obs), dat = 1) + dim(ref) <- c(dim(ref), dat = 1) + + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + if (dat_dim %in% names(dim(ref))) { + nref <- as.numeric(dim(ref)[2]) + } else { + dim(ref) <- c(dim(ref), dat = 1) + nref <- 1 + } + } + + nsdate <- as.numeric(dim(exp)[1]) + + # MSE of forecast + dif1 <- array(dim = c(nsdate, nexp, nobs)) + names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') + + for (i in 1:nobs) { + dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + } + + mse_exp <- colMeans(dif1^2, na.rm = TRUE) # [nexp, nobs] + + # MSE of reference + dif2 <- array(dim = c(nsdate, nref, nobs)) + names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + } + mse_ref <- colMeans(dif2^2, na.rm = TRUE) # [nref, nobs] + if (nexp != nref) { + # expand mse_ref to nexp (nref is 1) + mse_ref <- array(mse_ref, dim = c(nobs = nobs, nexp = nexp)) + mse_ref <- Reorder(mse_ref, c(2, 1)) + } + + msss <- 1 - mse_exp / mse_ref + + ################################################# + + if (sig_method == 'one-sided Fisher') { + p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + ## pval and sign + if (pval || sign) { + eno1 <- Eno(dif1, time_dim) + if (is.null(ref)) { + eno2 <- Eno(obs, time_dim) + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } else { + eno2 <- Eno(dif2, time_dim) + if (nref != nexp) { + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } + } + + F.stat <- (eno2 * mse_ref^2 / (eno2 - 1)) / ((eno1 * mse_exp^2 / (eno1- 1))) + tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 + p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + if (sign) signif <- p_val <= alpha + # If there isn't enough valid data, return NA + p_val[which(!tmp)] <- NA + if (sign) signif[which(!tmp)] <- NA + + # change not enough valid data msss to NA + msss[which(!tmp)] <- NA + } + + } else if (sig_method == "Random Walk") { + + if (sign) signif <- array(dim = c(nexp = nexp, nobs = nobs)) + if (pval) p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) + if (nref == nexp) { + error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) + } else { + # nref = 1 + error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) + } + aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, + test.type = sig_method.type, + pval = pval, sign = sign, alpha = alpha) + if (sign) signif[i, j] <- aux$sign + if (pval) p_val[i, j] <- aux$p.val + } + } + } + + ################################### + # Remove extra dimensions if dat_dim = NULL + if (is.null(dat_dim)) { + dim(msss) <- NULL + if (pval) dim(p_val) <- NULL + if (sign) dim(signif) <- NULL + } + ################################### + + # output + res <- list(msss = msss) + if (pval) res <- c(res, list(p.val = p_val)) + if (sign) res <- c(res, list(sign = signif)) + + return(res) +} diff --git a/R/RMS.R b/R/RMS.R index 164167fd686aab87d22309fc49dbaa5cd247b5f0..4e6bfeb2e9a8301e10d55d91ad9633811fd733b5 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -2,7 +2,7 @@ #' #'Compute the root mean square error for an array of forecasts and an array of #'observations. The RMSEs are computed along time_dim, the dimension which -#'corresponds to the startdate dimension. If comp_dim is given, the RMSEs are +#'corresponds to the start date dimension. If comp_dim is given, the RMSEs are #'computed only if obs along the comp_dim dimension are complete between #'limits[1] and limits[2], i.e. there are no NAs between limits[1] and #'limits[2]. This option can be activated if the user wishes to account only @@ -10,16 +10,16 @@ #'all leadtimes.\cr #'The confidence interval is computed by the chi2 distribution.\cr #' -#'@param exp A named numeric array of experimental data, with at least two -#' dimensions 'time_dim' and 'dat_dim'. It can also be a vector with the -#' same length as 'obs', then the vector will automatically be 'time_dim' and -#' 'dat_dim' will be 1. +#'@param exp A named numeric array of experimental data, with at least +#' 'time_dim' dimension. It can also be a vector with the same length as 'obs'. #'@param obs A named numeric array of observational data, same dimensions as -#' parameter 'exp' except along dat_dim. It can also be a vector with the same -#' length as 'exp', then the vector will automatically be 'time_dim' and -#' 'dat_dim' will be 1. +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. It can also be a +#' vector with the same length as 'exp'. #'@param time_dim A character string indicating the name of dimension along #' which the correlations are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the input data are +#' already the ensemble mean. The default value is NULL. #'@param dat_dim A character string indicating the name of dataset or member #' (nobs/nexp) dimension. The datasets of exp and obs will be paired and #' computed RMS for each pair. The default value is NULL. @@ -53,22 +53,32 @@ #' #'@examples #'# Load sample data as in Load() example: -#' set.seed(1) -#' exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) -#' set.seed(2) -#' obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) -#' set.seed(2) -#' na <- floor(runif(10, min = 1, max = 80)) -#' obs1[na] <- NA -#' res <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') -#' # Renew example when Ano and Smoothing are ready +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = 12, time_dim = 'ftime') +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = 12, time_dim = 'ftime') +#'res <- RMS(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', +#' comp_dim = 'ftime', limits = c(7, 54)) #' +#'# Synthetic data: +#'exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 10, ftime = 4)) +#'obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 10, ftime = 4)) +#'na <- floor(runif(10, min = 1, max = 80)) +#'obs1[na] <- NA +#'res1 <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') +#' +#'exp2 <- array(rnorm(20), dim = c(sdate = 5, member = 4)) +#'obs2 <- array(rnorm(10), dim = c(sdate = 5, member = 2)) +#'res2 <- RMS(exp3, obs3, memb_dim = 'member') +#' #'@import multiApply #'@importFrom ClimProjDiags Subset #'@importFrom stats qchisq #'@export -RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, - limits = NULL, conf = TRUE, alpha = 0.05, ncores = NULL) { +RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, + comp_dim = NULL, limits = NULL, conf = TRUE, alpha = 0.05, ncores = NULL) { # Check inputs ## exp and obs (1) if (is.null(exp) | is.null(obs)) { @@ -91,14 +101,10 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", "dimensions time_dim and dat_dim, or vector of same length.")) } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp)) %in% names(dim(obs))) | - !all(names(dim(obs)) %in% names(dim(exp)))) { - stop("Parameter 'exp' and 'obs' must have same dimension name") - } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") @@ -106,6 +112,15 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension.") + } + } ## dat_dim if (!is.null(dat_dim)) { if (!is.character(dat_dim) | length(dat_dim) > 1) { @@ -151,22 +166,44 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, stop("Parameter 'ncores' must be a positive integer.") } } + ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + if (memb_dim %in% name_exp) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + if (!all(name_exp == name_obs)) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'dat_dim'.")) + "all dimensions except 'dat_dim' and 'memb_dim'.")) } if (dim(exp)[time_dim] < 2) { stop("The length of time_dim must be at least 2 to compute RMS.") } - - + + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim, na.rm = T) + } + } + ############################### # Sort dimension name_exp <- names(dim(exp)) @@ -230,7 +267,7 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, dif[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) } - rms <- colMeans(dif^2, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) + rms <- colMeans(dif^2, na.rm = TRUE)^0.5 # [nexp, nobs] if (conf) { #NOTE: pval and sign also need #count effective sample along sdate. eno: c(nexp, nobs) diff --git a/R/RMSSS.R b/R/RMSSS.R index cf45fa6a31545c800ba0f0bf7b0ec5c5b6961a3a..c33a40e5a85b77246ab9f8fff35ded3e732a4374 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -2,14 +2,12 @@ #' #'Compute the root mean square error skill score (RMSSS) between an array of #'forecast 'exp' and an array of observation 'obs'. The two arrays should -#'have the same dimensions except along dat_dim, where the length can be -#'different, with the number of experiments/models (nexp) and the number of -#'observational datasets (nobs).\cr -#'RMSSS computes the root mean square error skill score of each jexp in 1:nexp -#'against each job in 1:nobs which gives nexp * nobs RMSSS for each grid point +#'have the same dimensions except along 'dat_dim' and 'memb_dim'. The RMSSSs +#'are computed along 'time_dim', the dimension which corresponds to the start +#'date dimension. +#'RMSSS computes the root mean square error skill score of each exp in 1:nexp +#'against each obs in 1:nobs which gives nexp * nobs RMSSS for each grid point #'of the array.\cr -#'The RMSSS are computed along the time_dim dimension which should correspond -#'to the start date dimension.\cr #'The p-value and significance test are optionally provided by an one-sided #'Fisher test or Random Walk test.\cr #' @@ -18,8 +16,9 @@ #' 'obs', then the vector will automatically be 'time_dim'. #'@param obs A named numeric array of observational data which contains at least #' time dimension (time_dim). The dimensions should be the same as parameter -#' 'exp' except the length of 'dat_dim' dimension. It can also be a vector with -#' the same length as 'exp', then the vector will automatically be 'time_dim'. +#' 'exp' except the length of 'dat_dim' and 'memb_dim' dimension. It can also +#' be a vector with the same length as 'exp', then the vector will +#' automatically be 'time_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time dimension, or 0 (typical climatological forecast) or 1 #' (normalized climatological forecast). If it is an array, the dimensions must @@ -27,15 +26,15 @@ #' reference dataset, it should not have dataset dimension. If there is #' corresponding reference for each experiment, the dataset dimension must #' have the same length as in 'exp'. If 'ref' is NULL, the typical -#' climatological forecast is used as reference forecast (equivelant to 0.) +#' climatological forecast is used as reference forecast (equivalent to 0.) #' The default value is NULL. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) #' dimension. The default value is NULL. #'@param time_dim A character string indicating the name of dimension along #' which the RMSSS are computed. The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' -#' and 'ref' are already the ensemble mean. The default value is NULL. +#' to compute the ensemble mean; it should be set to NULL if the data are +#' already the ensemble mean. The default value is NULL. #'@param pval A logical value indicating whether to compute or not the p-value #' of the test Ho: RMSSS = 0. The default value is TRUE. #'@param sign A logical value indicating whether to compute or not the @@ -45,6 +44,11 @@ #' statistical significance test. The default value is 0.05. #'@param sig_method A character string indicating the significance method. The #' options are "one-sided Fisher" (default) and "Random Walk". +#'@param sig_method.type A character string indicating the test type of the +#' significance method. Check \code{RandomWalkTest()} parameter +#' \code{test.type} for details if parameter "sig_method" is "Random Walk". The +#' default is NULL (since "one-sided Fisher" doesn't have different test +#' types.) #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -67,11 +71,26 @@ #'} #' #'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'rmsss <- RMSSS(ano_exp, ano_obs, dat_dim = 'dataset', memb_dim = 'member') +#' #' set.seed(1) -#' exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +#' exp1 <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) #' set.seed(2) -#' obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) -#' res <- RMSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset') +#' obs1 <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) +#' res1 <- RMSSS(exp1, obs1, time_dim = 'time', dat_dim = 'dataset') +#' +#' exp2 <- array(rnorm(30), dim = c(lat = 2, time = 3, memb = 5)) +#' obs2 <- array(rnorm(15), dim = c(time = 3, lat = 2)) +#' res2 <- RMSSS(exp2, obs2, time_dim = 'time', memb_dim = 'memb') +#' +#' exp3 <- array(rnorm(30), dim = c(lat = 2, time = 3)) +#' obs3 <- array(rnorm(15), dim = c(time = 3, lat = 2)) +#' res3 <- RMSSS(exp3, obs3, time_dim = 'time') #' #'@rdname RMSSS #'@import multiApply @@ -79,7 +98,7 @@ #'@export RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, - sig_method = 'one-sided Fisher', ncores = NULL) { + sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -96,21 +115,17 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, obs <- array(obs, dim = c(length(obs))) names(dim(obs)) <- c(time_dim) } else { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) } } else if (is.null(dim(exp)) | is.null(dim(obs))) { stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", "dimensions time_dim and dat_dim, or vector of same length.")) } - if(any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp)) %in% names(dim(obs))) | - !all(names(dim(obs)) %in% names(dim(exp)))) { - stop("Parameter 'exp' and 'obs' must have same dimension name.") - } if (!is.null(ref)) { if (!is.numeric(ref)) { stop("Parameter 'ref' must be numeric.") @@ -122,6 +137,11 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { stop("Parameter 'ref' must be a numeric array or number 0 or 1.") } + } else { + ref <- 0 + } + if (!is.array(ref)) { # 0 or 1 + ref <- array(data = ref, dim = dim(exp)) } ## time_dim @@ -149,14 +169,6 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!memb_dim %in% names(dim(exp))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", - "but it should be of length = 1).") - } - } } ## pval if (!is.logical(pval) | length(pval) > 1) { @@ -174,78 +186,100 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") } - if (sig_method == "Random Walk" & pval == T) { - warning("p-value cannot be calculated by significance method 'Random Walk'.") - pval <- FALSE + ## sig_method.type + if (sig_method == 'Random Walk') { + if (is.null(sig_method.type)) { + .warning("Parameter 'sig_method.type' must be specified if 'sig_method' is ", + "Random Walk. Assign it as 'two.sided'.") + .warning("Note that in s2dv <= 1.4.1, Random Walk uses 'two.sided.approx' method.", + "If you want to retain the same functionality, please specify parameter ", + "'sig_method.type' as 'two.sided.approx'.") + sig_method.type <- "two.sided" + } + if (!any(sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less'))) { + stop("Parameter 'sig_method.type' must be a method accepted by RandomWalkTest() parameter 'test.type'.") + } + if (sig_method.type == 'two.sided.approx' & pval == T) { + .warning("p-value cannot be calculated by Random Walk 'two.sided.approx' method.") + pval <- FALSE + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + alpha <- 0.05 + } + } } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } - ## exp and obs (2) + ## exp, obs, and ref (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_exp) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!all(name_exp == name_obs)) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'memb_dim' and 'dat_dim'.")) + "all dimensions except 'dat_dim' and 'memb_dim'.")) } - if (!is.null(ref)) { - name_ref <- sort(names(dim(ref))) - if (!is.null(memb_dim) && memb_dim %in% name_ref) { - name_ref <- name_ref[-which(name_ref == memb_dim)] - } - if (!is.null(dat_dim)) { - if (dat_dim %in% name_ref) { - if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be ", - "equal to dataset dimension of 'exp'.")) - } - name_ref <- name_ref[-which(name_ref == dat_dim)] + + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.")) } + name_ref <- name_ref[-which(name_ref == dat_dim)] } - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) - } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) } if (dim(exp)[time_dim] <= 2) { stop("The length of time_dim must be more than 2 to compute RMSSS.") } - - - ############################### -# # Sort dimension -# name_exp <- names(dim(exp)) -# name_obs <- names(dim(obs)) -# order_obs <- match(name_exp, name_obs) -# obs <- Reorder(obs, order_obs) - - + + ############################### - # Create ref array if needed - if (is.null(ref)) ref <- 0 - if (!is.array(ref)) { - ref <- array(data = ref, dim = dim(exp)) - } - + # # Sort dimension + # name_exp <- names(dim(exp)) + # name_obs <- names(dim(obs)) + # order_obs <- match(name_exp, name_obs) + # obs <- Reorder(obs, order_obs) + ############################### ## Ensemble mean if (!is.null(memb_dim)) { - exp <- MeanDims(exp, memb_dim, na.rm = T) - if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + if (memb_dim %in% names(dim(exp))) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(ref))) { ref <- MeanDims(ref, memb_dim, na.rm = T) } } @@ -253,21 +287,6 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, ############################### # Calculate RMSSS -# if (!is.null(ref)) { # use "ref" as reference forecast -# if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { -# target_dims_ref <- c(time_dim, dat_dim) -# } else { -# target_dims_ref <- c(time_dim) -# } -# data <- list(exp = exp, obs = obs, ref = ref) -# target_dims = list(exp = c(time_dim, dat_dim), -# obs = c(time_dim, dat_dim), -# ref = target_dims_ref) -# } else { -# data <- list(exp = exp, obs = obs) -# target_dims = list(exp = c(time_dim, dat_dim), -# obs = c(time_dim, dat_dim)) -# } data <- list(exp = exp, obs = obs, ref = ref) if (!is.null(dat_dim)) { if (dat_dim %in% names(dim(ref))) { @@ -288,14 +307,15 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, fun = .RMSSS, time_dim = time_dim, dat_dim = dat_dim, pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, + sig_method = sig_method, sig_method.type = sig_method.type, ncores = ncores) return(res) } .RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher') { + sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', + sig_method.type = NULL) { # exp: [sdate, (dat)] # obs: [sdate, (dat)] # ref: [sdate, (dat)] or NULL @@ -318,8 +338,8 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, dim(ref) <- c(dim(ref), dat = 1) } else { - # exp: [sdate, dat_exp] - # obs: [sdate, dat_obs] + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] nexp <- as.numeric(dim(exp)[2]) nobs <- as.numeric(dim(obs)[2]) if (dat_dim %in% names(dim(ref))) { @@ -340,40 +360,25 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) } - rms_exp <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) + rms_exp <- colMeans(dif1^2, na.rm = TRUE)^0.5 # [nexp, nobs] # RMS of reference -# if (!is.null(ref)) { - dif2 <- array(dim = c(nsdate, nref, nobs)) - names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') - for (i in 1:nobs) { - dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) - } - rms_ref <- apply(dif2^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nref, nobs)) - if (nexp != nref) { - # expand rms_ref to nexp (nref is 1) - rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) - rms_ref <- Reorder(rms_ref, c(2, 1)) - } -# } else { -# rms_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) -## rms_ref[which(abs(rms_ref) <= (max(abs(rms_ref), na.rm = TRUE) / 1000))] <- max(abs( -## rms_ref), na.rm = TRUE) / 1000 -# rms_ref <- Reorder(rms_ref, c(2, 1)) -# #rms_ref above: [nexp, nobs] -# } + dif2 <- array(dim = c(nsdate, nref, nobs)) + names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + } + rms_ref <- colMeans(dif2^2, na.rm = TRUE)^0.5 # [nref, nobs] + if (nexp != nref) { + # expand rms_ref to nexp (nref is 1) + rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) + rms_ref <- Reorder(rms_ref, c(2, 1)) + } rmsss <- 1 - rms_exp / rms_ref ################################################# -# if (conf) { -# conflow <- alpha / 2 -# confhigh <- 1 - conflow -# conf_low <- array(dim = c(nexp = nexp, nobs = nobs)) -# conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) -# } - if (sig_method == 'one-sided Fisher') { p_val <- array(dim = c(nexp = nexp, nobs = nobs)) ## pval and sign @@ -404,11 +409,12 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } } else if (sig_method == "Random Walk") { - signif <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (sign) signif <- array(dim = c(nexp = nexp, nobs = nobs)) + if (pval) p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { for (j in 1:nobs) { - - # Error error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) if (nref == nexp) { error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) @@ -416,7 +422,11 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # nref = 1 error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) } - signif[i, j] <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref)$sign + aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, + test.type = sig_method.type, + pval = pval, sign = sign, alpha = alpha) + if (sign) signif[i, j] <- aux$sign + if (pval) p_val[i, j] <- aux$p.val } } } @@ -425,7 +435,7 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # Remove extra dimensions if dat_dim = NULL if (is.null(dat_dim)) { dim(rmsss) <- NULL - dim(p_val) <- NULL + if (pval) dim(p_val) <- NULL if (sign) dim(signif) <- NULL } ################################### diff --git a/man/MSE.Rd b/man/MSE.Rd new file mode 100644 index 0000000000000000000000000000000000000000..291d08c2891d54fb76c7ba7b5d083c34ad21c620 --- /dev/null +++ b/man/MSE.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MSE.R +\name{MSE} +\alias{MSE} +\title{Compute mean square error} +\usage{ +MSE( + exp, + obs, + time_dim = "sdate", + dat_dim = NULL, + memb_dim = NULL, + comp_dim = NULL, + limits = NULL, + conf = TRUE, + alpha = 0.05, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data, with at least #' 'time_dim' dimension. It can also be a vector with the same length as 'obs'.} + +\item{obs}{A named numeric array of observational data, same dimensions as +parameter 'exp' except along 'dat_dim' and 'memb_dim'. It can also be a +vector with the same length as 'exp'.} + +\item{time_dim}{A character string indicating the name of dimension along +which the correlations are computed. The default value is 'sdate'.} + +\item{dat_dim}{A character string indicating the name of dataset or member +(nobs/nexp) dimension. The datasets of exp and obs will be paired and +computed MSE for each pair. The default value is NULL.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the ensemble mean; it should be set to NULL if the input data are +already the ensemble mean. The default value is NULL.} + +\item{comp_dim}{A character string indicating the name of dimension along which +obs is taken into account only if it is complete. The default value +is NULL.} + +\item{limits}{A vector of two integers indicating the range along comp_dim to +be completed. The default value is c(1, length(comp_dim dimension)).} + +\item{conf}{A logical value indicating whether to retrieve the confidence +intervals or not. The default value is TRUE.} + +\item{alpha}{A numeric indicating the significance level for the statistical +significance test. The default value is 0.05.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing the numeric arrays with dimension:\cr + c(nexp, nobs, all other dimensions of exp except time_dim).\cr +nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +number of observation (i.e., dat_dim in obs).\cr +\item{$mse}{ + The mean square error. +} +\item{$conf.lower}{ + The lower confidence interval. Only present if \code{conf = TRUE}. +} +\item{$conf.upper}{ + The upper confidence interval. Only present if \code{conf = TRUE}. +} +} +\description{ +Compute the mean square error for an array of forecasts and an array of +observations. The MSEs are computed along time_dim, the dimension which +corresponds to the start date dimension. If comp_dim is given, the MSEs are +computed only if obs along the comp_dim dimension are complete between +limits[1] and limits[2], i.e. there are no NAs between limits[1] and +limits[2]. This option can be activated if the user wants to account only +for the forecasts for which the corresponding observations are available at +all leadtimes.\cr +The confidence interval is computed by the chi2 distribution.\cr +} +\examples{ +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = 12, time_dim = 'ftime') +smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = 12, time_dim = 'ftime') +res <- MSE(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', + comp_dim = 'ftime', limits = c(7, 54)) + +# Synthetic data: +exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 10, ftime = 4)) +obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 10, ftime = 4)) +na <- floor(runif(10, min = 1, max = 80)) +obs1[na] <- NA +res1 <- MSE(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') + +exp2 <- array(rnorm(20), dim = c(sdate = 5, member = 4)) +obs2 <- array(rnorm(10), dim = c(sdate = 5, member = 2)) +res2 <- MSE(exp3, obs3, memb_dim = 'member') + +} diff --git a/man/MSSS.Rd b/man/MSSS.Rd new file mode 100644 index 0000000000000000000000000000000000000000..33df4501aac5e5137446830419d51860471fa797 --- /dev/null +++ b/man/MSSS.Rd @@ -0,0 +1,117 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MSSS.R +\name{MSSS} +\alias{MSSS} +\title{Compute mean square error skill score} +\usage{ +MSSS( + exp, + obs, + ref = NULL, + time_dim = "sdate", + dat_dim = NULL, + memb_dim = NULL, + pval = TRUE, + sign = FALSE, + alpha = 0.05, + sig_method = "one-sided Fisher", + sig_method.type = NULL, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data which contains at least +time dimension (time_dim). It can also be a vector with the same length as +'obs', then the vector will automatically be 'time_dim'.} + +\item{obs}{A named numeric array of observational data which contains at least +time dimension (time_dim). The dimensions should be the same as parameter +'exp' except the length of 'dat_dim' and 'memb_dim' dimension. It can also +be a vector with the same length as 'exp', then the vector will +automatically be 'time_dim'.} + +\item{ref}{A named numerical array of the reference forecast data with at +least time dimension, or 0 (typical climatological forecast) or 1 +(normalized climatological forecast). If it is an array, the dimensions must +be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one +reference dataset, it should not have dataset dimension. If there is +corresponding reference for each experiment, the dataset dimension must +have the same length as in 'exp'. If 'ref' is NULL, the typical +climatological forecast is used as reference forecast (equivalent to 0.) +The default value is NULL.} + +\item{time_dim}{A character string indicating the name of dimension along +which the MSSS are computed. The default value is 'sdate'.} + +\item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) +dimension. The default value is NULL.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the ensemble mean; it should be set to NULL if the data are +already the ensemble mean. The default value is NULL.} + +\item{pval}{A logical value indicating whether to compute or not the p-value +of the test Ho: MSSS = 0. The default value is TRUE.} + +\item{sign}{A logical value indicating whether to compute or not the +statistical significance of the test Ho: MSSS = 0. The default value is +FALSE.} + +\item{alpha}{A numeric of the significance level to be used in the +statistical significance test. The default value is 0.05.} + +\item{sig_method}{A character string indicating the significance method. The +options are "one-sided Fisher" (default) and "Random Walk".} + +\item{sig_method.type}{A character string indicating the test type of the +significance method. Check \code{RandomWalkTest()} parameter +\code{test.type} for details if parameter "sig_method" is "Random Walk". The +default is NULL (since "one-sided Fisher" doesn't have different test +types.)} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing the numeric arrays with dimension:\cr + c(nexp, nobs, all other dimensions of exp except time_dim).\cr +nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +nobs are omitted.\cr +\item{$msss}{ + A numerical array of the mean square error skill score. +} +\item{$p.val}{ + A numerical array of the p-value with the same dimensions as $msss. + Only present if \code{pval = TRUE}. +} +\item{sign}{ + A logical array of the statistical significance of the MSSS with the same + dimensions as $msss. Only present if \code{sign = TRUE}. +} +} +\description{ +Compute the mean square error skill score (MSSS) between an array of forecast +'exp' and an array of observation 'obs'. The two arrays should have the same +dimensions except along 'dat_dim' and 'memb_dim'. The MSSSs are computed along +'time_dim', the dimension which corresponds to the start date dimension. +MSSS computes the mean square error skill score of each exp in 1:nexp +against each obs in 1:nobs which gives nexp * nobs MSSS for each grid point +of the array.\cr +The p-value and significance test are optionally provided by an one-sided +Fisher test or Random Walk test.\cr +} +\examples{ +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +rmsss <- MSSS(ano_exp, ano_obs, dat_dim = 'dataset', memb_dim = 'member') + +# Synthetic data: +exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +obs <- array(rnorm(15), dim = c(time = 3, dataset = 1)) +res <- MSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset', memb_dim = 'memb') + +} diff --git a/man/RMS.Rd b/man/RMS.Rd index 9d02d82b52a9eeeb23da78f9f74350346075252e..b7c044f65ae7c3abfd6a3f0e14996600791237a0 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -8,6 +8,7 @@ RMS( exp, obs, time_dim = "sdate", + memb_dim = NULL, dat_dim = NULL, comp_dim = NULL, limits = NULL, @@ -17,19 +18,20 @@ RMS( ) } \arguments{ -\item{exp}{A named numeric array of experimental data, with at least two -dimensions 'time_dim' and 'dat_dim'. It can also be a vector with the -same length as 'obs', then the vector will automatically be 'time_dim' and -'dat_dim' will be 1.} +\item{exp}{A named numeric array of experimental data, with at least +'time_dim' dimension. It can also be a vector with the same length as 'obs'.} \item{obs}{A named numeric array of observational data, same dimensions as -parameter 'exp' except along dat_dim. It can also be a vector with the same -length as 'exp', then the vector will automatically be 'time_dim' and -'dat_dim' will be 1.} +parameter 'exp' except along 'dat_dim' and 'memb_dim'. It can also be a +vector with the same length as 'exp'.} \item{time_dim}{A character string indicating the name of dimension along which the correlations are computed. The default value is 'sdate'.} +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the ensemble mean; it should be set to NULL if the input data are +already the ensemble mean. The default value is NULL.} + \item{dat_dim}{A character string indicating the name of dataset or member (nobs/nexp) dimension. The datasets of exp and obs will be paired and computed RMS for each pair. The default value is NULL.} @@ -69,7 +71,7 @@ nobs are omitted.\cr \description{ Compute the root mean square error for an array of forecasts and an array of observations. The RMSEs are computed along time_dim, the dimension which -corresponds to the startdate dimension. If comp_dim is given, the RMSEs are +corresponds to the start date dimension. If comp_dim is given, the RMSEs are computed only if obs along the comp_dim dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only @@ -79,14 +81,24 @@ The confidence interval is computed by the chi2 distribution.\cr } \examples{ # Load sample data as in Load() example: - set.seed(1) - exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) - set.seed(2) - obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) - set.seed(2) - na <- floor(runif(10, min = 1, max = 80)) - obs1[na] <- NA - res <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') - # Renew example when Ano and Smoothing are ready +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = 12, time_dim = 'ftime') +smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = 12, time_dim = 'ftime') +res <- RMS(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', + comp_dim = 'ftime', limits = c(7, 54)) +# Synthetic data: +exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 10, ftime = 4)) +obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 10, ftime = 4)) +na <- floor(runif(10, min = 1, max = 80)) +obs1[na] <- NA +res1 <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') + +exp2 <- array(rnorm(20), dim = c(sdate = 5, member = 4)) +obs2 <- array(rnorm(10), dim = c(sdate = 5, member = 2)) +res2 <- RMS(exp3, obs3, memb_dim = 'member') + } diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index c647c71207839c502a6dd3844ca2d7ca99f81d9b..7b31e26f2810fbab6ad23e9d4df0b1302e02d28e 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -15,6 +15,7 @@ RMSSS( sign = FALSE, alpha = 0.05, sig_method = "one-sided Fisher", + sig_method.type = NULL, ncores = NULL ) } @@ -25,8 +26,9 @@ time dimension (time_dim). It can also be a vector with the same length as \item{obs}{A named numeric array of observational data which contains at least time dimension (time_dim). The dimensions should be the same as parameter -'exp' except the length of 'dat_dim' dimension. It can also be a vector with -the same length as 'exp', then the vector will automatically be 'time_dim'.} +'exp' except the length of 'dat_dim' and 'memb_dim' dimension. It can also +be a vector with the same length as 'exp', then the vector will +automatically be 'time_dim'.} \item{ref}{A named numerical array of the reference forecast data with at least time dimension, or 0 (typical climatological forecast) or 1 @@ -35,7 +37,7 @@ be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should not have dataset dimension. If there is corresponding reference for each experiment, the dataset dimension must have the same length as in 'exp'. If 'ref' is NULL, the typical -climatological forecast is used as reference forecast (equivelant to 0.) +climatological forecast is used as reference forecast (equivalent to 0.) The default value is NULL.} \item{time_dim}{A character string indicating the name of dimension along @@ -45,8 +47,8 @@ which the RMSSS are computed. The default value is 'sdate'.} dimension. The default value is NULL.} \item{memb_dim}{A character string indicating the name of the member dimension -to compute the ensemble mean; it should be set to NULL if the parameter 'exp' -and 'ref' are already the ensemble mean. The default value is NULL.} +to compute the ensemble mean; it should be set to NULL if the data are +already the ensemble mean. The default value is NULL.} \item{pval}{A logical value indicating whether to compute or not the p-value of the test Ho: RMSSS = 0. The default value is TRUE.} @@ -61,6 +63,12 @@ statistical significance test. The default value is 0.05.} \item{sig_method}{A character string indicating the significance method. The options are "one-sided Fisher" (default) and "Random Walk".} +\item{sig_method.type}{A character string indicating the test type of the +significance method. Check \code{RandomWalkTest()} parameter +\code{test.type} for details if parameter "sig_method" is "Random Walk". The +default is NULL (since "one-sided Fisher" doesn't have different test +types.)} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } @@ -85,22 +93,35 @@ nobs are omitted.\cr \description{ Compute the root mean square error skill score (RMSSS) between an array of forecast 'exp' and an array of observation 'obs'. The two arrays should -have the same dimensions except along dat_dim, where the length can be -different, with the number of experiments/models (nexp) and the number of -observational datasets (nobs).\cr -RMSSS computes the root mean square error skill score of each jexp in 1:nexp -against each job in 1:nobs which gives nexp * nobs RMSSS for each grid point +have the same dimensions except along 'dat_dim' and 'memb_dim'. The RMSSSs +are computed along 'time_dim', the dimension which corresponds to the start +date dimension. +RMSSS computes the root mean square error skill score of each exp in 1:nexp +against each obs in 1:nobs which gives nexp * nobs RMSSS for each grid point of the array.\cr -The RMSSS are computed along the time_dim dimension which should correspond -to the start date dimension.\cr The p-value and significance test are optionally provided by an one-sided Fisher test or Random Walk test.\cr } \examples{ +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +rmsss <- RMSSS(ano_exp, ano_obs, dat_dim = 'dataset', memb_dim = 'member') + set.seed(1) -exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +exp1 <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) set.seed(2) -obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) -res <- RMSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset') +obs1 <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) +res1 <- RMSSS(exp1, obs1, time_dim = 'time', dat_dim = 'dataset') + +exp2 <- array(rnorm(30), dim = c(lat = 2, time = 3, memb = 5)) +obs2 <- array(rnorm(15), dim = c(time = 3, lat = 2)) +res2 <- RMSSS(exp2, obs2, time_dim = 'time', memb_dim = 'memb') + +exp3 <- array(rnorm(30), dim = c(lat = 2, time = 3)) +obs3 <- array(rnorm(15), dim = c(time = 3, lat = 2)) +res3 <- RMSSS(exp3, obs3, time_dim = 'time') } diff --git a/tests/testthat/test-CRPSS.R b/tests/testthat/test-CRPSS.R index 505f3d4a410b003639243a2b9dcaf4e306a8995f..5311724ea1c5e9414a895b4a9b2f39de08288a97 100644 --- a/tests/testthat/test-CRPSS.R +++ b/tests/testthat/test-CRPSS.R @@ -144,7 +144,7 @@ test_that("2. Output checks: dat1", { ) expect_equal( as.vector(CRPSS(exp1, obs1)$sign), - c(FALSE, FALSE), + c(FALSE, FALSE) ) expect_equal( as.vector(CRPSS(exp1, obs1, Fair = T)$crpss), @@ -220,7 +220,7 @@ test_that("3. Output checks: dat2", { expect_equal( as.vector(CRPSS(exp2, obs2)$sign), - FALSE, + FALSE ) expect_equal( as.vector(CRPSS(exp2, obs2, Fair = T)$crpss), @@ -273,7 +273,7 @@ test_that("4. Output checks: dat3", { ) expect_equal( as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$sign), - rep(FALSE, 6), + rep(FALSE, 6) ) expect_equal( mean(CRPSS(exp3, obs3, dat_dim = 'dataset', Fair = T)$crpss), diff --git a/tests/testthat/test-EOF.R b/tests/testthat/test-EOF.R index 4e95aa36443760e2f468c08305f453783c77edca..828bd5242651c70e5cfbe7bf3034dd4d3130c6c2 100644 --- a/tests/testthat/test-EOF.R +++ b/tests/testthat/test-EOF.R @@ -72,7 +72,7 @@ test_that("1. Input checks", { "length as the longitude dimension of 'ano'.") ) expect_warning( - EOF(dat1, lat = lat1, lon = c(350, 370)), + EOF(dat1, lat = lat1, lon = c(350, 370), neofs = 8), "Some 'lon' is out of the range \\[-360, 360\\]." ) # neofs diff --git a/tests/testthat/test-MSE.R b/tests/testthat/test-MSE.R new file mode 100644 index 0000000000000000000000000000000000000000..05bba2d8da007ed2f544b59e6b0a163930d50174 --- /dev/null +++ b/tests/testthat/test-MSE.R @@ -0,0 +1,259 @@ +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(120), dim = c(dataset = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) + set.seed(2) + obs1 <- array(rnorm(80), dim = c(dataset = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) + set.seed(2) + na <- floor(runif(10, min = 1, max = 80)) + obs1[na] <- NA + + # dat 2: vector + set.seed(5) + exp2 <- rnorm(10) + set.seed(6) + obs2 <- rnorm(10) + + # dat3 + set.seed(1) + exp3 <- array(rnorm(120), dim = c(sdate = 5, ftime = 2, member = 3)) + set.seed(2) + obs3 <- array(rnorm(80), dim = c(sdate = 5, ftime = 2, member = 2)) + + # dat4 + set.seed(1) + exp4 <- array(rnorm(120), dim = c(dataset = 2, sdate = 5, time = 1)) + set.seed(2) + obs4 <- array(rnorm(80), dim = c(dataset = 1, sdate = 5, member = 2, time = 1)) + +############################################## +test_that("1. Input checks", { + + expect_error( + MSE(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + MSE(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + MSE(c(1:10), c(2:4)), + "Parameter 'exp' and 'obs' must be array with as least two dimensions time_dim and dat_dim, or vector of same length." + ) + expect_error( + MSE(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + MSE(array(1:20, dim = c(dat = 2, sdate = 5, member = 2)), + array(1:10, dim = c(dat = 2, sdate = 5, time = 1))), + "Parameter 'exp' and 'obs' must have the same dimension names." + ) + expect_error( + MSE(exp1, obs1, dat_dim = 1), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + MSE(exp1, obs1, dat_dim = 'a'), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + MSE(exp1, obs1, time_dim = c('sdate', 'a')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + MSE(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + MSE(exp1, obs1, comp_dim = c('sdate', 'ftime')), + "Parameter 'comp_dim' must be a character string." + ) + expect_error( + MSE(exp1, obs1, comp_dim = 'a'), + "Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + MSE(exp1, obs1, limits = c(1,3)), + "Paramter 'comp_dim' cannot be NULL if 'limits' is assigned." + ) + expect_error( + MSE(exp1, obs1, comp_dim = 'ftime', limits = c(1)), + paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.") + ) + expect_error( + MSE(exp1, obs1, alpha = -1), + "Parameter 'alpha' must be a numeric number between 0 and 1." + ) + expect_error( + MSE(exp1, obs1, conf = 1), + "Parameter 'conf' must be one logical value." + ) + expect_error( + MSE(exp1, obs1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( + MSE(exp = array(1:10, dim = c(dataset = 1, member = 5, sdate = 1)), + obs = array(1:4, dim = c(dataset = 2, member = 2, sdate = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'dat_dim' and 'memb_dim'." + ) + expect_error( + MSE(exp = array(1:5, dim = c(sdate = 1, dataset = 5, a = 1)), + obs = array(1:2, dim = c(a = 1, sdate = 1, dataset = 2)), dat_dim = "dataset"), + "The length of time_dim must be at least 2 to compute MSE." + ) + + + +}) + +############################################## +test_that("2. Output checks: dat1", { +suppressWarnings( + expect_equal( + dim(MSE(exp1, obs1, dat_dim = 'dataset')$mse), + c(nexp = 3, nobs = 2, ftime = 2, lon = 1, lat = 4) + ) +) +suppressWarnings( + expect_equal( + MSE(exp1, obs1, dat_dim = 'dataset')$mse[1:6], + c(1.2815677, 2.0832803, 1.1894637, 1.3000403, 1.4053807, 0.8157563)^2, + tolerance = 0.001 + ) +) +suppressWarnings( + expect_equal( + length(which(is.na(MSE(exp1, obs1, dat_dim = 'dataset')$conf.lower))), + 4 + ) +) +suppressWarnings( + expect_equal( + c(MSE(exp1, obs1, dat_dim = 'dataset')$conf.lower[2,1,,1,2:3]), + c(1.8869268, 0.4418298, 0.2694637, 0.8215383), + tolerance = 0.001 + ) +) +suppressWarnings( + expect_equal( + length(which(is.na(MSE(exp1, obs1, dat_dim = 'dataset', comp_dim = 'ftime')$mse))), + 0 + ) +) +suppressWarnings( + expect_equal( + length(which(is.na(MSE(exp1, obs1, dat_dim = 'dataset', comp_dim = 'ftime')$conf.upper))), + 8 + ) +) +suppressWarnings( + expect_equal( + length(which(is.na(MSE(exp1, obs1, dat_dim = 'dataset', comp_dim = 'lat')$conf.lower))), + 36 + ) +) +suppressWarnings( + expect_equal( + length(which(is.na(MSE(exp1, obs1, dat_dim = 'dataset', comp_dim = 'lat', limits = c(1, 2))$conf.lower))), + 21 + ) +) +suppressWarnings( + expect_equal( + c(MSE(exp1, obs1, dat_dim = 'dataset', alpha = 0.01)$conf.upper[2,1,,1,2:3]), + c(13.844841, 5.044269, 1.977121, 6.027826), + tolerance = 0.0001 + ) +) +suppressWarnings( + expect_equal( + length(MSE(exp1, obs1, dat_dim = 'dataset', conf = FALSE)), + 1 + ) +) + + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + dim(MSE(exp2, obs2)$mse), + NULL + ) + + expect_equal( + as.vector(MSE(exp2, obs2)$mse), + 1.429815^2, + tolerance = 0.00001 + ) +}) + +############################################## +test_that("4. Output checks: dat3", { + + expect_equal( + dim(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$mse), + c(ftime = 2) + ) + + expect_equal( + as.vector(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$mse), + c(0.6191331, 0.7133894)^2, + tolerance = 0.00001 + ) + expect_equal( + names(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member', conf = FALSE)), + c("mse") + ) + expect_equal( + names(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member', conf = TRUE)), + c('mse', 'conf.lower', 'conf.upper') + ) + expect_equal( + c(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$conf.lower), + c(0.2567713, 0.3409037), + tolerance = 0.0001 + ) + expect_equal( + c(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$conf.upper), + c(1.231523, 1.635038), + tolerance = 0.0001 + ) + +}) + +############################################## + +test_that("5. Output checks: dat4", { + + expect_equal( + dim(MSE(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$mse), + c(nexp = 2, nobs = 1, time = 1) + ) + expect_equal( + c(MSE(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$mse), + c(0.6775320, 0.8954404)^2, + tolerance = 0.0001 + ) + expect_equal( + c(MSE(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$conf.lower), + c(0.3074949, 0.5370958), + tolerance = 0.0001 + ) + expect_equal( + c(MSE(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$conf.upper), + c(1.474804, 2.576013), + tolerance = 0.0001 + ) + + +}) + +############################################## + diff --git a/tests/testthat/test-MSSS.R b/tests/testthat/test-MSSS.R new file mode 100644 index 0000000000000000000000000000000000000000..29952fae9e7c54f95f4acb051e437e8854a64ecc --- /dev/null +++ b/tests/testthat/test-MSSS.R @@ -0,0 +1,265 @@ +############################################## + # case 1 + set.seed(1) + exp1 <- array(rnorm(15), dim = c(sdate = 3, dataset = 5)) + set.seed(2) + obs1 <- array(rnorm(6), dim = c(sdate = 3, dataset = 2)) + set.seed(3) + ref1_1 <- array(rnorm(15), dim = c(sdate = 3, dataset = 5)) + ref1_2 <- exp1[, 3] + dim(ref1_2) <- c(sdate = 3) + + # case 2 + set.seed(3) + exp2 <- array(rnorm(30), dim = c(dataset = 3, sdate = 5, member = 3)) + set.seed(4) + obs2 <- array(rnorm(20), dim = c(dataset = 2, sdate = 5, member = 2)) + set.seed(5) + ref2 <- array(rnorm(15), dim = c(sdate = 5, member = 3)) + + # case 3: vector + set.seed(5) + exp3 <- rnorm(10) + set.seed(6) + obs3 <- rnorm(10) + + +############################################## + +test_that("1. Input checks", { + ## exp and obs (1) + expect_error( + MSSS(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + MSSS('exp', 'obs'), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + MSSS(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must be array with as least two dimensions ", + "time_dim and dat_dim, or vector of same length.") + ) + expect_error( + MSSS(array(1:10, dim = c(2, 5)), array(1:10, dim = c(2, 5))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + MSSS(array(1:20, dim = c(dat = 2, sdate = 5, member = 2)), + array(1:10, dim = c(dat = 2, sdate = 5, time = 1))), + "Parameter 'exp' and 'obs' must have the same dimension names." + ) + ## time_dim + expect_error( + MSSS(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + MSSS(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + ## dat_dim + expect_error( + MSSS(exp1, obs1, dat_dim = NA), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + MSSS(exp1, obs1, dat_dim = 'memb'), + paste0("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + ) + ## pval + expect_error( + MSSS(exp1, obs1, pval = c(T, T)), + "Parameter 'pval' must be one logical value." + ) + ## sign + expect_error( + MSSS(exp1, obs1, sign = 0.05), + "Parameter 'sign' must be one logical value." + ) + ## alpha + expect_error( + MSSS(exp1, obs1, alpha = T), + "Parameter 'alpha' must be one numeric value." + ) + ## ncores + expect_error( + MSSS(exp1, obs1, ncores = 1.4), + "Parameter 'ncores' must be a positive integer." + ) + ## exp and obs (2) + expect_error( + MSSS(exp = array(1:10, dim = c(dataset = 1, member = 5, sdate = 1)), + obs = array(1:4, dim = c(dataset = 2, member = 2, sdate = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'dat_dim' and 'memb_dim'." + ) + expect_error( + MSSS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), + obs = array(1:4, dim = c(a = 1, sdate = 1, dataset = 2)), dat_dim = "dataset"), + "The length of time_dim must be more than 2 to compute MSSS." + ) +}) + +############################################## +test_that("2. Output checks: case 1", { + + res1_1 <- MSSS(exp1, obs1, dat_dim = 'dataset') + expect_equal( + dim(res1_1$msss), + c(nexp = 5, nobs = 2) + ) + expect_equal( + dim(res1_1$p.val), + c(nexp = 5, nobs = 2) + ) + expect_equal( + c(res1_1$msss)[3:8], + c(0.03359106, -0.05535409, -0.80010171, 0.03151828, -5.53371892, -1.67639444), + tolerance = 0.00001 + ) + expect_equal( + as.vector(res1_1$p.val)[3:7], + c(0.4829225, 0.5269121, 0.7641713, 0.4839926, 0.9771112), + tolerance = 0.001 + ) + + exp1_2 <- exp1; exp1_2[2:4] <- NA + obs1_2 <- obs1; obs1_2[1:2] <- NA + res1_2 <- MSSS(exp1_2, obs1_2, dat_dim = 'dataset', pval = T, sign = T) + + expect_equal( + names(res1_2), + c("msss", "p.val", "sign") + ) + expect_equal( + c(res1_2$msss), + c(rep(NA, 7), -1.676394, -1.520840, -3.455754), + tolerance = 0.0001 + ) + expect_equal( + c(res1_2$p.val), + c(rep(NA, 7), 0.8774973, 0.8640313, 0.9520470), + tolerance = 0.0001 + ) + expect_equal( + c(res1_2$sign), + c(rep(NA, 7), rep(FALSE, 3)) + ) + + #ref + res1_3 <- MSSS(exp1, obs1, ref = ref1_1, dat_dim = 'dataset') + expect_equal( + dim(res1_3$msss), + c(nexp = 5, nobs = 2) + ) + expect_equal( + dim(res1_3$p.val), + c(nexp = 5, nobs = 2) + ) + expect_equal( + as.vector(res1_3$msss[2, ]), + c(-3.828708, -96.610622), + tolerance = 0.0001 + ) + expect_equal( + as.vector(res1_3$p.val[2, ]), + c(0.9588755, 0.9998951), + tolerance = 0.0001 + ) + res1_4 <- MSSS(exp1, obs1, ref = ref1_2, dat_dim = 'dataset', sign = T, alpha = 0.3) + expect_equal( + dim(res1_4$msss), + c(nexp = 5, nobs = 2) + ) + expect_equal( + dim(res1_4$sign), + c(nexp = 5, nobs = 2) + ) + expect_equal( + as.vector(res1_4$msss[2, ]), + c(-2.705537, -1.441239), + tolerance = 0.0001 + ) + expect_equal( + as.vector(res1_4$p[2, ]), + c(0.9321160, 0.8563146), + tolerance = 0.0001 + ) + expect_equal( + as.vector(res1_4$sign), + c(rep(F, 5), T, rep(F, 4)) + ) + + # Random Walk + suppressWarnings({ + res1_5 <- MSSS(exp1, obs1, ref = ref1_1, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T, sig_method.type = 'two.sided') + }) + expect_equal( + as.vector(res1_5$sign), + rep(F, 10) + ) + suppressWarnings({ + res1_6 <- MSSS(exp1, obs1, ref = NULL, dat_dim = 'dataset', sig_method = "Random Walk", pval = T, sign = T, sig_method.type = 'two.sided') + }) + expect_equal( + as.vector(res1_6$p), + c(1, 1, 1, 1, 1, 1, 0.25, 0.25, 1, 1) + ) + expect_equal( + as.vector(res1_6$sign), + rep(F, 10) + ) + +}) + + +############################################## +test_that("3. Output checks: case 2", { + res1 <- MSSS(exp2, obs2, ref2, dat_dim = "dataset", memb_dim = 'member', sign = T) + expect_equal( + dim(res1$msss), + c(nexp = 3, nobs = 2) + ) + expect_equal( + dim(res1$sign), + c(nexp = 3, nobs = 2) + ) + expect_equal( + dim(res1$p), + c(nexp = 3, nobs = 2) + ) + expect_equal( + c(res1$msss), + c(-0.18155696, 0.51980557, -0.66121915, -0.08753543, 0.62908575, -0.95419385), + tolerance = 0.0001 + ) + expect_equal( + c(res1$p), + c(0.62284746, 0.09217502, 0.82539496, 0.56264155, 0.04034091, 0.88868245), + tolerance = 0.0001 + ) + + res2 <- MSSS(apply(exp2, 1:2, mean), apply(obs2, 1:2, mean), array(apply(ref2, 1, mean), dim = c(sdate = 5)), dat_dim = "dataset", sign = T) + expect_equal( + res1, res2 + ) + +}) + +############################################## + +test_that("4. Output checks: case 3", { + + expect_equal( + dim(MSSS(exp3, obs3)$msss), + NULL + ) + expect_equal( + as.vector(MSSS(exp3, obs3)$msss), + -1.653613, + tolerance = 0.00001 + ) + +}) diff --git a/tests/testthat/test-RMS.R b/tests/testthat/test-RMS.R index c24a2a9f8867b5afeb9f2a755465d0396f123b7b..660f4e7f31ceca39a8038d326717f5048bf0c3bc 100644 --- a/tests/testthat/test-RMS.R +++ b/tests/testthat/test-RMS.R @@ -2,7 +2,6 @@ # dat1 set.seed(1) exp1 <- array(rnorm(120), dim = c(dataset = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) - set.seed(2) obs1 <- array(rnorm(80), dim = c(dataset = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) set.seed(2) @@ -17,17 +16,15 @@ # dat3 set.seed(1) - exp3 <- array(rnorm(120), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4)) - + exp3 <- array(rnorm(120), dim = c(sdate = 5, ftime = 2, member = 3)) set.seed(2) - obs3 <- array(rnorm(80), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4)) + obs3 <- array(rnorm(80), dim = c(sdate = 5, ftime = 2, member = 2)) # dat4 set.seed(1) - exp4 <- array(rnorm(120), dim = c(sdates = 5, ftimes = 2, lon = 1, lat = 1)) - + exp4 <- array(rnorm(120), dim = c(dataset = 2, sdate = 5, time = 1)) set.seed(2) - obs4 <- array(rnorm(80), dim = c(sdates = 5, ftimes = 2, lon = 1, lat = 1)) + obs4 <- array(rnorm(80), dim = c(dataset = 1, sdate = 5, member = 2, time = 1)) ############################################## test_that("1. Input checks", { @@ -49,8 +46,9 @@ test_that("1. Input checks", { "Parameter 'exp' and 'obs' must have dimension names." ) expect_error( - RMS(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), - "Parameter 'exp' and 'obs' must have same dimension name" + RMS(array(1:20, dim = c(dat = 2, sdate = 5, member = 2)), + array(1:10, dim = c(dat = 2, sdate = 5, time = 1))), + "Parameter 'exp' and 'obs' must have the same dimension names." ) expect_error( RMS(exp1, obs1, dat_dim = 1), @@ -98,13 +96,13 @@ test_that("1. Input checks", { "Parameter 'ncores' must be a positive integer." ) expect_error( - RMS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), - obs = array(1:4, dim = c(a = 1, sdate = 2, dataset = 2))), - "Parameter 'exp' and 'obs' must have same length of all dimension except 'dat_dim'." + RMS(exp = array(1:10, dim = c(dataset = 1, member = 5, sdate = 1)), + obs = array(1:4, dim = c(dataset = 2, member = 2, sdate = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'dat_dim' and 'memb_dim'." ) expect_error( RMS(exp = array(1:5, dim = c(sdate = 1, dataset = 5, a = 1)), - obs = array(1:2, dim = c(a = 1, sdate = 1, dataset = 2)), dat_dim = "dataset"), + obs = array(1:2, dim = c(a = 1, sdate = 1, dataset = 2)), dat_dim = "dataset"), "The length of time_dim must be at least 2 to compute RMS." ) @@ -200,38 +198,62 @@ test_that("3. Output checks: dat2", { test_that("4. Output checks: dat3", { expect_equal( - dim(RMS(exp3, obs3, dat_dim = NULL)$rms), - c(ftime = 2, lon = 1, lat = 4) + dim(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$rms), + c(ftime = 2) ) expect_equal( - as.vector(RMS(exp3, obs3, dat_dim = NULL)$rms), - c(1.6458118, 0.8860392, 0.8261295, 1.1681939, 2.1693538, 1.3064454, 0.5384229, 1.1215333), + as.vector(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$rms), + c(0.6191331, 0.7133894), tolerance = 0.00001 ) expect_equal( - dim(RMS(exp3, obs3, dat_dim = NULL, conf = FALSE)$rms), - c(ftime = 2, lon = 1, lat = 4) + names(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member', conf = FALSE)), + c("rms") ) expect_equal( - dim(RMS(exp4, obs4, time_dim = 'sdates', dat_dim = NULL, conf = TRUE)$rms), - c(ftimes = 2, lon = 1, lat = 1) + names(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member', conf = TRUE)), + c('rms', 'conf.lower', 'conf.upper') ) expect_equal( - length(RMS(exp3, obs3, dat_dim = NULL, conf = F)), - 1 + c(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$conf.lower), + c(0.4147271, 0.4778648), + tolerance = 0.0001 ) expect_equal( - c(RMS(exp3, obs3, dat_dim = NULL)$conf.lower[1,1,]), - c(1.1024490, 0.5533838, 1.4531443, 0.3606632), - tolerance = 0.0001 + c(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$conf.upper), + c(1.989109, 2.291930), + tolerance = 0.0001 + ) + +}) + +############################################## + +test_that("5. Output checks: dat4", { + + expect_equal( + dim(RMS(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$rms), + c(nexp = 2, nobs = 1, time = 1) ) expect_equal( - c(RMS(exp3, obs3, dat_dim = NULL)$conf.upper[1,1,]), - c(5.287554, 2.654133, 6.969554, 1.729809), - tolerance = 0.0001 + c(RMS(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$rms), + c(0.6775320, 0.8954404), + tolerance = 0.0001 + ) + expect_equal( + c(RMS(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$conf.lower), + c(0.4538456, 0.5998118), + tolerance = 0.0001 + ) + expect_equal( + c(RMS(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$conf.upper), + c(2.176729, 2.876811), + tolerance = 0.0001 ) + }) ############################################## + diff --git a/tests/testthat/test-RMSSS.R b/tests/testthat/test-RMSSS.R index 7f3837389617e199adbf7e55bdf41483b4feec06..a364b40c50cd917002560c6e0b0613e7e3091a4a 100644 --- a/tests/testthat/test-RMSSS.R +++ b/tests/testthat/test-RMSSS.R @@ -11,9 +11,11 @@ # case 2 set.seed(3) - exp2 <- array(rnorm(120), dim = c(time = 10, dat = 1, lon = 3, lat = 2, dataset = 2)) + exp2 <- array(rnorm(30), dim = c(dataset = 3, sdate = 5, member = 3)) set.seed(4) - obs2 <- array(rnorm(60), dim = c(dat = 1, time = 10, dataset = 1, lat = 2, lon = 3)) + obs2 <- array(rnorm(20), dim = c(dataset = 2, sdate = 5, member = 2)) + set.seed(5) + ref2 <- array(rnorm(15), dim = c(sdate = 5, member = 3)) # case 3: vector set.seed(5) @@ -21,20 +23,6 @@ set.seed(6) obs3 <- rnorm(10) - # case 4 - set.seed(7) - exp4 <- array(rnorm(60), dim = c(sdate = 10, lon = 3, lat = 2)) - set.seed(8) - obs4 <- array(exp4 + rnorm(60) / 2, dim = dim(exp4)) - - # case 5: memb_dim - set.seed(1) - exp5 <- array(rnorm(45), dim = c(sdate = 3, dataset = 5, member = 3)) - set.seed(2) - obs5 <- array(rnorm(3), dim = c(sdate = 3, dataset = 1, member = 1)) - set.seed(3) - ref5 <- array(rnorm(6), dim = c(sdate = 3, member = 2)) - ############################################## @@ -58,8 +46,9 @@ test_that("1. Input checks", { "Parameter 'exp' and 'obs' must have dimension names." ) expect_error( - RMSSS(array(1:10, dim = c(a = 3, c = 5)), array(1:4, dim = c(a = 3, b = 5)), time_dim = 'a', dat_dim = NULL), - "Parameter 'exp' and 'obs' must have same dimension name" + RMSSS(array(1:20, dim = c(dat = 2, sdate = 5, member = 2)), + array(1:10, dim = c(dat = 2, sdate = 5, time = 1))), + "Parameter 'exp' and 'obs' must have the same dimension names." ) ## time_dim expect_error( @@ -102,9 +91,9 @@ test_that("1. Input checks", { ) ## exp and obs (2) expect_error( - RMSSS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), - obs = array(1:4, dim = c(a = 1, sdate = 2, dataset = 2))), - "Parameter 'exp' and 'obs' must have same length of all dimension except 'memb_dim' and 'dat_dim'." + RMSSS(exp = array(1:10, dim = c(dataset = 1, member = 5, sdate = 1)), + obs = array(1:4, dim = c(dataset = 2, member = 2, sdate = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'dat_dim' and 'memb_dim'." ) expect_error( RMSSS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), @@ -126,8 +115,8 @@ test_that("2. Output checks: case 1", { c(nexp = 5, nobs = 2) ) expect_equal( - mean(res1_1$rmsss), - -0.5449538, + c(res1_1$rmsss)[3:8], + c(0.01693900, -0.02730428, -0.34167869, 0.01588531, -1.55611403, -0.63596896), tolerance = 0.00001 ) expect_equal( @@ -136,20 +125,27 @@ test_that("2. Output checks: case 1", { tolerance = 0.001 ) - exp1_2 <- exp1 - exp1_2[2:4] <- NA - obs1_2 <- obs1 - obs1_2[1:2] <- NA - res1_2 <- RMSSS(exp1_2, obs1_2, dat_dim = 'dataset', pval = TRUE) + exp1_2 <- exp1; exp1_2[2:4] <- NA + obs1_2 <- obs1; obs1_2[1:2] <- NA + res1_2 <- RMSSS(exp1_2, obs1_2, dat_dim = 'dataset', pval = T, sign = T) expect_equal( - length(res1_2$rmsss[which(is.na(res1_2$rmsss))]), - 7 + names(res1_2), + c("rmsss", "p.val", "sign") ) expect_equal( - range(res1_2$p.val, na.rm = T), - c(0.7159769, 0.8167073), - tolerance = 0.00001 + c(res1_2$rmsss), + c(rep(NA, 7), -0.6359690, -0.5877153, -1.1108657), + tolerance = 0.0001 + ) + expect_equal( + c(res1_2$p.val), + c(rep(NA, 7), 0.7279944, 0.7159769, 0.8167073), + tolerance = 0.0001 + ) + expect_equal( + c(res1_2$sign), + c(rep(NA, 7), rep(FALSE, 3)) ) #ref @@ -198,39 +194,73 @@ test_that("2. Output checks: case 1", { # Random Walk suppressWarnings({ - res1_5 <- RMSSS(exp1, obs1, ref = ref1_1, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T) + res1_5 <- RMSSS(exp1, obs1, ref = ref1_1, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T, sig_method.type = 'two.sided.approx') }) expect_equal( as.vector(res1_5$sign), rep(F, 10) ) suppressWarnings({ - res1_6 <- RMSSS(exp1, obs1, ref = NULL, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T) + res1_6 <- RMSSS(exp1, obs1, ref = NULL, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T, sig_method.type = 'two.sided.approx') }) expect_equal( as.vector(res1_6$sign), rep(F, 10) ) + suppressWarnings({ + res1_7 <- RMSSS(exp1, obs1, ref = NULL, dat_dim = 'dataset', sig_method = "Random Walk", pval = T, sign = T, sig_method.type = 'two.sided', alpha = 0.4) + }) + expect_equal( + names(res1_7), + c('rmsss', 'p.val', 'sign') + ) + expect_equal( + res1_7$rmsss, + res1_6$rmsss + ) + expect_equal( + c(res1_7$p[, 2]), + c(1, 0.25, 0.25, 1, 1), + tolerance = 0.0001 + ) + expect_equal( + c(res1_7$sign[, 2]), + c(F, T, T, F, F) + ) }) ############################################## test_that("3. Output checks: case 2", { - + res1 <- RMSSS(exp2, obs2, ref2, dat_dim = "dataset", memb_dim = 'member', sign = T) expect_equal( - dim(RMSSS(exp2, obs2, time_dim = 'time', dat_dim = "dataset")$rmsss), - c(nexp = 2, nobs = 1, dat = 1, lon = 3, lat = 2) + dim(res1$rmsss), + c(nexp = 3, nobs = 2) ) expect_equal( - mean(RMSSS(exp2, obs2, time_dim = 'time', dat_dim = "dataset")$rmsss), - -0.3912208, - tolerance = 0.00001 + dim(res1$sign), + c(nexp = 3, nobs = 2) + ) + expect_equal( + dim(res1$p), + c(nexp = 3, nobs = 2) ) expect_equal( - range(RMSSS(exp2, obs2, time_dim = 'time', dat_dim = "dataset")$p.val), - c(0.2627770, 0.9868412), - tolerance = 0.00001 + c(res1$rmsss), + c(-0.08699446, 0.30703938, -0.28888291, -0.04284967, 0.39097270, -0.39792484), + tolerance = 0.0001 + ) + expect_equal( + c(res1$p), + c(0.5622736, 0.2474466, 0.6825138, 0.5314309, 0.1799964, 0.7338230), + tolerance = 0.0001 + ) + + + res2 <- RMSSS(apply(exp2, 1:2, mean), apply(obs2, 1:2, mean), array(apply(ref2, 1, mean), dim = c(sdate = 5)),dat_dim = "dataset", sign = T) + expect_equal( + res1, res2 ) }) @@ -250,61 +280,3 @@ test_that("4. Output checks: case 3", { ) }) - -############################################## -test_that("5. Output checks: case 4", { - - expect_equal( - dim(RMSSS(exp4, obs4, dat_dim = NULL)$rmsss), - c(lon = 3, lat = 2) - ) - expect_equal( - dim(RMSSS(exp4, obs4, dat_dim = NULL)$p.val), - c(lon = 3, lat = 2) - ) - expect_equal( - as.vector(RMSSS(exp4, obs4, dat_dim = NULL)$rmsss), - c(0.5393823, 0.6818405, 0.4953423, 0.4093817, 0.5972085, 0.5861135), - tolerance = 0.00001 - ) - expect_equal( - as.vector(RMSSS(exp4, obs4, dat_dim = NULL)$p.val), - c(0.015203983, 0.001091360, 0.026987112, 0.066279877, 0.006161059, 0.007437649), - tolerance = 0.00001 - ) - expect_equal( - names(RMSSS(exp4, obs4, dat_dim = NULL)), - c('rmsss', 'p.val') - ) - expect_equal( - names(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = F)), - c('rmsss', 'sign') - ) - expect_equal( - names(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = T)), - c('rmsss', 'p.val', 'sign') - ) - expect_equal( - as.vector(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = F)$sign), - c(T, T, T, F, T, T) - ) - expect_equal( - as.vector(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = F, alpha = 0.01)$sign), - c(F, T, F, F, T, T) - ) - -}) - -############################################## -test_that("6. Output checks: case 5", { - res5_1 <- RMSSS(exp5, obs5, ref = ref5, dat_dim = 'dataset', memb_dim = 'member') - res5_2 <- RMSSS(s2dv::MeanDims(exp5, 'member'), s2dv::MeanDims(obs5, 'member'), - ref = s2dv::MeanDims(ref5, 'member'), dat_dim = 'dataset') - expect_equal( - res5_1, - res5_2 - ) - - -}) - diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R index f054325f729ff120a5c1570d1b2c12a164d3ea14..8df2d90d0d6ec9f1276ef7d89422b8912a15a4a7 100644 --- a/tests/testthat/test-RPSS.R +++ b/tests/testthat/test-RPSS.R @@ -188,7 +188,7 @@ test_that("2. Output checks: dat1", { ) expect_equal( as.vector(RPSS(exp1, obs1)$sign), - c(FALSE, FALSE), + c(FALSE, FALSE) ) expect_equal( as.vector(RPSS(exp1, obs1, Fair = T)$rpss), @@ -306,7 +306,7 @@ test_that("3. Output checks: dat2", { ) expect_equal( as.vector(RPSS(exp2, obs2)$sign), - FALSE, + FALSE ) expect_equal( as.vector(RPSS(exp2, obs2, Fair = T)$rpss), @@ -376,7 +376,7 @@ test_that("4. Output checks: dat3", { ) expect_equal( as.vector(RPSS(exp3, obs3, dat_dim = 'dataset')$sign)[1:3], - c(FALSE, FALSE, TRUE), + c(FALSE, FALSE, TRUE) ) expect_equal( mean(RPSS(exp3, obs3, dat_dim = 'dataset', weights_exp = weights3, Fair = T)$rpss),