From 6b5e39cd38785382126926846dc56441d87d787c Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Tue, 20 Jun 2023 12:34:13 +0200 Subject: [PATCH 1/4] new functions --- R/MSE.R | 300 ++++++++++++++++++++++++++++++++++++ R/MSSS.R | 454 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 754 insertions(+) create mode 100644 R/MSE.R create mode 100644 R/MSSS.R diff --git a/R/MSE.R b/R/MSE.R new file mode 100644 index 0000000..1218904 --- /dev/null +++ b/R/MSE.R @@ -0,0 +1,300 @@ +#'Compute mean square error +#' +#'Compute the mean square error for an array of forecasts and an array of +#'observations. The MSEs are computed along time_dim, the dimension which +#'corresponds to the startdate dimension. If comp_dim is given, the MSEs are +#'computed only if obs along the comp_dim dimension are complete between +#'limits[1] and limits[2], i.e. there are no NAs between limits[1] and +#'limits[2]. This option can be activated if the user wishes to account only +#'for the forecasts for which the corresponding observations are available at +#'all leadtimes.\cr +#'The confidence interval is computed by the chi2 distribution.\cr +#' +#'@param exp A named numeric array of experimental data, with at least two +#' dimensions 'time_dim' and 'dat_dim'. It can also be a vector with the +#' same length as 'obs', then the vector will automatically be 'time_dim' and +#' 'dat_dim' will be 1. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along dat_dim. It can also be a vector with the same +#' length as 'exp', then the vector will automatically be 'time_dim' and +#' 'dat_dim' will be 1. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +#' and 'ref' are already the ensemble mean. The default value is NULL. +#'@param dat_dim A character string indicating the name of member (nobs/nexp) +#' dimension. The default value is 'dataset'. +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default value is c(1, length(comp_dim dimension)). +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. The default value is TRUE. +#'@param conf.lev A numeric indicating the confidence level for the +#' regression computation. The default value is 0.95. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +#'number of observation (i.e., dat_dim in obs).\cr +#'\item{$mse}{ +#' The mean square error. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#' set.seed(1) +#' exp1 <- array(rnorm(120), dim = c(dataset = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) +#' set.seed(2) +#' obs1 <- array(rnorm(80), dim = c(dataset = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) +#' res1 <- MSE(exp1, obs1) +#' +#' exp2 <- array(rnorm(120), dim = c(dataset = 3, sdate = 5, ftime = 2, lon = 1, lat = 4, memb = 5)) +#' obs2 <- array(rnorm(80), dim = c(dataset = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) +#' res2 <- MSE(exp2, obs2, memb_dim = 'memb') +#' +#' # Renew example when Ano and Smoothing are ready +#' +#'@rdname MSE +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats qchisq +#'@export +MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, + dat_dim = 'dataset', comp_dim = NULL, limits = NULL, + conf = TRUE, conf.lev = 0.95, ncores = NULL) { + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector + if (length(exp) == length(obs)) { + exp <- array(exp, dim = c(length(exp), 1)) + names(dim(exp)) <- c(time_dim, dat_dim) + obs <- array(obs, dim = c(length(obs), 1)) + names(dim(obs)) <- c(time_dim, dat_dim) + } else { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + } else if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp))[-which(names(dim(exp))==memb_dim)] %in% names(dim(obs))[-which(names(dim(exp))==memb_dim)]) | + !all(names(dim(obs))[-which(names(dim(exp))==memb_dim)] %in% names(dim(exp))[-which(names(dim(exp))==memb_dim)])) { + stop("Parameter 'exp' and 'obs' must have same dimension name.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + "Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'memb_dim' and 'dat_dim'.")) + } + + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + ############################### + # Calculate MSE + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + if (is.null(limits)) { + limits <- c(1, dim(obs)[comp_dim]) + } + pos <- which(names(dim(obs)) == comp_dim) + obs_sub <- Subset(obs, pos, list(limits[1]:limits[2])) + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim), + c(time_dim, dat_dim)), + fun = .MSE, + time_dim = time_dim, dat_dim = dat_dim, + conf = conf, conf.lev = conf.lev, ncores_input = ncores, + ncores = ncores) + return(res) +} + +.MSE <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', + conf = TRUE, conf.lev = 0.95, ncores_input = NULL) { + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + ini_dims <- dim(exp) + dim(exp) <- c(ini_dims, dat_dim = 1) + dim(obs) <- c(ini_dims, dat_dim = 1) + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + } + + dif <- array(dim = c(dim(exp)[1], nexp = nexp, nobs = nobs)) + chi <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (conf) { + conflow <- (1 - conf.lev) / 2 + confhigh <- 1 - conflow + conf.lower <- array(dim = c(nexp = nexp, nobs = nobs)) + conf.upper <- array(dim = c(nexp = nexp, nobs = nobs)) + } + + # dif + for (i in 1:nobs) { + dif[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + } + + mse <- apply(dif^2, c(2, 3), mean, na.rm = TRUE) #array(dim = c(_exp, nobs)) + + if (conf) { + #eno <- Eno(dif, 1) #count effective sample along sdate. dim = c(nexp, nobs) + eno <- Eno(dif, time_dim, ncores = ncores_input) #change to this line when Eno() is done + + # conf.lower + chi <- sapply(1:nobs, function(i) { + qchisq(confhigh, eno[, i] - 1) + }) + conf.lower <- (eno * mse ** 2 / chi) ** 0.5 + + # conf.upper + chi <- sapply(1:nobs, function(i) { + qchisq(conflow, eno[, i] - 1) + }) + conf.upper <- (eno * mse ** 2 / chi) ** 0.5 + } + + ################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim)) { + dim(mse) <- NULL + if (conf) { + dim(conf.lower) <- NULL + dim(conf.upper) <- NULL + } + } + + ################################### + + if (conf) { + res <- list(mse = mse, conf.lower = conf.lower, conf.upper = conf.upper) + } else { + res <- list(mse = mse) + } + + return(res) + +} diff --git a/R/MSSS.R b/R/MSSS.R new file mode 100644 index 0000000..b784027 --- /dev/null +++ b/R/MSSS.R @@ -0,0 +1,454 @@ +#'Compute mean square error skill score +#' +#'Compute the mean square error skill score (MSSS) between an array of +#'forecast 'exp' and an array of observation 'obs'. The two arrays should +#'have the same dimensions except along dat_dim, where the length can be +#'different, with the number of experiments/models (nexp) and the number of +#'observational datasets (nobs).\cr +#'MSSS computes the mean square error skill score of each jexp in 1:nexp +#'against each job in 1:nobs which gives nexp * nobs MSSS for each grid point +#'of the array.\cr +#'The MSSS are computed along the time_dim dimension which should correspond +#'to the start date dimension.\cr +#'The p-value and significance test are optionally provided by an one-sided +#'Fisher test or Random Walk test.\cr +#' +#'@param exp A named numeric array of experimental data which contains at least +#' two dimensions for dat_dim and time_dim. It can also be a vector with the +#' same length as 'obs', then the vector will automatically be 'time_dim' and +#' 'dat_dim' will be 1. +#'@param obs A named numeric array of observational data which contains at least +#' two dimensions for dat_dim and time_dim. The dimensions should be the same +#' as paramter 'exp' except the length of 'dat_dim' dimension. The order of +#' dimension can be different. It can also be a vector with the same length as +#' 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will +#' be 1. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension, or 0 (typical climatological forecast) or 1 +#' (normalized climatological forecast). If it is an array, the dimensions must +#' be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one +#' reference dataset, it should not have dataset dimension. If there is +#' corresponding reference for each experiment, the dataset dimension must +#' have the same length as in 'exp'. If 'ref' is NULL, the typical +#' climatological forecast is used as reference forecast (equivalant to 0.) +#' The default value is NULL. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL. +#'@param time_dim A character string indicating the name of dimension along +#' which the MSSS are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +#' and 'ref' are already the ensemble mean. The default value is NULL. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho: MSSS = 0. The default value is TRUE. +#'@param sign A logical value indicating whether to compute or not the +#' statistical significance of the test Ho: MSSS = 0. The default value is +#' FALSE. +#'@param alpha A numeric of the significance level to be used in the +#' statistical significance test. The default value is 0.05. +#'@param sig_method A character string indicating the significance method. The +#' options are "one-sided Fisher" (default) and "Random Walk". +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +#'number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +#'nobs are omitted.\cr +#'\item{$msss}{ +#' A numerical array of the mean square error skill score. +#'} +#'\item{$p.val}{ +#' A numerical array of the p-value with the same dimensions as $msss. +#' Only present if \code{pval = TRUE}. +#'} +#'\item{sign}{ +#' A logical array of the statistical significance of the MSSS with the same +#' dimensions as $msss. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#' set.seed(1) +#' exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +#' set.seed(2) +#' obs <- array(rnorm(15), dim = c(time = 3, dataset = 1)) +#' res <- MSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset', memb_dim = 'memb') +#' +#'@rdname MSSS +#'@import multiApply +#'@importFrom stats pf +#'@export +MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + sig_method = 'one-sided Fisher', ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector + if (length(exp) == length(obs)) { + exp <- array(exp, dim = c(length(exp), 1)) + names(dim(exp)) <- c(time_dim, dat_dim) + obs <- array(obs, dim = c(length(obs), 1)) + names(dim(obs)) <- c(time_dim, dat_dim) + } else { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + } else if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp))[-which(names(dim(exp))==memb_dim)] %in% names(dim(obs))[-which(names(dim(exp))==memb_dim)]) | + !all(names(dim(obs))[-which(names(dim(exp))==memb_dim)] %in% names(dim(exp))[-which(names(dim(exp))==memb_dim)])) { + stop("Parameter 'exp' and 'obs' must have same dimension name.") + } + if (!is.null(ref)) { + if (!is.numeric(ref)) { + stop("Parameter 'ref' must be numeric.") + } + if (is.array(ref)) { + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { + stop("Parameter 'ref' must be a numeric array or number 0 or 1.") + } + } + + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be one numeric value.") + } + ## sig_method + if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { + stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") + } + if (sig_method == "Random Walk" & pval == T) { + warning("p-value cannot be calculated by significance method 'Random Walk'.") + pval <- FALSE + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'memb_dim' and 'dat_dim'.")) + } + + ############################### + # Create ref array if needed + if (is.null(ref)) ref <- 0 + if (!is.array(ref)) { + ref <- array(data = ref, dim = dim(exp)) + } + + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } + } + + if (dim(exp)[time_dim] <= 2) { + stop("The length of time_dim must be more than 2 to compute MSSS.") + } + + ############################### + # # Sort dimension + # name_exp <- names(dim(exp)) + # name_obs <- names(dim(obs)) + # order_obs <- match(name_exp, name_obs) + # obs <- Reorder(obs, order_obs) + + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + ref <- MeanDims(ref, memb_dim, na.rm = T) + } + } + + ############################### + # Calculate MSSS + + # if (!is.null(ref)) { # use "ref" as reference forecast + # if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { + # target_dims_ref <- c(time_dim, dat_dim) + # } else { + # target_dims_ref <- c(time_dim) + # } + # data <- list(exp = exp, obs = obs, ref = ref) + # target_dims = list(exp = c(time_dim, dat_dim), + # obs = c(time_dim, dat_dim), + # ref = target_dims_ref) + # } else { + # data <- list(exp = exp, obs = obs) + # target_dims = list(exp = c(time_dim, dat_dim), + # obs = c(time_dim, dat_dim)) + # } + data <- list(exp = exp, obs = obs, ref = ref) + if (!is.null(dat_dim)) { + if (dat_dim %in% names(dim(ref))) { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim, dat_dim)) + } else { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim)) + } + } else { + target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) + } + + res <- Apply(data, + target_dims = target_dims, + fun = .MSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, + ncores = ncores) + + return(res) +} + +.MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, + sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher') { + # exp: [sdate, (dat)] + # obs: [sdate, (dat)] + # ref: [sdate, (dat)] or NULL + + if (is.null(ref)) { + ref <- array(data = 0, dim = dim(obs)) + } else if (identical(ref, 0) | identical(ref, 1)) { + ref <- array(ref, dim = dim(exp)) + } + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + nref <- 1 + # Add dat dim back temporarily + dim(exp) <- c(dim(exp), dat = 1) + dim(obs) <- c(dim(obs), dat = 1) + dim(ref) <- c(dim(ref), dat = 1) + + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + if (dat_dim %in% names(dim(ref))) { + nref <- as.numeric(dim(ref)[2]) + } else { + dim(ref) <- c(dim(ref), dat = 1) + nref <- 1 + } + } + + nsdate <- as.numeric(dim(exp)[1]) + + # MSE of forecast + dif1 <- array(dim = c(nsdate, nexp, nobs)) + names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') + + for (i in 1:nobs) { + dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + } + + mse_exp <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE) #array(dim = c(nexp, nobs)) + + # MSE of reference + # if (!is.null(ref)) { + dif2 <- array(dim = c(nsdate, nref, nobs)) + names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + } + mse_ref <- apply(dif2^2, c(2, 3), mean, na.rm = TRUE) #array(dim = c(nref, nobs)) + if (nexp != nref) { + # expand mse_ref to nexp (nref is 1) + mse_ref <- array(mse_ref, dim = c(nobs = nobs, nexp = nexp)) + mse_ref <- Reorder(mse_ref, c(2, 1)) + } + # } else { + # mse_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) + ## mse_ref[which(abs(mse_ref) <= (max(abs(mse_ref), na.rm = TRUE) / 1000))] <- max(abs( + ## mse_ref), na.rm = TRUE) / 1000 + # mse_ref <- Reorder(mse_ref, c(2, 1)) + # #mse_ref above: [nexp, nobs] + # } + + msss <- 1 - mse_exp / mse_ref + + ################################################# + + # if (conf) { + # conflow <- (1 - conf.lev) / 2 + # confhigh <- 1 - conflow + # conf_low <- array(dim = c(nexp = nexp, nobs = nobs)) + # conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) + # } + + if (sig_method == 'one-sided Fisher') { + p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + ## pval and sign + if (pval || sign) { + eno1 <- Eno(dif1, time_dim) + if (is.null(ref)) { + eno2 <- Eno(obs, time_dim) + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } else { + eno2 <- Eno(dif2, time_dim) + if (nref != nexp) { + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } + } + + F.stat <- (eno2 * mse_ref^2 / (eno2 - 1)) / ((eno1 * mse_exp^2 / (eno1- 1))) + tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 + p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + if (sign) signif <- p_val <= alpha + # If there isn't enough valid data, return NA + p_val[which(!tmp)] <- NA + if (sign) signif[which(!tmp)] <- NA + + # change not enough valid data msss to NA + msss[which(!tmp)] <- NA + } + + } else if (sig_method == "Random Walk") { + signif <- array(dim = c(nexp = nexp, nobs = nobs)) + p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + + # Error + error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) + if (nref == nexp) { + error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) + } else { + # nref = 1 + error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) + } + # signif[i, j] <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref)$sign + aux <- s2dv:::.RandomWalkTest(skill_A = error_exp, skill_B = error_ref, + test.type = 'two.sided', + pval = TRUE, sign = TRUE, alpha = alpha) + signif[i, j] <- aux$sign + p_val[i, j] <- aux$p.val + } + } + } + + ################################### + # Remove extra dimensions if dat_dim = NULL + if (is.null(dat_dim)) { + dim(msss) <- NULL + dim(p_val) <- NULL + if (sign) dim(signif) <- NULL + } + ################################### + + # output + res <- list(msss = msss) + if (pval) { + p.val <- list(p.val = p_val) + res <- c(res, p.val) + } + if (sign) { + signif <- list(sign = signif) + res <- c(res, signif) + } + + return(res) +} -- GitLab From ab88f80c2ed0d9827abb04c0bc4e11c78b876232 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Tue, 20 Jun 2023 12:50:39 +0200 Subject: [PATCH 2/4] included memb_dim --- R/RMS.R | 54 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/R/RMS.R b/R/RMS.R index 164167f..9f011ec 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -20,6 +20,9 @@ #' 'dat_dim' will be 1. #'@param time_dim A character string indicating the name of dimension along #' which the correlations are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +#' and 'ref' are already the ensemble mean. The default value is NULL. #'@param dat_dim A character string indicating the name of dataset or member #' (nobs/nexp) dimension. The datasets of exp and obs will be paired and #' computed RMS for each pair. The default value is NULL. @@ -60,15 +63,23 @@ #' set.seed(2) #' na <- floor(runif(10, min = 1, max = 80)) #' obs1[na] <- NA -#' res <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') +#' res1 <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') #' # Renew example when Ano and Smoothing are ready #' +#' exp2 <- array(rnorm(120), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4)) +#' obs2 <- array(rnorm(80), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4)) +#' res2 <- RMS(exp2, obs2, comp_dim = 'ftime') +#' +#' exp3 <- array(rnorm(120), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4, memb = 4)) +#' obs3 <- array(rnorm(80), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4)) +#' res3 <- RMS(exp3, obs3, comp_dim = 'ftime', memb_dim = 'memb') +#' #'@import multiApply #'@importFrom ClimProjDiags Subset #'@importFrom stats qchisq #'@export -RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, - limits = NULL, conf = TRUE, alpha = 0.05, ncores = NULL) { +RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, + comp_dim = NULL, limits = NULL, conf = TRUE, alpha = 0.05, ncores = NULL) { # Check inputs ## exp and obs (1) if (is.null(exp) | is.null(obs)) { @@ -95,8 +106,8 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp)) %in% names(dim(obs))) | - !all(names(dim(obs)) %in% names(dim(exp)))) { + if(!all(names(dim(exp))[-which(names(dim(exp))==memb_dim)] %in% names(dim(obs))[-which(names(dim(exp))==memb_dim)]) | + !all(names(dim(obs))[-which(names(dim(exp))==memb_dim)] %in% names(dim(exp))[-which(names(dim(exp))==memb_dim)])) { stop("Parameter 'exp' and 'obs' must have same dimension name") } ## time_dim @@ -106,6 +117,23 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } ## dat_dim if (!is.null(dat_dim)) { if (!is.character(dat_dim) | length(dat_dim) > 1) { @@ -151,22 +179,32 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, stop("Parameter 'ncores' must be a positive integer.") } } + ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'dat_dim'.")) + "all dimension except 'dat_dim' and 'memb_dim'.")) } if (dim(exp)[time_dim] < 2) { stop("The length of time_dim must be at least 2 to compute RMS.") } - - + + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + ############################### # Sort dimension name_exp <- names(dim(exp)) -- GitLab From f9fa3819f493d77664a0cb713b7e05f148803f00 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Tue, 20 Jun 2023 12:51:37 +0200 Subject: [PATCH 3/4] fixed place to create ref-array if ref is provided as a number --- R/RMSSS.R | 53 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/R/RMSSS.R b/R/RMSSS.R index cf45fa6..00ccc21 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -27,7 +27,7 @@ #' reference dataset, it should not have dataset dimension. If there is #' corresponding reference for each experiment, the dataset dimension must #' have the same length as in 'exp'. If 'ref' is NULL, the typical -#' climatological forecast is used as reference forecast (equivelant to 0.) +#' climatological forecast is used as reference forecast (equivalent to 0.) #' The default value is NULL. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) #' dimension. The default value is NULL. @@ -68,10 +68,18 @@ #' #'@examples #' set.seed(1) -#' exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +#' exp1 <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) #' set.seed(2) -#' obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) -#' res <- RMSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset') +#' obs1 <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) +#' res1 <- RMSSS(exp1, obs1, time_dim = 'time', dat_dim = 'dataset') +#' +#' exp2 <- array(rnorm(30), dim = c(lat = 2, time = 3, memb = 5)) +#' obs2 <- array(rnorm(15), dim = c(time = 3, lat = 2)) +#' res2 <- RMSSS(exp2, obs2, time_dim = 'time', memb_dim = 'memb') +#' +#' exp3 <- array(rnorm(30), dim = c(lat = 2, time = 3)) +#' obs3 <- array(rnorm(15), dim = c(time = 3, lat = 2)) +#' res3 <- RMSSS(exp3, obs3, time_dim = 'time') #' #'@rdname RMSSS #'@import multiApply @@ -107,8 +115,8 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp)) %in% names(dim(obs))) | - !all(names(dim(obs)) %in% names(dim(exp)))) { + if(!all(names(dim(exp))[-which(names(dim(exp))==memb_dim)] %in% names(dim(obs))[-which(names(dim(exp))==memb_dim)]) | + !all(names(dim(obs))[-which(names(dim(exp))==memb_dim)] %in% names(dim(exp))[-which(names(dim(exp))==memb_dim)])) { stop("Parameter 'exp' and 'obs' must have same dimension name.") } if (!is.null(ref)) { @@ -190,6 +198,7 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, name_obs <- sort(names(dim(obs))) if (!is.null(memb_dim)) { name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] @@ -199,6 +208,14 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, stop(paste0("Parameter 'exp' and 'obs' must have same length of ", "all dimension except 'memb_dim' and 'dat_dim'.")) } + + ############################### + # Create ref array if needed + if (is.null(ref)) ref <- 0 + if (!is.array(ref)) { + ref <- array(data = ref, dim = dim(exp)) + } + if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) if (!is.null(memb_dim) && memb_dim %in% name_ref) { @@ -224,23 +241,15 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (dim(exp)[time_dim] <= 2) { stop("The length of time_dim must be more than 2 to compute RMSSS.") } - - - ############################### -# # Sort dimension -# name_exp <- names(dim(exp)) -# name_obs <- names(dim(obs)) -# order_obs <- match(name_exp, name_obs) -# obs <- Reorder(obs, order_obs) - - + + ############################### - # Create ref array if needed - if (is.null(ref)) ref <- 0 - if (!is.array(ref)) { - ref <- array(data = ref, dim = dim(exp)) - } - + # # Sort dimension + # name_exp <- names(dim(exp)) + # name_obs <- names(dim(obs)) + # order_obs <- match(name_exp, name_obs) + # obs <- Reorder(obs, order_obs) + ############################### ## Ensemble mean if (!is.null(memb_dim)) { -- GitLab From abd2660319312c2acce8ad55f24834ac0c5048f1 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 11 Jul 2023 16:24:59 +0200 Subject: [PATCH 4/4] Improve MSSS and MSE, build unit tests; fix minor mistakes; Add RandomWalkTest test.type options --- NAMESPACE | 2 + R/MSE.R | 157 +++++++++++---------- R/MSSS.R | 257 ++++++++++++++++------------------ R/RMS.R | 93 +++++++------ R/RMSSS.R | 235 ++++++++++++++++---------------- man/MSE.Rd | 102 ++++++++++++++ man/MSSS.Rd | 117 ++++++++++++++++ man/RMS.Rd | 46 ++++--- man/RMSSS.Rd | 51 +++++-- tests/testthat/test-CRPSS.R | 6 +- tests/testthat/test-EOF.R | 2 +- tests/testthat/test-MSE.R | 259 +++++++++++++++++++++++++++++++++++ tests/testthat/test-MSSS.R | 265 ++++++++++++++++++++++++++++++++++++ tests/testthat/test-RMS.R | 80 +++++++---- tests/testthat/test-RMSSS.R | 176 ++++++++++-------------- tests/testthat/test-RPSS.R | 6 +- 16 files changed, 1309 insertions(+), 545 deletions(-) create mode 100644 man/MSE.Rd create mode 100644 man/MSSS.Rd create mode 100644 tests/testthat/test-MSE.R create mode 100644 tests/testthat/test-MSSS.R diff --git a/NAMESPACE b/NAMESPACE index d74cc97..9214a1a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -41,6 +41,8 @@ export(Histo2Hindcast) export(InsertDim) export(LeapYear) export(Load) +export(MSE) +export(MSSS) export(MeanDims) export(NAO) export(Persistence) diff --git a/R/MSE.R b/R/MSE.R index 1218904..97e4e82 100644 --- a/R/MSE.R +++ b/R/MSE.R @@ -2,29 +2,26 @@ #' #'Compute the mean square error for an array of forecasts and an array of #'observations. The MSEs are computed along time_dim, the dimension which -#'corresponds to the startdate dimension. If comp_dim is given, the MSEs are +#'corresponds to the start date dimension. If comp_dim is given, the MSEs are #'computed only if obs along the comp_dim dimension are complete between #'limits[1] and limits[2], i.e. there are no NAs between limits[1] and -#'limits[2]. This option can be activated if the user wishes to account only +#'limits[2]. This option can be activated if the user wants to account only #'for the forecasts for which the corresponding observations are available at #'all leadtimes.\cr #'The confidence interval is computed by the chi2 distribution.\cr #' -#'@param exp A named numeric array of experimental data, with at least two -#' dimensions 'time_dim' and 'dat_dim'. It can also be a vector with the -#' same length as 'obs', then the vector will automatically be 'time_dim' and -#' 'dat_dim' will be 1. +#'@param exp A named numeric array of experimental data, with at least #' 'time_dim' dimension. It can also be a vector with the same length as 'obs'. #'@param obs A named numeric array of observational data, same dimensions as -#' parameter 'exp' except along dat_dim. It can also be a vector with the same -#' length as 'exp', then the vector will automatically be 'time_dim' and -#' 'dat_dim' will be 1. +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. It can also be a +#' vector with the same length as 'exp'. #'@param time_dim A character string indicating the name of dimension along #' which the correlations are computed. The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' -#' and 'ref' are already the ensemble mean. The default value is NULL. -#'@param dat_dim A character string indicating the name of member (nobs/nexp) -#' dimension. The default value is 'dataset'. +#' to compute the ensemble mean; it should be set to NULL if the input data are +#' already the ensemble mean. The default value is NULL. +#'@param dat_dim A character string indicating the name of dataset or member +#' (nobs/nexp) dimension. The datasets of exp and obs will be paired and +#' computed MSE for each pair. The default value is NULL. #'@param comp_dim A character string indicating the name of dimension along which #' obs is taken into account only if it is complete. The default value #' is NULL. @@ -32,8 +29,8 @@ #' be completed. The default value is c(1, length(comp_dim dimension)). #'@param conf A logical value indicating whether to retrieve the confidence #' intervals or not. The default value is TRUE. -#'@param conf.lev A numeric indicating the confidence level for the -#' regression computation. The default value is 0.95. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -54,26 +51,33 @@ #' #'@examples #'# Load sample data as in Load() example: -#' set.seed(1) -#' exp1 <- array(rnorm(120), dim = c(dataset = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) -#' set.seed(2) -#' obs1 <- array(rnorm(80), dim = c(dataset = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) -#' res1 <- MSE(exp1, obs1) -#' -#' exp2 <- array(rnorm(120), dim = c(dataset = 3, sdate = 5, ftime = 2, lon = 1, lat = 4, memb = 5)) -#' obs2 <- array(rnorm(80), dim = c(dataset = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) -#' res2 <- MSE(exp2, obs2, memb_dim = 'memb') +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = 12, time_dim = 'ftime') +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = 12, time_dim = 'ftime') +#'res <- MSE(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', +#' comp_dim = 'ftime', limits = c(7, 54)) +#' +#'# Synthetic data: +#'exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 10, ftime = 4)) +#'obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 10, ftime = 4)) +#'na <- floor(runif(10, min = 1, max = 80)) +#'obs1[na] <- NA +#'res1 <- MSE(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') #' -#' # Renew example when Ano and Smoothing are ready +#'exp2 <- array(rnorm(20), dim = c(sdate = 5, member = 4)) +#'obs2 <- array(rnorm(10), dim = c(sdate = 5, member = 2)) +#'res2 <- MSE(exp3, obs3, memb_dim = 'member') #' -#'@rdname MSE #'@import multiApply #'@importFrom ClimProjDiags Subset #'@importFrom stats qchisq #'@export -MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, - dat_dim = 'dataset', comp_dim = NULL, limits = NULL, - conf = TRUE, conf.lev = 0.95, ncores = NULL) { +MSE <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, + comp_dim = NULL, limits = NULL, conf = TRUE, alpha = 0.05, ncores = NULL) { + # Check inputs ## exp and obs (1) if (is.null(exp) | is.null(obs)) { @@ -84,10 +88,10 @@ MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, } if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector if (length(exp) == length(obs)) { - exp <- array(exp, dim = c(length(exp), 1)) - names(dim(exp)) <- c(time_dim, dat_dim) - obs <- array(obs, dim = c(length(obs), 1)) - names(dim(obs)) <- c(time_dim, dat_dim) + exp <- array(exp, dim = c(length(exp))) + names(dim(exp)) <- c(time_dim) + obs <- array(obs, dim = c(length(obs))) + names(dim(obs)) <- c(time_dim) } else { stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", "dimensions time_dim and dat_dim, or vector of same length.")) @@ -100,10 +104,6 @@ MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp))[-which(names(dim(exp))==memb_dim)] %in% names(dim(obs))[-which(names(dim(exp))==memb_dim)]) | - !all(names(dim(obs))[-which(names(dim(exp))==memb_dim)] %in% names(dim(exp))[-which(names(dim(exp))==memb_dim)])) { - stop("Parameter 'exp' and 'obs' must have same dimension name.") - } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") @@ -116,17 +116,9 @@ MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") } - if (!memb_dim %in% names(dim(exp))) { + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", - "but it should be of length = 1).") - } - } } ## dat_dim if (!is.null(dat_dim)) { @@ -162,9 +154,9 @@ MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, if (!is.logical(conf) | length(conf) > 1) { stop("Parameter 'conf' must be one logical value.") } - ## conf.lev - if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { - stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") } ## ncores if (!is.null(ncores)) { @@ -177,23 +169,39 @@ MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_exp) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + if (!all(name_exp == name_obs)) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'memb_dim' and 'dat_dim'.")) + "all dimensions except 'dat_dim' and 'memb_dim'.")) } - + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2 to compute MSE.") + } + ############################### ## Ensemble mean if (!is.null(memb_dim)) { - exp <- MeanDims(exp, memb_dim, na.rm = T) + if (memb_dim %in% names(dim(exp))) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim, na.rm = T) + } } - + ############################### # Sort dimension name_exp <- names(dim(exp)) @@ -221,13 +229,13 @@ MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, c(time_dim, dat_dim)), fun = .MSE, time_dim = time_dim, dat_dim = dat_dim, - conf = conf, conf.lev = conf.lev, ncores_input = ncores, + conf = conf, alpha = alpha, ncores = ncores) return(res) } -.MSE <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', - conf = TRUE, conf.lev = 0.95, ncores_input = NULL) { +.MSE <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, conf = TRUE, alpha = 0.05) { + if (is.null(dat_dim)) { # exp: [sdate] # obs: [sdate] @@ -247,7 +255,7 @@ MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, chi <- array(dim = c(nexp = nexp, nobs = nobs)) if (conf) { - conflow <- (1 - conf.lev) / 2 + conflow <- alpha / 2 confhigh <- 1 - conflow conf.lower <- array(dim = c(nexp = nexp, nobs = nobs)) conf.upper <- array(dim = c(nexp = nexp, nobs = nobs)) @@ -258,22 +266,28 @@ MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dif[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) } - mse <- apply(dif^2, c(2, 3), mean, na.rm = TRUE) #array(dim = c(_exp, nobs)) + mse <- colMeans(dif^2, na.rm = TRUE) # array(dim = c(nexp, nobs)) if (conf) { - #eno <- Eno(dif, 1) #count effective sample along sdate. dim = c(nexp, nobs) - eno <- Eno(dif, time_dim, ncores = ncores_input) #change to this line when Eno() is done + #count effective sample along sdate. eno: c(nexp, nobs) +# eno <- Eno(dif, time_dim) # slower than for loop below? + eno <- array(dim = c(nexp = nexp, nobs = nobs)) + for (n_obs in 1:nobs) { + for (n_exp in 1:nexp) { + eno[n_exp, n_obs] <- .Eno(dif[, n_exp, n_obs], na.action = na.pass) + } + } # conf.lower chi <- sapply(1:nobs, function(i) { - qchisq(confhigh, eno[, i] - 1) - }) + qchisq(confhigh, eno[, i] - 1) + }) conf.lower <- (eno * mse ** 2 / chi) ** 0.5 # conf.upper chi <- sapply(1:nobs, function(i) { - qchisq(conflow, eno[, i] - 1) - }) + qchisq(conflow, eno[, i] - 1) + }) conf.upper <- (eno * mse ** 2 / chi) ** 0.5 } @@ -288,13 +302,10 @@ MSE <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, } ################################### - - if (conf) { - res <- list(mse = mse, conf.lower = conf.lower, conf.upper = conf.upper) - } else { - res <- list(mse = mse) - } - + + res <- list(mse = mse) + if (conf) res <- c(res, list(conf.lower = conf.lower, conf.upper = conf.upper)) + return(res) } diff --git a/R/MSSS.R b/R/MSSS.R index b784027..a11c50c 100644 --- a/R/MSSS.R +++ b/R/MSSS.R @@ -1,28 +1,23 @@ #'Compute mean square error skill score #' -#'Compute the mean square error skill score (MSSS) between an array of -#'forecast 'exp' and an array of observation 'obs'. The two arrays should -#'have the same dimensions except along dat_dim, where the length can be -#'different, with the number of experiments/models (nexp) and the number of -#'observational datasets (nobs).\cr -#'MSSS computes the mean square error skill score of each jexp in 1:nexp -#'against each job in 1:nobs which gives nexp * nobs MSSS for each grid point +#'Compute the mean square error skill score (MSSS) between an array of forecast +#''exp' and an array of observation 'obs'. The two arrays should have the same +#'dimensions except along 'dat_dim' and 'memb_dim'. The MSSSs are computed along +#''time_dim', the dimension which corresponds to the start date dimension. +#'MSSS computes the mean square error skill score of each exp in 1:nexp +#'against each obs in 1:nobs which gives nexp * nobs MSSS for each grid point #'of the array.\cr -#'The MSSS are computed along the time_dim dimension which should correspond -#'to the start date dimension.\cr #'The p-value and significance test are optionally provided by an one-sided #'Fisher test or Random Walk test.\cr #' #'@param exp A named numeric array of experimental data which contains at least -#' two dimensions for dat_dim and time_dim. It can also be a vector with the -#' same length as 'obs', then the vector will automatically be 'time_dim' and -#' 'dat_dim' will be 1. +#' time dimension (time_dim). It can also be a vector with the same length as +#' 'obs', then the vector will automatically be 'time_dim'. #'@param obs A named numeric array of observational data which contains at least -#' two dimensions for dat_dim and time_dim. The dimensions should be the same -#' as paramter 'exp' except the length of 'dat_dim' dimension. The order of -#' dimension can be different. It can also be a vector with the same length as -#' 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will -#' be 1. +#' time dimension (time_dim). The dimensions should be the same as parameter +#' 'exp' except the length of 'dat_dim' and 'memb_dim' dimension. It can also +#' be a vector with the same length as 'exp', then the vector will +#' automatically be 'time_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time dimension, or 0 (typical climatological forecast) or 1 #' (normalized climatological forecast). If it is an array, the dimensions must @@ -30,15 +25,15 @@ #' reference dataset, it should not have dataset dimension. If there is #' corresponding reference for each experiment, the dataset dimension must #' have the same length as in 'exp'. If 'ref' is NULL, the typical -#' climatological forecast is used as reference forecast (equivalant to 0.) +#' climatological forecast is used as reference forecast (equivalent to 0.) #' The default value is NULL. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) #' dimension. The default value is NULL. #'@param time_dim A character string indicating the name of dimension along #' which the MSSS are computed. The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' -#' and 'ref' are already the ensemble mean. The default value is NULL. +#' to compute the ensemble mean; it should be set to NULL if the data are +#' already the ensemble mean. The default value is NULL. #'@param pval A logical value indicating whether to compute or not the p-value #' of the test Ho: MSSS = 0. The default value is TRUE. #'@param sign A logical value indicating whether to compute or not the @@ -48,6 +43,11 @@ #' statistical significance test. The default value is 0.05. #'@param sig_method A character string indicating the significance method. The #' options are "one-sided Fisher" (default) and "Random Walk". +#'@param sig_method.type A character string indicating the test type of the +#' significance method. Check \code{RandomWalkTest()} parameter +#' \code{test.type} for details if parameter "sig_method" is "Random Walk". The +#' default is NULL (since "one-sided Fisher" doesn't have different test +#' types.) #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -70,11 +70,17 @@ #'} #' #'@examples -#' set.seed(1) -#' exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) -#' set.seed(2) -#' obs <- array(rnorm(15), dim = c(time = 3, dataset = 1)) -#' res <- MSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset', memb_dim = 'memb') +#'# Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'rmsss <- MSSS(ano_exp, ano_obs, dat_dim = 'dataset', memb_dim = 'member') +#' +#'# Synthetic data: +#'exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +#'obs <- array(rnorm(15), dim = c(time = 3, dataset = 1)) +#'res <- MSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset', memb_dim = 'memb') #' #'@rdname MSSS #'@import multiApply @@ -82,7 +88,7 @@ #'@export MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, - sig_method = 'one-sided Fisher', ncores = NULL) { + sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -94,10 +100,10 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector if (length(exp) == length(obs)) { - exp <- array(exp, dim = c(length(exp), 1)) - names(dim(exp)) <- c(time_dim, dat_dim) - obs <- array(obs, dim = c(length(obs), 1)) - names(dim(obs)) <- c(time_dim, dat_dim) + exp <- array(exp, dim = c(length(exp))) + names(dim(exp)) <- c(time_dim) + obs <- array(obs, dim = c(length(obs))) + names(dim(obs)) <- c(time_dim) } else { stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", "dimensions time_dim and dat_dim, or vector of same length.")) @@ -106,14 +112,10 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", "dimensions time_dim and dat_dim, or vector of same length.")) } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp))[-which(names(dim(exp))==memb_dim)] %in% names(dim(obs))[-which(names(dim(exp))==memb_dim)]) | - !all(names(dim(obs))[-which(names(dim(exp))==memb_dim)] %in% names(dim(exp))[-which(names(dim(exp))==memb_dim)])) { - stop("Parameter 'exp' and 'obs' must have same dimension name.") - } if (!is.null(ref)) { if (!is.numeric(ref)) { stop("Parameter 'ref' must be numeric.") @@ -125,6 +127,11 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { stop("Parameter 'ref' must be a numeric array or number 0 or 1.") } + } else { + ref <- 0 + } + if (!is.array(ref)) { # 0 or 1 + ref <- array(data = ref, dim = dim(exp)) } ## time_dim @@ -152,14 +159,6 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!memb_dim %in% names(dim(exp))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", - "but it should be of length = 1).") - } - } } ## pval if (!is.logical(pval) | length(pval) > 1) { @@ -177,66 +176,79 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") } - if (sig_method == "Random Walk" & pval == T) { - warning("p-value cannot be calculated by significance method 'Random Walk'.") - pval <- FALSE + ## sig_method.type + if (sig_method == 'Random Walk') { + if (is.null(sig_method.type)) { + .warning("Parameter 'sig_method.type' must be specified if 'sig_method' is ", + "Random Walk. Assign it as 'two.sided'.") + sig_method.type <- "two.sided" + } + if (!any(sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less'))) { + stop("Parameter 'sig_method.type' must be a method accepted by RandomWalkTest() parameter 'test.type'.") + } + if (sig_method.type == 'two.sided.approx' & pval == T) { + .warning("p-value cannot be calculated by Random Walk 'two.sided.approx' method.") + pval <- FALSE + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + alpha <- 0.05 + } + } } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } - ## exp and obs (2) + ## exp, obs, and ref (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - name_obs <- name_obs[-which(name_obs == memb_dim)] + if (memb_dim %in% name_exp) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + if (!all(name_exp == name_obs)) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'memb_dim' and 'dat_dim'.")) + "all dimensions except 'dat_dim' and 'memb_dim'.")) } - ############################### - # Create ref array if needed - if (is.null(ref)) ref <- 0 - if (!is.array(ref)) { - ref <- array(data = ref, dim = dim(exp)) + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] } - - if (!is.null(ref)) { - name_ref <- sort(names(dim(ref))) - if (!is.null(memb_dim) && memb_dim %in% name_ref) { - name_ref <- name_ref[-which(name_ref == memb_dim)] - } - if (!is.null(dat_dim)) { - if (dat_dim %in% name_ref) { - if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be ", - "equal to dataset dimension of 'exp'.")) - } - name_ref <- name_ref[-which(name_ref == dat_dim)] + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.")) } - } - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) + name_ref <- name_ref[-which(name_ref == dat_dim)] } } - + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } + if (dim(exp)[time_dim] <= 2) { stop("The length of time_dim must be more than 2 to compute MSSS.") } - + ############################### # # Sort dimension # name_exp <- names(dim(exp)) @@ -247,8 +259,13 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, ############################### ## Ensemble mean if (!is.null(memb_dim)) { - exp <- MeanDims(exp, memb_dim, na.rm = T) - if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + if (memb_dim %in% names(dim(exp))) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(ref))) { ref <- MeanDims(ref, memb_dim, na.rm = T) } } @@ -256,21 +273,6 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, ############################### # Calculate MSSS - # if (!is.null(ref)) { # use "ref" as reference forecast - # if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { - # target_dims_ref <- c(time_dim, dat_dim) - # } else { - # target_dims_ref <- c(time_dim) - # } - # data <- list(exp = exp, obs = obs, ref = ref) - # target_dims = list(exp = c(time_dim, dat_dim), - # obs = c(time_dim, dat_dim), - # ref = target_dims_ref) - # } else { - # data <- list(exp = exp, obs = obs) - # target_dims = list(exp = c(time_dim, dat_dim), - # obs = c(time_dim, dat_dim)) - # } data <- list(exp = exp, obs = obs, ref = ref) if (!is.null(dat_dim)) { if (dat_dim %in% names(dim(ref))) { @@ -291,14 +293,15 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, fun = .MSSS, time_dim = time_dim, dat_dim = dat_dim, pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, + sig_method = sig_method, sig_method.type = sig_method.type, ncores = ncores) return(res) } -.MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher') { +.MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, + sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', + sig_method.type = NULL) { # exp: [sdate, (dat)] # obs: [sdate, (dat)] # ref: [sdate, (dat)] or NULL @@ -343,40 +346,25 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) } - mse_exp <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE) #array(dim = c(nexp, nobs)) + mse_exp <- colMeans(dif1^2, na.rm = TRUE) # [nexp, nobs] # MSE of reference - # if (!is.null(ref)) { dif2 <- array(dim = c(nsdate, nref, nobs)) names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') for (i in 1:nobs) { dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) } - mse_ref <- apply(dif2^2, c(2, 3), mean, na.rm = TRUE) #array(dim = c(nref, nobs)) + mse_ref <- colMeans(dif2^2, na.rm = TRUE) # [nref, nobs] if (nexp != nref) { # expand mse_ref to nexp (nref is 1) mse_ref <- array(mse_ref, dim = c(nobs = nobs, nexp = nexp)) mse_ref <- Reorder(mse_ref, c(2, 1)) } - # } else { - # mse_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) - ## mse_ref[which(abs(mse_ref) <= (max(abs(mse_ref), na.rm = TRUE) / 1000))] <- max(abs( - ## mse_ref), na.rm = TRUE) / 1000 - # mse_ref <- Reorder(mse_ref, c(2, 1)) - # #mse_ref above: [nexp, nobs] - # } - + msss <- 1 - mse_exp / mse_ref ################################################# - # if (conf) { - # conflow <- (1 - conf.lev) / 2 - # confhigh <- 1 - conflow - # conf_low <- array(dim = c(nexp = nexp, nobs = nobs)) - # conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) - # } - if (sig_method == 'one-sided Fisher') { p_val <- array(dim = c(nexp = nexp, nobs = nobs)) ## pval and sign @@ -407,12 +395,12 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } } else if (sig_method == "Random Walk") { - signif <- array(dim = c(nexp = nexp, nobs = nobs)) - p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (sign) signif <- array(dim = c(nexp = nexp, nobs = nobs)) + if (pval) p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { - for (j in 1:nobs) { - - # Error + for (j in 1:nobs) { error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) if (nref == nexp) { error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) @@ -420,12 +408,11 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # nref = 1 error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) } - # signif[i, j] <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref)$sign - aux <- s2dv:::.RandomWalkTest(skill_A = error_exp, skill_B = error_ref, - test.type = 'two.sided', - pval = TRUE, sign = TRUE, alpha = alpha) - signif[i, j] <- aux$sign - p_val[i, j] <- aux$p.val + aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, + test.type = sig_method.type, + pval = pval, sign = sign, alpha = alpha) + if (sign) signif[i, j] <- aux$sign + if (pval) p_val[i, j] <- aux$p.val } } } @@ -434,21 +421,15 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # Remove extra dimensions if dat_dim = NULL if (is.null(dat_dim)) { dim(msss) <- NULL - dim(p_val) <- NULL + if (pval) dim(p_val) <- NULL if (sign) dim(signif) <- NULL } ################################### # output res <- list(msss = msss) - if (pval) { - p.val <- list(p.val = p_val) - res <- c(res, p.val) - } - if (sign) { - signif <- list(sign = signif) - res <- c(res, signif) - } + if (pval) res <- c(res, list(p.val = p_val)) + if (sign) res <- c(res, list(sign = signif)) return(res) } diff --git a/R/RMS.R b/R/RMS.R index 9f011ec..4e6bfeb 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -2,7 +2,7 @@ #' #'Compute the root mean square error for an array of forecasts and an array of #'observations. The RMSEs are computed along time_dim, the dimension which -#'corresponds to the startdate dimension. If comp_dim is given, the RMSEs are +#'corresponds to the start date dimension. If comp_dim is given, the RMSEs are #'computed only if obs along the comp_dim dimension are complete between #'limits[1] and limits[2], i.e. there are no NAs between limits[1] and #'limits[2]. This option can be activated if the user wishes to account only @@ -10,19 +10,16 @@ #'all leadtimes.\cr #'The confidence interval is computed by the chi2 distribution.\cr #' -#'@param exp A named numeric array of experimental data, with at least two -#' dimensions 'time_dim' and 'dat_dim'. It can also be a vector with the -#' same length as 'obs', then the vector will automatically be 'time_dim' and -#' 'dat_dim' will be 1. +#'@param exp A named numeric array of experimental data, with at least +#' 'time_dim' dimension. It can also be a vector with the same length as 'obs'. #'@param obs A named numeric array of observational data, same dimensions as -#' parameter 'exp' except along dat_dim. It can also be a vector with the same -#' length as 'exp', then the vector will automatically be 'time_dim' and -#' 'dat_dim' will be 1. +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. It can also be a +#' vector with the same length as 'exp'. #'@param time_dim A character string indicating the name of dimension along #' which the correlations are computed. The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' -#' and 'ref' are already the ensemble mean. The default value is NULL. +#' to compute the ensemble mean; it should be set to NULL if the input data are +#' already the ensemble mean. The default value is NULL. #'@param dat_dim A character string indicating the name of dataset or member #' (nobs/nexp) dimension. The datasets of exp and obs will be paired and #' computed RMS for each pair. The default value is NULL. @@ -56,23 +53,25 @@ #' #'@examples #'# Load sample data as in Load() example: -#' set.seed(1) -#' exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) -#' set.seed(2) -#' obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) -#' set.seed(2) -#' na <- floor(runif(10, min = 1, max = 80)) -#' obs1[na] <- NA -#' res1 <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') -#' # Renew example when Ano and Smoothing are ready +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = 12, time_dim = 'ftime') +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = 12, time_dim = 'ftime') +#'res <- RMS(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', +#' comp_dim = 'ftime', limits = c(7, 54)) #' -#' exp2 <- array(rnorm(120), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4)) -#' obs2 <- array(rnorm(80), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4)) -#' res2 <- RMS(exp2, obs2, comp_dim = 'ftime') +#'# Synthetic data: +#'exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 10, ftime = 4)) +#'obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 10, ftime = 4)) +#'na <- floor(runif(10, min = 1, max = 80)) +#'obs1[na] <- NA +#'res1 <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') #' -#' exp3 <- array(rnorm(120), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4, memb = 4)) -#' obs3 <- array(rnorm(80), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4)) -#' res3 <- RMS(exp3, obs3, comp_dim = 'ftime', memb_dim = 'memb') +#'exp2 <- array(rnorm(20), dim = c(sdate = 5, member = 4)) +#'obs2 <- array(rnorm(10), dim = c(sdate = 5, member = 2)) +#'res2 <- RMS(exp3, obs3, memb_dim = 'member') #' #'@import multiApply #'@importFrom ClimProjDiags Subset @@ -102,14 +101,10 @@ RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", "dimensions time_dim and dat_dim, or vector of same length.")) } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp))[-which(names(dim(exp))==memb_dim)] %in% names(dim(obs))[-which(names(dim(exp))==memb_dim)]) | - !all(names(dim(obs))[-which(names(dim(exp))==memb_dim)] %in% names(dim(exp))[-which(names(dim(exp))==memb_dim)])) { - stop("Parameter 'exp' and 'obs' must have same dimension name") - } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") @@ -122,16 +117,8 @@ RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", - "but it should be of length = 1).") - } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension.") } } ## dat_dim @@ -184,16 +171,23 @@ RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - name_obs <- name_obs[-which(name_obs == memb_dim)] + if (memb_dim %in% name_exp) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + if (!all(name_exp == name_obs)) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'dat_dim' and 'memb_dim'.")) + "all dimensions except 'dat_dim' and 'memb_dim'.")) } if (dim(exp)[time_dim] < 2) { stop("The length of time_dim must be at least 2 to compute RMS.") @@ -202,7 +196,12 @@ RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, ############################### ## Ensemble mean if (!is.null(memb_dim)) { - exp <- MeanDims(exp, memb_dim, na.rm = T) + if (memb_dim %in% names(dim(exp))) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim, na.rm = T) + } } ############################### @@ -268,7 +267,7 @@ RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, dif[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) } - rms <- colMeans(dif^2, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) + rms <- colMeans(dif^2, na.rm = TRUE)^0.5 # [nexp, nobs] if (conf) { #NOTE: pval and sign also need #count effective sample along sdate. eno: c(nexp, nobs) diff --git a/R/RMSSS.R b/R/RMSSS.R index 00ccc21..c33a40e 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -2,14 +2,12 @@ #' #'Compute the root mean square error skill score (RMSSS) between an array of #'forecast 'exp' and an array of observation 'obs'. The two arrays should -#'have the same dimensions except along dat_dim, where the length can be -#'different, with the number of experiments/models (nexp) and the number of -#'observational datasets (nobs).\cr -#'RMSSS computes the root mean square error skill score of each jexp in 1:nexp -#'against each job in 1:nobs which gives nexp * nobs RMSSS for each grid point +#'have the same dimensions except along 'dat_dim' and 'memb_dim'. The RMSSSs +#'are computed along 'time_dim', the dimension which corresponds to the start +#'date dimension. +#'RMSSS computes the root mean square error skill score of each exp in 1:nexp +#'against each obs in 1:nobs which gives nexp * nobs RMSSS for each grid point #'of the array.\cr -#'The RMSSS are computed along the time_dim dimension which should correspond -#'to the start date dimension.\cr #'The p-value and significance test are optionally provided by an one-sided #'Fisher test or Random Walk test.\cr #' @@ -18,8 +16,9 @@ #' 'obs', then the vector will automatically be 'time_dim'. #'@param obs A named numeric array of observational data which contains at least #' time dimension (time_dim). The dimensions should be the same as parameter -#' 'exp' except the length of 'dat_dim' dimension. It can also be a vector with -#' the same length as 'exp', then the vector will automatically be 'time_dim'. +#' 'exp' except the length of 'dat_dim' and 'memb_dim' dimension. It can also +#' be a vector with the same length as 'exp', then the vector will +#' automatically be 'time_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time dimension, or 0 (typical climatological forecast) or 1 #' (normalized climatological forecast). If it is an array, the dimensions must @@ -34,8 +33,8 @@ #'@param time_dim A character string indicating the name of dimension along #' which the RMSSS are computed. The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' -#' and 'ref' are already the ensemble mean. The default value is NULL. +#' to compute the ensemble mean; it should be set to NULL if the data are +#' already the ensemble mean. The default value is NULL. #'@param pval A logical value indicating whether to compute or not the p-value #' of the test Ho: RMSSS = 0. The default value is TRUE. #'@param sign A logical value indicating whether to compute or not the @@ -45,6 +44,11 @@ #' statistical significance test. The default value is 0.05. #'@param sig_method A character string indicating the significance method. The #' options are "one-sided Fisher" (default) and "Random Walk". +#'@param sig_method.type A character string indicating the test type of the +#' significance method. Check \code{RandomWalkTest()} parameter +#' \code{test.type} for details if parameter "sig_method" is "Random Walk". The +#' default is NULL (since "one-sided Fisher" doesn't have different test +#' types.) #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -67,6 +71,13 @@ #'} #' #'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'rmsss <- RMSSS(ano_exp, ano_obs, dat_dim = 'dataset', memb_dim = 'member') +#' #' set.seed(1) #' exp1 <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) #' set.seed(2) @@ -87,7 +98,7 @@ #'@export RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, - sig_method = 'one-sided Fisher', ncores = NULL) { + sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -104,21 +115,17 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, obs <- array(obs, dim = c(length(obs))) names(dim(obs)) <- c(time_dim) } else { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) } } else if (is.null(dim(exp)) | is.null(dim(obs))) { stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", "dimensions time_dim and dat_dim, or vector of same length.")) } - if(any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp))[-which(names(dim(exp))==memb_dim)] %in% names(dim(obs))[-which(names(dim(exp))==memb_dim)]) | - !all(names(dim(obs))[-which(names(dim(exp))==memb_dim)] %in% names(dim(exp))[-which(names(dim(exp))==memb_dim)])) { - stop("Parameter 'exp' and 'obs' must have same dimension name.") - } if (!is.null(ref)) { if (!is.numeric(ref)) { stop("Parameter 'ref' must be numeric.") @@ -130,6 +137,11 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { stop("Parameter 'ref' must be a numeric array or number 0 or 1.") } + } else { + ref <- 0 + } + if (!is.array(ref)) { # 0 or 1 + ref <- array(data = ref, dim = dim(exp)) } ## time_dim @@ -157,14 +169,6 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!memb_dim %in% names(dim(exp))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", - "but it should be of length = 1).") - } - } } ## pval if (!is.logical(pval) | length(pval) > 1) { @@ -182,60 +186,76 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") } - if (sig_method == "Random Walk" & pval == T) { - warning("p-value cannot be calculated by significance method 'Random Walk'.") - pval <- FALSE + ## sig_method.type + if (sig_method == 'Random Walk') { + if (is.null(sig_method.type)) { + .warning("Parameter 'sig_method.type' must be specified if 'sig_method' is ", + "Random Walk. Assign it as 'two.sided'.") + .warning("Note that in s2dv <= 1.4.1, Random Walk uses 'two.sided.approx' method.", + "If you want to retain the same functionality, please specify parameter ", + "'sig_method.type' as 'two.sided.approx'.") + sig_method.type <- "two.sided" + } + if (!any(sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less'))) { + stop("Parameter 'sig_method.type' must be a method accepted by RandomWalkTest() parameter 'test.type'.") + } + if (sig_method.type == 'two.sided.approx' & pval == T) { + .warning("p-value cannot be calculated by Random Walk 'two.sided.approx' method.") + pval <- FALSE + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + alpha <- 0.05 + } + } } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } - ## exp and obs (2) + ## exp, obs, and ref (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - name_obs <- name_obs[-which(name_obs == memb_dim)] + if (memb_dim %in% name_exp) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!all(name_exp == name_obs)) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'memb_dim' and 'dat_dim'.")) + "all dimensions except 'dat_dim' and 'memb_dim'.")) } - ############################### - # Create ref array if needed - if (is.null(ref)) ref <- 0 - if (!is.array(ref)) { - ref <- array(data = ref, dim = dim(exp)) + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] } - - if (!is.null(ref)) { - name_ref <- sort(names(dim(ref))) - if (!is.null(memb_dim) && memb_dim %in% name_ref) { - name_ref <- name_ref[-which(name_ref == memb_dim)] - } - if (!is.null(dat_dim)) { - if (dat_dim %in% name_ref) { - if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be ", - "equal to dataset dimension of 'exp'.")) - } - name_ref <- name_ref[-which(name_ref == dat_dim)] + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.")) } + name_ref <- name_ref[-which(name_ref == dat_dim)] } - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) - } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) } if (dim(exp)[time_dim] <= 2) { @@ -253,8 +273,13 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, ############################### ## Ensemble mean if (!is.null(memb_dim)) { - exp <- MeanDims(exp, memb_dim, na.rm = T) - if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + if (memb_dim %in% names(dim(exp))) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(ref))) { ref <- MeanDims(ref, memb_dim, na.rm = T) } } @@ -262,21 +287,6 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, ############################### # Calculate RMSSS -# if (!is.null(ref)) { # use "ref" as reference forecast -# if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { -# target_dims_ref <- c(time_dim, dat_dim) -# } else { -# target_dims_ref <- c(time_dim) -# } -# data <- list(exp = exp, obs = obs, ref = ref) -# target_dims = list(exp = c(time_dim, dat_dim), -# obs = c(time_dim, dat_dim), -# ref = target_dims_ref) -# } else { -# data <- list(exp = exp, obs = obs) -# target_dims = list(exp = c(time_dim, dat_dim), -# obs = c(time_dim, dat_dim)) -# } data <- list(exp = exp, obs = obs, ref = ref) if (!is.null(dat_dim)) { if (dat_dim %in% names(dim(ref))) { @@ -297,14 +307,15 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, fun = .RMSSS, time_dim = time_dim, dat_dim = dat_dim, pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, + sig_method = sig_method, sig_method.type = sig_method.type, ncores = ncores) return(res) } .RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher') { + sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', + sig_method.type = NULL) { # exp: [sdate, (dat)] # obs: [sdate, (dat)] # ref: [sdate, (dat)] or NULL @@ -327,8 +338,8 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, dim(ref) <- c(dim(ref), dat = 1) } else { - # exp: [sdate, dat_exp] - # obs: [sdate, dat_obs] + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] nexp <- as.numeric(dim(exp)[2]) nobs <- as.numeric(dim(obs)[2]) if (dat_dim %in% names(dim(ref))) { @@ -349,40 +360,25 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) } - rms_exp <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) + rms_exp <- colMeans(dif1^2, na.rm = TRUE)^0.5 # [nexp, nobs] # RMS of reference -# if (!is.null(ref)) { - dif2 <- array(dim = c(nsdate, nref, nobs)) - names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') - for (i in 1:nobs) { - dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) - } - rms_ref <- apply(dif2^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nref, nobs)) - if (nexp != nref) { - # expand rms_ref to nexp (nref is 1) - rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) - rms_ref <- Reorder(rms_ref, c(2, 1)) - } -# } else { -# rms_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) -## rms_ref[which(abs(rms_ref) <= (max(abs(rms_ref), na.rm = TRUE) / 1000))] <- max(abs( -## rms_ref), na.rm = TRUE) / 1000 -# rms_ref <- Reorder(rms_ref, c(2, 1)) -# #rms_ref above: [nexp, nobs] -# } + dif2 <- array(dim = c(nsdate, nref, nobs)) + names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + } + rms_ref <- colMeans(dif2^2, na.rm = TRUE)^0.5 # [nref, nobs] + if (nexp != nref) { + # expand rms_ref to nexp (nref is 1) + rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) + rms_ref <- Reorder(rms_ref, c(2, 1)) + } rmsss <- 1 - rms_exp / rms_ref ################################################# -# if (conf) { -# conflow <- alpha / 2 -# confhigh <- 1 - conflow -# conf_low <- array(dim = c(nexp = nexp, nobs = nobs)) -# conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) -# } - if (sig_method == 'one-sided Fisher') { p_val <- array(dim = c(nexp = nexp, nobs = nobs)) ## pval and sign @@ -413,11 +409,12 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } } else if (sig_method == "Random Walk") { - signif <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (sign) signif <- array(dim = c(nexp = nexp, nobs = nobs)) + if (pval) p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { for (j in 1:nobs) { - - # Error error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) if (nref == nexp) { error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) @@ -425,7 +422,11 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # nref = 1 error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) } - signif[i, j] <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref)$sign + aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, + test.type = sig_method.type, + pval = pval, sign = sign, alpha = alpha) + if (sign) signif[i, j] <- aux$sign + if (pval) p_val[i, j] <- aux$p.val } } } @@ -434,7 +435,7 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # Remove extra dimensions if dat_dim = NULL if (is.null(dat_dim)) { dim(rmsss) <- NULL - dim(p_val) <- NULL + if (pval) dim(p_val) <- NULL if (sign) dim(signif) <- NULL } ################################### diff --git a/man/MSE.Rd b/man/MSE.Rd new file mode 100644 index 0000000..291d08c --- /dev/null +++ b/man/MSE.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MSE.R +\name{MSE} +\alias{MSE} +\title{Compute mean square error} +\usage{ +MSE( + exp, + obs, + time_dim = "sdate", + dat_dim = NULL, + memb_dim = NULL, + comp_dim = NULL, + limits = NULL, + conf = TRUE, + alpha = 0.05, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data, with at least #' 'time_dim' dimension. It can also be a vector with the same length as 'obs'.} + +\item{obs}{A named numeric array of observational data, same dimensions as +parameter 'exp' except along 'dat_dim' and 'memb_dim'. It can also be a +vector with the same length as 'exp'.} + +\item{time_dim}{A character string indicating the name of dimension along +which the correlations are computed. The default value is 'sdate'.} + +\item{dat_dim}{A character string indicating the name of dataset or member +(nobs/nexp) dimension. The datasets of exp and obs will be paired and +computed MSE for each pair. The default value is NULL.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the ensemble mean; it should be set to NULL if the input data are +already the ensemble mean. The default value is NULL.} + +\item{comp_dim}{A character string indicating the name of dimension along which +obs is taken into account only if it is complete. The default value +is NULL.} + +\item{limits}{A vector of two integers indicating the range along comp_dim to +be completed. The default value is c(1, length(comp_dim dimension)).} + +\item{conf}{A logical value indicating whether to retrieve the confidence +intervals or not. The default value is TRUE.} + +\item{alpha}{A numeric indicating the significance level for the statistical +significance test. The default value is 0.05.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing the numeric arrays with dimension:\cr + c(nexp, nobs, all other dimensions of exp except time_dim).\cr +nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +number of observation (i.e., dat_dim in obs).\cr +\item{$mse}{ + The mean square error. +} +\item{$conf.lower}{ + The lower confidence interval. Only present if \code{conf = TRUE}. +} +\item{$conf.upper}{ + The upper confidence interval. Only present if \code{conf = TRUE}. +} +} +\description{ +Compute the mean square error for an array of forecasts and an array of +observations. The MSEs are computed along time_dim, the dimension which +corresponds to the start date dimension. If comp_dim is given, the MSEs are +computed only if obs along the comp_dim dimension are complete between +limits[1] and limits[2], i.e. there are no NAs between limits[1] and +limits[2]. This option can be activated if the user wants to account only +for the forecasts for which the corresponding observations are available at +all leadtimes.\cr +The confidence interval is computed by the chi2 distribution.\cr +} +\examples{ +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = 12, time_dim = 'ftime') +smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = 12, time_dim = 'ftime') +res <- MSE(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', + comp_dim = 'ftime', limits = c(7, 54)) + +# Synthetic data: +exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 10, ftime = 4)) +obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 10, ftime = 4)) +na <- floor(runif(10, min = 1, max = 80)) +obs1[na] <- NA +res1 <- MSE(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') + +exp2 <- array(rnorm(20), dim = c(sdate = 5, member = 4)) +obs2 <- array(rnorm(10), dim = c(sdate = 5, member = 2)) +res2 <- MSE(exp3, obs3, memb_dim = 'member') + +} diff --git a/man/MSSS.Rd b/man/MSSS.Rd new file mode 100644 index 0000000..33df450 --- /dev/null +++ b/man/MSSS.Rd @@ -0,0 +1,117 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MSSS.R +\name{MSSS} +\alias{MSSS} +\title{Compute mean square error skill score} +\usage{ +MSSS( + exp, + obs, + ref = NULL, + time_dim = "sdate", + dat_dim = NULL, + memb_dim = NULL, + pval = TRUE, + sign = FALSE, + alpha = 0.05, + sig_method = "one-sided Fisher", + sig_method.type = NULL, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data which contains at least +time dimension (time_dim). It can also be a vector with the same length as +'obs', then the vector will automatically be 'time_dim'.} + +\item{obs}{A named numeric array of observational data which contains at least +time dimension (time_dim). The dimensions should be the same as parameter +'exp' except the length of 'dat_dim' and 'memb_dim' dimension. It can also +be a vector with the same length as 'exp', then the vector will +automatically be 'time_dim'.} + +\item{ref}{A named numerical array of the reference forecast data with at +least time dimension, or 0 (typical climatological forecast) or 1 +(normalized climatological forecast). If it is an array, the dimensions must +be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one +reference dataset, it should not have dataset dimension. If there is +corresponding reference for each experiment, the dataset dimension must +have the same length as in 'exp'. If 'ref' is NULL, the typical +climatological forecast is used as reference forecast (equivalent to 0.) +The default value is NULL.} + +\item{time_dim}{A character string indicating the name of dimension along +which the MSSS are computed. The default value is 'sdate'.} + +\item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) +dimension. The default value is NULL.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the ensemble mean; it should be set to NULL if the data are +already the ensemble mean. The default value is NULL.} + +\item{pval}{A logical value indicating whether to compute or not the p-value +of the test Ho: MSSS = 0. The default value is TRUE.} + +\item{sign}{A logical value indicating whether to compute or not the +statistical significance of the test Ho: MSSS = 0. The default value is +FALSE.} + +\item{alpha}{A numeric of the significance level to be used in the +statistical significance test. The default value is 0.05.} + +\item{sig_method}{A character string indicating the significance method. The +options are "one-sided Fisher" (default) and "Random Walk".} + +\item{sig_method.type}{A character string indicating the test type of the +significance method. Check \code{RandomWalkTest()} parameter +\code{test.type} for details if parameter "sig_method" is "Random Walk". The +default is NULL (since "one-sided Fisher" doesn't have different test +types.)} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing the numeric arrays with dimension:\cr + c(nexp, nobs, all other dimensions of exp except time_dim).\cr +nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +nobs are omitted.\cr +\item{$msss}{ + A numerical array of the mean square error skill score. +} +\item{$p.val}{ + A numerical array of the p-value with the same dimensions as $msss. + Only present if \code{pval = TRUE}. +} +\item{sign}{ + A logical array of the statistical significance of the MSSS with the same + dimensions as $msss. Only present if \code{sign = TRUE}. +} +} +\description{ +Compute the mean square error skill score (MSSS) between an array of forecast +'exp' and an array of observation 'obs'. The two arrays should have the same +dimensions except along 'dat_dim' and 'memb_dim'. The MSSSs are computed along +'time_dim', the dimension which corresponds to the start date dimension. +MSSS computes the mean square error skill score of each exp in 1:nexp +against each obs in 1:nobs which gives nexp * nobs MSSS for each grid point +of the array.\cr +The p-value and significance test are optionally provided by an one-sided +Fisher test or Random Walk test.\cr +} +\examples{ +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +rmsss <- MSSS(ano_exp, ano_obs, dat_dim = 'dataset', memb_dim = 'member') + +# Synthetic data: +exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +obs <- array(rnorm(15), dim = c(time = 3, dataset = 1)) +res <- MSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset', memb_dim = 'memb') + +} diff --git a/man/RMS.Rd b/man/RMS.Rd index 9d02d82..b7c044f 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -8,6 +8,7 @@ RMS( exp, obs, time_dim = "sdate", + memb_dim = NULL, dat_dim = NULL, comp_dim = NULL, limits = NULL, @@ -17,19 +18,20 @@ RMS( ) } \arguments{ -\item{exp}{A named numeric array of experimental data, with at least two -dimensions 'time_dim' and 'dat_dim'. It can also be a vector with the -same length as 'obs', then the vector will automatically be 'time_dim' and -'dat_dim' will be 1.} +\item{exp}{A named numeric array of experimental data, with at least +'time_dim' dimension. It can also be a vector with the same length as 'obs'.} \item{obs}{A named numeric array of observational data, same dimensions as -parameter 'exp' except along dat_dim. It can also be a vector with the same -length as 'exp', then the vector will automatically be 'time_dim' and -'dat_dim' will be 1.} +parameter 'exp' except along 'dat_dim' and 'memb_dim'. It can also be a +vector with the same length as 'exp'.} \item{time_dim}{A character string indicating the name of dimension along which the correlations are computed. The default value is 'sdate'.} +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the ensemble mean; it should be set to NULL if the input data are +already the ensemble mean. The default value is NULL.} + \item{dat_dim}{A character string indicating the name of dataset or member (nobs/nexp) dimension. The datasets of exp and obs will be paired and computed RMS for each pair. The default value is NULL.} @@ -69,7 +71,7 @@ nobs are omitted.\cr \description{ Compute the root mean square error for an array of forecasts and an array of observations. The RMSEs are computed along time_dim, the dimension which -corresponds to the startdate dimension. If comp_dim is given, the RMSEs are +corresponds to the start date dimension. If comp_dim is given, the RMSEs are computed only if obs along the comp_dim dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only @@ -79,14 +81,24 @@ The confidence interval is computed by the chi2 distribution.\cr } \examples{ # Load sample data as in Load() example: - set.seed(1) - exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) - set.seed(2) - obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) - set.seed(2) - na <- floor(runif(10, min = 1, max = 80)) - obs1[na] <- NA - res <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') - # Renew example when Ano and Smoothing are ready +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = 12, time_dim = 'ftime') +smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = 12, time_dim = 'ftime') +res <- RMS(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', + comp_dim = 'ftime', limits = c(7, 54)) +# Synthetic data: +exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 10, ftime = 4)) +obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 10, ftime = 4)) +na <- floor(runif(10, min = 1, max = 80)) +obs1[na] <- NA +res1 <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') + +exp2 <- array(rnorm(20), dim = c(sdate = 5, member = 4)) +obs2 <- array(rnorm(10), dim = c(sdate = 5, member = 2)) +res2 <- RMS(exp3, obs3, memb_dim = 'member') + } diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index c647c71..7b31e26 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -15,6 +15,7 @@ RMSSS( sign = FALSE, alpha = 0.05, sig_method = "one-sided Fisher", + sig_method.type = NULL, ncores = NULL ) } @@ -25,8 +26,9 @@ time dimension (time_dim). It can also be a vector with the same length as \item{obs}{A named numeric array of observational data which contains at least time dimension (time_dim). The dimensions should be the same as parameter -'exp' except the length of 'dat_dim' dimension. It can also be a vector with -the same length as 'exp', then the vector will automatically be 'time_dim'.} +'exp' except the length of 'dat_dim' and 'memb_dim' dimension. It can also +be a vector with the same length as 'exp', then the vector will +automatically be 'time_dim'.} \item{ref}{A named numerical array of the reference forecast data with at least time dimension, or 0 (typical climatological forecast) or 1 @@ -35,7 +37,7 @@ be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should not have dataset dimension. If there is corresponding reference for each experiment, the dataset dimension must have the same length as in 'exp'. If 'ref' is NULL, the typical -climatological forecast is used as reference forecast (equivelant to 0.) +climatological forecast is used as reference forecast (equivalent to 0.) The default value is NULL.} \item{time_dim}{A character string indicating the name of dimension along @@ -45,8 +47,8 @@ which the RMSSS are computed. The default value is 'sdate'.} dimension. The default value is NULL.} \item{memb_dim}{A character string indicating the name of the member dimension -to compute the ensemble mean; it should be set to NULL if the parameter 'exp' -and 'ref' are already the ensemble mean. The default value is NULL.} +to compute the ensemble mean; it should be set to NULL if the data are +already the ensemble mean. The default value is NULL.} \item{pval}{A logical value indicating whether to compute or not the p-value of the test Ho: RMSSS = 0. The default value is TRUE.} @@ -61,6 +63,12 @@ statistical significance test. The default value is 0.05.} \item{sig_method}{A character string indicating the significance method. The options are "one-sided Fisher" (default) and "Random Walk".} +\item{sig_method.type}{A character string indicating the test type of the +significance method. Check \code{RandomWalkTest()} parameter +\code{test.type} for details if parameter "sig_method" is "Random Walk". The +default is NULL (since "one-sided Fisher" doesn't have different test +types.)} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } @@ -85,22 +93,35 @@ nobs are omitted.\cr \description{ Compute the root mean square error skill score (RMSSS) between an array of forecast 'exp' and an array of observation 'obs'. The two arrays should -have the same dimensions except along dat_dim, where the length can be -different, with the number of experiments/models (nexp) and the number of -observational datasets (nobs).\cr -RMSSS computes the root mean square error skill score of each jexp in 1:nexp -against each job in 1:nobs which gives nexp * nobs RMSSS for each grid point +have the same dimensions except along 'dat_dim' and 'memb_dim'. The RMSSSs +are computed along 'time_dim', the dimension which corresponds to the start +date dimension. +RMSSS computes the root mean square error skill score of each exp in 1:nexp +against each obs in 1:nobs which gives nexp * nobs RMSSS for each grid point of the array.\cr -The RMSSS are computed along the time_dim dimension which should correspond -to the start date dimension.\cr The p-value and significance test are optionally provided by an one-sided Fisher test or Random Walk test.\cr } \examples{ +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +rmsss <- RMSSS(ano_exp, ano_obs, dat_dim = 'dataset', memb_dim = 'member') + set.seed(1) -exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +exp1 <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) set.seed(2) -obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) -res <- RMSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset') +obs1 <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) +res1 <- RMSSS(exp1, obs1, time_dim = 'time', dat_dim = 'dataset') + +exp2 <- array(rnorm(30), dim = c(lat = 2, time = 3, memb = 5)) +obs2 <- array(rnorm(15), dim = c(time = 3, lat = 2)) +res2 <- RMSSS(exp2, obs2, time_dim = 'time', memb_dim = 'memb') + +exp3 <- array(rnorm(30), dim = c(lat = 2, time = 3)) +obs3 <- array(rnorm(15), dim = c(time = 3, lat = 2)) +res3 <- RMSSS(exp3, obs3, time_dim = 'time') } diff --git a/tests/testthat/test-CRPSS.R b/tests/testthat/test-CRPSS.R index 505f3d4..5311724 100644 --- a/tests/testthat/test-CRPSS.R +++ b/tests/testthat/test-CRPSS.R @@ -144,7 +144,7 @@ test_that("2. Output checks: dat1", { ) expect_equal( as.vector(CRPSS(exp1, obs1)$sign), - c(FALSE, FALSE), + c(FALSE, FALSE) ) expect_equal( as.vector(CRPSS(exp1, obs1, Fair = T)$crpss), @@ -220,7 +220,7 @@ test_that("3. Output checks: dat2", { expect_equal( as.vector(CRPSS(exp2, obs2)$sign), - FALSE, + FALSE ) expect_equal( as.vector(CRPSS(exp2, obs2, Fair = T)$crpss), @@ -273,7 +273,7 @@ test_that("4. Output checks: dat3", { ) expect_equal( as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$sign), - rep(FALSE, 6), + rep(FALSE, 6) ) expect_equal( mean(CRPSS(exp3, obs3, dat_dim = 'dataset', Fair = T)$crpss), diff --git a/tests/testthat/test-EOF.R b/tests/testthat/test-EOF.R index 4e95aa3..828bd52 100644 --- a/tests/testthat/test-EOF.R +++ b/tests/testthat/test-EOF.R @@ -72,7 +72,7 @@ test_that("1. Input checks", { "length as the longitude dimension of 'ano'.") ) expect_warning( - EOF(dat1, lat = lat1, lon = c(350, 370)), + EOF(dat1, lat = lat1, lon = c(350, 370), neofs = 8), "Some 'lon' is out of the range \\[-360, 360\\]." ) # neofs diff --git a/tests/testthat/test-MSE.R b/tests/testthat/test-MSE.R new file mode 100644 index 0000000..05bba2d --- /dev/null +++ b/tests/testthat/test-MSE.R @@ -0,0 +1,259 @@ +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(120), dim = c(dataset = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) + set.seed(2) + obs1 <- array(rnorm(80), dim = c(dataset = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) + set.seed(2) + na <- floor(runif(10, min = 1, max = 80)) + obs1[na] <- NA + + # dat 2: vector + set.seed(5) + exp2 <- rnorm(10) + set.seed(6) + obs2 <- rnorm(10) + + # dat3 + set.seed(1) + exp3 <- array(rnorm(120), dim = c(sdate = 5, ftime = 2, member = 3)) + set.seed(2) + obs3 <- array(rnorm(80), dim = c(sdate = 5, ftime = 2, member = 2)) + + # dat4 + set.seed(1) + exp4 <- array(rnorm(120), dim = c(dataset = 2, sdate = 5, time = 1)) + set.seed(2) + obs4 <- array(rnorm(80), dim = c(dataset = 1, sdate = 5, member = 2, time = 1)) + +############################################## +test_that("1. Input checks", { + + expect_error( + MSE(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + MSE(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + MSE(c(1:10), c(2:4)), + "Parameter 'exp' and 'obs' must be array with as least two dimensions time_dim and dat_dim, or vector of same length." + ) + expect_error( + MSE(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + MSE(array(1:20, dim = c(dat = 2, sdate = 5, member = 2)), + array(1:10, dim = c(dat = 2, sdate = 5, time = 1))), + "Parameter 'exp' and 'obs' must have the same dimension names." + ) + expect_error( + MSE(exp1, obs1, dat_dim = 1), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + MSE(exp1, obs1, dat_dim = 'a'), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + MSE(exp1, obs1, time_dim = c('sdate', 'a')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + MSE(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + MSE(exp1, obs1, comp_dim = c('sdate', 'ftime')), + "Parameter 'comp_dim' must be a character string." + ) + expect_error( + MSE(exp1, obs1, comp_dim = 'a'), + "Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + MSE(exp1, obs1, limits = c(1,3)), + "Paramter 'comp_dim' cannot be NULL if 'limits' is assigned." + ) + expect_error( + MSE(exp1, obs1, comp_dim = 'ftime', limits = c(1)), + paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.") + ) + expect_error( + MSE(exp1, obs1, alpha = -1), + "Parameter 'alpha' must be a numeric number between 0 and 1." + ) + expect_error( + MSE(exp1, obs1, conf = 1), + "Parameter 'conf' must be one logical value." + ) + expect_error( + MSE(exp1, obs1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( + MSE(exp = array(1:10, dim = c(dataset = 1, member = 5, sdate = 1)), + obs = array(1:4, dim = c(dataset = 2, member = 2, sdate = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'dat_dim' and 'memb_dim'." + ) + expect_error( + MSE(exp = array(1:5, dim = c(sdate = 1, dataset = 5, a = 1)), + obs = array(1:2, dim = c(a = 1, sdate = 1, dataset = 2)), dat_dim = "dataset"), + "The length of time_dim must be at least 2 to compute MSE." + ) + + + +}) + +############################################## +test_that("2. Output checks: dat1", { +suppressWarnings( + expect_equal( + dim(MSE(exp1, obs1, dat_dim = 'dataset')$mse), + c(nexp = 3, nobs = 2, ftime = 2, lon = 1, lat = 4) + ) +) +suppressWarnings( + expect_equal( + MSE(exp1, obs1, dat_dim = 'dataset')$mse[1:6], + c(1.2815677, 2.0832803, 1.1894637, 1.3000403, 1.4053807, 0.8157563)^2, + tolerance = 0.001 + ) +) +suppressWarnings( + expect_equal( + length(which(is.na(MSE(exp1, obs1, dat_dim = 'dataset')$conf.lower))), + 4 + ) +) +suppressWarnings( + expect_equal( + c(MSE(exp1, obs1, dat_dim = 'dataset')$conf.lower[2,1,,1,2:3]), + c(1.8869268, 0.4418298, 0.2694637, 0.8215383), + tolerance = 0.001 + ) +) +suppressWarnings( + expect_equal( + length(which(is.na(MSE(exp1, obs1, dat_dim = 'dataset', comp_dim = 'ftime')$mse))), + 0 + ) +) +suppressWarnings( + expect_equal( + length(which(is.na(MSE(exp1, obs1, dat_dim = 'dataset', comp_dim = 'ftime')$conf.upper))), + 8 + ) +) +suppressWarnings( + expect_equal( + length(which(is.na(MSE(exp1, obs1, dat_dim = 'dataset', comp_dim = 'lat')$conf.lower))), + 36 + ) +) +suppressWarnings( + expect_equal( + length(which(is.na(MSE(exp1, obs1, dat_dim = 'dataset', comp_dim = 'lat', limits = c(1, 2))$conf.lower))), + 21 + ) +) +suppressWarnings( + expect_equal( + c(MSE(exp1, obs1, dat_dim = 'dataset', alpha = 0.01)$conf.upper[2,1,,1,2:3]), + c(13.844841, 5.044269, 1.977121, 6.027826), + tolerance = 0.0001 + ) +) +suppressWarnings( + expect_equal( + length(MSE(exp1, obs1, dat_dim = 'dataset', conf = FALSE)), + 1 + ) +) + + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + dim(MSE(exp2, obs2)$mse), + NULL + ) + + expect_equal( + as.vector(MSE(exp2, obs2)$mse), + 1.429815^2, + tolerance = 0.00001 + ) +}) + +############################################## +test_that("4. Output checks: dat3", { + + expect_equal( + dim(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$mse), + c(ftime = 2) + ) + + expect_equal( + as.vector(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$mse), + c(0.6191331, 0.7133894)^2, + tolerance = 0.00001 + ) + expect_equal( + names(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member', conf = FALSE)), + c("mse") + ) + expect_equal( + names(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member', conf = TRUE)), + c('mse', 'conf.lower', 'conf.upper') + ) + expect_equal( + c(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$conf.lower), + c(0.2567713, 0.3409037), + tolerance = 0.0001 + ) + expect_equal( + c(MSE(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$conf.upper), + c(1.231523, 1.635038), + tolerance = 0.0001 + ) + +}) + +############################################## + +test_that("5. Output checks: dat4", { + + expect_equal( + dim(MSE(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$mse), + c(nexp = 2, nobs = 1, time = 1) + ) + expect_equal( + c(MSE(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$mse), + c(0.6775320, 0.8954404)^2, + tolerance = 0.0001 + ) + expect_equal( + c(MSE(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$conf.lower), + c(0.3074949, 0.5370958), + tolerance = 0.0001 + ) + expect_equal( + c(MSE(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$conf.upper), + c(1.474804, 2.576013), + tolerance = 0.0001 + ) + + +}) + +############################################## + diff --git a/tests/testthat/test-MSSS.R b/tests/testthat/test-MSSS.R new file mode 100644 index 0000000..29952fa --- /dev/null +++ b/tests/testthat/test-MSSS.R @@ -0,0 +1,265 @@ +############################################## + # case 1 + set.seed(1) + exp1 <- array(rnorm(15), dim = c(sdate = 3, dataset = 5)) + set.seed(2) + obs1 <- array(rnorm(6), dim = c(sdate = 3, dataset = 2)) + set.seed(3) + ref1_1 <- array(rnorm(15), dim = c(sdate = 3, dataset = 5)) + ref1_2 <- exp1[, 3] + dim(ref1_2) <- c(sdate = 3) + + # case 2 + set.seed(3) + exp2 <- array(rnorm(30), dim = c(dataset = 3, sdate = 5, member = 3)) + set.seed(4) + obs2 <- array(rnorm(20), dim = c(dataset = 2, sdate = 5, member = 2)) + set.seed(5) + ref2 <- array(rnorm(15), dim = c(sdate = 5, member = 3)) + + # case 3: vector + set.seed(5) + exp3 <- rnorm(10) + set.seed(6) + obs3 <- rnorm(10) + + +############################################## + +test_that("1. Input checks", { + ## exp and obs (1) + expect_error( + MSSS(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + MSSS('exp', 'obs'), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + MSSS(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must be array with as least two dimensions ", + "time_dim and dat_dim, or vector of same length.") + ) + expect_error( + MSSS(array(1:10, dim = c(2, 5)), array(1:10, dim = c(2, 5))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + MSSS(array(1:20, dim = c(dat = 2, sdate = 5, member = 2)), + array(1:10, dim = c(dat = 2, sdate = 5, time = 1))), + "Parameter 'exp' and 'obs' must have the same dimension names." + ) + ## time_dim + expect_error( + MSSS(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + MSSS(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + ## dat_dim + expect_error( + MSSS(exp1, obs1, dat_dim = NA), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + MSSS(exp1, obs1, dat_dim = 'memb'), + paste0("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + ) + ## pval + expect_error( + MSSS(exp1, obs1, pval = c(T, T)), + "Parameter 'pval' must be one logical value." + ) + ## sign + expect_error( + MSSS(exp1, obs1, sign = 0.05), + "Parameter 'sign' must be one logical value." + ) + ## alpha + expect_error( + MSSS(exp1, obs1, alpha = T), + "Parameter 'alpha' must be one numeric value." + ) + ## ncores + expect_error( + MSSS(exp1, obs1, ncores = 1.4), + "Parameter 'ncores' must be a positive integer." + ) + ## exp and obs (2) + expect_error( + MSSS(exp = array(1:10, dim = c(dataset = 1, member = 5, sdate = 1)), + obs = array(1:4, dim = c(dataset = 2, member = 2, sdate = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'dat_dim' and 'memb_dim'." + ) + expect_error( + MSSS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), + obs = array(1:4, dim = c(a = 1, sdate = 1, dataset = 2)), dat_dim = "dataset"), + "The length of time_dim must be more than 2 to compute MSSS." + ) +}) + +############################################## +test_that("2. Output checks: case 1", { + + res1_1 <- MSSS(exp1, obs1, dat_dim = 'dataset') + expect_equal( + dim(res1_1$msss), + c(nexp = 5, nobs = 2) + ) + expect_equal( + dim(res1_1$p.val), + c(nexp = 5, nobs = 2) + ) + expect_equal( + c(res1_1$msss)[3:8], + c(0.03359106, -0.05535409, -0.80010171, 0.03151828, -5.53371892, -1.67639444), + tolerance = 0.00001 + ) + expect_equal( + as.vector(res1_1$p.val)[3:7], + c(0.4829225, 0.5269121, 0.7641713, 0.4839926, 0.9771112), + tolerance = 0.001 + ) + + exp1_2 <- exp1; exp1_2[2:4] <- NA + obs1_2 <- obs1; obs1_2[1:2] <- NA + res1_2 <- MSSS(exp1_2, obs1_2, dat_dim = 'dataset', pval = T, sign = T) + + expect_equal( + names(res1_2), + c("msss", "p.val", "sign") + ) + expect_equal( + c(res1_2$msss), + c(rep(NA, 7), -1.676394, -1.520840, -3.455754), + tolerance = 0.0001 + ) + expect_equal( + c(res1_2$p.val), + c(rep(NA, 7), 0.8774973, 0.8640313, 0.9520470), + tolerance = 0.0001 + ) + expect_equal( + c(res1_2$sign), + c(rep(NA, 7), rep(FALSE, 3)) + ) + + #ref + res1_3 <- MSSS(exp1, obs1, ref = ref1_1, dat_dim = 'dataset') + expect_equal( + dim(res1_3$msss), + c(nexp = 5, nobs = 2) + ) + expect_equal( + dim(res1_3$p.val), + c(nexp = 5, nobs = 2) + ) + expect_equal( + as.vector(res1_3$msss[2, ]), + c(-3.828708, -96.610622), + tolerance = 0.0001 + ) + expect_equal( + as.vector(res1_3$p.val[2, ]), + c(0.9588755, 0.9998951), + tolerance = 0.0001 + ) + res1_4 <- MSSS(exp1, obs1, ref = ref1_2, dat_dim = 'dataset', sign = T, alpha = 0.3) + expect_equal( + dim(res1_4$msss), + c(nexp = 5, nobs = 2) + ) + expect_equal( + dim(res1_4$sign), + c(nexp = 5, nobs = 2) + ) + expect_equal( + as.vector(res1_4$msss[2, ]), + c(-2.705537, -1.441239), + tolerance = 0.0001 + ) + expect_equal( + as.vector(res1_4$p[2, ]), + c(0.9321160, 0.8563146), + tolerance = 0.0001 + ) + expect_equal( + as.vector(res1_4$sign), + c(rep(F, 5), T, rep(F, 4)) + ) + + # Random Walk + suppressWarnings({ + res1_5 <- MSSS(exp1, obs1, ref = ref1_1, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T, sig_method.type = 'two.sided') + }) + expect_equal( + as.vector(res1_5$sign), + rep(F, 10) + ) + suppressWarnings({ + res1_6 <- MSSS(exp1, obs1, ref = NULL, dat_dim = 'dataset', sig_method = "Random Walk", pval = T, sign = T, sig_method.type = 'two.sided') + }) + expect_equal( + as.vector(res1_6$p), + c(1, 1, 1, 1, 1, 1, 0.25, 0.25, 1, 1) + ) + expect_equal( + as.vector(res1_6$sign), + rep(F, 10) + ) + +}) + + +############################################## +test_that("3. Output checks: case 2", { + res1 <- MSSS(exp2, obs2, ref2, dat_dim = "dataset", memb_dim = 'member', sign = T) + expect_equal( + dim(res1$msss), + c(nexp = 3, nobs = 2) + ) + expect_equal( + dim(res1$sign), + c(nexp = 3, nobs = 2) + ) + expect_equal( + dim(res1$p), + c(nexp = 3, nobs = 2) + ) + expect_equal( + c(res1$msss), + c(-0.18155696, 0.51980557, -0.66121915, -0.08753543, 0.62908575, -0.95419385), + tolerance = 0.0001 + ) + expect_equal( + c(res1$p), + c(0.62284746, 0.09217502, 0.82539496, 0.56264155, 0.04034091, 0.88868245), + tolerance = 0.0001 + ) + + res2 <- MSSS(apply(exp2, 1:2, mean), apply(obs2, 1:2, mean), array(apply(ref2, 1, mean), dim = c(sdate = 5)), dat_dim = "dataset", sign = T) + expect_equal( + res1, res2 + ) + +}) + +############################################## + +test_that("4. Output checks: case 3", { + + expect_equal( + dim(MSSS(exp3, obs3)$msss), + NULL + ) + expect_equal( + as.vector(MSSS(exp3, obs3)$msss), + -1.653613, + tolerance = 0.00001 + ) + +}) diff --git a/tests/testthat/test-RMS.R b/tests/testthat/test-RMS.R index c24a2a9..660f4e7 100644 --- a/tests/testthat/test-RMS.R +++ b/tests/testthat/test-RMS.R @@ -2,7 +2,6 @@ # dat1 set.seed(1) exp1 <- array(rnorm(120), dim = c(dataset = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) - set.seed(2) obs1 <- array(rnorm(80), dim = c(dataset = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) set.seed(2) @@ -17,17 +16,15 @@ # dat3 set.seed(1) - exp3 <- array(rnorm(120), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4)) - + exp3 <- array(rnorm(120), dim = c(sdate = 5, ftime = 2, member = 3)) set.seed(2) - obs3 <- array(rnorm(80), dim = c(sdate = 5, ftime = 2, lon = 1, lat = 4)) + obs3 <- array(rnorm(80), dim = c(sdate = 5, ftime = 2, member = 2)) # dat4 set.seed(1) - exp4 <- array(rnorm(120), dim = c(sdates = 5, ftimes = 2, lon = 1, lat = 1)) - + exp4 <- array(rnorm(120), dim = c(dataset = 2, sdate = 5, time = 1)) set.seed(2) - obs4 <- array(rnorm(80), dim = c(sdates = 5, ftimes = 2, lon = 1, lat = 1)) + obs4 <- array(rnorm(80), dim = c(dataset = 1, sdate = 5, member = 2, time = 1)) ############################################## test_that("1. Input checks", { @@ -49,8 +46,9 @@ test_that("1. Input checks", { "Parameter 'exp' and 'obs' must have dimension names." ) expect_error( - RMS(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), - "Parameter 'exp' and 'obs' must have same dimension name" + RMS(array(1:20, dim = c(dat = 2, sdate = 5, member = 2)), + array(1:10, dim = c(dat = 2, sdate = 5, time = 1))), + "Parameter 'exp' and 'obs' must have the same dimension names." ) expect_error( RMS(exp1, obs1, dat_dim = 1), @@ -98,13 +96,13 @@ test_that("1. Input checks", { "Parameter 'ncores' must be a positive integer." ) expect_error( - RMS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), - obs = array(1:4, dim = c(a = 1, sdate = 2, dataset = 2))), - "Parameter 'exp' and 'obs' must have same length of all dimension except 'dat_dim'." + RMS(exp = array(1:10, dim = c(dataset = 1, member = 5, sdate = 1)), + obs = array(1:4, dim = c(dataset = 2, member = 2, sdate = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'dat_dim' and 'memb_dim'." ) expect_error( RMS(exp = array(1:5, dim = c(sdate = 1, dataset = 5, a = 1)), - obs = array(1:2, dim = c(a = 1, sdate = 1, dataset = 2)), dat_dim = "dataset"), + obs = array(1:2, dim = c(a = 1, sdate = 1, dataset = 2)), dat_dim = "dataset"), "The length of time_dim must be at least 2 to compute RMS." ) @@ -200,38 +198,62 @@ test_that("3. Output checks: dat2", { test_that("4. Output checks: dat3", { expect_equal( - dim(RMS(exp3, obs3, dat_dim = NULL)$rms), - c(ftime = 2, lon = 1, lat = 4) + dim(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$rms), + c(ftime = 2) ) expect_equal( - as.vector(RMS(exp3, obs3, dat_dim = NULL)$rms), - c(1.6458118, 0.8860392, 0.8261295, 1.1681939, 2.1693538, 1.3064454, 0.5384229, 1.1215333), + as.vector(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$rms), + c(0.6191331, 0.7133894), tolerance = 0.00001 ) expect_equal( - dim(RMS(exp3, obs3, dat_dim = NULL, conf = FALSE)$rms), - c(ftime = 2, lon = 1, lat = 4) + names(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member', conf = FALSE)), + c("rms") ) expect_equal( - dim(RMS(exp4, obs4, time_dim = 'sdates', dat_dim = NULL, conf = TRUE)$rms), - c(ftimes = 2, lon = 1, lat = 1) + names(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member', conf = TRUE)), + c('rms', 'conf.lower', 'conf.upper') ) expect_equal( - length(RMS(exp3, obs3, dat_dim = NULL, conf = F)), - 1 + c(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$conf.lower), + c(0.4147271, 0.4778648), + tolerance = 0.0001 ) expect_equal( - c(RMS(exp3, obs3, dat_dim = NULL)$conf.lower[1,1,]), - c(1.1024490, 0.5533838, 1.4531443, 0.3606632), - tolerance = 0.0001 + c(RMS(exp3, obs3, dat_dim = NULL, memb_dim = 'member')$conf.upper), + c(1.989109, 2.291930), + tolerance = 0.0001 + ) + +}) + +############################################## + +test_that("5. Output checks: dat4", { + + expect_equal( + dim(RMS(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$rms), + c(nexp = 2, nobs = 1, time = 1) ) expect_equal( - c(RMS(exp3, obs3, dat_dim = NULL)$conf.upper[1,1,]), - c(5.287554, 2.654133, 6.969554, 1.729809), - tolerance = 0.0001 + c(RMS(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$rms), + c(0.6775320, 0.8954404), + tolerance = 0.0001 + ) + expect_equal( + c(RMS(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$conf.lower), + c(0.4538456, 0.5998118), + tolerance = 0.0001 + ) + expect_equal( + c(RMS(exp4, obs4, dat_dim = 'dataset', memb_dim = 'member')$conf.upper), + c(2.176729, 2.876811), + tolerance = 0.0001 ) + }) ############################################## + diff --git a/tests/testthat/test-RMSSS.R b/tests/testthat/test-RMSSS.R index 7f38373..a364b40 100644 --- a/tests/testthat/test-RMSSS.R +++ b/tests/testthat/test-RMSSS.R @@ -11,9 +11,11 @@ # case 2 set.seed(3) - exp2 <- array(rnorm(120), dim = c(time = 10, dat = 1, lon = 3, lat = 2, dataset = 2)) + exp2 <- array(rnorm(30), dim = c(dataset = 3, sdate = 5, member = 3)) set.seed(4) - obs2 <- array(rnorm(60), dim = c(dat = 1, time = 10, dataset = 1, lat = 2, lon = 3)) + obs2 <- array(rnorm(20), dim = c(dataset = 2, sdate = 5, member = 2)) + set.seed(5) + ref2 <- array(rnorm(15), dim = c(sdate = 5, member = 3)) # case 3: vector set.seed(5) @@ -21,20 +23,6 @@ set.seed(6) obs3 <- rnorm(10) - # case 4 - set.seed(7) - exp4 <- array(rnorm(60), dim = c(sdate = 10, lon = 3, lat = 2)) - set.seed(8) - obs4 <- array(exp4 + rnorm(60) / 2, dim = dim(exp4)) - - # case 5: memb_dim - set.seed(1) - exp5 <- array(rnorm(45), dim = c(sdate = 3, dataset = 5, member = 3)) - set.seed(2) - obs5 <- array(rnorm(3), dim = c(sdate = 3, dataset = 1, member = 1)) - set.seed(3) - ref5 <- array(rnorm(6), dim = c(sdate = 3, member = 2)) - ############################################## @@ -58,8 +46,9 @@ test_that("1. Input checks", { "Parameter 'exp' and 'obs' must have dimension names." ) expect_error( - RMSSS(array(1:10, dim = c(a = 3, c = 5)), array(1:4, dim = c(a = 3, b = 5)), time_dim = 'a', dat_dim = NULL), - "Parameter 'exp' and 'obs' must have same dimension name" + RMSSS(array(1:20, dim = c(dat = 2, sdate = 5, member = 2)), + array(1:10, dim = c(dat = 2, sdate = 5, time = 1))), + "Parameter 'exp' and 'obs' must have the same dimension names." ) ## time_dim expect_error( @@ -102,9 +91,9 @@ test_that("1. Input checks", { ) ## exp and obs (2) expect_error( - RMSSS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), - obs = array(1:4, dim = c(a = 1, sdate = 2, dataset = 2))), - "Parameter 'exp' and 'obs' must have same length of all dimension except 'memb_dim' and 'dat_dim'." + RMSSS(exp = array(1:10, dim = c(dataset = 1, member = 5, sdate = 1)), + obs = array(1:4, dim = c(dataset = 2, member = 2, sdate = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'dat_dim' and 'memb_dim'." ) expect_error( RMSSS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), @@ -126,8 +115,8 @@ test_that("2. Output checks: case 1", { c(nexp = 5, nobs = 2) ) expect_equal( - mean(res1_1$rmsss), - -0.5449538, + c(res1_1$rmsss)[3:8], + c(0.01693900, -0.02730428, -0.34167869, 0.01588531, -1.55611403, -0.63596896), tolerance = 0.00001 ) expect_equal( @@ -136,20 +125,27 @@ test_that("2. Output checks: case 1", { tolerance = 0.001 ) - exp1_2 <- exp1 - exp1_2[2:4] <- NA - obs1_2 <- obs1 - obs1_2[1:2] <- NA - res1_2 <- RMSSS(exp1_2, obs1_2, dat_dim = 'dataset', pval = TRUE) + exp1_2 <- exp1; exp1_2[2:4] <- NA + obs1_2 <- obs1; obs1_2[1:2] <- NA + res1_2 <- RMSSS(exp1_2, obs1_2, dat_dim = 'dataset', pval = T, sign = T) expect_equal( - length(res1_2$rmsss[which(is.na(res1_2$rmsss))]), - 7 + names(res1_2), + c("rmsss", "p.val", "sign") ) expect_equal( - range(res1_2$p.val, na.rm = T), - c(0.7159769, 0.8167073), - tolerance = 0.00001 + c(res1_2$rmsss), + c(rep(NA, 7), -0.6359690, -0.5877153, -1.1108657), + tolerance = 0.0001 + ) + expect_equal( + c(res1_2$p.val), + c(rep(NA, 7), 0.7279944, 0.7159769, 0.8167073), + tolerance = 0.0001 + ) + expect_equal( + c(res1_2$sign), + c(rep(NA, 7), rep(FALSE, 3)) ) #ref @@ -198,39 +194,73 @@ test_that("2. Output checks: case 1", { # Random Walk suppressWarnings({ - res1_5 <- RMSSS(exp1, obs1, ref = ref1_1, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T) + res1_5 <- RMSSS(exp1, obs1, ref = ref1_1, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T, sig_method.type = 'two.sided.approx') }) expect_equal( as.vector(res1_5$sign), rep(F, 10) ) suppressWarnings({ - res1_6 <- RMSSS(exp1, obs1, ref = NULL, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T) + res1_6 <- RMSSS(exp1, obs1, ref = NULL, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T, sig_method.type = 'two.sided.approx') }) expect_equal( as.vector(res1_6$sign), rep(F, 10) ) + suppressWarnings({ + res1_7 <- RMSSS(exp1, obs1, ref = NULL, dat_dim = 'dataset', sig_method = "Random Walk", pval = T, sign = T, sig_method.type = 'two.sided', alpha = 0.4) + }) + expect_equal( + names(res1_7), + c('rmsss', 'p.val', 'sign') + ) + expect_equal( + res1_7$rmsss, + res1_6$rmsss + ) + expect_equal( + c(res1_7$p[, 2]), + c(1, 0.25, 0.25, 1, 1), + tolerance = 0.0001 + ) + expect_equal( + c(res1_7$sign[, 2]), + c(F, T, T, F, F) + ) }) ############################################## test_that("3. Output checks: case 2", { - + res1 <- RMSSS(exp2, obs2, ref2, dat_dim = "dataset", memb_dim = 'member', sign = T) expect_equal( - dim(RMSSS(exp2, obs2, time_dim = 'time', dat_dim = "dataset")$rmsss), - c(nexp = 2, nobs = 1, dat = 1, lon = 3, lat = 2) + dim(res1$rmsss), + c(nexp = 3, nobs = 2) ) expect_equal( - mean(RMSSS(exp2, obs2, time_dim = 'time', dat_dim = "dataset")$rmsss), - -0.3912208, - tolerance = 0.00001 + dim(res1$sign), + c(nexp = 3, nobs = 2) + ) + expect_equal( + dim(res1$p), + c(nexp = 3, nobs = 2) ) expect_equal( - range(RMSSS(exp2, obs2, time_dim = 'time', dat_dim = "dataset")$p.val), - c(0.2627770, 0.9868412), - tolerance = 0.00001 + c(res1$rmsss), + c(-0.08699446, 0.30703938, -0.28888291, -0.04284967, 0.39097270, -0.39792484), + tolerance = 0.0001 + ) + expect_equal( + c(res1$p), + c(0.5622736, 0.2474466, 0.6825138, 0.5314309, 0.1799964, 0.7338230), + tolerance = 0.0001 + ) + + + res2 <- RMSSS(apply(exp2, 1:2, mean), apply(obs2, 1:2, mean), array(apply(ref2, 1, mean), dim = c(sdate = 5)),dat_dim = "dataset", sign = T) + expect_equal( + res1, res2 ) }) @@ -250,61 +280,3 @@ test_that("4. Output checks: case 3", { ) }) - -############################################## -test_that("5. Output checks: case 4", { - - expect_equal( - dim(RMSSS(exp4, obs4, dat_dim = NULL)$rmsss), - c(lon = 3, lat = 2) - ) - expect_equal( - dim(RMSSS(exp4, obs4, dat_dim = NULL)$p.val), - c(lon = 3, lat = 2) - ) - expect_equal( - as.vector(RMSSS(exp4, obs4, dat_dim = NULL)$rmsss), - c(0.5393823, 0.6818405, 0.4953423, 0.4093817, 0.5972085, 0.5861135), - tolerance = 0.00001 - ) - expect_equal( - as.vector(RMSSS(exp4, obs4, dat_dim = NULL)$p.val), - c(0.015203983, 0.001091360, 0.026987112, 0.066279877, 0.006161059, 0.007437649), - tolerance = 0.00001 - ) - expect_equal( - names(RMSSS(exp4, obs4, dat_dim = NULL)), - c('rmsss', 'p.val') - ) - expect_equal( - names(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = F)), - c('rmsss', 'sign') - ) - expect_equal( - names(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = T)), - c('rmsss', 'p.val', 'sign') - ) - expect_equal( - as.vector(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = F)$sign), - c(T, T, T, F, T, T) - ) - expect_equal( - as.vector(RMSSS(exp4, obs4, dat_dim = NULL, sign = T, pval = F, alpha = 0.01)$sign), - c(F, T, F, F, T, T) - ) - -}) - -############################################## -test_that("6. Output checks: case 5", { - res5_1 <- RMSSS(exp5, obs5, ref = ref5, dat_dim = 'dataset', memb_dim = 'member') - res5_2 <- RMSSS(s2dv::MeanDims(exp5, 'member'), s2dv::MeanDims(obs5, 'member'), - ref = s2dv::MeanDims(ref5, 'member'), dat_dim = 'dataset') - expect_equal( - res5_1, - res5_2 - ) - - -}) - diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R index f054325..8df2d90 100644 --- a/tests/testthat/test-RPSS.R +++ b/tests/testthat/test-RPSS.R @@ -188,7 +188,7 @@ test_that("2. Output checks: dat1", { ) expect_equal( as.vector(RPSS(exp1, obs1)$sign), - c(FALSE, FALSE), + c(FALSE, FALSE) ) expect_equal( as.vector(RPSS(exp1, obs1, Fair = T)$rpss), @@ -306,7 +306,7 @@ test_that("3. Output checks: dat2", { ) expect_equal( as.vector(RPSS(exp2, obs2)$sign), - FALSE, + FALSE ) expect_equal( as.vector(RPSS(exp2, obs2, Fair = T)$rpss), @@ -376,7 +376,7 @@ test_that("4. Output checks: dat3", { ) expect_equal( as.vector(RPSS(exp3, obs3, dat_dim = 'dataset')$sign)[1:3], - c(FALSE, FALSE, TRUE), + c(FALSE, FALSE, TRUE) ) expect_equal( mean(RPSS(exp3, obs3, dat_dim = 'dataset', weights_exp = weights3, Fair = T)$rpss), -- GitLab