diff --git a/NAMESPACE b/NAMESPACE index 06d01d16c3575c4cc796f10c97598eb6a82ff330..4f9efb8a2dfa3aebdd9607f99e470bf7ffefeca7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,7 @@ export(.BrierScore) export(.Corr) -export(.RMS) +export(.RMSSS) export(.RatioRMS) export(.RatioSDRMS) export(.Trend) @@ -122,7 +122,6 @@ importFrom(stats,na.pass) importFrom(stats,pf) importFrom(stats,predict) importFrom(stats,pt) -importFrom(stats,qchisq) importFrom(stats,qnorm) importFrom(stats,qt) importFrom(stats,quantile) diff --git a/R/RMS.R b/R/RMS.R index c83b478256da2cb21359e330f2032caf15238507..7c9a5d1e1dbb872c0ca8285ef08964c921a23d4a 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -1,64 +1,56 @@ -#'Computes Root Mean Square Error +#'Compute root mean square error #' -#'Computes the root mean square error for an array of forecasts, var_exp and -#'an array of observations, var_obs, which should have the same dimensions -#'except along the posloop dimension where the lengths can be different, with -#'the number of experiments/models for var_exp (nexp) and the number of -#'obserational datasets for var_obs (nobs).\cr -#'The RMSE is computed along the posRMS dimension which should correspond to -#'the startdate dimension.\cr -#'If compROW is given, the RMSE is computed only if rows along the compROW -#'dimension are complete between limits[1] and limits[2], i.e. there are no -#'NAs between limits[1] and limits[2]. This option can be activated if the -#'user wishes to account only for the forecasts for which observations are -#'available at all leadtimes.\cr -#'Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -#'The confidence interval relies on a chi2 distribution.\cr\cr -#'.RMS provides the same functionality but taking a matrix of ensemble -#'members as input (exp). +#'Compute the root mean square error for an array of forecasts and an array of +#'observations. The RMSEs are computed along time_dim, the dimension which +#'corresponds to the startdate dimension. If comp_dim is given, the RMSEs are +#'computed only if data along the comp_dim dimension are complete between +#'limits[1] and limits[2], i.e. there are no NAs between limits[1] and +#'limits[2]. This option can be activated if the user wishes to account only +#'for the forecasts for which the corresponding observations are available at +#'all leadtimes.\cr +#'The confidence interval is computed by the chi2 distribution.\cr #' -#'@param var_exp Matrix of experimental data. -#'@param var_obs Matrix of observational data, same dimensions as var_exp -#' except along posloop dimension, where the length can be nobs instead of -#' nexp. -#'@param posloop Dimension nobs and nexp. -#'@param posRMS Dimension along which RMSE are to be computed (the dimension -#' of the start dates). -#'@param compROW Data taken into account only if (compROW)th row is complete.\cr -#' Default = NULL. -#'@param limits Complete between limits[1] & limits[2]. Default = NULL. -#'@param siglev Confidence level of the computed confidence interval. 0.95 -#' by default. -#'@param conf Whether to compute confidence interval or not. TRUE by default. -#'@param exp N by M matrix of N forecasts from M ensemble members. -#'@param obs Vector of the corresponding observations of length N. +#'@param exp A named numeric array of experimental data, with at least two +#' dimensions 'time_dim' and 'memb_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along memb_dim. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of member (nobs/nexp) +#' dimension. The default value is 'member'. +#'@param comp_dim A character string indicating the name of dimension along which +#' the data is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default value is c(1, length(comp_dim dimension)). +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. The default value is TRUE. +#'@param conf.lev A numeric indicating the confidence level for the +#' regression computation. The default value is 0.95. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' #'@return -#'RMS: Array with dimensions:\cr -#'c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all -#' other dimensions of var_exp & var_obs except posRMS).\cr -#'The 3rd dimension corresponds to the lower limit of the 95\% confidence -#' interval (only present if \code{conf = TRUE}), the RMSE, and the upper -#' limit of the 95\% confidence interval (only present if -#' \code{conf = TRUE}).\cr\cr -#'.RMS: +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., memb_dim in exp), and nobs is the +#'number of observation (i.e., memb_dim in obs).\cr #'\item{$rms}{ -#'The root mean square error, +#' The root mean square error. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. #'} -#'\item{$conf_low}{ -#' Corresponding to the lower limit of the \code{siglev}\% confidence interval -#' (only present if \code{conf = TRUE}) for the rms. -#'} -#'\item{$conf_high}{ -#' Corresponding to the upper limit of the \code{siglev}\% confidence interval -#' (only present if \code{conf = TRUE}) for the rms. -#'} #' #'@keywords datagen #'@author History:\cr #'0.1 - 2011-05 (V. Guemas, \email{vguemas@ic3.cat}) - Original code\cr #'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens2@ic3.cat}) - Formatting to R CRAN\cr #'1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() +#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt multiApply feature #'@examples #'# Load sample data as in Load() example: #'example(Load) @@ -71,160 +63,181 @@ #'smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) #'dim_to_mean <- 2 # Mean along members #'# Discard start-dates for which some leadtimes are missing -#'required_complete_row <- 3 #'leadtimes_per_startdate <- 60 -#'rms <- RMS(Mean1Dim(smooth_ano_exp, dim_to_mean), -#' Mean1Dim(smooth_ano_obs, dim_to_mean), -#' compROW = required_complete_row, +#'rms <- RMS(smooth_ano_exp, +#' smooth_ano_obs, +#' comp_dim = 'ftime', #' limits = c(ceiling((runmean_months + 1) / 2), #' leadtimes_per_startdate - floor(runmean_months / 2))) -#' \donttest{ -#'PlotVsLTime(rms, toptitle = "Root Mean Square Error", ytitle = "K", -#' monini = 11, limits = NULL, listexp = c('CMIP5 IC3'), -#' listobs = c('ERSST'), biglab = FALSE, hlines = c(0), -#' fileout = 'tos_rms.eps') -#' } -#'# The following example uses veriApply combined with .RMS instead of RMS -#' \dontrun{ -#'require(easyVerification) -#'RMS2 <- s2dverification:::.RMS -#'rms2 <- veriApply("RMS2", -#' smooth_ano_exp, -#' # see ?veriApply for how to use the 'parallel' option -#' Mean1Dim(smooth_ano_obs, dim_to_mean), -#' tdim = 3, ensdim = 2) -#' } #' #'@rdname RMS +#'@import multiApply #'@export -RMS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, - limits = NULL, siglev = 0.95, conf = TRUE) { - # - # Remove data along compROW dim if there is at least one NA between limits - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (is.null(compROW) == FALSE) { - if (is.null(limits) == TRUE) { - limits <- c(1, dim(var_obs)[compROW]) +RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', + comp_dim = NULL, limits = NULL, + conf = TRUE, conf.lev = 0.95, ncores = NULL) { + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and memb_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") } - outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) - outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) - var_obs[which(outrows)] <- NA } - - # - # Enlarge var_exp & var_obs to 10 dim + move posloop & posRMS to 1st & 2nd - # pos - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp) - for (iind in 1:length(dimsvar)) { - if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { - stop("var_exp & var_obs must have same dimensions except along posloop") + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) } } - if (dimsvar[posRMS] < 2 ) { - stop("At least 2 values required to compute RMSE") - } - enlvarexp <- Enlarge(var_exp, 10) - enlvarobs <- Enlarge(var_obs, 10) - nexp <- dimsvar[posloop] - nobs <- dim(var_obs)[posloop] - posaperm <- numeric(10) - posaperm[1] <- posloop - posaperm[2] <- posRMS - posaperm[3:10] <- seq(1, 10)[-c(posloop, posRMS)] - permvarexp <- aperm(enlvarexp, posaperm) - permvarobs <- aperm(enlvarobs, posaperm) - dimsaperm <- dim(permvarexp) - # - # RMS & its confidence interval computation - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (conf) { - nvals <- 3 - dim_rms <- 2 - conf_low <- (1 - siglev) / 2 - conf_high <- 1 - conf_low - } else { - nvals <- 1 - dim_rms <- 1 - } - enlrms <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) - for (jexp in 1:nexp) { - for (jobs in 1:nobs) { - dif <- array(dim = dimsaperm[-1]) - dif[, , , , , , , , ] <- permvarexp[jexp, , , , , , , , - , ] - permvarobs[jobs, , , , , , , , , ] - enlrms[jexp, jobs, dim_rms, , , , , , , , ] <- Mean1Dim(dif ** 2, 1, - narm = TRUE) ** 0.5 - if (conf) { - eno <- Eno(dif, 1) - for (j3 in 1:dimsaperm[3]){ - for (j4 in 1:dimsaperm[4]){ - for (j5 in 1:dimsaperm[5]){ - for (j6 in 1:dimsaperm[6]){ - for (j7 in 1:dimsaperm[7]){ - for (j8 in 1:dimsaperm[8]){ - for (j9 in 1:dimsaperm[9]){ - for (j10 in 1:dimsaperm[10]){ - ndat <- length(sort(dif[, j3, j4, j5, j6, j7, j8, j9, - j10])) - enlrms[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, - j10] <- (eno[j3, j4, j5, j6, j7, j8, j9, - j10] * enlrms[jexp, jobs, 2, j3, j4, j5, j6, j7, - j8, j9, j10] ** 2 / qchisq(conf_high, eno[j3, j4, j5, - j6, j7, j8, j9, j10] - 1)) ** 0.5 - enlrms[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, - j10] <- (eno[j3, j4, j5, j6, j7, j8, j9, - j10] * enlrms[jexp, jobs, 2, j3, j4, j5, j6, j7, - j8, j9, j10] ** 2 / qchisq(conf_low, eno[j3, j4, j5, - j6, j7, j8, j9, j10] - 1)) ** 0.5 - } - } - } - } - } - } - } - } - } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2 to compute RMS.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- s2dverification:::.aperm2(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + if (is.null(limits)) { + limits <- c(1, dim(obs)[comp_dim]) } + pos <- which(names(dim(obs)) == comp_dim) + outrows <- is.na(Mean1Dim(obs, pos, narm = FALSE, limits)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA } - dim(enlrms) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, posRMS)]) - # - # Output - # ~~~~~~~~ - # - enlrms + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, memb_dim), + c(time_dim, memb_dim)), + fun = .RMS, + time_dim = time_dim, memb_dim = memb_dim, + conf = conf, conf.lev = conf.lev, ncores = ncores) + return(res) } -#'@rdname RMS -#'@importFrom stats qchisq -#'@export -.RMS <- function(exp, obs, siglev = 0.95, conf = TRUE) { - # - # RMS & its confidence interval computation - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # + + +.RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', + conf = TRUE, conf.lev = 0.95) { + + # exp: [sdate, member_exp] + # obs: [sdate, member_obs] + n_exp <- as.numeric(dim(exp)[2]) + n_obs <- as.numeric(dim(obs)[2]) + n_sdate <- as.numeric(dim(exp)[1]) + + dif <- array(dim = c(sdate = n_sdate, n_exp = n_exp, n_obs = n_obs)) + chi <- array(dim = c(nexp = n_exp, nobs = n_obs)) if (conf) { - conf_low <- (1 - siglev) / 2 - conf_high <- 1 - conf_low + conflow <- (1 - conf.lev) / 2 + confhigh <- 1 - conflow + conf.lower <- array(dim = c(nexp = n_exp, nobs = n_obs)) + conf.upper <- array(dim = c(nexp = n_exp, nobs = n_obs)) } - dif <- rowMeans(exp) - obs - enlrms <- mean(dif ** 2, na.rm = TRUE) ** 0.5 + + # dif + for (i in 1:n_obs) { + dif[, , i] <- sapply(1:n_exp, function(x) {exp[, x] - obs[, i]}) + } + rms <- apply(dif^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(n_exp, n_obs)) + if (conf) { - eno <- Eno(dif, 1) - ndat <- length(sort(dif)) - conf_int <- c((eno * enlrms ** 2 / qchisq(conf_high, eno - 1)) ** 0.5, - (eno * enlrms ** 2 / qchisq(conf_low, eno - 1)) ** 0.5) - names(conf_int) <- c("conf_low","conf_high") - } else { - conf_int <- c() - names(conf_int) <- c() + #eno <- Eno(dif, 1) #count effective sample along sdate. dim = c(n_exp, n_obs) + eno <- Eno(dif, time_dim) #change to this line when Eno() is done + + # conf.lower + chi <- sapply(1:n_obs, function(i) { + qchisq(confhigh, eno[, i] - 1) + }) + conf.lower <- (eno * rms ** 2 / chi) ** 0.5 + + # conf.upper + chi <- sapply(1:n_obs, function(i) { + qchisq(conflow, eno[, i] - 1) + }) + conf.upper <- (eno * rms ** 2 / chi) ** 0.5 } - results <- c(enlrms, conf_int) - names(results) <- c("rms", names(conf_int)) - return(results) + if (conf) { + res <- list(rms = rms, conf.lower = conf.lower, conf.upper = conf.upper) + } else { + res <- list(rms = rms) + } + + return(res) + } diff --git a/man/RMS.Rd b/man/RMS.Rd index 715f84d10e82bf233cf2e6016668006a965067ea..91aa9b093b577fb189cf244db574238b2f0c119c 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -1,79 +1,66 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RMS.R \name{RMS} -\alias{.RMS} \alias{RMS} -\title{Computes Root Mean Square Error} +\title{Compute root mean square error} \usage{ -RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, - limits = NULL, siglev = 0.95, conf = TRUE) - -.RMS(exp, obs, siglev = 0.95, conf = TRUE) +RMS(exp, obs, time_dim = "sdate", memb_dim = "member", comp_dim = NULL, + limits = NULL, conf = TRUE, conf.lev = 0.95, ncores = NULL) } \arguments{ -\item{var_exp}{Matrix of experimental data.} - -\item{var_obs}{Matrix of observational data, same dimensions as var_exp -except along posloop dimension, where the length can be nobs instead of -nexp.} +\item{exp}{A named numeric array of experimental data, with at least two +dimensions 'time_dim' and 'memb_dim'.} -\item{posloop}{Dimension nobs and nexp.} +\item{obs}{A named numeric array of observational data, same dimensions as +parameter 'exp' except along memb_dim.} -\item{posRMS}{Dimension along which RMSE are to be computed (the dimension -of the start dates).} +\item{time_dim}{A character string indicating the name of dimension along +which the correlations are computed. The default value is 'sdate'.} -\item{compROW}{Data taken into account only if (compROW)th row is complete.\cr -Default = NULL.} +\item{memb_dim}{A character string indicating the name of member (nobs/nexp) +dimension. The default value is 'member'.} -\item{limits}{Complete between limits[1] & limits[2]. Default = NULL.} +\item{comp_dim}{A character string indicating the name of dimension along which +the data is taken into account only if it is complete. The default value +is NULL.} -\item{siglev}{Confidence level of the computed confidence interval. 0.95 -by default.} +\item{limits}{A vector of two integers indicating the range along comp_dim to +be completed. The default value is c(1, length(comp_dim dimension)).} -\item{conf}{Whether to compute confidence interval or not. TRUE by default.} +\item{conf}{A logical value indicating whether to retrieve the confidence +intervals or not. The default value is TRUE.} -\item{exp}{N by M matrix of N forecasts from M ensemble members.} +\item{conf.lev}{A numeric indicating the confidence level for the +regression computation. The default value is 0.95.} -\item{obs}{Vector of the corresponding observations of length N.} +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -RMS: Array with dimensions:\cr -c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all - other dimensions of var_exp & var_obs except posRMS).\cr -The 3rd dimension corresponds to the lower limit of the 95\% confidence - interval (only present if \code{conf = TRUE}), the RMSE, and the upper - limit of the 95\% confidence interval (only present if - \code{conf = TRUE}).\cr\cr -.RMS: +A list containing the numeric arrays with dimension:\cr + c(nexp, nobs, all other dimensions of exp except time_dim).\cr +nexp is the number of experiment (i.e., memb_dim in exp), and nobs is the +number of observation (i.e., memb_dim in obs).\cr \item{$rms}{ -The root mean square error, + The root mean square error. +} +\item{$conf.lower}{ + The lower confidence interval. Only present if \code{conf = TRUE}. } -\item{$conf_low}{ - Corresponding to the lower limit of the \code{siglev}\% confidence interval - (only present if \code{conf = TRUE}) for the rms. -} -\item{$conf_high}{ - Corresponding to the upper limit of the \code{siglev}\% confidence interval - (only present if \code{conf = TRUE}) for the rms. +\item{$conf.upper}{ + The upper confidence interval. Only present if \code{conf = TRUE}. } } \description{ -Computes the root mean square error for an array of forecasts, var_exp and -an array of observations, var_obs, which should have the same dimensions -except along the posloop dimension where the lengths can be different, with -the number of experiments/models for var_exp (nexp) and the number of -obserational datasets for var_obs (nobs).\cr -The RMSE is computed along the posRMS dimension which should correspond to -the startdate dimension.\cr -If compROW is given, the RMSE is computed only if rows along the compROW -dimension are complete between limits[1] and limits[2], i.e. there are no -NAs between limits[1] and limits[2]. This option can be activated if the -user wishes to account only for the forecasts for which observations are -available at all leadtimes.\cr -Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -The confidence interval relies on a chi2 distribution.\cr\cr -.RMS provides the same functionality but taking a matrix of ensemble -members as input (exp). +Compute the root mean square error for an array of forecasts and an array of +observations. The RMSEs are computed along time_dim, the dimension which +corresponds to the startdate dimension. If comp_dim is given, the RMSEs are +computed only if data along the comp_dim dimension are complete between +limits[1] and limits[2], i.e. there are no NAs between limits[1] and +limits[2]. This option can be activated if the user wishes to account only +for the forecasts for which the corresponding observations are available at +all leadtimes.\cr +The confidence interval is computed by the chi2 distribution.\cr } \examples{ # Load sample data as in Load() example: @@ -87,29 +74,12 @@ smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) dim_to_mean <- 2 # Mean along members # Discard start-dates for which some leadtimes are missing -required_complete_row <- 3 leadtimes_per_startdate <- 60 -rms <- RMS(Mean1Dim(smooth_ano_exp, dim_to_mean), - Mean1Dim(smooth_ano_obs, dim_to_mean), - compROW = required_complete_row, +rms <- RMS(smooth_ano_exp, + smooth_ano_obs, + comp_dim = 'ftime', limits = c(ceiling((runmean_months + 1) / 2), leadtimes_per_startdate - floor(runmean_months / 2))) - \donttest{ -PlotVsLTime(rms, toptitle = "Root Mean Square Error", ytitle = "K", - monini = 11, limits = NULL, listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = c(0), - fileout = 'tos_rms.eps') - } -# The following example uses veriApply combined with .RMS instead of RMS - \dontrun{ -require(easyVerification) -RMS2 <- s2dverification:::.RMS -rms2 <- veriApply("RMS2", - smooth_ano_exp, - # see ?veriApply for how to use the 'parallel' option - Mean1Dim(smooth_ano_obs, dim_to_mean), - tdim = 3, ensdim = 2) - } } \author{ @@ -117,6 +87,7 @@ History:\cr 0.1 - 2011-05 (V. Guemas, \email{vguemas@ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens2@ic3.cat}) - Formatting to R CRAN\cr 1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() +3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt multiApply feature } \keyword{datagen} diff --git a/tests/testthat/test-RMS.R b/tests/testthat/test-RMS.R new file mode 100644 index 0000000000000000000000000000000000000000..a0aaff65124bf2898241b2a1d16de98c898c93fc --- /dev/null +++ b/tests/testthat/test-RMS.R @@ -0,0 +1,150 @@ +context("s2dverification::RMS tests") + +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(120), dim = c(member = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) + + set.seed(2) + obs1 <- array(rnorm(80), dim = c(member = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) + set.seed(2) + na <- floor(runif(10, min = 1, max = 80)) + obs1[na] <- NA + +############################################## +test_that("1. Input checks", { + + expect_error( + RMS(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + RMS(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + RMS(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and memb_dim.") + ) + expect_error( + RMS(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + RMS(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'exp' and 'obs' must have same dimension name" + ) + expect_error( + RMS(exp1, obs1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + RMS(exp1, obs1, memb_dim = 'a'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RMS(exp1, obs1, time_dim = c('sdate', 'a')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + RMS(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RMS(exp1, obs1, comp_dim = c('sdate', 'ftime')), + "Parameter 'comp_dim' must be a character string." + ) + expect_error( + RMS(exp1, obs1, comp_dim = 'a'), + "Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RMS(exp1, obs1, limits = c(1,3)), + "Paramter 'comp_dim' cannot be NULL if 'limits' is assigned." + ) + expect_error( + RMS(exp1, obs1, comp_dim = 'ftime', limits = c(1)), + paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.") + ) + expect_error( + RMS(exp1, obs1, conf.lev = -1), + "Parameter 'conf.lev' must be a numeric number between 0 and 1." + ) + expect_error( + RMS(exp1, obs1, conf = 1), + "Parameter 'conf' must be one logical value." + ) + expect_error( + RMS(exp1, obs1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( + RMS(exp = array(1:10, dim = c(sdate = 1, member = 5, a = 1)), + obs = array(1:4, dim = c(a = 1, sdate = 2, member = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimension expect 'memb_dim'." + ) + expect_error( + RMS(exp = array(1:5, dim = c(sdate = 1, member = 5, a = 1)), + obs = array(1:2, dim = c(a = 1, sdate = 1, member = 2))), + "The length of time_dim must be at least 2 to compute RMS." + ) + + + +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(RMS(exp1, obs1)$rms), + c(n_exp = 3, n_obs = 2, ftime = 2, lon = 1, lat = 4) + ) + expect_equal( + head(RMS(exp1, obs1)$rms), + c(1.2815677, 2.0832803, 1.1894637, 1.3000403, 1.4053807, 0.8157563), + tolerance = 0.001 + ) + expect_equal( + length(which(is.na(RMS(exp1, obs1)$conf.lower))), + 4 + ) + expect_equal( + max(RMS(exp1, obs1)$conf.lower, na.rm = T), + 1.399509, + tolerance = 0.001 + ) + expect_equal( + length(which(is.na(RMS(exp1, obs1, comp_dim = 'ftime')$rms))), + 0 + ) + expect_equal( + length(which(is.na(RMS(exp1, obs1, comp_dim = 'ftime')$conf.upper))), + 8 + ) + expect_equal( + length(which(is.na(RMS(exp1, obs1, comp_dim = 'lat')$conf.lower))), + 36 + ) + expect_equal( + length(which(is.na(RMS(exp1, obs1, comp_dim = 'lat', limits = c(1, 2))$conf.lower))), + 21 + ) + expect_equal( + summary(RMS(exp1, obs1, conf.lev = 0.99)$conf.upper)[1], + c(Min. = 1.406), + tolerance = 0.0001 + ) + expect_equal( + length(RMS(exp1, obs1, conf = FALSE)), + 1 + ) + + + +}) + +############################################## +