diff --git a/R/Corr.R b/R/Corr.R index 419002305586a9920628244c6445ee7f7ef20774..59bd1ec949d6b161502ad13d4765f47cde65beaa 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -1,72 +1,66 @@ -#'Computes the correlation coefficient between an array of forecasts and their corresponding observations +#'Compute the correlation coefficient between an array of forecast and their corresponding observation #' -#'Calculates the correlation coefficient (Pearson, Kendall or Spearman) for -#'an array of forecasts and observations. The input should be an array with -#'dimensions c(no. of datasets, no. of start dates, no. of forecast times, -#'no. of lons, no. of lats.), where the longitude and latitude dimensions are -#'optional. The correlations are computed along the poscor dimension which -#'should correspond to the startdate dimension. If compROW is given, the -#'correlations are computed only if rows along the compROW dimension are -#'complete between limits[1] and limits[2], i.e. there are no NAs between -#'limits[1] and limits[2]. This option can be activated if the user wishes to -#'account only for the forecasts for which observations are available at all -#'leadtimes. \cr -#'Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -#'The confidence interval is computed by a Fisher transformation.\cr -#'The significance level relies on a one-sided student-T distribution.\cr -#'We can modifiy the treshold of the test modifying siglev (default value=0.95).\cr\cr -#'.Corr calculates the correlation between the ensemble mean and the -#'observations, using an N by M matrix (exp) of forecasts and a vector of -#'observations (obs) as input. +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. The correlations are +#'computed along time_dim, the startdate dimension. If comp_dim is given, +#'the correlations are computed only if data along the comp_dim dimension are +#'complete between limits[1] and limits[2], i.e., there is no NA between +#'limits[1] and limits[2]. This option can be activated if the user wants to +#'account only for the forecasts which the corresponding observations are +#'available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr #' -#'@param var_exp Array of experimental data. -#'@param var_obs Array of observational data, same dimensions as var_exp -#' except along posloop dimension, where the length can be nobs instead of nexp. -#'@param posloop Dimension nobs and nexp. -#'@param poscor Dimension along which correlation are to be computed (the -#' dimension of the start dates). -#'@param compROW Data taken into account only if (compROW)th row is complete. -#' Default = NULL. -#'@param limits Complete between limits[1] & limits[2]. Default = NULL. -#'@param siglev Significance level. Default = 0.95. -#'@param method Type of correlation: 'pearson', 'spearman' or 'kendall'. -#' Default='pearson' -#'@param conf Whether to compute confidence intervals (default = 'TRUE') or -#' not (FALSE). -#'@param pval Whether to compute statistical significance p-value (default = 'TRUE') -#' or not (FALSE). -#'@param exp N by M matrix of N forecasts from M ensemble members. -#'@param obs Vector of the corresponding observations of length N. +#'@param exp A named numeric array of experimental data, with at least two +#' dimensions 'time_dim' and 'memb_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along memb_dim. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of member (nobs/nexp) +#' dimension. The default value is 'member'. +#'@param comp_dim A character string indicating the name of dimension along which +#' the data is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. The default value is TRUE. +#'@param conf.lev A numeric indicating the confidence level for the +#' regression computation. The default value is 0.95. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' #'@return -#'Corr: Array with dimensions :\cr -#'c(# of datasets along posloop in var_exp, # of datasets along posloop in -#'var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr -#'The third dimension, of length 4 maximum, contains to the lower limit of -#'the 95\% confidence interval, the correlation, the upper limit of the 95\% -#'confidence interval and the 95\% significance level given by a one-sided -#'T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension -#'will be of length 3. If the confidence intervals are disabled via -#'\code{conf = FALSE}, this dimension will be of length 2. If both are -#'disabled, this will be of length 2. \cr\cr -#'.Corr: -#' \itemize{ -#' \item{$corr}{The correlation statistic.} -#' \item{$p_val}{Corresponds to the p values for the \code{siglev}\% -#' (only present if \code{pval = TRUE}) for the correlation.} -#' \item{$conf_low}{Corresponds to the upper limit of the \code{siglev}\% -#' (only present if \code{conf = TRUE}) for the correlation.} -#' \item{$conf_high}{Corresponds to the lower limit of the \code{siglev}\% -#' (only present if \code{conf = TRUE}) for the correlation.} -#' } +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., memb_dim in exp), and nobs is the +#'number of observation (i.e., memb_dim in obs).\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} #' #'@keywords datagen #'@author History:\cr -#' 0.1 - 2011-04 (V. Guemas, \email{vguemas@@bsc.es}) - Original code\cr -#'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@@bsc.es}) - Formatting to R CRAN\cr -#'1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@@bsc.es}) - Adding siglev argument\cr +#'0.1 - 2011-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr +#'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN\cr +#'1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@bsc.es}) - Adding conf.lev argument\cr #'1.2 - 2015-03 (L.P. Caron, \email{louis-philippe.caron@@bsc.es}) - Adding method argument\cr -#'1.3 - 2017-02 (A. Hunter, \email{alasdair.hunter@@bsc.es}) - Adapted to veriApply() +#'1.3 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() +#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt multiApply feature #'@examples #'# Load sample data as in Load() example: #'example(Load) @@ -78,200 +72,222 @@ #'# Smooth along lead-times #'smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) #'smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) -#'dim_to_mean <- 2 # Mean along members -#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times #'leadtimes_per_startdate <- 60 -#'corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), -#' Mean1Dim(smooth_ano_obs, dim_to_mean), -#' compROW = required_complete_row, +#'corr <- Corr(smooth_ano_exp, +#' smooth_ano_obs, +#' comp_dim = 'ftime', #Discard start dates which contain any NA ftime #' limits = c(ceiling((runmean_months + 1) / 2), #' leadtimes_per_startdate - floor(runmean_months / 2))) -#' \donttest{ -#'PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", -#' monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), -#' listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), -#' fileout = 'tos_cor.eps') -#' } #' -#'# The following example uses veriApply combined with .Corr instead of Corr -#' \dontrun{ -#'require(easyVerification) -#'Corr2 <- s2dverification:::.Corr -#'corr2 <- veriApply("Corr2", -#' smooth_ano_exp, -#' # see ?veriApply for how to use the 'parallel' option -#' Mean1Dim(smooth_ano_obs, dim_to_mean), -#' tdim = 3, ensdim = 2) -#' } #'@rdname Corr +#'@import multiApply #'@importFrom stats cor pt qnorm #'@export -Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, - limits = NULL, siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE) { - # - # Remove data along compROW dim if there is at least one NA between limits - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (!is.null(compROW)) { - if (is.null(limits)) { - limits <- c(1, dim(var_obs)[compROW]) +Corr <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', + comp_dim = NULL, limits = NULL, + method = 'pearson', pval = TRUE, conf = TRUE, + conf.lev = 0.95, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and memb_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") } - outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) - outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) - var_obs[which(outrows)] <- NA } - # - # Enlarge var_exp & var_obs to 10 dim + move posloop & poscor to 1st & 2nd - # pos - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp) - for (iind in 1:length(dimsvar)) { - if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { - stop("var_exp & var_obs must have same dimensions except along posloop") + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) } } - if (dimsvar[poscor] < 3 ) { - stop("At least 3 values required to compute correlation") + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") } - if (method != "kendall" && method != "spearman" && method != "pearson") { - stop("Wrong correlation method") + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") } - nexp <- dimsvar[posloop] - nobs <- dim(var_obs)[posloop] - var_exp <- Enlarge(var_exp, 10) - var_obs <- Enlarge(var_obs, 10) - posaperm <- numeric(10) - posaperm[1] <- posloop - posaperm[2] <- poscor - posaperm[3:10] <- seq(1, 10)[-c(posloop, poscor)] - var_exp <- aperm(var_exp, posaperm) - var_obs <- aperm(var_obs, posaperm) - dimsaperm <- dim(var_exp) - # - - # Check the siglev arguments: - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - if (siglev > 1 || siglev < 0) { - stop("siglev need to be higher than O and lower than 1") + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") } - # - # Loop to compute correlation for each grid point - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dim_val <- 2 - dim_pval <- 4 - nvals <- 1 + 2*conf + pval - if (!conf) { - dim_val <- 1 - dim_pval <- 2 - } else { - conf_low <- (1 - siglev) / 2 - conf_high <- 1 - conf_low + ## conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") } - CORR <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) - for (jexp in 1:nexp) { - for (jobs in 1:nobs) { - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - tmp1 <- var_exp[jexp, , j3, j4, j5, j6, j7, j8, j9, - j10] - tmp2 <- var_obs[jobs, , j3, j4, j5, j6, j7, j8, j9, - j10] - if (any(!is.na(tmp1)) && sum(!is.na(tmp2)) > 2) { - toto <- cor(tmp1, tmp2, use = "pairwise.complete.obs", method = method) - CORR[jexp, jobs, dim_val, j3, j4, j5, j6, j7, j8, j9, j10] <- toto - #eno <- min(Eno(tmp2, 1), Eno(tmp1, 1)) - if (pval || conf) { - if (method == "kendall" | method == "spearman") { - eno <- Eno(rank(tmp2), 1) - } else if (method == "pearson") { - eno <- Eno(tmp2, 1) - } - } - if (pval) { - #t <- qt(0.95, eno - 2) - t <- qt(siglev, eno - 2) - CORR[jexp, jobs, dim_pval, j3, j4, j5, j6, j7, j8, j9, - j10] <- sqrt((t * t) / ((t * t) + eno - 2)) - } - if (conf) { - CORR[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, - j10] <- tanh(atanh(toto) + qnorm(conf_high) / sqrt( - #j10] <- tanh(atanh(toto) + qnorm(0.975) / sqrt( - eno - 3)) - CORR[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, - j10] <- tanh(atanh(toto) + qnorm(conf_low) / sqrt( - #j10] <- tanh(atanh(toto) + qnorm(0.025) / sqrt( - eno - 3)) - } - } - } - } - } - } - } - } - } - } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") } - # - dim(CORR) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, poscor)]) - # - # Output - # ~~~~~~~~ - # - CORR -} -#'@rdname Corr -#'@importFrom stats cor pt qnorm -#'@export -.Corr <- function(exp, obs, siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE) { - # Check 'method' - if (!(method %in% c("kendall", "spearman", "pearson"))) { - stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") - # Check 'siglev' - if (siglev > 1 || siglev < 0) { - stop("Parameter 'siglev' must be higher than O and lower than 1.") + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- s2dverification:::.aperm2(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + if (is.null(limits)) { + limits <- c(1, dim(obs)[comp_dim]) } + pos <- which(names(dim(obs)) == comp_dim) + outrows <- is.na(Mean1Dim(obs, pos, narm = FALSE, limits)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA } - p_val <- NULL - conflow <- NULL - confhigh <- NULL - ens_mean <- rowMeans(exp) - CORR <- cor(obs, ens_mean, use = "pairwise.complete.obs", method = method) - if (pval || conf) { - if (method == "kendall" || method == "spearman") { - eno <- Eno(rank(obs), 1) + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, memb_dim), + c(time_dim, memb_dim)), + fun = .Corr, + time_dim = time_dim, method = method, + pval = pval, conf = conf, conf.lev = conf.lev, + ncores = ncores) + return(res) +} + +.Corr <- function(exp, obs, time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, conf.lev = 0.95) { + + # exp: [sdate, member_exp] + # obs: [sdate, member_obs] + n_exp <- as.numeric(dim(exp)[2]) + n_obs <- as.numeric(dim(obs)[2]) + + CORR <- array(dim = c(n_exp = n_exp, n_obs = n_obs)) + eno_expand <- array(dim = c(n_exp = n_exp, n_obs = n_obs)) + p.val <- array(dim = c(n_exp = n_exp, n_obs = n_obs)) + + # ens_mean + for (i in 1:n_obs) { + + CORR[, i] <- sapply(1:n_exp, + function(x) { + if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +cor(exp[, x], obs[, i], + use = "pairwise.complete.obs", + method = method) +} else { + CORR[, i] <- NA +} +}) + } + +# if (pval) { +# for (i in 1:n_obs) { +# p.val[, i] <- try(sapply(1:n_exp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA +# } +# } +# } + + if (pval | conf) { + if (method == "kendall" | method == "spearman") { + tmp <- apply(obs, 2, rank) + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) } else if (method == "pearson") { - eno <- Eno(obs, 1) + eno <- Eno(obs, time_dim) + } + for (i in 1:n_exp) { + eno_expand[i, ] <- eno } } - if (pval && (method == "pearson")) { - t <-sqrt(CORR * CORR * (eno - 2) / (1 - (CORR ^ 2))) - p_val <- pt(t, eno - 2, lower.tail = FALSE) - } - if (conf && (method == "pearson")) { - conf_low <- (1 - siglev) / 2 - conf_high <- 1 - conf_low - conf_int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt(eno - 3)), - tanh(atanh(CORR) + qnorm(conf_high) / sqrt(eno - 3))) - conf_int <- conf_int[!is.na(CORR)] - conflow <- conf_int[1] - confhigh <- conf_int[2] +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval) { + t <-sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) } - # Output - invisible(result <- list(corr = CORR, p_val = p_val, conf_low = conflow, conf_high = confhigh)) +################################### + if (conf) { + conf.lower <- (1 - conf.lev) / 2 + conf.upper <- 1 - conf.lower + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + } + + if (pval & conf) { + res <- list(corr = CORR, p.val = p.val, + conf.lower = conflow, conf.upper = confhigh) + } else if (pval & !conf) { + res <- list(corr = CORR, p.val = p.val) + } else if (!pval & conf) { + res <- list(corr = CORR, + conf.lower = conflow, conf.upper = confhigh) + } else { + res <- list(corr = CORR) + } + + return(res) + } diff --git a/man/Corr.Rd b/man/Corr.Rd index e311813de042355a4499b46d3dd6d9d6fb11c1f9..72edd2cad2e0f74c2930b4c83b2379431ef83883 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -1,89 +1,77 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Corr.R \name{Corr} -\alias{.Corr} \alias{Corr} -\title{Computes the correlation coefficient between an array of forecasts and their corresponding observations} +\title{Compute the correlation coefficient between an array of forecast and their corresponding observation} \usage{ -Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, - limits = NULL, siglev = 0.95, method = "pearson", conf = TRUE, - pval = TRUE) - -.Corr(exp, obs, siglev = 0.95, method = "pearson", conf = TRUE, - pval = TRUE) +Corr(exp, obs, time_dim = "sdate", memb_dim = "member", comp_dim = NULL, + limits = NULL, method = "pearson", pval = TRUE, conf = TRUE, + conf.lev = 0.95, ncores = NULL) } \arguments{ -\item{var_exp}{Array of experimental data.} - -\item{var_obs}{Array of observational data, same dimensions as var_exp -except along posloop dimension, where the length can be nobs instead of nexp.} +\item{exp}{A named numeric array of experimental data, with at least two +dimensions 'time_dim' and 'memb_dim'.} -\item{posloop}{Dimension nobs and nexp.} +\item{obs}{A named numeric array of observational data, same dimensions as +parameter 'exp' except along memb_dim.} -\item{poscor}{Dimension along which correlation are to be computed (the -dimension of the start dates).} +\item{time_dim}{A character string indicating the name of dimension along +which the correlations are computed. The default value is 'sdate'.} -\item{compROW}{Data taken into account only if (compROW)th row is complete. -Default = NULL.} +\item{memb_dim}{A character string indicating the name of member (nobs/nexp) +dimension. The default value is 'member'.} -\item{limits}{Complete between limits[1] & limits[2]. Default = NULL.} +\item{comp_dim}{A character string indicating the name of dimension along which +the data is taken into account only if it is complete. The default value +is NULL.} -\item{siglev}{Significance level. Default = 0.95.} +\item{limits}{A vector of two integers indicating the range along comp_dim to +be completed. The default is c(1, length(comp_dim dimension)).} -\item{method}{Type of correlation: 'pearson', 'spearman' or 'kendall'. -Default='pearson'} +\item{method}{A character string indicating the type of correlation: +'pearson', 'spearman', or 'kendall'. The default value is 'pearson'.} -\item{conf}{Whether to compute confidence intervals (default = 'TRUE') or -not (FALSE).} +\item{pval}{A logical value indicating whether to compute or not the p-value +of the test Ho: Corr = 0. The default value is TRUE.} -\item{pval}{Whether to compute statistical significance p-value (default = 'TRUE') -or not (FALSE).} +\item{conf}{A logical value indicating whether to retrieve the confidence +intervals or not. The default value is TRUE.} -\item{exp}{N by M matrix of N forecasts from M ensemble members.} +\item{conf.lev}{A numeric indicating the confidence level for the +regression computation. The default value is 0.95.} -\item{obs}{Vector of the corresponding observations of length N.} +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -Corr: Array with dimensions :\cr -c(# of datasets along posloop in var_exp, # of datasets along posloop in -var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr -The third dimension, of length 4 maximum, contains to the lower limit of -the 95\% confidence interval, the correlation, the upper limit of the 95\% -confidence interval and the 95\% significance level given by a one-sided -T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension -will be of length 3. If the confidence intervals are disabled via -\code{conf = FALSE}, this dimension will be of length 2. If both are -disabled, this will be of length 2. \cr\cr -.Corr: - \itemize{ - \item{$corr}{The correlation statistic.} - \item{$p_val}{Corresponds to the p values for the \code{siglev}\% - (only present if \code{pval = TRUE}) for the correlation.} - \item{$conf_low}{Corresponds to the upper limit of the \code{siglev}\% - (only present if \code{conf = TRUE}) for the correlation.} - \item{$conf_high}{Corresponds to the lower limit of the \code{siglev}\% - (only present if \code{conf = TRUE}) for the correlation.} - } +A list containing the numeric arrays with dimension:\cr + c(nexp, nobs, all other dimensions of exp except time_dim).\cr +nexp is the number of experiment (i.e., memb_dim in exp), and nobs is the +number of observation (i.e., memb_dim in obs).\cr +\item{$corr}{ + The correlation coefficient. +} +\item{$p.val}{ + The p-value. Only present if \code{pval = TRUE}. +} +\item{$conf.lower}{ + The lower confidence interval. Only present if \code{conf = TRUE}. +} +\item{$conf.upper}{ + The upper confidence interval. Only present if \code{conf = TRUE}. +} } \description{ -Calculates the correlation coefficient (Pearson, Kendall or Spearman) for -an array of forecasts and observations. The input should be an array with -dimensions c(no. of datasets, no. of start dates, no. of forecast times, -no. of lons, no. of lats.), where the longitude and latitude dimensions are -optional. The correlations are computed along the poscor dimension which -should correspond to the startdate dimension. If compROW is given, the -correlations are computed only if rows along the compROW dimension are -complete between limits[1] and limits[2], i.e. there are no NAs between -limits[1] and limits[2]. This option can be activated if the user wishes to -account only for the forecasts for which observations are available at all -leadtimes. \cr -Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -The confidence interval is computed by a Fisher transformation.\cr -The significance level relies on a one-sided student-T distribution.\cr -We can modifiy the treshold of the test modifying siglev (default value=0.95).\cr\cr -.Corr calculates the correlation between the ensemble mean and the -observations, using an N by M matrix (exp) of forecasts and a vector of -observations (obs) as input. +Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +an array of forecast and an array of observation. The correlations are +computed along time_dim, the startdate dimension. If comp_dim is given, +the correlations are computed only if data along the comp_dim dimension are +complete between limits[1] and limits[2], i.e., there is no NA between +limits[1] and limits[2]. This option can be activated if the user wants to +account only for the forecasts which the corresponding observations are +available at all leadtimes.\cr +The confidence interval is computed by the Fisher transformation and the +significance level relies on an one-sided student-T distribution.\cr } \examples{ # Load sample data as in Load() example: @@ -96,39 +84,22 @@ dim_to_smooth <- 4 # Smooth along lead-times smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) -dim_to_mean <- 2 # Mean along members -required_complete_row <- 3 # Discard start dates which contain any NA lead-times leadtimes_per_startdate <- 60 -corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), - Mean1Dim(smooth_ano_obs, dim_to_mean), - compROW = required_complete_row, +corr <- Corr(smooth_ano_exp, + smooth_ano_obs, + comp_dim = 'ftime', #Discard start dates which contain any NA ftime limits = c(ceiling((runmean_months + 1) / 2), leadtimes_per_startdate - floor(runmean_months / 2))) - \donttest{ -PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", - monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), - fileout = 'tos_cor.eps') - } -# The following example uses veriApply combined with .Corr instead of Corr - \dontrun{ -require(easyVerification) -Corr2 <- s2dverification:::.Corr -corr2 <- veriApply("Corr2", - smooth_ano_exp, - # see ?veriApply for how to use the 'parallel' option - Mean1Dim(smooth_ano_obs, dim_to_mean), - tdim = 3, ensdim = 2) - } } \author{ History:\cr - 0.1 - 2011-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr +0.1 - 2011-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN\cr -1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@bsc.es}) - Adding siglev argument\cr +1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@bsc.es}) - Adding conf.lev argument\cr 1.2 - 2015-03 (L.P. Caron, \email{louis-philippe.caron@bsc.es}) - Adding method argument\cr 1.3 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() +3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt multiApply feature } \keyword{datagen} diff --git a/tests/testthat/test-Corr.R b/tests/testthat/test-Corr.R new file mode 100644 index 0000000000000000000000000000000000000000..490f5e72ec624a938b9ee74920debdf648f4c8ca --- /dev/null +++ b/tests/testthat/test-Corr.R @@ -0,0 +1,165 @@ +context("s2dverification::Corr tests") + +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(240), dim = c(dataset = 1, member = 2, sdate = 5, + ftime = 3, lat = 2, lon = 4)) + + set.seed(2) + obs1 <- array(rnorm(120), dim = c(dataset = 1, member = 1, sdate = 5, + ftime = 3, lat = 2, lon = 4)) + set.seed(2) + na <- floor(runif(10, min = 1, max = 120)) + obs1[na] <- NA + +############################################## +test_that("1. Input checks", { + + expect_error( + Corr(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + Corr(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + Corr(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and memb_dim.") + ) + expect_error( + Corr(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + Corr(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'exp' and 'obs' must have same dimension name" + ) + expect_error( + Corr(exp1, obs1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + Corr(exp1, obs1, memb_dim = 'a'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Corr(exp1, obs1, time_dim = c('sdate', 'a')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Corr(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Corr(exp1, obs1, comp_dim = c('sdate', 'ftime')), + "Parameter 'comp_dim' must be a character string." + ) + expect_error( + Corr(exp1, obs1, comp_dim = 'a'), + "Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Corr(exp1, obs1, limits = c(1,3)), + "Paramter 'comp_dim' cannot be NULL if 'limits' is assigned." + ) + expect_error( + Corr(exp1, obs1, comp_dim = 'ftime', limits = c(1)), + paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.") + ) + expect_error( + Corr(exp1, obs1, conf.lev = -1), + "Parameter 'conf.lev' must be a numeric number between 0 and 1." + ) + expect_error( + Corr(exp1, obs1, method = 1), + "Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'." + ) + expect_error( + Corr(exp1, obs1, conf = 1), + "Parameter 'conf' must be one logical value." + ) + expect_error( + Corr(exp1, obs1, pval = 'TRUE'), + "Parameter 'pval' must be one logical value." + ) + expect_error( + Corr(exp1, obs1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( + Corr(exp = array(1:10, dim = c(sdate = 1, member = 5, a = 1)), + obs = array(1:4, dim = c(a = 1, sdate = 2, member = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimension expect 'memb_dim'." + ) + expect_error( + Corr(exp = array(1:10, dim = c(sdate = 2, member = 5, a = 1)), + obs = array(1:4, dim = c(a = 1, sdate = 2, member = 2))), + "The length of time_dim must be at least 3 to compute correlation." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(Corr(exp1, obs1)$corr), + c(n_exp = 2, n_obs = 1, dataset = 1, ftime = 3, lat = 2, lon = 4) + ) + expect_equal( + head(Corr(exp1, obs1)$corr), + c(0.11503859, -0.46959987, -0.64113021, 0.09776572, -0.32393603, 0.27565829), + tolerance = 0.001 + ) + expect_equal( + length(which(is.na(Corr(exp1, obs1)$p.val))), + 2 + ) + expect_equal( + max(Corr(exp1, obs1)$conf.lower, na.rm = T), + 0.6332941, + tolerance = 0.001 + ) + expect_equal( + length(which(is.na(Corr(exp1, obs1, comp_dim = 'ftime')$corr))), + 6 + ) + expect_equal( + length(which(is.na(Corr(exp1, obs1, comp_dim = 'ftime', limits = c(2, 3))$corr))), + 2 + ) + expect_equal( + summary(Corr(exp1, obs1, conf.lev = 0.99)$conf.upper)[1], + c(Min. = 0.2748), + tolerance = 0.0001 + ) + expect_equal( + length(Corr(exp1, obs1, conf = FALSE, pval = FALSE)), + 1 + ) + expect_equal( + length(Corr(exp1, obs1, conf = FALSE)), + 2 + ) + expect_equal( + length(Corr(exp1, obs1, pval = FALSE)), + 3 + ) + expect_equal( + head(Corr(exp1, obs1, method = 'spearman')$corr), + c(-0.3, -0.4, -0.6, 0.3, -0.3, 0.2) + ) + expect_equal( + range(Corr(exp1, obs1, method = 'spearman', comp_dim = 'ftime')$p.val, na.rm = T), + c(0.0, 0.5), + tolerance = 0.001 + ) + +}) + +############################################## +