From cb0a3a3a1a0a4402a9d0a765695c0c9e5a39c13a Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 17 Jan 2020 18:10:02 +0100 Subject: [PATCH 1/5] Modify Corr.R with Apply feature --- DESCRIPTION | 3 +- NAMESPACE | 1 - R/Corr.R | 460 +++++++++++++++++++------------------ man/Corr.Rd | 141 +++++------- tests/testthat/test-Corr.R | 165 +++++++++++++ 5 files changed, 456 insertions(+), 314 deletions(-) create mode 100644 tests/testthat/test-Corr.R diff --git a/DESCRIPTION b/DESCRIPTION index 0045265c..6780c83d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,7 +50,8 @@ Imports: ncdf4, parallel, plyr, - SpecsVerification (>= 0.5.0) + SpecsVerification (>= 0.5.0), + multiApply (>= 2.0.0) Suggests: easyVerification, testthat diff --git a/NAMESPACE b/NAMESPACE index 4e855789..67268b6f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand export(.BrierScore) -export(.Corr) export(.RMS) export(.RMSSS) export(.RatioRMS) diff --git a/R/Corr.R b/R/Corr.R index 41900230..a675ce32 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -1,72 +1,66 @@ -#'Computes the correlation coefficient between an array of forecasts and their corresponding observations +#'Compute the correlation coefficient between an array of forecast and their corresponding observation #' -#'Calculates the correlation coefficient (Pearson, Kendall or Spearman) for -#'an array of forecasts and observations. The input should be an array with -#'dimensions c(no. of datasets, no. of start dates, no. of forecast times, -#'no. of lons, no. of lats.), where the longitude and latitude dimensions are -#'optional. The correlations are computed along the poscor dimension which -#'should correspond to the startdate dimension. If compROW is given, the -#'correlations are computed only if rows along the compROW dimension are -#'complete between limits[1] and limits[2], i.e. there are no NAs between -#'limits[1] and limits[2]. This option can be activated if the user wishes to -#'account only for the forecasts for which observations are available at all +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. +#'The correlations are computed along sdate_dim, the dimension corresponding +#'to the startdate dimension. If comp_dim is given, the correlations are +#'computed only if data along the comp_dim dimension are complete between +#'limits[1] and limits[2], i.e., there is no NA between limits[1] and +#'limits[2]. This option can be activated if the user wishes to account only +#'for the forecasts which the corresponding observations are available at all #'leadtimes. \cr -#'Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -#'The confidence interval is computed by a Fisher transformation.\cr -#'The significance level relies on a one-sided student-T distribution.\cr -#'We can modifiy the treshold of the test modifying siglev (default value=0.95).\cr\cr -#'.Corr calculates the correlation between the ensemble mean and the -#'observations, using an N by M matrix (exp) of forecasts and a vector of -#'observations (obs) as input. +#'The confidence interval is computed by the Fisher transformation.\cr +#'The significance level relies on an one-sided student-T distribution.\cr #' -#'@param var_exp Array of experimental data. -#'@param var_obs Array of observational data, same dimensions as var_exp -#' except along posloop dimension, where the length can be nobs instead of nexp. -#'@param posloop Dimension nobs and nexp. -#'@param poscor Dimension along which correlation are to be computed (the -#' dimension of the start dates). -#'@param compROW Data taken into account only if (compROW)th row is complete. -#' Default = NULL. -#'@param limits Complete between limits[1] & limits[2]. Default = NULL. -#'@param siglev Significance level. Default = 0.95. -#'@param method Type of correlation: 'pearson', 'spearman' or 'kendall'. -#' Default='pearson' -#'@param conf Whether to compute confidence intervals (default = 'TRUE') or -#' not (FALSE). -#'@param pval Whether to compute statistical significance p-value (default = 'TRUE') -#' or not (FALSE). -#'@param exp N by M matrix of N forecasts from M ensemble members. -#'@param obs Vector of the corresponding observations of length N. +#'@param exp A named numeric array of experimental data, with at least two +#' dimensions 'sdate_dim' and 'memb_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along memb_dim. +#'@param memb_dim A character string indicating the name of member (nobs/nexp) +#' dimension. The default value is 'member'. +#'@param sdate_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param comp_dim A character string indicating the name of dimension along which +#' the data is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param siglev A numeric indicating the confidence level for the +#' regression computation. The default value is 0.95. +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. The default value is TRUE. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is 1. #' #'@return -#'Corr: Array with dimensions :\cr -#'c(# of datasets along posloop in var_exp, # of datasets along posloop in -#'var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr -#'The third dimension, of length 4 maximum, contains to the lower limit of -#'the 95\% confidence interval, the correlation, the upper limit of the 95\% -#'confidence interval and the 95\% significance level given by a one-sided -#'T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension -#'will be of length 3. If the confidence intervals are disabled via -#'\code{conf = FALSE}, this dimension will be of length 2. If both are -#'disabled, this will be of length 2. \cr\cr -#'.Corr: -#' \itemize{ -#' \item{$corr}{The correlation statistic.} -#' \item{$p_val}{Corresponds to the p values for the \code{siglev}\% -#' (only present if \code{pval = TRUE}) for the correlation.} -#' \item{$conf_low}{Corresponds to the upper limit of the \code{siglev}\% -#' (only present if \code{conf = TRUE}) for the correlation.} -#' \item{$conf_high}{Corresponds to the lower limit of the \code{siglev}\% -#' (only present if \code{conf = TRUE}) for the correlation.} -#' } +#'A list containing the numeric arrays with dimension:\cr +#'(# of datasets along memb_dim in exp, # of datasets along memb_dim in obs, +#'all other dimensions of exp & obs except sdate_dim).\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p_val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} #' #'@keywords datagen #'@author History:\cr -#' 0.1 - 2011-04 (V. Guemas, \email{vguemas@@bsc.es}) - Original code\cr -#'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@@bsc.es}) - Formatting to R CRAN\cr -#'1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@@bsc.es}) - Adding siglev argument\cr +#'0.1 - 2011-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr +#'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN\cr +#'1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@bsc.es}) - Adding siglev argument\cr #'1.2 - 2015-03 (L.P. Caron, \email{louis-philippe.caron@@bsc.es}) - Adding method argument\cr -#'1.3 - 2017-02 (A. Hunter, \email{alasdair.hunter@@bsc.es}) - Adapted to veriApply() +#'1.3 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() +#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt to multiApply #'@examples #'# Load sample data as in Load() example: #'example(Load) @@ -78,200 +72,212 @@ #'# Smooth along lead-times #'smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) #'smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) -#'dim_to_mean <- 2 # Mean along members -#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times #'leadtimes_per_startdate <- 60 -#'corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), -#' Mean1Dim(smooth_ano_obs, dim_to_mean), -#' compROW = required_complete_row, +#'corr <- Corr(smooth_ano_exp, +#' smooth_ano_obs, +#' comp_dim = 'ftime', #Discard start dates which contain any NA ftime #' limits = c(ceiling((runmean_months + 1) / 2), #' leadtimes_per_startdate - floor(runmean_months / 2))) -#' \donttest{ -#'PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", -#' monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), -#' listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), -#' fileout = 'tos_cor.eps') -#' } #' -#'# The following example uses veriApply combined with .Corr instead of Corr -#' \dontrun{ -#'require(easyVerification) -#'Corr2 <- s2dverification:::.Corr -#'corr2 <- veriApply("Corr2", -#' smooth_ano_exp, -#' # see ?veriApply for how to use the 'parallel' option -#' Mean1Dim(smooth_ano_obs, dim_to_mean), -#' tdim = 3, ensdim = 2) -#' } #'@rdname Corr #'@importFrom stats cor pt qnorm #'@export -Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, - limits = NULL, siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE) { - # - # Remove data along compROW dim if there is at least one NA between limits - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (!is.null(compROW)) { - if (is.null(limits)) { - limits <- c(1, dim(var_obs)[compROW]) +Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', + comp_dim = NULL, limits = NULL, + siglev = 0.95, method = 'pearson', + conf = TRUE, pval = TRUE, ncores = 1) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing sdate_dim and memb_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + ## sdate_dim + if (!is.character(sdate_dim) | length(sdate_dim) > 1) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!sdate_dim %in% names(dim(exp)) | !sdate_dim %in% names(dim(obs))) { + stop("Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension.") + } + ##comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") } - outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) - outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) - var_obs[which(outrows)] <- NA } - # - # Enlarge var_exp & var_obs to 10 dim + move posloop & poscor to 1st & 2nd - # pos - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp) - for (iind in 1:length(dimsvar)) { - if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { - stop("var_exp & var_obs must have same dimensions except along posloop") + ##limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'com_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) } } - if (dimsvar[poscor] < 3 ) { - stop("At least 3 values required to compute correlation") + ## siglev + if (!is.numeric(siglev) | siglev < 0 | siglev > 1 | length(siglev) > 1) { + stop("Parameter 'siglev' must be a numeric number between 0 and 1.") } - if (method != "kendall" && method != "spearman" && method != "pearson") { - stop("Wrong correlation method") + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.")} + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") } - nexp <- dimsvar[posloop] - nobs <- dim(var_obs)[posloop] - var_exp <- Enlarge(var_exp, 10) - var_obs <- Enlarge(var_obs, 10) - posaperm <- numeric(10) - posaperm[1] <- posloop - posaperm[2] <- poscor - posaperm[3:10] <- seq(1, 10)[-c(posloop, poscor)] - var_exp <- aperm(var_exp, posaperm) - var_obs <- aperm(var_obs, posaperm) - dimsaperm <- dim(var_exp) - # - - # Check the siglev arguments: - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - if (siglev > 1 || siglev < 0) { - stop("siglev need to be higher than O and lower than 1") + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") } - # - # Loop to compute correlation for each grid point - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dim_val <- 2 - dim_pval <- 4 - nvals <- 1 + 2*conf + pval - if (!conf) { - dim_val <- 1 - dim_pval <- 2 - } else { - conf_low <- (1 - siglev) / 2 - conf_high <- 1 - conf_low + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'memb_dim'.")) } - CORR <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) - for (jexp in 1:nexp) { - for (jobs in 1:nobs) { - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - tmp1 <- var_exp[jexp, , j3, j4, j5, j6, j7, j8, j9, - j10] - tmp2 <- var_obs[jobs, , j3, j4, j5, j6, j7, j8, j9, - j10] - if (any(!is.na(tmp1)) && sum(!is.na(tmp2)) > 2) { - toto <- cor(tmp1, tmp2, use = "pairwise.complete.obs", method = method) - CORR[jexp, jobs, dim_val, j3, j4, j5, j6, j7, j8, j9, j10] <- toto - #eno <- min(Eno(tmp2, 1), Eno(tmp1, 1)) - if (pval || conf) { - if (method == "kendall" | method == "spearman") { - eno <- Eno(rank(tmp2), 1) - } else if (method == "pearson") { - eno <- Eno(tmp2, 1) - } - } - if (pval) { - #t <- qt(0.95, eno - 2) - t <- qt(siglev, eno - 2) - CORR[jexp, jobs, dim_pval, j3, j4, j5, j6, j7, j8, j9, - j10] <- sqrt((t * t) / ((t * t) + eno - 2)) - } - if (conf) { - CORR[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, - j10] <- tanh(atanh(toto) + qnorm(conf_high) / sqrt( - #j10] <- tanh(atanh(toto) + qnorm(0.975) / sqrt( - eno - 3)) - CORR[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, - j10] <- tanh(atanh(toto) + qnorm(conf_low) / sqrt( - #j10] <- tanh(atanh(toto) + qnorm(0.025) / sqrt( - eno - 3)) - } - } - } - } - } - } - } - } - } - } + if (dim(exp)[sdate_dim] < 3) { + stop("The length of sdate_dim must be at least 3 to compute correlation.") + } + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + if (is.null(limits)) { + limits <- c(1, dim(obs)[comp_dim]) } + pos <- which(names(dim(obs)) == comp_dim) + outrows <- is.na(Mean1Dim(obs, pos, narm = FALSE, limits)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA } - # - dim(CORR) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, poscor)]) - # - # Output - # ~~~~~~~~ - # - CORR + + res <- Apply(list(exp, obs), + target_dims = list(c(sdate_dim, memb_dim), + c(sdate_dim, memb_dim)), + fun = .Corr, + siglev = siglev, method = method, + conf = conf, pval = pval, ncores = ncores) + return(res) } -#'@rdname Corr -#'@importFrom stats cor pt qnorm -#'@export -.Corr <- function(exp, obs, siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE) { +.Corr <- function(exp, obs, siglev = siglev, method = method, + conf = conf, pval = pval) { - # Check 'method' - if (!(method %in% c("kendall", "spearman", "pearson"))) { - stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") - # Check 'siglev' - if (siglev > 1 || siglev < 0) { - stop("Parameter 'siglev' must be higher than O and lower than 1.") - } + # exp: [sdate, member_exp] + # obs: [sdate, member_obs] + n_exp <- as.numeric(dim(exp)[2]) + n_obs <- as.numeric(dim(obs)[2]) + + CORR <- array(dim = c(n_exp = n_exp, n_obs = n_obs)) + eno_expand <- array(dim = c(n_exp = n_exp, n_obs = n_obs)) + p_val <- array(dim = c(n_exp = n_exp, n_obs = n_obs)) + + # ens_mean + for (i in 1:n_obs) { + + CORR[, i] <- sapply(1:n_exp, + function(x) { + if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +cor(exp[, x], obs[, i], + use = "pairwise.complete.obs", + method = method) +} else { + CORR[, i] <- NA +} +}) } - p_val <- NULL - conflow <- NULL - confhigh <- NULL - ens_mean <- rowMeans(exp) - CORR <- cor(obs, ens_mean, use = "pairwise.complete.obs", method = method) - if (pval || conf) { - if (method == "kendall" || method == "spearman") { - eno <- Eno(rank(obs), 1) +# if (pval) { +# for (i in 1:n_obs) { +# p_val[, i] <- try(sapply(1:n_exp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p_val[, i]) == 'character') { +# p_val[, i] <- NA +# } +# } +# } + + if (pval | conf) { + if (method == "kendall" | method == "spearman") { + tmp <- apply(obs, 2, rank) + eno <- Eno(tmp, 1) +# eno <- Eno(tmp, sdate_dim) #replace the upper line with this when using new Eno() } else if (method == "pearson") { eno <- Eno(obs, 1) +# eno <- Eno(obs, sdate_dim) #replace the upper line with this when using new Eno() + } + + for (i in 1:n_exp) { + eno_expand[i, ] <- eno } } - if (pval && (method == "pearson")) { - t <-sqrt(CORR * CORR * (eno - 2) / (1 - (CORR ^ 2))) - p_val <- pt(t, eno - 2, lower.tail = FALSE) +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval) { + t <-sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p_val <- pt(t, eno_expand - 2, lower.tail = FALSE) } - if (conf && (method == "pearson")) { +################################### + if (conf) { conf_low <- (1 - siglev) / 2 conf_high <- 1 - conf_low - conf_int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt(eno - 3)), - tanh(atanh(CORR) + qnorm(conf_high) / sqrt(eno - 3))) - conf_int <- conf_int[!is.na(CORR)] - conflow <- conf_int[1] - confhigh <- conf_int[2] - } - # Output - invisible(result <- list(corr = CORR, p_val = p_val, conf_low = conflow, conf_high = confhigh)) + conflow <- tanh(atanh(CORR) + qnorm(conf_low) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf_high) / sqrt(eno_expand - 3)) + } + + if (pval & conf) { + res <- list(corr = CORR, p_val = p_val, + conf_low = conflow, conf_high = confhigh) + } else if (pval & !conf) { + res <- list(corr = CORR, p_val = p_val) + } else if (!pval & conf) { + res <- list(corr = CORR, + conf_low = conflow, conf_high = confhigh) + } else { + res <- list(corr = CORR) + } + + return(res) + } diff --git a/man/Corr.Rd b/man/Corr.Rd index e311813d..2bc58c28 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -1,89 +1,77 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Corr.R \name{Corr} -\alias{.Corr} \alias{Corr} -\title{Computes the correlation coefficient between an array of forecasts and their corresponding observations} +\title{Compute the correlation coefficient between an array of forecast and their corresponding observation} \usage{ -Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, +Corr(exp, obs, memb_dim = "member", sdate_dim = "sdate", comp_dim = NULL, limits = NULL, siglev = 0.95, method = "pearson", conf = TRUE, - pval = TRUE) - -.Corr(exp, obs, siglev = 0.95, method = "pearson", conf = TRUE, - pval = TRUE) + pval = TRUE, ncores = 1) } \arguments{ -\item{var_exp}{Array of experimental data.} - -\item{var_obs}{Array of observational data, same dimensions as var_exp -except along posloop dimension, where the length can be nobs instead of nexp.} +\item{exp}{A named numeric array of experimental data, with at least two +dimensions 'sdate_dim' and 'memb_dim'.} -\item{posloop}{Dimension nobs and nexp.} +\item{obs}{A named numeric array of observational data, same dimensions as +parameter 'exp' except along memb_dim.} -\item{poscor}{Dimension along which correlation are to be computed (the -dimension of the start dates).} +\item{memb_dim}{A character string indicating the name of member (nobs/nexp) +dimension. The default value is 'member'.} -\item{compROW}{Data taken into account only if (compROW)th row is complete. -Default = NULL.} +\item{sdate_dim}{A character string indicating the name of dimension along +which the correlations are computed. The default value is 'sdate'.} -\item{limits}{Complete between limits[1] & limits[2]. Default = NULL.} +\item{comp_dim}{A character string indicating the name of dimension along which +the data is taken into account only if it is complete. The default value +is NULL.} -\item{siglev}{Significance level. Default = 0.95.} +\item{limits}{A vector of two integers indicating the range along comp_dim to +be completed. The default is c(1, length(comp_dim dimension)).} -\item{method}{Type of correlation: 'pearson', 'spearman' or 'kendall'. -Default='pearson'} +\item{siglev}{A numeric indicating the confidence level for the +regression computation. The default value is 0.95.} -\item{conf}{Whether to compute confidence intervals (default = 'TRUE') or -not (FALSE).} +\item{method}{A character string indicating the type of correlation: +'pearson', 'spearman', or 'kendall'. The default value is 'pearson'.} -\item{pval}{Whether to compute statistical significance p-value (default = 'TRUE') -or not (FALSE).} +\item{conf}{A logical value indicating whether to retrieve the confidence +intervals or not. The default value is TRUE.} -\item{exp}{N by M matrix of N forecasts from M ensemble members.} +\item{pval}{A logical value indicating whether to compute or not the p-value +of the test Ho: Corr = 0. The default value is TRUE.} -\item{obs}{Vector of the corresponding observations of length N.} +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is 1.} } \value{ -Corr: Array with dimensions :\cr -c(# of datasets along posloop in var_exp, # of datasets along posloop in -var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr -The third dimension, of length 4 maximum, contains to the lower limit of -the 95\% confidence interval, the correlation, the upper limit of the 95\% -confidence interval and the 95\% significance level given by a one-sided -T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension -will be of length 3. If the confidence intervals are disabled via -\code{conf = FALSE}, this dimension will be of length 2. If both are -disabled, this will be of length 2. \cr\cr -.Corr: - \itemize{ - \item{$corr}{The correlation statistic.} - \item{$p_val}{Corresponds to the p values for the \code{siglev}\% - (only present if \code{pval = TRUE}) for the correlation.} - \item{$conf_low}{Corresponds to the upper limit of the \code{siglev}\% - (only present if \code{conf = TRUE}) for the correlation.} - \item{$conf_high}{Corresponds to the lower limit of the \code{siglev}\% - (only present if \code{conf = TRUE}) for the correlation.} - } +A list containing the numeric arrays with dimension:\cr +(# of datasets along memb_dim in exp, # of datasets along memb_dim in obs, +all other dimensions of exp & obs except sdate_dim).\cr +\item{$corr}{ + The correlation coefficient. +} +\item{$p_val}{ + The p-value. Only present if \code{pval = TRUE}. +} +\item{$conf.lower}{ + The lower confidence interval. Only present if \code{conf = TRUE}. +} +\item{$conf.upper}{ + The upper confidence interval. Only present if \code{conf = TRUE}. +} } \description{ -Calculates the correlation coefficient (Pearson, Kendall or Spearman) for -an array of forecasts and observations. The input should be an array with -dimensions c(no. of datasets, no. of start dates, no. of forecast times, -no. of lons, no. of lats.), where the longitude and latitude dimensions are -optional. The correlations are computed along the poscor dimension which -should correspond to the startdate dimension. If compROW is given, the -correlations are computed only if rows along the compROW dimension are -complete between limits[1] and limits[2], i.e. there are no NAs between -limits[1] and limits[2]. This option can be activated if the user wishes to -account only for the forecasts for which observations are available at all +Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +an array of forecast and an array of observation. +The correlations are computed along sdate_dim, the dimension corresponding +to the startdate dimension. If comp_dim is given, the correlations are +computed only if data along the comp_dim dimension are complete between +limits[1] and limits[2], i.e., there is no NA between limits[1] and +limits[2]. This option can be activated if the user wishes to account only +for the forecasts which the corresponding observations are available at all leadtimes. \cr -Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -The confidence interval is computed by a Fisher transformation.\cr -The significance level relies on a one-sided student-T distribution.\cr -We can modifiy the treshold of the test modifying siglev (default value=0.95).\cr\cr -.Corr calculates the correlation between the ensemble mean and the -observations, using an N by M matrix (exp) of forecasts and a vector of -observations (obs) as input. +The confidence interval is computed by the Fisher transformation.\cr +The significance level relies on an one-sided student-T distribution.\cr } \examples{ # Load sample data as in Load() example: @@ -96,39 +84,22 @@ dim_to_smooth <- 4 # Smooth along lead-times smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) -dim_to_mean <- 2 # Mean along members -required_complete_row <- 3 # Discard start dates which contain any NA lead-times leadtimes_per_startdate <- 60 -corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), - Mean1Dim(smooth_ano_obs, dim_to_mean), - compROW = required_complete_row, +corr <- Corr(smooth_ano_exp, + smooth_ano_obs, + comp_dim = 'ftime', #Discard start dates which contain any NA ftime limits = c(ceiling((runmean_months + 1) / 2), leadtimes_per_startdate - floor(runmean_months / 2))) - \donttest{ -PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", - monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), - fileout = 'tos_cor.eps') - } -# The following example uses veriApply combined with .Corr instead of Corr - \dontrun{ -require(easyVerification) -Corr2 <- s2dverification:::.Corr -corr2 <- veriApply("Corr2", - smooth_ano_exp, - # see ?veriApply for how to use the 'parallel' option - Mean1Dim(smooth_ano_obs, dim_to_mean), - tdim = 3, ensdim = 2) - } } \author{ History:\cr - 0.1 - 2011-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr +0.1 - 2011-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN\cr 1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@bsc.es}) - Adding siglev argument\cr 1.2 - 2015-03 (L.P. Caron, \email{louis-philippe.caron@bsc.es}) - Adding method argument\cr 1.3 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() +3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt to multiApply } \keyword{datagen} diff --git a/tests/testthat/test-Corr.R b/tests/testthat/test-Corr.R new file mode 100644 index 00000000..ab487afa --- /dev/null +++ b/tests/testthat/test-Corr.R @@ -0,0 +1,165 @@ +context("s2dverification::Corr tests") + +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(240), dim = c(dataset = 1, member = 2, sdate = 5, + ftime = 3, lat = 2, lon = 4)) + + set.seed(2) + obs1 <- array(rnorm(120), dim = c(dataset = 1, member = 1, sdate = 5, + ftime = 3, lat = 2, lon = 4)) + set.seed(2) + na <- floor(runif(10, min = 1, max = 120)) + obs1[na] <- NA + +############################################## +test_that("1. Input checks", { + + expect_error( + Corr(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + Corr(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + Corr(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing sdate_dim and memb_dim.") + ) + expect_error( + Corr(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + Corr(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'exp' and 'obs' must have same dimension name" + ) + expect_error( + Corr(exp1, obs1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + Corr(exp1, obs1, memb_dim = 'a'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Corr(exp1, obs1, sdate_dim = c('sdate', 'a')), + "Parameter 'sdate_dim' must be a character string." + ) + expect_error( + Corr(exp1, obs1, sdate_dim = 'a'), + "Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Corr(exp1, obs1, comp_dim = c('sdate', 'ftime')), + "Parameter 'comp_dim' must be a character string." + ) + expect_error( + Corr(exp1, obs1, comp_dim = 'a'), + "Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Corr(exp1, obs1, limits = c(1,3)), + "Paramter 'com_dim' cannot be NULL if 'limits' is assigned." + ) + expect_error( + Corr(exp1, obs1, comp_dim = 'ftime', limits = c(1)), + paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.") + ) + expect_error( + Corr(exp1, obs1, siglev = -1), + "Parameter 'siglev' must be a numeric number between 0 and 1." + ) + expect_error( + Corr(exp1, obs1, method = 1), + "Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'." + ) + expect_error( + Corr(exp1, obs1, conf = 1), + "Parameter 'conf' must be one logical value." + ) + expect_error( + Corr(exp1, obs1, pval = 'TRUE'), + "Parameter 'pval' must be one logical value." + ) + expect_error( + Corr(exp1, obs1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( + Corr(exp = array(1:10, dim = c(sdate = 1, member = 5, a = 1)), + obs = array(1:4, dim = c(a = 1, sdate = 2, member = 2))), + "Parameter 'exp' and 'obs' must have same length of all dimension expect 'memb_dim'." + ) + expect_error( + Corr(exp = array(1:10, dim = c(sdate = 2, member = 5, a = 1)), + obs = array(1:4, dim = c(a = 1, sdate = 2, member = 2))), + "The length of sdate_dim must be at least 3 to compute correlation." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(Corr(exp1, obs1)$corr), + c(n_exp = 2, n_obs = 1, dataset = 1, ftime = 3, lat = 2, lon = 4) + ) + expect_equal( + head(Corr(exp1, obs1)$corr), + c(0.11503859, -0.46959987, -0.64113021, 0.09776572, -0.32393603, 0.27565829), + tolerance = 0.001 + ) + expect_equal( + length(which(is.na(Corr(exp1, obs1)$p_val))), + 2 + ) + expect_equal( + max(Corr(exp1, obs1)$conf_low, na.rm = T), + 0.6332941, + tolerance = 0.001 + ) + expect_equal( + length(which(is.na(Corr(exp1, obs1, comp_dim = 'ftime')$corr))), + 6 + ) + expect_equal( + length(which(is.na(Corr(exp1, obs1, comp_dim = 'ftime', limits = c(2, 3))$corr))), + 2 + ) + expect_equal( + summary(Corr(exp1, obs1, siglev = 0.99)$conf_high)[1], + c(Min. = 0.2748), + tolerance = 0.0001 + ) + expect_equal( + length(Corr(exp1, obs1, conf = FALSE, pval = FALSE)), + 1 + ) + expect_equal( + length(Corr(exp1, obs1, conf = FALSE)), + 2 + ) + expect_equal( + length(Corr(exp1, obs1, pval = FALSE)), + 3 + ) + expect_equal( + head(Corr(exp1, obs1, method = 'spearman')$corr), + c(-0.3, -0.4, -0.6, 0.3, -0.3, 0.2) + ) + expect_equal( + range(Corr(exp1, obs1, method = 'spearman', comp_dim = 'ftime')$p_val, na.rm = T), + c(0.0, 0.5), + tolerance = 0.001 + ) + +}) + +############################################## + -- GitLab From ce18173fdf37fba32ccabc7b54785b4798938870 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 17 Jan 2020 18:22:58 +0100 Subject: [PATCH 2/5] Add @import multiApply --- NAMESPACE | 1 + R/Corr.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 67268b6f..24746631 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -89,6 +89,7 @@ import(graphics) import(mapproj) import(maps) import(methods) +import(multiApply) import(ncdf4) import(parallel) import(plyr) diff --git a/R/Corr.R b/R/Corr.R index a675ce32..00ac7eab 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -80,6 +80,7 @@ #' leadtimes_per_startdate - floor(runmean_months / 2))) #' #'@rdname Corr +#'@import multiApply #'@importFrom stats cor pt qnorm #'@export Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', -- GitLab From e35986638a167e1fb5085ce5babfb8af3277a822 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 20 Jan 2020 18:57:30 +0100 Subject: [PATCH 3/5] Small fix --- R/Corr.R | 8 ++++---- man/Corr.Rd | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index 00ac7eab..55d9ec25 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -46,10 +46,10 @@ #'\item{$p_val}{ #' The p-value. Only present if \code{pval = TRUE}. #'} -#'\item{$conf.lower}{ +#'\item{$conf:low}{ #' The lower confidence interval. Only present if \code{conf = TRUE}. #'} -#'\item{$conf.upper}{ +#'\item{$conf_high}{ #' The upper confidence interval. Only present if \code{conf = TRUE}. #'} #' @@ -86,7 +86,7 @@ Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', comp_dim = NULL, limits = NULL, siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE, ncores = 1) { + conf = TRUE, pval = TRUE, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -134,7 +134,7 @@ Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', ##limits if (!is.null(limits)) { if (is.null(comp_dim)) { - stop("Paramter 'com_dim' cannot be NULL if 'limits' is assigned.") + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") } if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { diff --git a/man/Corr.Rd b/man/Corr.Rd index 2bc58c28..e63742f2 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -6,7 +6,7 @@ \usage{ Corr(exp, obs, memb_dim = "member", sdate_dim = "sdate", comp_dim = NULL, limits = NULL, siglev = 0.95, method = "pearson", conf = TRUE, - pval = TRUE, ncores = 1) + pval = TRUE, ncores = NULL) } \arguments{ \item{exp}{A named numeric array of experimental data, with at least two @@ -53,10 +53,10 @@ all other dimensions of exp & obs except sdate_dim).\cr \item{$p_val}{ The p-value. Only present if \code{pval = TRUE}. } -\item{$conf.lower}{ +\item{$conf:low}{ The lower confidence interval. Only present if \code{conf = TRUE}. } -\item{$conf.upper}{ +\item{$conf_high}{ The upper confidence interval. Only present if \code{conf = TRUE}. } } -- GitLab From 334db984abd2a25c3ac9fcceb2634450d5e8f281 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 20 Jan 2020 19:02:06 +0100 Subject: [PATCH 4/5] Small fix --- R/Corr.R | 2 +- man/Corr.Rd | 2 +- tests/testthat/test-Corr.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index 55d9ec25..dd914dbb 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -34,7 +34,7 @@ #'@param pval A logical value indicating whether to compute or not the p-value #' of the test Ho: Corr = 0. The default value is TRUE. #'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is 1. +#' computation. The default value is NULL. #' #'@return #'A list containing the numeric arrays with dimension:\cr diff --git a/man/Corr.Rd b/man/Corr.Rd index e63742f2..a6d79b9f 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -41,7 +41,7 @@ intervals or not. The default value is TRUE.} of the test Ho: Corr = 0. The default value is TRUE.} \item{ncores}{An integer indicating the number of cores to use for parallel -computation. The default value is 1.} +computation. The default value is NULL.} } \value{ A list containing the numeric arrays with dimension:\cr diff --git a/tests/testthat/test-Corr.R b/tests/testthat/test-Corr.R index ab487afa..557e5f0e 100644 --- a/tests/testthat/test-Corr.R +++ b/tests/testthat/test-Corr.R @@ -63,7 +63,7 @@ test_that("1. Input checks", { ) expect_error( Corr(exp1, obs1, limits = c(1,3)), - "Paramter 'com_dim' cannot be NULL if 'limits' is assigned." + "Paramter 'comp_dim' cannot be NULL if 'limits' is assigned." ) expect_error( Corr(exp1, obs1, comp_dim = 'ftime', limits = c(1)), -- GitLab From 781cf6730025871976eaec8e353cce16d1d13fd5 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 29 Jan 2020 19:55:45 +0100 Subject: [PATCH 5/5] Use new Eno() in Corr() --- R/Corr.R | 151 ++++++++++++++++++++----------------- man/Corr.Rd | 58 +++++++------- tests/testthat/test-Corr.R | 24 +++--- 3 files changed, 121 insertions(+), 112 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index dd914dbb..59bd1ec9 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -1,55 +1,55 @@ #'Compute the correlation coefficient between an array of forecast and their corresponding observation #' #'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for -#'an array of forecast and an array of observation. -#'The correlations are computed along sdate_dim, the dimension corresponding -#'to the startdate dimension. If comp_dim is given, the correlations are -#'computed only if data along the comp_dim dimension are complete between -#'limits[1] and limits[2], i.e., there is no NA between limits[1] and -#'limits[2]. This option can be activated if the user wishes to account only -#'for the forecasts which the corresponding observations are available at all -#'leadtimes. \cr -#'The confidence interval is computed by the Fisher transformation.\cr -#'The significance level relies on an one-sided student-T distribution.\cr +#'an array of forecast and an array of observation. The correlations are +#'computed along time_dim, the startdate dimension. If comp_dim is given, +#'the correlations are computed only if data along the comp_dim dimension are +#'complete between limits[1] and limits[2], i.e., there is no NA between +#'limits[1] and limits[2]. This option can be activated if the user wants to +#'account only for the forecasts which the corresponding observations are +#'available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr #' #'@param exp A named numeric array of experimental data, with at least two -#' dimensions 'sdate_dim' and 'memb_dim'. +#' dimensions 'time_dim' and 'memb_dim'. #'@param obs A named numeric array of observational data, same dimensions as #' parameter 'exp' except along memb_dim. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. #'@param memb_dim A character string indicating the name of member (nobs/nexp) #' dimension. The default value is 'member'. -#'@param sdate_dim A character string indicating the name of dimension along -#' which the correlations are computed. The default value is 'sdate'. #'@param comp_dim A character string indicating the name of dimension along which #' the data is taken into account only if it is complete. The default value #' is NULL. #'@param limits A vector of two integers indicating the range along comp_dim to #' be completed. The default is c(1, length(comp_dim dimension)). -#'@param siglev A numeric indicating the confidence level for the -#' regression computation. The default value is 0.95. #'@param method A character string indicating the type of correlation: #' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. -#'@param conf A logical value indicating whether to retrieve the confidence -#' intervals or not. The default value is TRUE. #'@param pval A logical value indicating whether to compute or not the p-value #' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. The default value is TRUE. +#'@param conf.lev A numeric indicating the confidence level for the +#' regression computation. The default value is 0.95. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return #'A list containing the numeric arrays with dimension:\cr -#'(# of datasets along memb_dim in exp, # of datasets along memb_dim in obs, -#'all other dimensions of exp & obs except sdate_dim).\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., memb_dim in exp), and nobs is the +#'number of observation (i.e., memb_dim in obs).\cr #'\item{$corr}{ #' The correlation coefficient. #'} -#'\item{$p_val}{ +#'\item{$p.val}{ #' The p-value. Only present if \code{pval = TRUE}. #'} -#'\item{$conf:low}{ +#'\item{$conf.lower}{ #' The lower confidence interval. Only present if \code{conf = TRUE}. #'} -#'\item{$conf_high}{ +#'\item{$conf.upper}{ #' The upper confidence interval. Only present if \code{conf = TRUE}. #'} #' @@ -57,10 +57,10 @@ #'@author History:\cr #'0.1 - 2011-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr #'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN\cr -#'1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@bsc.es}) - Adding siglev argument\cr +#'1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@bsc.es}) - Adding conf.lev argument\cr #'1.2 - 2015-03 (L.P. Caron, \email{louis-philippe.caron@@bsc.es}) - Adding method argument\cr #'1.3 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() -#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt to multiApply +#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt multiApply feature #'@examples #'# Load sample data as in Load() example: #'example(Load) @@ -83,10 +83,10 @@ #'@import multiApply #'@importFrom stats cor pt qnorm #'@export -Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', +Corr <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', comp_dim = NULL, limits = NULL, - siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE, ncores = NULL) { + method = 'pearson', pval = TRUE, conf = TRUE, + conf.lev = 0.95, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -98,7 +98,7 @@ Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', } if (is.null(dim(exp)) | is.null(dim(obs))) { stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", - "containing sdate_dim and memb_dim.")) + "containing time_dim and memb_dim.")) } if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { @@ -108,6 +108,13 @@ Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', !all(names(dim(obs)) %in% names(dim(exp)))) { stop("Parameter 'exp' and 'obs' must have same dimension name") } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } ## memb_dim if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") @@ -115,14 +122,7 @@ Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") } - ## sdate_dim - if (!is.character(sdate_dim) | length(sdate_dim) > 1) { - stop("Parameter 'sdate_dim' must be a character string.") - } - if (!sdate_dim %in% names(dim(exp)) | !sdate_dim %in% names(dim(obs))) { - stop("Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension.") - } - ##comp_dim + ## comp_dim if (!is.null(comp_dim)) { if (!is.character(comp_dim) | length(comp_dim) > 1) { stop("Parameter 'comp_dim' must be a character string.") @@ -131,7 +131,7 @@ Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") } } - ##limits + ## limits if (!is.null(limits)) { if (is.null(comp_dim)) { stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") @@ -142,21 +142,22 @@ Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', "integers smaller than the length of paramter 'comp_dim'.")) } } - ## siglev - if (!is.numeric(siglev) | siglev < 0 | siglev > 1 | length(siglev) > 1) { - stop("Parameter 'siglev' must be a numeric number between 0 and 1.") - } ## method if (!(method %in% c("kendall", "spearman", "pearson"))) { - stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.")} - ## conf - if (!is.logical(conf) | length(conf) > 1) { - stop("Parameter 'conf' must be one logical value.") + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") } ## pval if (!is.logical(pval) | length(pval) > 1) { stop("Parameter 'pval' must be one logical value.") } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | @@ -173,10 +174,19 @@ Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', stop(paste0("Parameter 'exp' and 'obs' must have same length of ", "all dimension expect 'memb_dim'.")) } - if (dim(exp)[sdate_dim] < 3) { - stop("The length of sdate_dim must be at least 3 to compute correlation.") + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") } + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- s2dverification:::.aperm2(obs, order_obs) + + ############################### # Calculate Corr @@ -192,16 +202,17 @@ Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', } res <- Apply(list(exp, obs), - target_dims = list(c(sdate_dim, memb_dim), - c(sdate_dim, memb_dim)), + target_dims = list(c(time_dim, memb_dim), + c(time_dim, memb_dim)), fun = .Corr, - siglev = siglev, method = method, - conf = conf, pval = pval, ncores = ncores) + time_dim = time_dim, method = method, + pval = pval, conf = conf, conf.lev = conf.lev, + ncores = ncores) return(res) } -.Corr <- function(exp, obs, siglev = siglev, method = method, - conf = conf, pval = pval) { +.Corr <- function(exp, obs, time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, conf.lev = 0.95) { # exp: [sdate, member_exp] # obs: [sdate, member_obs] @@ -210,7 +221,7 @@ Corr <- function(exp, obs, memb_dim = 'member', sdate_dim = 'sdate', CORR <- array(dim = c(n_exp = n_exp, n_obs = n_obs)) eno_expand <- array(dim = c(n_exp = n_exp, n_obs = n_obs)) - p_val <- array(dim = c(n_exp = n_exp, n_obs = n_obs)) + p.val <- array(dim = c(n_exp = n_exp, n_obs = n_obs)) # ens_mean for (i in 1:n_obs) { @@ -229,12 +240,12 @@ cor(exp[, x], obs[, i], # if (pval) { # for (i in 1:n_obs) { -# p_val[, i] <- try(sapply(1:n_exp, +# p.val[, i] <- try(sapply(1:n_exp, # function(x) {(cor.test(exp[, x], obs[, i], # use = "pairwise.complete.obs", # method = method)$p.value)/2}), silent = TRUE) -# if (class(p_val[, i]) == 'character') { -# p_val[, i] <- NA +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA # } # } # } @@ -242,13 +253,11 @@ cor(exp[, x], obs[, i], if (pval | conf) { if (method == "kendall" | method == "spearman") { tmp <- apply(obs, 2, rank) - eno <- Eno(tmp, 1) -# eno <- Eno(tmp, sdate_dim) #replace the upper line with this when using new Eno() + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) } else if (method == "pearson") { - eno <- Eno(obs, 1) -# eno <- Eno(obs, sdate_dim) #replace the upper line with this when using new Eno() + eno <- Eno(obs, time_dim) } - for (i in 1:n_exp) { eno_expand[i, ] <- eno } @@ -257,24 +266,24 @@ cor(exp[, x], obs[, i], #This doesn't return error but it's diff from cor.test() when method is spearman and kendall if (pval) { t <-sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) - p_val <- pt(t, eno_expand - 2, lower.tail = FALSE) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) } ################################### if (conf) { - conf_low <- (1 - siglev) / 2 - conf_high <- 1 - conf_low - conflow <- tanh(atanh(CORR) + qnorm(conf_low) / sqrt(eno_expand - 3)) - confhigh <- tanh(atanh(CORR) + qnorm(conf_high) / sqrt(eno_expand - 3)) + conf.lower <- (1 - conf.lev) / 2 + conf.upper <- 1 - conf.lower + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) } if (pval & conf) { - res <- list(corr = CORR, p_val = p_val, - conf_low = conflow, conf_high = confhigh) + res <- list(corr = CORR, p.val = p.val, + conf.lower = conflow, conf.upper = confhigh) } else if (pval & !conf) { - res <- list(corr = CORR, p_val = p_val) + res <- list(corr = CORR, p.val = p.val) } else if (!pval & conf) { res <- list(corr = CORR, - conf_low = conflow, conf_high = confhigh) + conf.lower = conflow, conf.upper = confhigh) } else { res <- list(corr = CORR) } diff --git a/man/Corr.Rd b/man/Corr.Rd index a6d79b9f..72edd2ca 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -4,23 +4,23 @@ \alias{Corr} \title{Compute the correlation coefficient between an array of forecast and their corresponding observation} \usage{ -Corr(exp, obs, memb_dim = "member", sdate_dim = "sdate", comp_dim = NULL, - limits = NULL, siglev = 0.95, method = "pearson", conf = TRUE, - pval = TRUE, ncores = NULL) +Corr(exp, obs, time_dim = "sdate", memb_dim = "member", comp_dim = NULL, + limits = NULL, method = "pearson", pval = TRUE, conf = TRUE, + conf.lev = 0.95, ncores = NULL) } \arguments{ \item{exp}{A named numeric array of experimental data, with at least two -dimensions 'sdate_dim' and 'memb_dim'.} +dimensions 'time_dim' and 'memb_dim'.} \item{obs}{A named numeric array of observational data, same dimensions as parameter 'exp' except along memb_dim.} +\item{time_dim}{A character string indicating the name of dimension along +which the correlations are computed. The default value is 'sdate'.} + \item{memb_dim}{A character string indicating the name of member (nobs/nexp) dimension. The default value is 'member'.} -\item{sdate_dim}{A character string indicating the name of dimension along -which the correlations are computed. The default value is 'sdate'.} - \item{comp_dim}{A character string indicating the name of dimension along which the data is taken into account only if it is complete. The default value is NULL.} @@ -28,50 +28,50 @@ is NULL.} \item{limits}{A vector of two integers indicating the range along comp_dim to be completed. The default is c(1, length(comp_dim dimension)).} -\item{siglev}{A numeric indicating the confidence level for the -regression computation. The default value is 0.95.} - \item{method}{A character string indicating the type of correlation: 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'.} +\item{pval}{A logical value indicating whether to compute or not the p-value +of the test Ho: Corr = 0. The default value is TRUE.} + \item{conf}{A logical value indicating whether to retrieve the confidence intervals or not. The default value is TRUE.} -\item{pval}{A logical value indicating whether to compute or not the p-value -of the test Ho: Corr = 0. The default value is TRUE.} +\item{conf.lev}{A numeric indicating the confidence level for the +regression computation. The default value is 0.95.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ A list containing the numeric arrays with dimension:\cr -(# of datasets along memb_dim in exp, # of datasets along memb_dim in obs, -all other dimensions of exp & obs except sdate_dim).\cr + c(nexp, nobs, all other dimensions of exp except time_dim).\cr +nexp is the number of experiment (i.e., memb_dim in exp), and nobs is the +number of observation (i.e., memb_dim in obs).\cr \item{$corr}{ The correlation coefficient. } -\item{$p_val}{ +\item{$p.val}{ The p-value. Only present if \code{pval = TRUE}. } -\item{$conf:low}{ +\item{$conf.lower}{ The lower confidence interval. Only present if \code{conf = TRUE}. } -\item{$conf_high}{ +\item{$conf.upper}{ The upper confidence interval. Only present if \code{conf = TRUE}. } } \description{ Calculate the correlation coefficient (Pearson, Kendall or Spearman) for -an array of forecast and an array of observation. -The correlations are computed along sdate_dim, the dimension corresponding -to the startdate dimension. If comp_dim is given, the correlations are -computed only if data along the comp_dim dimension are complete between -limits[1] and limits[2], i.e., there is no NA between limits[1] and -limits[2]. This option can be activated if the user wishes to account only -for the forecasts which the corresponding observations are available at all -leadtimes. \cr -The confidence interval is computed by the Fisher transformation.\cr -The significance level relies on an one-sided student-T distribution.\cr +an array of forecast and an array of observation. The correlations are +computed along time_dim, the startdate dimension. If comp_dim is given, +the correlations are computed only if data along the comp_dim dimension are +complete between limits[1] and limits[2], i.e., there is no NA between +limits[1] and limits[2]. This option can be activated if the user wants to +account only for the forecasts which the corresponding observations are +available at all leadtimes.\cr +The confidence interval is computed by the Fisher transformation and the +significance level relies on an one-sided student-T distribution.\cr } \examples{ # Load sample data as in Load() example: @@ -96,10 +96,10 @@ corr <- Corr(smooth_ano_exp, History:\cr 0.1 - 2011-04 (V. Guemas, \email{vguemas@bsc.es}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN\cr -1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@bsc.es}) - Adding siglev argument\cr +1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz@bsc.es}) - Adding conf.lev argument\cr 1.2 - 2015-03 (L.P. Caron, \email{louis-philippe.caron@bsc.es}) - Adding method argument\cr 1.3 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapted to veriApply() -3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt to multiApply +3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt multiApply feature } \keyword{datagen} diff --git a/tests/testthat/test-Corr.R b/tests/testthat/test-Corr.R index 557e5f0e..490f5e72 100644 --- a/tests/testthat/test-Corr.R +++ b/tests/testthat/test-Corr.R @@ -27,7 +27,7 @@ test_that("1. Input checks", { expect_error( Corr(c(1:10), c(2:4)), paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", - "containing sdate_dim and memb_dim.") + "containing time_dim and memb_dim.") ) expect_error( Corr(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), @@ -46,12 +46,12 @@ test_that("1. Input checks", { "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." ) expect_error( - Corr(exp1, obs1, sdate_dim = c('sdate', 'a')), - "Parameter 'sdate_dim' must be a character string." + Corr(exp1, obs1, time_dim = c('sdate', 'a')), + "Parameter 'time_dim' must be a character string." ) expect_error( - Corr(exp1, obs1, sdate_dim = 'a'), - "Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension." + Corr(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." ) expect_error( Corr(exp1, obs1, comp_dim = c('sdate', 'ftime')), @@ -71,8 +71,8 @@ test_that("1. Input checks", { "integers smaller than the length of paramter 'comp_dim'.") ) expect_error( - Corr(exp1, obs1, siglev = -1), - "Parameter 'siglev' must be a numeric number between 0 and 1." + Corr(exp1, obs1, conf.lev = -1), + "Parameter 'conf.lev' must be a numeric number between 0 and 1." ) expect_error( Corr(exp1, obs1, method = 1), @@ -98,7 +98,7 @@ test_that("1. Input checks", { expect_error( Corr(exp = array(1:10, dim = c(sdate = 2, member = 5, a = 1)), obs = array(1:4, dim = c(a = 1, sdate = 2, member = 2))), - "The length of sdate_dim must be at least 3 to compute correlation." + "The length of time_dim must be at least 3 to compute correlation." ) }) @@ -116,11 +116,11 @@ test_that("2. Output checks: dat1", { tolerance = 0.001 ) expect_equal( - length(which(is.na(Corr(exp1, obs1)$p_val))), + length(which(is.na(Corr(exp1, obs1)$p.val))), 2 ) expect_equal( - max(Corr(exp1, obs1)$conf_low, na.rm = T), + max(Corr(exp1, obs1)$conf.lower, na.rm = T), 0.6332941, tolerance = 0.001 ) @@ -133,7 +133,7 @@ test_that("2. Output checks: dat1", { 2 ) expect_equal( - summary(Corr(exp1, obs1, siglev = 0.99)$conf_high)[1], + summary(Corr(exp1, obs1, conf.lev = 0.99)$conf.upper)[1], c(Min. = 0.2748), tolerance = 0.0001 ) @@ -154,7 +154,7 @@ test_that("2. Output checks: dat1", { c(-0.3, -0.4, -0.6, 0.3, -0.3, 0.2) ) expect_equal( - range(Corr(exp1, obs1, method = 'spearman', comp_dim = 'ftime')$p_val, na.rm = T), + range(Corr(exp1, obs1, method = 'spearman', comp_dim = 'ftime')$p.val, na.rm = T), c(0.0, 0.5), tolerance = 0.001 ) -- GitLab