From f372a7695abfbeeae0215b840422fba2ae87d79f Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 5 Aug 2016 12:01:30 +0200 Subject: [PATCH 01/41] veriApply compatible Corr --- R/Corr.R | 144 ++++++++++++++----------------------------------------- 1 file changed, 37 insertions(+), 107 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index e23895b7..cc43bf2d 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -1,125 +1,55 @@ -Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, - limits = NULL, siglev = 0.95, method = 'pearson', +Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) { - # - # Remove data along compROW dim if there is at least one NA between limits - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (is.null(compROW) == FALSE) { - if (is.null(limits) == TRUE) { - limits <- c(1, dim(var_obs)[compROW]) - } - outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) - outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) - var_obs[which(outrows)] <- NA - } - # - # Enlarge var_exp & var_obs to 10 dim + move posloop & poscor to 1st & 2nd - # pos - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp) - for (iind in 1:length(dimsvar)) { - if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { - stop("var_exp & var_obs must have same dimensions except along posloop") - } - } - if (dimsvar[poscor] < 3 ) { - stop("At least 3 values required to compute correlation") - } + if (method != "kendall" && method != "spearman" && method != "pearson") { stop("Wrong correlation method") - } - nexp <- dimsvar[posloop] - nobs <- dim(var_obs)[posloop] - var_exp <- Enlarge(var_exp, 10) - var_obs <- Enlarge(var_obs, 10) - posaperm <- numeric(10) - posaperm[1] <- posloop - posaperm[2] <- poscor - posaperm[3:10] <- seq(1, 10)[-c(posloop, poscor)] - var_exp <- aperm(var_exp, posaperm) - var_obs <- aperm(var_obs, posaperm) - dimsaperm <- dim(var_exp) - # + # Check the siglev arguments: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (siglev > 1 || siglev < 0) { stop("siglev need to be higher than O and lower than 1") } - # - # Loop to compute correlation for each grid point - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dim_val <- 2 - dim_pval <- 4 - nvals <- 1 + 2*conf + pval - if (!conf) { - dim_val <- 1 - dim_pval <- 2 - } else { + } + conf_low <- (1 - siglev) / 2 conf_high <- 1 - conf_low - } - CORR <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) - for (jexp in 1:nexp) { - for (jobs in 1:nobs) { - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - tmp1 <- var_exp[jexp, , j3, j4, j5, j6, j7, j8, j9, - j10] - tmp2 <- var_obs[jobs, , j3, j4, j5, j6, j7, j8, j9, - j10] - if (any(!is.na(tmp1)) && sum(!is.na(tmp2)) > 2) { - toto <- cor(tmp1, tmp2, use = "pairwise.complete.obs", method = method) - CORR[jexp, jobs, dim_val, j3, j4, j5, j6, j7, j8, j9, j10] <- toto - #eno <- min(Eno(tmp2, 1), Eno(tmp1, 1)) - if (pval || conf) { - if (method == "kendall" | method == "spearman") { - eno <- Eno(rank(tmp2), 1) - } else if (method == "pearson") { - eno <- Eno(tmp2, 1) + + ens.mean <- rowMeans(ens) + CORR <- cor(obs, ens.mean, use = "pairwise.complete.obs", method = method) + + if (pval || conf) { + if (method == "kendall" | method == "spearman") { + eno <- Eno(rank(obs), 1) + } else if (method == "pearson") { + eno <- Eno(obs, 1) } } - if (pval) { - #t <- qt(0.95, eno - 2) - t <- qt(siglev, eno - 2) - CORR[jexp, jobs, dim_pval, j3, j4, j5, j6, j7, j8, j9, - j10] <- sqrt((t * t) / ((t * t) + eno - 2)) - } - if (conf) { - CORR[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, - j10] <- tanh(atanh(toto) + qnorm(conf_high) / sqrt( - #j10] <- tanh(atanh(toto) + qnorm(0.975) / sqrt( - eno - 3)) - CORR[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, - j10] <- tanh(atanh(toto) + qnorm(conf_low) / sqrt( - #j10] <- tanh(atanh(toto) + qnorm(0.025) / sqrt( - eno - 3)) - } + if (pval) { + + t <- CORR*sqrt((eno-2)/(1-(CORR^2))) + p <- 1 - pt(t, eno-2) + p.val <- p + names(p.val) <- "p.val" + } else { + p.val <- c() + names(p.val) <- c() } - } - } - } - } - } - } - } - } - } - } - # - dim(CORR) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, poscor)]) - # + if (conf) { + conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt( + eno - 3)), tanh(atanh(CORR) + qnorm(conf_high) / sqrt( + eno - 3))) + conf.int <- conf.int[!is.na(CORR)] + names(conf.int) <- c("conf_low","conf_high") + } else { + conf.int <- c() + names(conf.int) <- c() + } + # Output # ~~~~~~~~ # - CORR + results <- c(CORR,conf.int, p.val) + names(results) <- c("Corr", names(conf.int), names(p.val)) + return(results) } -- GitLab From 73dbae9a20fc7beef8ff3925d9ee4c46bd3e421a Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 5 Aug 2016 12:11:03 +0200 Subject: [PATCH 02/41] veriApply compatible Corr --- man/Corr.Rd | 75 ++++++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 53 deletions(-) diff --git a/man/Corr.Rd b/man/Corr.Rd index 1c5641e0..5ba5ede6 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -1,82 +1,51 @@ \name{Corr} \alias{Corr} \title{ -Computes Correlation Skill Measure (Temporal Correlation Along Start Dates) +Computes the correlation coefficient between the ensemble mean and observations. } \description{ -Matrix var_exp & var_obs should have the same dimensions except along posloop dimension where the length can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr -Corr computes the correlation skill of each jexp in 1:nexp against each jobs in 1:nobs which gives nexp x nobs correlation skill measures for each other grid point of the matrix (each latitude/longitude/level/leadtime). \cr -The correlations are computed along the poscor dimension which should correspond to the startdate dimension. If compROW is given, the correlations are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], that mean with no NA between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes. \cr -Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -The confidence interval is computed by a Fisher transformation.\cr -The significance level relies on a one-sided student-T distribution.\cr -We can modifiy the treshold of the test modifying siglev (default value=0.95). + +Calculates the correlation coefficient (Pearson, Kendall or Spearman) between the ensemble mean and the observations along the initialization times. The confidence interval and p-value from a one-tailed t-test are provided, where the number of degrees of freedom is calculated using the effective numberof observations (see Eno). \cr + + } \usage{ -Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, +Corr(ens, obs, posloop = 1, poscor = 2, compROW = NULL, limits = NULL, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) } \arguments{ - \item{var_exp}{ -Matrix of experimental data. + \item{ens}{ +N by M matrix of N forecasts from M ensemble members. } - \item{var_obs}{ -Matrix of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. - } - \item{posloop}{ -Dimension nobs and nexp. - } - \item{poscor}{ -Dimension along which correlation are to be computed (the dimension of the start dates). - } - \item{compROW}{ -Data taken into account only if (compROW)th row is complete.\cr -Default = NULL. - } - \item{limits}{ -Complete between limits[1] & limits[2]. Default = NULL. + \item{obs}{ +Vector of the corresponding observations of length N. } + \item{siglev}{ -Significance level according. Default = 0.95. +Significance level. Default = 0.95. } \item{method}{ Type of correlation: 'pearson', 'spearman' or 'kendall'. Default='pearson' } \item{conf}{ -Whether to compute confidence intervals (TRUE; default) or not (FALSE). +Whether to compute confidence intervals (default = 'TRUE') or not (FALSE). } \item{pval}{ -Whether to compute statistical significance p-value (TRUE; default) or not (FALSE). +Whether to compute statistical significance p-value (default = 'TRUE') or not (FALSE). } } \value{ -Matrix with dimensions :\cr -c(# of datasets alogn posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr -The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. +A list containing the following components :\cr +statistic - the value of the test statistic \cr +conf_low - the lower limit of the confidence interval \cr +conf_high - the upper limit of the confidence interval \cr +p.val - the p value } \examples{ -# Load sample data as in Load() example: -example(Load) -clim <- Clim(sampleData$mod, sampleData$obs) -ano_exp <- Ano(sampleData$mod, clim$clim_exp) -ano_obs <- Ano(sampleData$obs, clim$clim_obs) -runmean_months <- 12 -dim_to_smooth <- 4 # Smooth along lead-times -smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) -smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) -dim_to_mean <- 2 # Mean along members -required_complete_row <- 3 # Discard start dates which contain any NA lead-times -leadtimes_per_startdate <- 60 -corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), - Mean1Dim(smooth_ano_obs, dim_to_mean), - compROW = required_complete_row, - limits = c(ceiling((runmean_months + 1) / 2), - leadtimes_per_startdate - floor(runmean_months / 2))) -PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", - monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), - fileout = 'tos_cor.eps') +obs <- rnorm(100) +ens <- matrix(rnorm(500), c(100,5)) +Corr(ens,obs) } \author{ History:\cr -- GitLab From 0dd30f37b5a50f19d74af4e690d06b114ab8a32d Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 5 Aug 2016 14:36:17 +0200 Subject: [PATCH 03/41] veriApply compatible BrierScore --- R/BrierScore.R | 86 ++++++++++++++++++++++++++++------------------- man/BrierScore.Rd | 15 +++++---- 2 files changed, 60 insertions(+), 41 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index c64434ed..ff4fc2c1 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -1,74 +1,92 @@ -BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { - if (max(pred) > 1 | min(pred) < 0) { +BrierScore <- function(ens, obs, thresholds = seq(0, 1, 0.1)) { + if (max(ens) > 1 | min(ens) < 0) { stop("Predictions outside [0,1] range. Are you certain this is a probability forecast? \n") } else if (max(obs) != 1 & min(obs) != 0) { .message("Binary events must be either 0 or 1. Are you certain this is a binary event? ") } else { nbins <- length(thresholds) - 1 # Number of bins - n <- length(pred) - bins <- as.list(paste("bin", 1:nbins,sep = "")) + n <- dim(ens)[1] # Number of observations + n.ens <- seq(1,dim(ens)[2],1) # Number of ensemble members + bins <- array(as.list(paste("bin", 1:nbins,sep = "")), c(nbins,dim(ens)[2])) for (i in 1:nbins) { if (i == nbins) { - bins[[i]] <- list(which(pred >= thresholds[i] & pred <= thresholds[i + 1])) + bins[i,] <- apply(ens, MARGIN = 2, FUN = function(x) list(which(x >= thresholds[i] & x <= thresholds[i + 1]))) } else { - bins[[i]] <- list(which(pred >= thresholds[i] & pred < thresholds[i + 1])) + bins[i,] <- apply(ens, MARGIN = 2, FUN = function(x) list(which(x >= thresholds[i] & x < thresholds[i + 1]))) } } - - fkbar <- okbar <- nk <- array(0, dim = nbins) - for (i in 1:nbins) { - nk[i] <- length(bins[[i]][[1]]) - fkbar[i] <- sum(pred[bins[[i]][[1]]]) / nk[i] - okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + + + fkbar <- okbar <- nk <- array(0, dim = c(nbins,dim(ens)[2])) + for (k in 1:dim(ens)[2]) { + for (i in 1:nbins) { + nk[i,k] <- length(bins[[i,k]][[1]]) + fkbar[i,k] <- sum(ens[,k][bins[[i,k]][[1]]])/nk[i,k] + okbar[i,k] <- sum(obs[bins[[i,k]][[1]]]) / nk[i,k] } - + } + + fkbar[fkbar == Inf] <- 0 + okbar[is.nan(okbar)] <- 0 obar <- sum(obs) / length(obs) - relsum <- ressum <- term1 <- term2 <- 0 - for (i in 1:nbins) { - if (nk[i] > 0) { - relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 - ressum <- ressum + nk[i] * (okbar[i] - obar)^2 - for (j in 1:nk[i]) { - term1 <- term1 + (pred[bins[[i]][[1]][j]] - fkbar[i])^2 - term2 <- term2 + (pred[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- rep(0,dim(ens)[2]) + + for (k in 1:dim(ens)[2]) { + for (i in 1:nbins) { + + if (nk[i,k] > 0) { + relsum[k] <- relsum[k] + nk[i,k] * (fkbar[i,k] - okbar[i,k])^2 + ressum[k] <- ressum[k] + nk[i,k] * (okbar[i,k] - obar)^2 + + for (j in 1:nk[i,k]) { + term1[k] <- term1[k] + (ens[,k][bins[[i,k]][[1]][j]] - fkbar[i,k])^2 + term2[k] <- term2[k] + (ens[,k][bins[[i,k]][[1]][j]] - fkbar[i,k]) * (obs[bins[[i,k]][[1]][j]] - okbar[i,k]) + } } } } + rel <- relsum / n res <- ressum / n unc <- obar * (1 - obar) - bs <- sum((pred - obs)^2) / n + bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) + bs_check_res <- rel - res + unc bss_res <- (res - rel) / unc gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution bs_check_gres <- rel - gres + unc # BS using GRES bss_gres <- (gres - rel) / unc # BSS using GRES - + # # Estimating the bias-corrected components of the BS # - term3 <- array(0, nbins) + term3 <- array(0, dim = c(nbins,dim(ens)[2])) for (i in 1:nbins) { - term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) + for (k in 1:dim(ens)[2]) { + term3[i,k] <- (nk[i,k] / (nk[i,k] - 1)) * okbar[i,k] * (1 - okbar[i,k]) + } } - term_a <- sum(term3, na.rm = T) / n + term_a <- apply(term3, MARGIN = 2, FUN = function(x) sum(x, na.rm = T) / n) term_b <- (obar * (1 - obar)) / (n - 1) rel_bias_corrected <- rel - term_a gres_bias_corrected <- gres - term_a + term_b - if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { - rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) - gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) - rel_bias_corrected <- rel_bias_corrected2 - gres_bias_corrected <- gres_bias_corrected2 + rel_bias_corrected2 <- gres_bias_corrected2 <- rep(0, dim(ens)[2]) + for(j in 1:dim(ens)[2]) { + if (rel_bias_corrected[j] < 0 || gres_bias_corrected[j] < 0) { + rel_bias_corrected2[j] <- max(rel_bias_corrected[j], rel_bias_corrected[j] - gres_bias_corrected[j], 0) + gres_bias_corrected2[j] <- max(gres_bias_corrected[j], gres_bias_corrected[j] - rel_bias_corrected[j], 0) + rel_bias_corrected[j] <- rel_bias_corrected2[j] + gres_bias_corrected[j] <- gres_bias_corrected2[j] + } } unc_bias_corrected <- unc + term_b bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected - + #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { # cat("No error found \ n") # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") #} - - invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar, bins = bins, pred = pred, obs = obs)) + + invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar, bins = bins, ens = ens, obs = obs)) } } diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index 43843523..e6639f82 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -1,7 +1,7 @@ \name{BrierScore} \alias{BrierScore} \title{ -Compute Brier Score And Its Decomposition And Brier Skill Score +Compute Brier Score and its decomposition and the Brier Skill Score } \description{ Returns the values of the BS and its standard decomposition as well as the addition of the two winthin-bin extra components (Stephenson et al., 2008). It also solves the bias-corrected decomposition of the BS (Ferro and Fricker, 2012). BSS having the climatology as the reference forecast. @@ -10,15 +10,16 @@ Stephenson et al. (2008). Two extra components in the Brier score decomposition. Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924.\cr } \usage{ -BrierScore(obs, pred, thresholds = seq(0, 1, 0.1)) +BrierScore(ens,obs, thresholds = seq(0, 1, 0.1)) } -\arguments{ +\arguments{ + \item{ens}{ +Vector of probablistic predictions with values in the range [0,1] + } \item{obs}{ Vector of binary observations (1 or 0) } - \item{pred}{ -Vector of probablistic predictions with values in the range [0,1] - } + \item{thresholds}{ Values used to bin the forecasts. By default the bins are {[0,0.1), [0.1, 0.2), ... [0.9, 1]} } @@ -47,7 +48,7 @@ $obs: probability forecasts of the event\cr \examples{ a <- runif(10) b <- round(a) -x <- BrierScore(b, a) +x <- BrierScore(a,b) x$bs - x$bs_check_res x$bs - x$bs_check_gres x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected -- GitLab From 46abcfe171621d30638ba06535373044fe646a3f Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 5 Aug 2016 14:41:04 +0200 Subject: [PATCH 04/41] veriApply compatible score functions --- R/RMS.R | 107 ++++++++-------------------------------------- R/RatioRMS.R | 93 ++++++++++------------------------------ R/RatioSDRMS.R | 100 ++++++++++++------------------------------- R/Trend.R | 90 +++++++------------------------------- man/RMS.Rd | 62 +++++++-------------------- man/RatioRMS.Rd | 71 ++++++++---------------------- man/RatioSDRMS.Rd | 44 +++++++------------ man/Trend.Rd | 37 +++++++--------- 8 files changed, 147 insertions(+), 457 deletions(-) diff --git a/R/RMS.R b/R/RMS.R index 1a63b48f..c8ba7c12 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -1,102 +1,31 @@ -RMS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, - limits = NULL, siglev = 0.95, conf = TRUE) { - # - # Remove data along compROW dim if there is at least one NA between limits - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (is.null(compROW) == FALSE) { - if (is.null(limits) == TRUE) { - limits <- c(1, dim(var_obs)[compROW]) - } - outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) - outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) - var_obs[which(outrows)] <- NA - } +RMS <- function(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) { + - # - # Enlarge var_exp & var_obs to 10 dim + move posloop & posRMS to 1st & 2nd - # pos - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp) - for (iind in 1:length(dimsvar)) { - if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { - stop("var_exp & var_obs must have same dimensions except along posloop") - } - } - if (dimsvar[posRMS] < 2 ) { - stop("At least 2 values required to compute RMSE") - } - enlvarexp <- Enlarge(var_exp, 10) - enlvarobs <- Enlarge(var_obs, 10) - nexp <- dimsvar[posloop] - nobs <- dim(var_obs)[posloop] - posaperm <- numeric(10) - posaperm[1] <- posloop - posaperm[2] <- posRMS - posaperm[3:10] <- seq(1, 10)[-c(posloop, posRMS)] - permvarexp <- aperm(enlvarexp, posaperm) - permvarobs <- aperm(enlvarobs, posaperm) - dimsaperm <- dim(permvarexp) # # RMS & its confidence interval computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # if (conf) { - nvals <- 3 - dim_rms <- 2 + conf_low <- (1 - siglev) / 2 conf_high <- 1 - conf_low - } else { - nvals <- 1 - dim_rms <- 1 } - enlrms <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) - for (jexp in 1:nexp) { - for (jobs in 1:nobs) { - dif <- array(dim = dimsaperm[-1]) - dif[, , , , , , , , ] <- permvarexp[jexp, , , , , , , , - , ] - permvarobs[jobs, , , , , , , , , ] - enlrms[jexp, jobs, dim_rms, , , , , , , , ] <- Mean1Dim(dif ** 2, 1, - narm = TRUE) ** 0.5 + + dif <- rowMeans(ens) - obs + enlrms <- mean(dif ** 2, na.rm = TRUE) ** 0.5 if (conf) { eno <- Eno(dif, 1) - for (j3 in 1:dimsaperm[3]){ - for (j4 in 1:dimsaperm[4]){ - for (j5 in 1:dimsaperm[5]){ - for (j6 in 1:dimsaperm[6]){ - for (j7 in 1:dimsaperm[7]){ - for (j8 in 1:dimsaperm[8]){ - for (j9 in 1:dimsaperm[9]){ - for (j10 in 1:dimsaperm[10]){ - ndat <- length(sort(dif[, j3, j4, j5, j6, j7, j8, j9, - j10])) - enlrms[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, - j10] <- (eno[j3, j4, j5, j6, j7, j8, j9, - j10] * enlrms[jexp, jobs, 2, j3, j4, j5, j6, j7, - j8, j9, j10] ** 2 / qchisq(conf_high, eno[j3, j4, j5, - j6, j7, j8, j9, j10] - 1)) ** 0.5 - enlrms[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, - j10] <- (eno[j3, j4, j5, j6, j7, j8, j9, - j10] * enlrms[jexp, jobs, 2, j3, j4, j5, j6, j7, - j8, j9, j10] ** 2 / qchisq(conf_low, eno[j3, j4, j5, - j6, j7, j8, j9, j10] - 1)) ** 0.5 - } - } - } - } - } - } - } - } - } - } - } + + ndat <- length(sort(dif)) + conf.int <- c((eno * enlrms ** 2 / qchisq(conf_high, eno - 1)) ** 0.5, + (eno * enlrms ** 2 / qchisq(conf_low, eno - 1)) ** 0.5) + names(conf.int) <- c("conf_low","conf_high") + } else { + conf.int <- c() + names(conf.int) <- c() } - dim(enlrms) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, posRMS)]) - # - # Output - # ~~~~~~~~ - # - enlrms + results <- c(enlrms,conf.int) + names(results) <- c("rms",names(conf.int)) + return(results) + } diff --git a/R/RatioRMS.R b/R/RatioRMS.R index a97ec02f..3f505613 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -1,86 +1,37 @@ -RatioRMS <- function(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) { - # - # Enlarge var_exps & var_obs to 10 dim + move posRMS to 1st pos - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp1) - for (iind in 1:length(dimsvar)) { - if (dim(var_exp2)[iind] != dimsvar[iind] | - dim(var_obs)[iind] != dimsvar[iind]) { - stop("all input vars should have the same dimensions") - } - } - enlvarexp1 <- Enlarge(var_exp1, 10) - enlvarexp2 <- Enlarge(var_exp2, 10) - enlvarobs <- Enlarge(var_obs, 10) - posaperm <- 1:10 - posaperm[1] <- posRMS - posaperm[posRMS] <- 1 - permvarexp1 <- aperm(enlvarexp1, posaperm) - permvarexp2 <- aperm(enlvarexp2, posaperm) - permvarobs <- aperm(enlvarobs, posaperm) - dimsaperm <- dim(permvarexp1) +RatioRMS <- function(ens, ens.ref, obs, pval = TRUE) { + # # RMS ratio and its pvalue computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # - if (pval) { - nvals <- 2 - } else { - nvals <- 1 - } - enlratiorms <- array(dim = c(nvals, dimsaperm[2:10])) - dif1 <- permvarexp1 - permvarobs - dif2 <- permvarexp2 - permvarobs - rms1 <- Mean1Dim(dif1 ** 2, 1, narm = TRUE) ** 0.5 - rms2 <- Mean1Dim(dif2 ** 2, 1, narm = TRUE) ** 0.5 + + + dif1 <- rowMeans(ens, na.rm = TRUE) - obs + dif2 <- rowMeans(ens.ref, na.rm = TRUE) - obs + rms1 <- mean(dif1 ** 2, na.rm = TRUE) ** 0.5 + rms2 <- mean(dif2 ** 2, na.rm = TRUE) ** 0.5 rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max( abs(rms2), na.rm = TRUE) / 1000 - enlratiorms[1, , , , , , , , , ] <- (rms1 / rms2) + enlratiorms <- (rms1 / rms2) if (pval) { eno1 <- Eno(dif1, 1) eno2 <- Eno(dif2, 1) F <- (eno1 * (rms1) ** 2 / (eno1 - 1)) / (eno2 * (rms2) ** 2 / (eno2 - 1)) F[which(F < 1)] <- 1 / F[which(F < 1)] - for (j2 in 1:dimsaperm[2]) { - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - l1 <- eno1[j2, j3, j4, j5, j6, j7, j8, j9, j10] - l2 <- eno2[j2, j3, j4, j5, j6, j7, j8, j9, j10] - if (!is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { - enlratiorms[2, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- (1 - pf(F[j2, j3, j4, j5, j6, j7, j8, j9, j10], - l1 - 1, l2 - 1)) * 2 - } else { - enlratiorms[1, j2, j3, j4, j5, j6, j7, j8, j9, j10] <- NA - } - } - } - } - } - } - } - } - } + + + if (!is.na(eno1) && !is.na(eno2) && eno1 > 2 && eno2 > 2) { + p.val <- (1 - pf(F, + eno1 - 1, eno2 - 1)) * 2 + names(p.val) <- "p.val" + } + else { + p.val <- c() + names(p.val) <- c() } } + results <- c(enlratiorms, p.val) + names(results) <- c("ratiorms",names(p.val)) + return(results) - enlratiorms <- aperm(enlratiorms, posaperm) - if (pval) { - dimsvar[posRMS] <- 2 - } else { - dimsvar[posRMS] <- 1 - } - dim(enlratiorms) <- dimsvar - # - # Output - # ~~~~~~~~ - # - enlratiorms } diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index d76a832b..7ec8ff67 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -1,85 +1,41 @@ -RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { - # - # Enlarge the number of dimensions of var_exp and var_obs to 7 if necessary - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimexp <- dim(var_exp) - dimobs <- dim(var_obs) - if (length(dimexp) < 4 | length(dimobs) < 4) { - stop("At least 4 dim needed : c(nexp/nobs, nmemb, nsdates, nltime)") - } - for (jn in 3:max(length(dimexp), length(dimobs))) { - if (dimexp[jn] != dimobs[jn]) { - stop("Wrong input dimensions") - } - } - var_exp <- Enlarge(var_exp, 7) - var_obs <- Enlarge(var_obs, 7) +RatioSDRMS <- function(ens, obs, pval = TRUE) { # # Ratio RMSE / SD and its significance level # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # - ensmeanexp <- Mean1Dim(var_exp, 2) - ensmeanobs <- Mean1Dim(var_obs, 2) - dimrms <- c(dimexp[1], dimobs[1], dimexp[4:length(dimexp)]) + ensmean <- rowMeans(ens) + + std <- sd(ensmean) + enosd <- Eno(ensmean,1) + + dif <- ensmean - obs + rms <- mean(dif ** 2, 1, narm = TRUE) ** 0.5 + + enorms <- Eno(dif,1) + enlratiormssd <- std /rms if (pval) { - nvals <- 2 - } else { - nvals <- 1 - } - dimratiormssd <- c(dimexp[1], dimobs[1], nvals, dimexp[4:length(dimexp)]) - if (length(dimrms) < 6) { - dimrms <- c(dimrms, array(1, dim = (6 - length(dimrms)))) - } - if (length(dimratiormssd) < 7) { - dimenlratiormssd <- c(dimratiormssd, - array(1, dim = (7 - length(dimratiormssd)))) - } else { - dimenlratiormssd <- dimratiormssd - } - dif <- var_exp - InsertDim(ensmeanexp, 2, dimexp[2]) - std <- apply(array(dif, dim = c(dimexp[1], dimexp[2] * dimexp[3], - dimrms[3:6])), c(1, 3, 4, 5, 6), sd, na.rm = TRUE) - enosd <- apply(Eno(dif, 3), c(1, 3, 4, 5, 6), sum, na.rm = TRUE) - rms <- array(dim = dimrms) - enlratiormssd <- array(dim = dimenlratiormssd) - for (jexp in 1:dimexp[1]) { - for (jobs in 1:dimobs[1]) { - dif <- ensmeanexp[jexp, , , , , ] - ensmeanobs[jobs, , , , , ] - rms[jexp,jobs, , , , ] <- Mean1Dim(dif ** 2, 1, narm = TRUE) ** 0.5 - enorms <- array(Eno(dif, 1), dim = dimrms[3:6]) - enlratiormssd[jexp, jobs, 1, , , , ] <- std[jexp, , , , ] / rms[jexp, - jobs, , , , ] - if (pval) { - for (jltime in 1:dimrms[3]) { - for (jlev in 1:dimrms[4]) { - for (jlat in 1:dimrms[5]) { - for (jlon in 1:dimrms[6]) { - l1 <- enosd[jexp, jltime, jlev, jlat, jlon] - l2 <- enorms[jltime, jlev, jlat, jlon] - F <- (enosd[jexp, jltime, jlev, jlat, jlon] * (std[jexp, jltime, - jlev, jlat, jlon]) ** 2 / (enosd[jexp, jltime, jlev, jlat, - jlon] - 1)) / (enorms[jltime, jlev, jlat, jlon] * (rms[jexp, - jobs, jltime, jlev, jlat, jlon]) ** 2 / (enorms[jltime, - jlev, jlat, jlon] - 1)) + + l1 <- enosd + l2 <- enorms + + + F <- (enosd * std ** 2 / (enosd - 1)) / (enorms * (rms) ** 2 / (enorms - 1)) if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { - enlratiormssd[jexp, jobs, 2, jltime, jlev, jlat, - jlon] <- 1 - pf(F, l1 - 1, l2 - 1) - } else { - enlratiormssd[jexp, jobs, 1, jltime, jlev, jlat, jlon] <- NA - } - } - } - } - } - } - } + p.val <- 1 - pf(F, l1 - 1, l2 - 1) + names(p.val) <- "p.val" + } } - dim(enlratiormssd) <- dimratiormssd + else { + p.val <- c() + names(p.val) <- c() } + # # Output # ~~~~~~~~ # - enlratiormssd + + results <- c(enlratiormssd, p.val) + names(results) <- c("ratio", names(p.val)) + return(results) } diff --git a/R/Trend.R b/R/Trend.R index 79984160..f427f283 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -1,80 +1,22 @@ -Trend <- function(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) { - # - # Enlarge the size of var to 10 and move posTR to first position - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var) - if (is.null(dimsvar)) { - dimsvar <- length(var) - } - enlvar <- Enlarge(var, 10) - outdim <- c(dimsvar, array(1, dim = (10 - length(dimsvar)))) - if (conf) { - nvals <- 4 - poscoef2 <- 2 - poscoef1 <- 4 - } else { - nvals <- 2 - poscoef2 <- 1 - poscoef1 <- 2 - } - outdim[posTR] <- nvals - posaperm <- 1:10 - posaperm[posTR] <- 1 - posaperm[1] <- posTR - enlvar <- aperm(enlvar, posaperm) - dimsaperm <- outdim[posaperm] - # - # Loop on all dimensions to compute trends - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - enltrend <- array(dim = dimsaperm) - enldetrend <- array(dim = dim(enlvar)) - for (j2 in 1:dimsaperm[2]) { - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - tmp <- enlvar[, j2, j3, j4, j5, j6, j7, j8, j9, j10] - if (any(!is.na(tmp))) { - mon <- seq(tmp) * interval - lm.out <- lm(tmp ~ mon, na.action = na.omit) - enltrend[poscoef2, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- lm.out$coefficients[2] - enltrend[poscoef1, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- lm.out$coefficients[1] - if (conf) { - enltrend[c(1, 3), j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- confint(lm.out, level = siglev)[2, 1:2] - } - enldetrend[is.na(tmp) == FALSE, j2, j3, j4, j5, j6, j7, j8, - j9, j10] <- tmp[is.na(tmp) == FALSE] - lm.out$fitted.values - } - } - } - } - } - } +Trend <- function(ens, interval = 1, siglev = 0.95, conf = TRUE) { + + ensmean <- rowMeans(ens, na.rm = TRUE) + + if (any(!is.na(ensmean))) { + mon <- seq(ensmean) * interval + lm.out <- lm(ensmean ~ mon, na.action = na.omit) + trend <- c(lm.out$coefficients[2], lm.out$coefficients[1]) + if (conf) { + conf.int <- confint(lm.out, level = siglev)[2, 1:2] } - } - } - } - # - # Back to the original dimensions - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - enldetrend <- aperm(enldetrend, posaperm) - dim(enldetrend) <- dimsvar - enltrend <- aperm(enltrend, posaperm) - dimsvar[posTR] <- nvals - dim(enltrend) <- dimsvar + detrend <- ensmean[is.na(ensmean) == FALSE] - lm.out$fitted.values + } + + + # # Outputs # ~~~~~~~~~ # - invisible(list(trend = enltrend, detrended = enldetrend)) + invisible(list(trend = trend, conf.int = conf.int, detrended = detrend)) } diff --git a/man/RMS.Rd b/man/RMS.Rd index 118400f4..8d276c57 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -1,36 +1,23 @@ \name{RMS} \alias{RMS} \title{ -Computes Root Mean Square Error Skill Measure +Computes the root mean square error of the ensemble mean } \description{ -Matrix var_exp & var_obs should have the same dimensions except along posloop dimension where the length can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr -RMS computes the Root Mean Square Error skill of each jexp in 1:nexp against each jobs in 1:nobs which gives nexp x nobs RMSE skill measures for each other grid point of the matrix (each latitude/longitude/level/leadtime).\cr -The RMSE are computed along the posRMS dimension which should correspond to the startdate dimension.\cr -If compROW is given, the RMSE are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], that mean with no NA between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes.\cr -Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -The confidence interval relies on a chi2 distribution. +Computes the RMSE of an ensemble mean forecast against a vector of observations.\cr +The confidence interval of the RMSE is obtained with a chi2 distribution. } \usage{ -RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) +RMS(ens, obs, siglev = 0.95, conf = TRUE) } \arguments{ - \item{var_exp}{ -Matrix of experimental data. + \item{ens}{ +N by M matrix of N forecasts from M ensemble members. } - \item{var_obs}{ -Matrix of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. - } - \item{posloop}{ -Dimension nobs and nexp. - } - \item{posRMS}{ -Dimension along which RMSE are to be computed (the dimension of the start dates). - } - \item{compROW}{ -Data taken into account only if (compROW)th row is complete.\cr -Default = NULL. + \item{obs}{ +Vector of the corresponding observations of length N. } + \item{limits}{ Complete between limits[1] & limits[2]. Default = NULL. } @@ -42,32 +29,15 @@ Whether to compute confidence interval or not. TRUE by default. } } \value{ -Matrix with dimensions:\cr - c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all other dimensions of var_exp & var_obs except posRMS).\cr -The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). +A list containing the following components :\cr +rms - the root mean square error \cr +conf_low - the lower limit of the confidence interval \cr +conf_high - the upper limit of the confidence interval \cr } \examples{ -# Load sample data as in Load() example: -example(Load) -clim <- Clim(sampleData$mod, sampleData$obs) -ano_exp <- Ano(sampleData$mod, clim$clim_exp) -ano_obs <- Ano(sampleData$obs, clim$clim_obs) -runmean_months <- 12 -dim_to_smooth <- 4 # Smooth along lead-times -smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) -smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) -dim_to_mean <- 2 # Mean along members -required_complete_row <- 3 # Discard start-dates for which some leadtimes are missing -leadtimes_per_startdate <- 60 -rms <- RMS(Mean1Dim(smooth_ano_exp, dim_to_mean), - Mean1Dim(smooth_ano_obs, dim_to_mean), - compROW = required_complete_row, - limits = c(ceiling((runmean_months + 1) / 2), - leadtimes_per_startdate - floor(runmean_months / 2))) -PlotVsLTime(rms, toptitle = "Root Mean Square Error", ytitle = "K", - monini = 11, limits = NULL, listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = c(0), - fileout = 'tos_rms.eps') +obs <- rnorm(100) +ens <- matrix(rnorm(500), c(100,5)) +RMS(ens, obs) } \author{ History:\cr diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index ee116561..9dfcb19e 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -1,79 +1,42 @@ \name{RatioRMS} \alias{RatioRMS} \title{ -Computes The Ratio Between The RMSE Scores of 2 Experiments. +Computes the ratio between the RMSE of 2 experiments. } \description{ -Matrix var_exp1 / var_exp2 / var_obs should have the same dimensions.\cr -The ratio RMSE(var_exp1, var_obs) / RMSE(var_exp2, var_obs) is output.\cr +Calculates the ratio of the RMSE for two forecasts of the same observations.\cr +The ratio RMSE(ens, obs) / RMSE(ens.ref, obs) is output.\cr The p-value is provided by a two-sided Fischer test. } \usage{ -RatioRMS(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) +RatioRMS(ens, ens.ref, obs, pval = TRUE) } \arguments{ - \item{var_exp1}{ + \item{ens}{ Matrix of experimental data 1. } - \item{var_exp2}{ -Matrix of experimental data 2, same dimensions as var_exp1. + \item{ens.ref}{ +Matrix of experimental data 2. } - \item{var_obs}{ -Matrix of observational data, same dimensions as var_exp1. - } - \item{posRMS}{ -Dimension along which the RMSE are to be computed = the position of the start dates. + \item{ens.ref}{ +Vector of observations. } + \item{pval}{ Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. } } \value{ -Matrix with the same dimensions as var_exp1/var_exp2/var_obs except along posRMS where the dimension has length 2 if \code{pval = TRUE}, or 1 otherwise.\cr +A list containing the following components :\cr +ratiorms - the ratio of the rms of the two ensembles \cr +p.val - the p value \cr The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. } \examples{ -# See examples on Load() to understand the first lines in this example - \dontrun{ -data_path <- system.file('sample_data', package = 's2dverification') -expA <- list(name = 'experiment', path = file.path(data_path, - 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', - '$VAR_NAME$_$START_DATE$.nc')) -obsX <- list(name = 'observation', path = file.path(data_path, - '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', - '$VAR_NAME$_$YEAR$$MONTH$.nc')) - -# Now we are ready to use Load(). -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- Load('tos', list(expA), list(obsX), startDates, - output = 'lonlat', latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - } - \dontshow{ -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), - c('observation'), startDates, - output = 'lonlat', - latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - } -leadtimes_dimension <- 4 -initial_month <- 11 -mean_start_month <- 12 -mean_stop_month <- 2 -sampleData$mod <- Season(sampleData$mod, leadtimes_dimension, initial_month, - mean_start_month, mean_stop_month) -sampleData$obs <- Season(sampleData$obs, leadtimes_dimension, initial_month, - mean_start_month, mean_stop_month) -clim <- Clim(sampleData$mod, sampleData$obs) -ano_exp <- Ano(sampleData$mod, clim$clim_exp) -ano_obs <- Ano(sampleData$obs, clim$clim_obs) -rrms <- RatioRMS(Mean1Dim(ano_exp[ , 1:2, , , , ], 1)[, 1, , ], - ano_exp[ , 3, , , , ][, 1, , ], - Mean1Dim(ano_obs, 2)[1, , 1, , ], 1) -PlotEquiMap(rrms[1, , ], sampleData$lon, sampleData$lat, - toptitle = 'Ratio RMSE') -} + ens <- matrix(rnorm(500), c(100,5)) + ens.ref <- matrix(rnorm(500), c(100,5)) + obs <- rnorm(100) + Ratio <- RatioRMS(ens,ens.ref,obs) \author{ History:\cr 0.1 - 2011-11 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 3dcfcd96..28d01b49 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -1,50 +1,36 @@ \name{RatioSDRMS} \alias{RatioSDRMS} \title{ -Computes The Ratio Between the Ensemble Spread and the RMSE of the Ensemble Mean +Computes the ratio between the ensemble spread and the RMSE of the ensemble mean } \description{ -Matrices var_exp & var_obs should have dimensions between\cr - c(nmod/nexp, nmemb/nparam, nsdates, nltime)\cr -and\cr - c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)\cr -The ratio between the standard deviation of the members around the ensemble mean in var_exp and the RMSE between var_exp and var_obs is output for each experiment and each observational dataset.\cr -The p-value is provided by a one-sided Fischer test. +Calculates the ratio of spread and the RMSE from a matrix of N ensemble members for M forecasts and the corresponding vector of observations of length M. \cr + +The p-value is provided by a one-sided Fisher test. } \usage{ -RatioSDRMS(var_exp, var_obs, pval = TRUE) +RatioSDRMS(ens, obs, pval = TRUE) } \arguments{ - \item{var_exp}{ -Model data:\cr - c(nmod/nexp, nmemb/nparam, nsdates, nltime) up to\cr - c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon) +\item{ens}{ +N by M matrix of N forecasts from M ensemble members. } - \item{var_obs}{ -Observational data:\cr - c(nobs, nmemb, nsdates, nltime) up to\cr - c(nobs, nmemb, nsdates, nltime, nlevel, nlat, nlon) + \item{obs}{ +Vector of the corresponding observations of length N. } \item{pval}{ Whether to compute the p-value of Ho : SD/RMSE = 1 or not. } } \value{ -Matrix with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to - c(nexp/nmod, nobs, 1 or 2, nltime, nlevel, nlat, nlon). -The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1. +A vector containing the following components :\cr +ratio - the ratio of the spread and RMSE \cr +p.val - the p.value (if pval ='TRUE') \cr } \examples{ -# Load sample data as in Load() example: -example(Load) -rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) -rsdrms2 <- array(dim = c(dim(rsdrms)[1:2], 4, dim(rsdrms)[4])) -rsdrms2[, , 2, ] <- rsdrms[, , 1, ] -rsdrms2[, , 4, ] <- rsdrms[, , 2, ] -PlotVsLTime(rsdrms2, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", - monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, siglev = TRUE, - fileout = 'tos_rsdrms.eps') +obs <- rnorm(100) +ens <- matrix(rnorm(500), c(100,5)) +RatioSDRMS(ens,obs) } \author{ History:\cr diff --git a/man/Trend.Rd b/man/Trend.Rd index 959e32a9..8a644415 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -1,23 +1,21 @@ \name{Trend} \alias{Trend} \title{ -Computes Trends +Computes the trend of the ensemble mean. } \description{ -Computes the trend along the posTR dimension of the matrix var by least square fitting, and the associated an error interval.\cr -Provide also the detrended data.\cr +Computes the trend along the forecast time of the ensemble mean by least square fitting, and the associated error interval.\cr +Trend() also provides the time series of the detrended ensemble mean forecasts.\cr The confidence interval relies on a student-T distribution. } \usage{ -Trend(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) +Trend(ens, interval = 1, siglev = 0.95, conf = TRUE) } \arguments{ - \item{var}{ -Matrix of any number of dimensions up to 10. - } - \item{posTR}{ -Position along which to compute the trend. + \item{ens}{ +M by N matrix of M forecasts from N ensemble members. } + \item{interval}{ Number of months/years between 2 points along posTR dimension.\cr Default = 1.\cr @@ -32,25 +30,20 @@ Whether to compute the confidence levels or not. TRUE by default. } \value{ \item{$trend}{ -Same dimensions as var except along the posTR dimension which is replaced by a dimension of length 2 or 4, corresponding to the lower limit of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}, trends, the upper limit of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}), and intercept of the trend for each point of the matrix along all the other dimensions. +The intercept and slope coefficients for the least squares fitting of the trend, } +\item{$conf.int}{corresponding to the limits of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the slope coefficient.} \item{$detrended}{ Same dimensions as var with linearly detrended var along the posTR dimension. } } \examples{ -# Load sample data as in Load() example: -example(Load) -months_between_startdates <- 60 -trend <- Trend(sampleData$obs, 3, months_between_startdates) -PlotVsLTime(trend$trend, toptitle = "trend", ytitle = "K / (5 year)", - monini = 11, limits = c(-1,1), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = 0, - fileout = 'tos_obs_trend.eps') -PlotAno(trend$detrended, NULL, startDates, - toptitle = 'detrended anomalies (along the startdates)', ytitle = 'K', - legends = 'ERSST', biglab = FALSE, fileout = 'tos_detrended_obs.eps') -} +ens <- matrix(rnorm(500), c(100,5)) +#Add a trend to the first ensemble member +ens[,1] <- ens[,1]+(seq(1,100,1)*5) +ens.trend <- Trend(ens) + + \author{ History:\cr 0.1 - 2011-05 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr -- GitLab From d410ec12074e16543f25de5648880fd108dca4b2 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 5 Aug 2016 16:48:53 +0200 Subject: [PATCH 05/41] veriApply compatible score functions --- R/Corr.R | 19 +++++-------------- R/RatioRMS.R | 12 ++++-------- R/RatioSDRMS.R | 11 ++++------- 3 files changed, 13 insertions(+), 29 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index cc43bf2d..eea1c4b7 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -30,26 +30,17 @@ Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', t <- CORR*sqrt((eno-2)/(1-(CORR^2))) p <- 1 - pt(t, eno-2) p.val <- p - names(p.val) <- "p.val" - } else { - p.val <- c() - names(p.val) <- c() - } + } if (conf) { conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt( eno - 3)), tanh(atanh(CORR) + qnorm(conf_high) / sqrt( eno - 3))) conf.int <- conf.int[!is.na(CORR)] - names(conf.int) <- c("conf_low","conf_high") - } else { - conf.int <- c() - names(conf.int) <- c() - } + + } # Output - # ~~~~~~~~ + # ~~~~~~~~ # - results <- c(CORR,conf.int, p.val) - names(results) <- c("Corr", names(conf.int), names(p.val)) - return(results) + list(Corr = CORR,conf_low =conf.int[1],conf_high=conf.int[2],p.val =p.val) } diff --git a/R/RatioRMS.R b/R/RatioRMS.R index 3f505613..5589f170 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -23,15 +23,11 @@ RatioRMS <- function(ens, ens.ref, obs, pval = TRUE) { if (!is.na(eno1) && !is.na(eno2) && eno1 > 2 && eno2 > 2) { p.val <- (1 - pf(F, eno1 - 1, eno2 - 1)) * 2 - names(p.val) <- "p.val" + } - else { - p.val <- c() - names(p.val) <- c() - } + } - results <- c(enlratiorms, p.val) - names(results) <- c("ratiorms",names(p.val)) - return(results) + # Output + list(ratiorms = enlratiorms,p.val =p.val) } diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index 7ec8ff67..1edbd700 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -23,19 +23,16 @@ RatioSDRMS <- function(ens, obs, pval = TRUE) { F <- (enosd * std ** 2 / (enosd - 1)) / (enorms * (rms) ** 2 / (enorms - 1)) if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { p.val <- 1 - pf(F, l1 - 1, l2 - 1) - names(p.val) <- "p.val" + } } - else { - p.val <- c() - names(p.val) <- c() } + # # Output # ~~~~~~~~ # - results <- c(enlratiormssd, p.val) - names(results) <- c("ratio", names(p.val)) - return(results) + list(ratio = enlratiormssd, p.val = p.val) + } -- GitLab From 780a7afe6f48ebf9c3b6821e07a87c22ba56e1f2 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Fri, 19 Aug 2016 12:44:17 +0200 Subject: [PATCH 06/41] Docfixes in Trend and RatioRMS. --- man/RatioRMS.Rd | 12 ++++++------ man/Trend.Rd | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 9dfcb19e..2e93d992 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -1,7 +1,7 @@ \name{RatioRMS} \alias{RatioRMS} \title{ -Computes the ratio between the RMSE of 2 experiments. +Computes the Ratio Between The RMSE of Two Experiments } \description{ Calculates the ratio of the RMSE for two forecasts of the same observations.\cr @@ -21,7 +21,6 @@ Matrix of experimental data 2. \item{ens.ref}{ Vector of observations. } - \item{pval}{ Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. } @@ -33,13 +32,14 @@ p.val - the p value \cr The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. } \examples{ - ens <- matrix(rnorm(500), c(100,5)) - ens.ref <- matrix(rnorm(500), c(100,5)) + ens <- matrix(rnorm(500), c(100, 5)) + ens.ref <- matrix(rnorm(500), c(100, 5)) obs <- rnorm(100) - Ratio <- RatioRMS(ens,ens.ref,obs) + Ratio <- RatioRMS(ens, ens.ref, obs) \author{ History:\cr 0.1 - 2011-11 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN\cr +2.0 - 2016-08 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/Trend.Rd b/man/Trend.Rd index 8a644415..0a3a930b 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -1,7 +1,7 @@ \name{Trend} \alias{Trend} \title{ -Computes the trend of the ensemble mean. +Computes the Trend of the Ensemble Mean } \description{ Computes the trend along the forecast time of the ensemble mean by least square fitting, and the associated error interval.\cr @@ -14,8 +14,7 @@ Trend(ens, interval = 1, siglev = 0.95, conf = TRUE) \arguments{ \item{ens}{ M by N matrix of M forecasts from N ensemble members. - } - + } \item{interval}{ Number of months/years between 2 points along posTR dimension.\cr Default = 1.\cr @@ -32,21 +31,23 @@ Whether to compute the confidence levels or not. TRUE by default. \item{$trend}{ The intercept and slope coefficients for the least squares fitting of the trend, } -\item{$conf.int}{corresponding to the limits of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the slope coefficient.} + \item{$conf.int}{ +Corresponding to the limits of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the slope coefficient. + } \item{$detrended}{ Same dimensions as var with linearly detrended var along the posTR dimension. } } \examples{ -ens <- matrix(rnorm(500), c(100,5)) +ens <- matrix(rnorm(500), c(100, 5)) #Add a trend to the first ensemble member -ens[,1] <- ens[,1]+(seq(1,100,1)*5) +ens[,1] <- ens[, 1] + (seq(1, 100, 1) * 5) ens.trend <- Trend(ens) - - +} \author{ History:\cr 0.1 - 2011-05 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to CRAN +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to CRAN\cr +2.0 - 2016-08 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapt to veriApply() } \keyword{datagen} -- GitLab From 57b38fab07e014426a36a78fcf659c6a5cc2e76f Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Fri, 19 Aug 2016 12:47:14 +0200 Subject: [PATCH 07/41] Docfix. --- man/RatioRMS.Rd | 1 + 1 file changed, 1 insertion(+) diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 2e93d992..49bc5cbc 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -36,6 +36,7 @@ The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p. ens.ref <- matrix(rnorm(500), c(100, 5)) obs <- rnorm(100) Ratio <- RatioRMS(ens, ens.ref, obs) +} \author{ History:\cr 0.1 - 2011-11 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -- GitLab From 2ae82489bee61d2e1d42576fdf0e128aaf88fc14 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Mon, 5 Sep 2016 09:31:21 +0200 Subject: [PATCH 08/41] update Corr function --- R/Corr.R | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index eea1c4b7..ea3a9170 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -14,10 +14,13 @@ Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', conf_low <- (1 - siglev) / 2 conf_high <- 1 - conf_low - + p <- c() + conflow <- c() + confhigh <- c() + ens.mean <- rowMeans(ens) CORR <- cor(obs, ens.mean, use = "pairwise.complete.obs", method = method) - + if (pval || conf) { if (method == "kendall" | method == "spearman") { eno <- Eno(rank(obs), 1) @@ -25,22 +28,25 @@ Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', eno <- Eno(obs, 1) } } - if (pval) { + if (pval & method == "pearson") { t <- CORR*sqrt((eno-2)/(1-(CORR^2))) p <- 1 - pt(t, eno-2) p.val <- p + } - if (conf) { + if (conf & method == "pearson") { conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt( eno - 3)), tanh(atanh(CORR) + qnorm(conf_high) / sqrt( eno - 3))) conf.int <- conf.int[!is.na(CORR)] - + conflow =conf.int[1] + confhigh=conf.int[2] } + invisible(result <- list(Corr = CORR, p.val = p, conf_low = conflow, confhigh = confhigh)) # Output # ~~~~~~~~ # - list(Corr = CORR,conf_low =conf.int[1],conf_high=conf.int[2],p.val =p.val) + } -- GitLab From 33ade855f0b33650a2e05fd3f037360fc446455d Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 14 Sep 2016 14:39:30 +0200 Subject: [PATCH 09/41] Minor changes to Corr.R --- R/Corr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Corr.R b/R/Corr.R index ea3a9170..56c0780a 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -44,7 +44,7 @@ Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', confhigh=conf.int[2] } - invisible(result <- list(Corr = CORR, p.val = p, conf_low = conflow, confhigh = confhigh)) + invisible(result <- list(corr = CORR, p.val = p, conf_low = conflow, conf_high = confhigh)) # Output # ~~~~~~~~ # -- GitLab From fc9ab4203f2bf4bccce7b66f81a81f85e3032970 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Thu, 24 Nov 2016 11:54:04 +0100 Subject: [PATCH 10/41] Add hidden function to Corr for veriApply compatibility --- R/Corr.R | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++- man/Corr.Rd | 79 ++++++++++++++++++++++++-------- 2 files changed, 187 insertions(+), 20 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index 56c0780a..40e89684 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -1,4 +1,130 @@ -Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', +Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, + limits = NULL, siglev = 0.95, method = 'pearson', + conf = TRUE, pval = TRUE) { + # + # Remove data along compROW dim if there is at least one NA between limits + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (is.null(compROW) == FALSE) { + if (is.null(limits) == TRUE) { + limits <- c(1, dim(var_obs)[compROW]) + } + outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) + outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) + var_obs[which(outrows)] <- NA + } + # + # Enlarge var_exp & var_obs to 10 dim + move posloop & poscor to 1st & 2nd + # pos + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var_exp) + for (iind in 1:length(dimsvar)) { + if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { + stop("var_exp & var_obs must have same dimensions except along posloop") + } + } + if (dimsvar[poscor] < 3 ) { + stop("At least 3 values required to compute correlation") + } + if (method != "kendall" && method != "spearman" && method != "pearson") { + stop("Wrong correlation method") + } + nexp <- dimsvar[posloop] + nobs <- dim(var_obs)[posloop] + var_exp <- Enlarge(var_exp, 10) + var_obs <- Enlarge(var_obs, 10) + posaperm <- numeric(10) + posaperm[1] <- posloop + posaperm[2] <- poscor + posaperm[3:10] <- seq(1, 10)[-c(posloop, poscor)] + var_exp <- aperm(var_exp, posaperm) + var_obs <- aperm(var_obs, posaperm) + dimsaperm <- dim(var_exp) + # + + # Check the siglev arguments: + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if (siglev > 1 || siglev < 0) { + stop("siglev need to be higher than O and lower than 1") + } + # + # Loop to compute correlation for each grid point + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dim_val <- 2 + dim_pval <- 4 + nvals <- 1 + 2*conf + pval + if (!conf) { + dim_val <- 1 + dim_pval <- 2 + } else { + conf_low <- (1 - siglev) / 2 + conf_high <- 1 - conf_low + } + CORR <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + for (j3 in 1:dimsaperm[3]) { + for (j4 in 1:dimsaperm[4]) { + for (j5 in 1:dimsaperm[5]) { + for (j6 in 1:dimsaperm[6]) { + for (j7 in 1:dimsaperm[7]) { + for (j8 in 1:dimsaperm[8]) { + for (j9 in 1:dimsaperm[9]) { + for (j10 in 1:dimsaperm[10]) { + tmp1 <- var_exp[jexp, , j3, j4, j5, j6, j7, j8, j9, + j10] + tmp2 <- var_obs[jobs, , j3, j4, j5, j6, j7, j8, j9, + j10] + if (any(!is.na(tmp1)) && sum(!is.na(tmp2)) > 2) { + toto <- cor(tmp1, tmp2, use = "pairwise.complete.obs", method = method) + CORR[jexp, jobs, dim_val, j3, j4, j5, j6, j7, j8, j9, j10] <- toto + #eno <- min(Eno(tmp2, 1), Eno(tmp1, 1)) + if (pval || conf) { + if (method == "kendall" | method == "spearman") { + eno <- Eno(rank(tmp2), 1) + } else if (method == "pearson") { + eno <- Eno(tmp2, 1) + } + } + if (pval) { + #t <- qt(0.95, eno - 2) + t <- qt(siglev, eno - 2) + CORR[jexp, jobs, dim_pval, j3, j4, j5, j6, j7, j8, j9, + j10] <- sqrt((t * t) / ((t * t) + eno - 2)) + } + if (conf) { + CORR[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, + j10] <- tanh(atanh(toto) + qnorm(conf_high) / sqrt( + #j10] <- tanh(atanh(toto) + qnorm(0.975) / sqrt( + eno - 3)) + CORR[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, + j10] <- tanh(atanh(toto) + qnorm(conf_low) / sqrt( + #j10] <- tanh(atanh(toto) + qnorm(0.025) / sqrt( + eno - 3)) + } + } + } + } + } + } + } + } + } + } + } + } + # + dim(CORR) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, poscor)]) + # + # Output + # ~~~~~~~~ + # + CORR +} + +.Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) { if (method != "kendall" && method != "spearman" && method != "pearson") { diff --git a/man/Corr.Rd b/man/Corr.Rd index 5ba5ede6..311d6c78 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -1,20 +1,48 @@ \name{Corr} \alias{Corr} +\alias{.Corr} \title{ -Computes the correlation coefficient between the ensemble mean and observations. +Computes the correlation coefficient between an array of forecasts and their corresponding observations. } \description{ -Calculates the correlation coefficient (Pearson, Kendall or Spearman) between the ensemble mean and the observations along the initialization times. The confidence interval and p-value from a one-tailed t-test are provided, where the number of degrees of freedom is calculated using the effective numberof observations (see Eno). \cr - - +Calculates the correlation coefficient (Pearson, Kendall or Spearman) between forecasts and observations. The input should be an array with dimensions c(no. of datasets, no. of start dates, no. of forecast times, no. of lons, no. of lats.), where the longitude and latitude dimensions are optional. The correlations are computed along the poscor dimension which should correspond to the startdate dimension. If compROW is given, the correlations are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes. \cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval is computed by a Fisher transformation.\cr The significance level relies on a one-sided student-T distribution.\cr We can modifiy the treshold of the test modifying siglev (default value=0.95). \cr +\cr +.Corr is the same function but with a matrix of experiments and a vector of observations as input. } \usage{ -Corr(ens, obs, posloop = 1, poscor = 2, compROW = NULL, - limits = NULL, siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE) +Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, + limits = NULL, siglev = 0.95, method = 'pearson', + conf = TRUE, pval = TRUE) \cr + +.Corr(ens, obs, siglev = 0.95, + method = 'pearson', conf = TRUE, pval = TRUE) } \arguments{ + \item{var_exp}{ +Array of experimental data. + } + + \item{var_obs}{ +Array of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. + } + + \item{posloop}{ +Dimension nobs and nexp. + } + + \item{poscor}{ +Dimension along which correlation are to be computed (the dimension of the start dates). + } + + \item{compROW}{ +Data taken into account only if (compROW)th row is complete.\cr Default = NULL. + } + + \item{limits}{ +Complete between limits[1] & limits[2]. Default = NULL. + } + \item{ens}{ N by M matrix of N forecasts from M ensemble members. } @@ -35,18 +63,31 @@ Whether to compute confidence intervals (default = 'TRUE') or not (FALSE). Whether to compute statistical significance p-value (default = 'TRUE') or not (FALSE). } } -\value{ -A list containing the following components :\cr -statistic - the value of the test statistic \cr -conf_low - the lower limit of the confidence interval \cr -conf_high - the upper limit of the confidence interval \cr -p.val - the p value -} -\examples{ -obs <- rnorm(100) -ens <- matrix(rnorm(500), c(100,5)) -Corr(ens,obs) -} + + \value{ Matrix with dimensions :\cr c(# of datasets alogn posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr +The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. } + \examples{ # Load sample data as in Load() example: +example(Load) + clim <- Clim(sampleData$mod, sampleData$obs) + ano_exp <- Ano(sampleData$mod, clim$clim_exp) + ano_obs <- Ano(sampleData$obs, clim$clim_obs) + runmean_months <- 12 dim_to_smooth <- 4 + # Smooth along lead-times + smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) + smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) + dim_to_mean <- 2 # Mean along members + required_complete_row <- 3 # Discard start dates which contain any NA lead-times + leadtimes_per_startdate <- 60 + corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), + Mean1Dim(smooth_ano_obs, dim_to_mean), + compROW = required_complete_row, + limits = c(ceiling((runmean_months + 1) / 2), + leadtimes_per_startdate - floor(runmean_months / 2))) + PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", + monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), + fileout = 'tos_cor.eps') } + \author{ History:\cr 0.1 - 2011-04 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -- GitLab From ff1bc6a42bda36c8d40c2d6f35207f32137cf7e8 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Thu, 24 Nov 2016 12:24:17 +0100 Subject: [PATCH 11/41] Dual input for score functions --- R/RMS.R | 105 ++++++++++++++++++++++++++++++++++++++++++++++++- R/RMSSS.R | 90 ++++++++++++++++++++++++++++++++++++++++++ R/RatioRMS.R | 89 ++++++++++++++++++++++++++++++++++++++++- R/RatioSDRMS.R | 88 ++++++++++++++++++++++++++++++++++++++++- R/Trend.R | 83 +++++++++++++++++++++++++++++++++++++- man/RMS.Rd | 5 ++- 6 files changed, 454 insertions(+), 6 deletions(-) diff --git a/R/RMS.R b/R/RMS.R index c8ba7c12..04bf52b2 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -1,4 +1,107 @@ -RMS <- function(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) { +RMS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, + limits = NULL, siglev = 0.95, conf = TRUE) { + # + # Remove data along compROW dim if there is at least one NA between limits + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (is.null(compROW) == FALSE) { + if (is.null(limits) == TRUE) { + limits <- c(1, dim(var_obs)[compROW]) + } + outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) + outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) + var_obs[which(outrows)] <- NA + } + + # + # Enlarge var_exp & var_obs to 10 dim + move posloop & posRMS to 1st & 2nd + # pos + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var_exp) + for (iind in 1:length(dimsvar)) { + if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { + stop("var_exp & var_obs must have same dimensions except along posloop") + } + } + if (dimsvar[posRMS] < 2 ) { + stop("At least 2 values required to compute RMSE") + } + enlvarexp <- Enlarge(var_exp, 10) + enlvarobs <- Enlarge(var_obs, 10) + nexp <- dimsvar[posloop] + nobs <- dim(var_obs)[posloop] + posaperm <- numeric(10) + posaperm[1] <- posloop + posaperm[2] <- posRMS + posaperm[3:10] <- seq(1, 10)[-c(posloop, posRMS)] + permvarexp <- aperm(enlvarexp, posaperm) + permvarobs <- aperm(enlvarobs, posaperm) + dimsaperm <- dim(permvarexp) + # + # RMS & its confidence interval computation + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (conf) { + nvals <- 3 + dim_rms <- 2 + conf_low <- (1 - siglev) / 2 + conf_high <- 1 - conf_low + } else { + nvals <- 1 + dim_rms <- 1 + } + enlrms <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + dif <- array(dim = dimsaperm[-1]) + dif[, , , , , , , , ] <- permvarexp[jexp, , , , , , , , + , ] - permvarobs[jobs, , , , , , , , , ] + enlrms[jexp, jobs, dim_rms, , , , , , , , ] <- Mean1Dim(dif ** 2, 1, + narm = TRUE) ** 0.5 + if (conf) { + eno <- Eno(dif, 1) + for (j3 in 1:dimsaperm[3]){ + for (j4 in 1:dimsaperm[4]){ + for (j5 in 1:dimsaperm[5]){ + for (j6 in 1:dimsaperm[6]){ + for (j7 in 1:dimsaperm[7]){ + for (j8 in 1:dimsaperm[8]){ + for (j9 in 1:dimsaperm[9]){ + for (j10 in 1:dimsaperm[10]){ + ndat <- length(sort(dif[, j3, j4, j5, j6, j7, j8, j9, + j10])) + enlrms[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, + j10] <- (eno[j3, j4, j5, j6, j7, j8, j9, + j10] * enlrms[jexp, jobs, 2, j3, j4, j5, j6, j7, + j8, j9, j10] ** 2 / qchisq(conf_high, eno[j3, j4, j5, + j6, j7, j8, j9, j10] - 1)) ** 0.5 + enlrms[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, + j10] <- (eno[j3, j4, j5, j6, j7, j8, j9, + j10] * enlrms[jexp, jobs, 2, j3, j4, j5, j6, j7, + j8, j9, j10] ** 2 / qchisq(conf_low, eno[j3, j4, j5, + j6, j7, j8, j9, j10] - 1)) ** 0.5 + } + } + } + } + } + } + } + } + } + } + } + + dim(enlrms) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, posRMS)]) + # + # Output + # ~~~~~~~~ + # + enlrms +} + +.RMS <- function(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) { # diff --git a/R/RMSSS.R b/R/RMSSS.R index 585a398b..9a33c8a6 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -33,6 +33,96 @@ RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { nvals <- 1 } enlRMSSS <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + dif1 <- array(dim = dimsaperm[-1]) + dif2 <- array(dim = dimsaperm[-1]) + dif1[, , , , , , , , ] <- permvarexp[jexp, , , , , , , , + , ] - permvarobs[jobs, , , , , , , , , ] + dif2[, , , , , , , , ] <- permvarobs[jobs, , , , , , , , , ] + rms1 <- Mean1Dim(dif1 ** 2, 1, narm = TRUE) ** 0.5 + rms2 <- Mean1Dim(dif2 ** 2, 1, narm = TRUE) ** 0.5 + rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( + rms2), na.rm = TRUE) / 1000 + enlRMSSS[jexp, jobs, 1, , , , , , , , ] <- 1 - (rms1 / rms2) + if (pval) { + eno1 <- Eno(dif1, 1) + eno2 <- Eno(dif2, 1) + F <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) + for (j3 in 1:dimsaperm[3]) { + for (j4 in 1:dimsaperm[4]) { + for (j5 in 1:dimsaperm[5]) { + for (j6 in 1:dimsaperm[6]) { + for (j7 in 1:dimsaperm[7]) { + for (j8 in 1:dimsaperm[8]) { + for (j9 in 1:dimsaperm[9]) { + for (j10 in 1:dimsaperm[10]) { + l1 <- eno1[j3, j4, j5, j6, j7, j8, j9, j10] + l2 <- eno2[j3, j4, j5, j6, j7, j8, j9, j10] + if (is.na(l1) == FALSE & is.na(l2) == FALSE & l1 > 2 & l2 > 2) { + enlRMSSS[jexp, jobs, 2, j3, j4, j5, j6, j7, j8, j9, + j10] <- 1 - pf(F[j3, j4, j5, j6, j7, j8, j9, + j10], l1 - 1, l2 - 1) + } else { + enlRMSSS[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, + j10] <- NA + } + } + } + } + } + } + } + } + } + } + } + } + + dim(enlRMSSS) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, posRMS)]) + # + # Output + # ~~~~~~~~ + # + enlRMSSS +} + +.RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { + # + # Enlarge var_exp & var_obs & clim to 10 dim + move posloop & posRMS + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var_exp) + for (iind in 1:length(dimsvar)) { + if (iind !=posloop & dim(var_obs)[iind] != dimsvar[iind]) { + stop("var_exp & var_obs must have same dimensions except along posloop") + } + } + if (dimsvar[posRMS] < 2 ) { + stop("At least 2 values required to compute RMSE") + } + enlvarexp <- Enlarge(var_exp, 10) + enlvarobs <- Enlarge(var_obs, 10) + nexp <- dimsvar[posloop] + nobs <- dim(var_obs)[posloop] + posaperm <- numeric(10) + posaperm[1] <- posloop + posaperm[2] <- posRMS + posaperm[3:10] <- seq(1, 10)[-c(posloop, posRMS)] + permvarexp <- aperm(enlvarexp, posaperm) + permvarobs <- aperm(enlvarobs, posaperm) + dimsaperm <- dim(permvarexp) + # + # RMSSS and its pvalue computation + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (pval) { + nvals <- 2 + } else { + nvals <- 1 + } + enlRMSSS <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) for (jexp in 1:nexp) { for (jobs in 1:nobs) { diff --git a/R/RatioRMS.R b/R/RatioRMS.R index 5589f170..a5bc5b48 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -1,4 +1,91 @@ -RatioRMS <- function(ens, ens.ref, obs, pval = TRUE) { +RatioRMS <- function(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) { + # + # Enlarge var_exps & var_obs to 10 dim + move posRMS to 1st pos + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var_exp1) + for (iind in 1:length(dimsvar)) { + if (dim(var_exp2)[iind] != dimsvar[iind] | + dim(var_obs)[iind] != dimsvar[iind]) { + stop("all input vars should have the same dimensions") + } + } + enlvarexp1 <- Enlarge(var_exp1, 10) + enlvarexp2 <- Enlarge(var_exp2, 10) + enlvarobs <- Enlarge(var_obs, 10) + posaperm <- 1:10 + posaperm[1] <- posRMS + posaperm[posRMS] <- 1 + permvarexp1 <- aperm(enlvarexp1, posaperm) + permvarexp2 <- aperm(enlvarexp2, posaperm) + permvarobs <- aperm(enlvarobs, posaperm) + dimsaperm <- dim(permvarexp1) + # + # RMS ratio and its pvalue computation + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (pval) { + nvals <- 2 + } else { + nvals <- 1 + } + enlratiorms <- array(dim = c(nvals, dimsaperm[2:10])) + dif1 <- permvarexp1 - permvarobs + dif2 <- permvarexp2 - permvarobs + rms1 <- Mean1Dim(dif1 ** 2, 1, narm = TRUE) ** 0.5 + rms2 <- Mean1Dim(dif2 ** 2, 1, narm = TRUE) ** 0.5 + rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max( + abs(rms2), na.rm = TRUE) / 1000 + enlratiorms[1, , , , , , , , , ] <- (rms1 / rms2) + if (pval) { + eno1 <- Eno(dif1, 1) + eno2 <- Eno(dif2, 1) + F <- (eno1 * (rms1) ** 2 / (eno1 - 1)) / (eno2 * (rms2) ** 2 / (eno2 - 1)) + F[which(F < 1)] <- 1 / F[which(F < 1)] + for (j2 in 1:dimsaperm[2]) { + for (j3 in 1:dimsaperm[3]) { + for (j4 in 1:dimsaperm[4]) { + for (j5 in 1:dimsaperm[5]) { + for (j6 in 1:dimsaperm[6]) { + for (j7 in 1:dimsaperm[7]) { + for (j8 in 1:dimsaperm[8]) { + for (j9 in 1:dimsaperm[9]) { + for (j10 in 1:dimsaperm[10]) { + l1 <- eno1[j2, j3, j4, j5, j6, j7, j8, j9, j10] + l2 <- eno2[j2, j3, j4, j5, j6, j7, j8, j9, j10] + if (!is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { + enlratiorms[2, j2, j3, j4, j5, j6, j7, j8, j9, + j10] <- (1 - pf(F[j2, j3, j4, j5, j6, j7, j8, j9, j10], + l1 - 1, l2 - 1)) * 2 + } else { + enlratiorms[1, j2, j3, j4, j5, j6, j7, j8, j9, j10] <- NA + } + } + } + } + } + } + } + } + } + } + } + + enlratiorms <- aperm(enlratiorms, posaperm) + if (pval) { + dimsvar[posRMS] <- 2 + } else { + dimsvar[posRMS] <- 1 + } + dim(enlratiorms) <- dimsvar + # + # Output + # ~~~~~~~~ + # + enlratiorms +} + +.RatioRMS <- function(ens, ens.ref, obs, pval = TRUE) { # # RMS ratio and its pvalue computation diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index 1edbd700..dd30eed6 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -1,4 +1,90 @@ -RatioSDRMS <- function(ens, obs, pval = TRUE) { +RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { + # + # Enlarge the number of dimensions of var_exp and var_obs to 7 if necessary + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimexp <- dim(var_exp) + dimobs <- dim(var_obs) + if (length(dimexp) < 4 | length(dimobs) < 4) { + stop("At least 4 dim needed : c(nexp/nobs, nmemb, nsdates, nltime)") + } + for (jn in 3:max(length(dimexp), length(dimobs))) { + if (dimexp[jn] != dimobs[jn]) { + stop("Wrong input dimensions") + } + } + var_exp <- Enlarge(var_exp, 7) + var_obs <- Enlarge(var_obs, 7) + + # + # Ratio RMSE / SD and its significance level + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + ensmeanexp <- Mean1Dim(var_exp, 2) + ensmeanobs <- Mean1Dim(var_obs, 2) + dimrms <- c(dimexp[1], dimobs[1], dimexp[4:length(dimexp)]) + if (pval) { + nvals <- 2 + } else { + nvals <- 1 + } + dimratiormssd <- c(dimexp[1], dimobs[1], nvals, dimexp[4:length(dimexp)]) + if (length(dimrms) < 6) { + dimrms <- c(dimrms, array(1, dim = (6 - length(dimrms)))) + } + if (length(dimratiormssd) < 7) { + dimenlratiormssd <- c(dimratiormssd, + array(1, dim = (7 - length(dimratiormssd)))) + } else { + dimenlratiormssd <- dimratiormssd + } + dif <- var_exp - InsertDim(ensmeanexp, 2, dimexp[2]) + std <- apply(array(dif, dim = c(dimexp[1], dimexp[2] * dimexp[3], + dimrms[3:6])), c(1, 3, 4, 5, 6), sd, na.rm = TRUE) + enosd <- apply(Eno(dif, 3), c(1, 3, 4, 5, 6), sum, na.rm = TRUE) + rms <- array(dim = dimrms) + enlratiormssd <- array(dim = dimenlratiormssd) + for (jexp in 1:dimexp[1]) { + for (jobs in 1:dimobs[1]) { + dif <- ensmeanexp[jexp, , , , , ] - ensmeanobs[jobs, , , , , ] + rms[jexp,jobs, , , , ] <- Mean1Dim(dif ** 2, 1, narm = TRUE) ** 0.5 + enorms <- array(Eno(dif, 1), dim = dimrms[3:6]) + enlratiormssd[jexp, jobs, 1, , , , ] <- std[jexp, , , , ] / rms[jexp, + jobs, , , , ] + if (pval) { + for (jltime in 1:dimrms[3]) { + for (jlev in 1:dimrms[4]) { + for (jlat in 1:dimrms[5]) { + for (jlon in 1:dimrms[6]) { + l1 <- enosd[jexp, jltime, jlev, jlat, jlon] + l2 <- enorms[jltime, jlev, jlat, jlon] + F <- (enosd[jexp, jltime, jlev, jlat, jlon] * (std[jexp, jltime, + jlev, jlat, jlon]) ** 2 / (enosd[jexp, jltime, jlev, jlat, + jlon] - 1)) / (enorms[jltime, jlev, jlat, jlon] * (rms[jexp, + jobs, jltime, jlev, jlat, jlon]) ** 2 / (enorms[jltime, + jlev, jlat, jlon] - 1)) + if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { + enlratiormssd[jexp, jobs, 2, jltime, jlev, jlat, + jlon] <- 1 - pf(F, l1 - 1, l2 - 1) + } else { + enlratiormssd[jexp, jobs, 1, jltime, jlev, jlat, jlon] <- NA + } + } + } + } + } + } + } + } + dim(enlratiormssd) <- dimratiormssd + # + # Output + # ~~~~~~~~ + # + enlratiormssd +} + +.RatioSDRMS <- function(ens, obs, pval = TRUE) { # # Ratio RMSE / SD and its significance level diff --git a/R/Trend.R b/R/Trend.R index f427f283..b8ef4d0c 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -1,4 +1,85 @@ -Trend <- function(ens, interval = 1, siglev = 0.95, conf = TRUE) { +Trend <- function(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) { + # + # Enlarge the size of var to 10 and move posTR to first position + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var) + if (is.null(dimsvar)) { + dimsvar <- length(var) + } + enlvar <- Enlarge(var, 10) + outdim <- c(dimsvar, array(1, dim = (10 - length(dimsvar)))) + if (conf) { + nvals <- 4 + poscoef2 <- 2 + poscoef1 <- 4 + } else { + nvals <- 2 + poscoef2 <- 1 + poscoef1 <- 2 + } + outdim[posTR] <- nvals + posaperm <- 1:10 + posaperm[posTR] <- 1 + posaperm[1] <- posTR + enlvar <- aperm(enlvar, posaperm) + dimsaperm <- outdim[posaperm] + # + # Loop on all dimensions to compute trends + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + enltrend <- array(dim = dimsaperm) + enldetrend <- array(dim = dim(enlvar)) + for (j2 in 1:dimsaperm[2]) { + for (j3 in 1:dimsaperm[3]) { + for (j4 in 1:dimsaperm[4]) { + for (j5 in 1:dimsaperm[5]) { + for (j6 in 1:dimsaperm[6]) { + for (j7 in 1:dimsaperm[7]) { + for (j8 in 1:dimsaperm[8]) { + for (j9 in 1:dimsaperm[9]) { + for (j10 in 1:dimsaperm[10]) { + tmp <- enlvar[, j2, j3, j4, j5, j6, j7, j8, j9, j10] + if (any(!is.na(tmp))) { + mon <- seq(tmp) * interval + lm.out <- lm(tmp ~ mon, na.action = na.omit) + enltrend[poscoef2, j2, j3, j4, j5, j6, j7, j8, j9, + j10] <- lm.out$coefficients[2] + enltrend[poscoef1, j2, j3, j4, j5, j6, j7, j8, j9, + j10] <- lm.out$coefficients[1] + if (conf) { + enltrend[c(1, 3), j2, j3, j4, j5, j6, j7, j8, j9, + j10] <- confint(lm.out, level = siglev)[2, 1:2] + } + enldetrend[is.na(tmp) == FALSE, j2, j3, j4, j5, j6, j7, j8, + j9, j10] <- tmp[is.na(tmp) == FALSE] - lm.out$fitted.values + } + } + } + } + } + } + } + } + } + } + # + # Back to the original dimensions + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + enldetrend <- aperm(enldetrend, posaperm) + dim(enldetrend) <- dimsvar + enltrend <- aperm(enltrend, posaperm) + dimsvar[posTR] <- nvals + dim(enltrend) <- dimsvar + # + # Outputs + # ~~~~~~~~~ + # + invisible(list(trend = enltrend, detrended = enldetrend)) +} + +.Trend <- function(ens, interval = 1, siglev = 0.95, conf = TRUE) { ensmean <- rowMeans(ens, na.rm = TRUE) diff --git a/man/RMS.Rd b/man/RMS.Rd index 8d276c57..fab80a03 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -1,10 +1,11 @@ \name{RMS} \alias{RMS} +\alias{.RMS} \title{ -Computes the root mean square error of the ensemble mean +Computes the root mean square error for a set of forecasts and observations } \description{ -Computes the RMSE of an ensemble mean forecast against a vector of observations.\cr +Computes the RMSE of an array of forecasts against an array of corresponding observations.\cr The confidence interval of the RMSE is obtained with a chi2 distribution. } \usage{ -- GitLab From d7873e1f7c61a27bf772b5c0ae6b3950eeae2c47 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Thu, 24 Nov 2016 14:17:02 +0100 Subject: [PATCH 12/41] Small update of manuals to include hidden functions --- man/RMS.Rd | 61 +++++++++++++++++++++++++++++++++++++---------- man/RMSSS.Rd | 9 ++++++- man/RatioRMS.Rd | 25 ++++++++++++++----- man/RatioSDRMS.Rd | 43 ++++++++++++++++++++++++--------- 4 files changed, 107 insertions(+), 31 deletions(-) diff --git a/man/RMS.Rd b/man/RMS.Rd index fab80a03..09b9d0a8 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -2,23 +2,41 @@ \alias{RMS} \alias{.RMS} \title{ -Computes the root mean square error for a set of forecasts and observations +Computes Root Mean Square Error } \description{ -Computes the RMSE of an array of forecasts against an array of corresponding observations.\cr -The confidence interval of the RMSE is obtained with a chi2 distribution. +Computes the root mean square error for an array of forecasts, var_exp and an array of observations, var_obs, which should have the same dimensions except along the posloop dimension where the lengths can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr +The RMSE is computed along the posRMS dimension which should correspond to the startdate dimension.\cr +If compROW is given, the RMSE is computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes.\cr +Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr +The confidence interval relies on a chi2 distribution. } \usage{ -RMS(ens, obs, siglev = 0.95, conf = TRUE) +RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) } \arguments{ - \item{ens}{ + \item{var_exp}{ +Matrix of experimental data. + } + \item{var_obs}{ +Matrix of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. + } + \item{ens}{ N by M matrix of N forecasts from M ensemble members. } \item{obs}{ Vector of the corresponding observations of length N. } - + \item{posloop}{ +Dimension nobs and nexp. + } + \item{posRMS}{ +Dimension along which RMSE are to be computed (the dimension of the start dates). + } + \item{compROW}{ +Data taken into account only if (compROW)th row is complete.\cr +Default = NULL. + } \item{limits}{ Complete between limits[1] & limits[2]. Default = NULL. } @@ -30,15 +48,32 @@ Whether to compute confidence interval or not. TRUE by default. } } \value{ -A list containing the following components :\cr -rms - the root mean square error \cr -conf_low - the lower limit of the confidence interval \cr -conf_high - the upper limit of the confidence interval \cr +Matrix with dimensions:\cr + c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all other dimensions of var_exp & var_obs except posRMS).\cr +The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). } \examples{ -obs <- rnorm(100) -ens <- matrix(rnorm(500), c(100,5)) -RMS(ens, obs) +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +runmean_months <- 12 +dim_to_smooth <- 4 # Smooth along lead-times +smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) +smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) +dim_to_mean <- 2 # Mean along members +required_complete_row <- 3 # Discard start-dates for which some leadtimes are missing +leadtimes_per_startdate <- 60 +rms <- RMS(Mean1Dim(smooth_ano_exp, dim_to_mean), + Mean1Dim(smooth_ano_obs, dim_to_mean), + compROW = required_complete_row, + limits = c(ceiling((runmean_months + 1) / 2), + leadtimes_per_startdate - floor(runmean_months / 2))) +PlotVsLTime(rms, toptitle = "Root Mean Square Error", ytitle = "K", + monini = 11, limits = NULL, listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, hlines = c(0), + fileout = 'tos_rms.eps') } \author{ History:\cr diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index 3d77230d..2547a9e5 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -1,10 +1,11 @@ \name{RMSSS} \alias{RMSSS} +\alias{.RMSSS} \title{ Computes Root Mean Square Skill Score } \description{ -Arrays var_exp & var_obs should have the same dimensions except along posloop where the length can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr +Computes the root mean square error skill score between an array of forecasts, var_exp and an array of observations, var_obs, which should have the same dimensions except along posloop where the lengths can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr RMSSS computes the Root Mean Square Skill Score of each jexp in 1:nexp against each jobs in 1:nobs which gives nexp x nobs RMSSS for each other grid point of the matrix (each latitude/longitude/level/leadtime).\cr The RMSSS are computed along the posRMS dimension which should correspond to the startdate dimension.\cr The p-value is optionally provided by a one-sided Fisher test. @@ -18,6 +19,12 @@ Array of experimental data. } \item{var_obs}{ Array of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. + } + \item{ens}{ +N by M matrix of N forecasts from M ensemble members. + } + \item{obs}{ +Vector of N observations. } \item{posloop}{ Dimension nobs and nexp. diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 49bc5cbc..7cd5bac1 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -1,5 +1,6 @@ \name{RatioRMS} \alias{RatioRMS} +\alias{.RatioRMS} \title{ Computes the Ratio Between The RMSE of Two Experiments } @@ -13,10 +14,10 @@ RatioRMS(ens, ens.ref, obs, pval = TRUE) } \arguments{ \item{ens}{ -Matrix of experimental data 1. +Array of experimental data 1. } \item{ens.ref}{ -Matrix of experimental data 2. +Array of experimental data 2. } \item{ens.ref}{ Vector of observations. @@ -32,10 +33,22 @@ p.val - the p value \cr The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. } \examples{ - ens <- matrix(rnorm(500), c(100, 5)) - ens.ref <- matrix(rnorm(500), c(100, 5)) - obs <- rnorm(100) - Ratio <- RatioRMS(ens, ens.ref, obs) + leadtimes_dimension <- 4 +initial_month <- 11 +mean_start_month <- 12 +mean_stop_month <- 2 +sampleData$mod <- Season(sampleData$mod, leadtimes_dimension, initial_month, + mean_start_month, mean_stop_month) +sampleData$obs <- Season(sampleData$obs, leadtimes_dimension, initial_month, + mean_start_month, mean_stop_month) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +rrms <- RatioRMS(Mean1Dim(ano_exp[ , 1:2, , , , ], 1)[, 1, , ], + ano_exp[ , 3, , , , ][, 1, , ], + Mean1Dim(ano_obs, 2)[1, , 1, , ], 1) +PlotEquiMap(rrms[1, , ], sampleData$lon, sampleData$lat, + toptitle = 'Ratio RMSE') } \author{ History:\cr diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 28d01b49..227591ee 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -1,18 +1,32 @@ \name{RatioSDRMS} \alias{RatioSDRMS} +\alias{RatioSDRMS} \title{ -Computes the ratio between the ensemble spread and the RMSE of the ensemble mean +Computes the ratio between the ensemble spread and RMSE } \description{ -Calculates the ratio of spread and the RMSE from a matrix of N ensemble members for M forecasts and the corresponding vector of observations of length M. \cr - -The p-value is provided by a one-sided Fisher test. +Arrays var_exp & var_obs should have dimensions between\cr + c(nmod/nexp, nmemb/nparam, nsdates, nltime)\cr +and\cr + c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)\cr +The ratio between the standard deviation of the members around the ensemble mean in var_exp and the RMSE between var_exp and var_obs is output for each experiment and each observational dataset.\cr +The p-value is provided by a one-sided Fischer test. } \usage{ RatioSDRMS(ens, obs, pval = TRUE) } \arguments{ -\item{ens}{ + \item{var_exp}{ +Model data:\cr + c(nmod/nexp, nmemb/nparam, nsdates, nltime) up to\cr + c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon) + } + \item{var_obs}{ +Observational data:\cr + c(nobs, nmemb, nsdates, nltime) up to\cr + c(nobs, nmemb, nsdates, nltime, nlevel, nlat, nlon) + } + \item{ens}{ N by M matrix of N forecasts from M ensemble members. } \item{obs}{ @@ -23,14 +37,21 @@ Whether to compute the p-value of Ho : SD/RMSE = 1 or not. } } \value{ -A vector containing the following components :\cr -ratio - the ratio of the spread and RMSE \cr -p.val - the p.value (if pval ='TRUE') \cr +Array with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to + c(nexp/nmod, nobs, 1 or 2, nltime, nlevel, nlat, nlon). +The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1. } \examples{ -obs <- rnorm(100) -ens <- matrix(rnorm(500), c(100,5)) -RatioSDRMS(ens,obs) +# Load sample data as in Load() example: +example(Load) +rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) +rsdrms2 <- array(dim = c(dim(rsdrms)[1:2], 4, dim(rsdrms)[4])) +rsdrms2[, , 2, ] <- rsdrms[, , 1, ] +rsdrms2[, , 4, ] <- rsdrms[, , 2, ] +PlotVsLTime(rsdrms2, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", + monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, siglev = TRUE, + fileout = 'tos_rsdrms.eps') } \author{ History:\cr -- GitLab From 0d13def7a9b9f085728bf15270e3991e671b8386 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Thu, 24 Nov 2016 16:03:22 +0100 Subject: [PATCH 13/41] Minor bugfixes in documentation --- R/BrierScore.R | 78 +++++++++++++++++++++++++++++++++++++++- R/RMSSS.R | 90 ----------------------------------------------- man/BrierScore.Rd | 21 ++++++----- man/Corr.Rd | 5 +-- man/RMS.Rd | 2 ++ man/RMSSS.Rd | 8 +---- man/RatioRMS.Rd | 46 +++++++++++++++++++++--- man/RatioSDRMS.Rd | 6 ++-- man/Trend.Rd | 11 +++++- 9 files changed, 152 insertions(+), 115 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index ff4fc2c1..50b38730 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -1,4 +1,80 @@ -BrierScore <- function(ens, obs, thresholds = seq(0, 1, 0.1)) { +BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { + if (max(pred) > 1 | min(pred) < 0) { + stop("Predictions outside [0,1] range. Are you certain this is a probability forecast? \n") + } else if (max(obs) != 1 & min(obs) != 0) { + .message("Binary events must be either 0 or 1. Are you certain this is a binary event? ") + } else { + nbins <- length(thresholds) - 1 # Number of bins + n <- length(pred) + bins <- as.list(paste("bin", 1:nbins,sep = "")) + for (i in 1:nbins) { + if (i == nbins) { + bins[[i]] <- list(which(pred >= thresholds[i] & pred <= thresholds[i + 1])) + } else { + bins[[i]] <- list(which(pred >= thresholds[i] & pred < thresholds[i + 1])) + } + } + + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(pred[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + } + + obar <- sum(obs) / length(obs) + relsum <- ressum <- term1 <- term2 <- 0 + for (i in 1:nbins) { + if (nk[i] > 0) { + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 + ressum <- ressum + nk[i] * (okbar[i] - obar)^2 + for (j in 1:nk[i]) { + term1 <- term1 + (pred[bins[[i]][[1]][j]] - fkbar[i])^2 + term2 <- term2 + (pred[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + } + } + } + rel <- relsum / n + res <- ressum / n + unc <- obar * (1 - obar) + bs <- sum((pred - obs)^2) / n + bs_check_res <- rel - res + unc + bss_res <- (res - rel) / unc + gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution + bs_check_gres <- rel - gres + unc # BS using GRES + bss_gres <- (gres - rel) / unc # BSS using GRES + + # + # Estimating the bias-corrected components of the BS + # + term3 <- array(0, nbins) + for (i in 1:nbins) { + term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) + } + term_a <- sum(term3, na.rm = T) / n + term_b <- (obar * (1 - obar)) / (n - 1) + rel_bias_corrected <- rel - term_a + gres_bias_corrected <- gres - term_a + term_b + if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { + rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) + gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) + rel_bias_corrected <- rel_bias_corrected2 + gres_bias_corrected <- gres_bias_corrected2 + } + unc_bias_corrected <- unc + term_b + bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected + + #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { + # cat("No error found \ n") + # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") + #} + + invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar, bins = bins, pred = pred, obs = obs)) + } +} + + +.BrierScore <- function(ens, obs, thresholds = seq(0, 1, 0.1)) { if (max(ens) > 1 | min(ens) < 0) { stop("Predictions outside [0,1] range. Are you certain this is a probability forecast? \n") } else if (max(obs) != 1 & min(obs) != 0) { diff --git a/R/RMSSS.R b/R/RMSSS.R index 9a33c8a6..74a2a387 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -87,93 +87,3 @@ RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { # enlRMSSS } - -.RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { - # - # Enlarge var_exp & var_obs & clim to 10 dim + move posloop & posRMS - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp) - for (iind in 1:length(dimsvar)) { - if (iind !=posloop & dim(var_obs)[iind] != dimsvar[iind]) { - stop("var_exp & var_obs must have same dimensions except along posloop") - } - } - if (dimsvar[posRMS] < 2 ) { - stop("At least 2 values required to compute RMSE") - } - enlvarexp <- Enlarge(var_exp, 10) - enlvarobs <- Enlarge(var_obs, 10) - nexp <- dimsvar[posloop] - nobs <- dim(var_obs)[posloop] - posaperm <- numeric(10) - posaperm[1] <- posloop - posaperm[2] <- posRMS - posaperm[3:10] <- seq(1, 10)[-c(posloop, posRMS)] - permvarexp <- aperm(enlvarexp, posaperm) - permvarobs <- aperm(enlvarobs, posaperm) - dimsaperm <- dim(permvarexp) - # - # RMSSS and its pvalue computation - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (pval) { - nvals <- 2 - } else { - nvals <- 1 - } - enlRMSSS <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) - - for (jexp in 1:nexp) { - for (jobs in 1:nobs) { - dif1 <- array(dim = dimsaperm[-1]) - dif2 <- array(dim = dimsaperm[-1]) - dif1[, , , , , , , , ] <- permvarexp[jexp, , , , , , , , - , ] - permvarobs[jobs, , , , , , , , , ] - dif2[, , , , , , , , ] <- permvarobs[jobs, , , , , , , , , ] - rms1 <- Mean1Dim(dif1 ** 2, 1, narm = TRUE) ** 0.5 - rms2 <- Mean1Dim(dif2 ** 2, 1, narm = TRUE) ** 0.5 - rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( - rms2), na.rm = TRUE) / 1000 - enlRMSSS[jexp, jobs, 1, , , , , , , , ] <- 1 - (rms1 / rms2) - if (pval) { - eno1 <- Eno(dif1, 1) - eno2 <- Eno(dif2, 1) - F <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - l1 <- eno1[j3, j4, j5, j6, j7, j8, j9, j10] - l2 <- eno2[j3, j4, j5, j6, j7, j8, j9, j10] - if (is.na(l1) == FALSE & is.na(l2) == FALSE & l1 > 2 & l2 > 2) { - enlRMSSS[jexp, jobs, 2, j3, j4, j5, j6, j7, j8, j9, - j10] <- 1 - pf(F[j3, j4, j5, j6, j7, j8, j9, - j10], l1 - 1, l2 - 1) - } else { - enlRMSSS[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, - j10] <- NA - } - } - } - } - } - } - } - } - } - } - } - } - - dim(enlRMSSS) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, posRMS)]) - # - # Output - # ~~~~~~~~ - # - enlRMSSS -} diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index e6639f82..e4cec048 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -1,7 +1,8 @@ \name{BrierScore} \alias{BrierScore} +\alias{.BrierScore} \title{ -Compute Brier Score and its decomposition and the Brier Skill Score +Compute Brier Score And Its Decomposition And Brier Skill Score } \description{ Returns the values of the BS and its standard decomposition as well as the addition of the two winthin-bin extra components (Stephenson et al., 2008). It also solves the bias-corrected decomposition of the BS (Ferro and Fricker, 2012). BSS having the climatology as the reference forecast. @@ -10,16 +11,20 @@ Stephenson et al. (2008). Two extra components in the Brier score decomposition. Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924.\cr } \usage{ -BrierScore(ens,obs, thresholds = seq(0, 1, 0.1)) +BrierScore(obs, pred, thresholds = seq(0, 1, 0.1)) + +.BrierScore(ens, obs, thresholds = seq(0, 1, 0.1)) } -\arguments{ - \item{ens}{ -Vector of probablistic predictions with values in the range [0,1] - } +\arguments{ \item{obs}{ Vector of binary observations (1 or 0) } - + \item{pred}{ +Vector of probablistic predictions with values in the range [0,1] + } + \item{ens}{ +Matrix of predictions with values in the range [0,1] for the .BrierScore function + } \item{thresholds}{ Values used to bin the forecasts. By default the bins are {[0,0.1), [0.1, 0.2), ... [0.9, 1]} } @@ -48,7 +53,7 @@ $obs: probability forecasts of the event\cr \examples{ a <- runif(10) b <- round(a) -x <- BrierScore(a,b) +x <- BrierScore(b, a) x$bs - x$bs_check_res x$bs - x$bs_check_gres x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected diff --git a/man/Corr.Rd b/man/Corr.Rd index 311d6c78..96041b46 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -13,7 +13,7 @@ Calculates the correlation coefficient (Pearson, Kendall or Spearman) between fo \usage{ Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, limits = NULL, siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE) \cr + conf = TRUE, pval = TRUE) .Corr(ens, obs, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) @@ -71,7 +71,8 @@ example(Load) clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) - runmean_months <- 12 dim_to_smooth <- 4 + runmean_months <- 12 + dim_to_smooth <- 4 # Smooth along lead-times smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) diff --git a/man/RMS.Rd b/man/RMS.Rd index 09b9d0a8..946c20f9 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -13,6 +13,8 @@ The confidence interval relies on a chi2 distribution. } \usage{ RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) + +.RMS(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) } \arguments{ \item{var_exp}{ diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index 2547a9e5..7e618836 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -1,6 +1,5 @@ \name{RMSSS} \alias{RMSSS} -\alias{.RMSSS} \title{ Computes Root Mean Square Skill Score } @@ -12,6 +11,7 @@ The p-value is optionally provided by a one-sided Fisher test. } \usage{ RMSSS(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) + } \arguments{ \item{var_exp}{ @@ -19,12 +19,6 @@ Array of experimental data. } \item{var_obs}{ Array of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. - } - \item{ens}{ -N by M matrix of N forecasts from M ensemble members. - } - \item{obs}{ -Vector of N observations. } \item{posloop}{ Dimension nobs and nexp. diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 7cd5bac1..5982b713 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -10,17 +10,31 @@ The ratio RMSE(ens, obs) / RMSE(ens.ref, obs) is output.\cr The p-value is provided by a two-sided Fischer test. } \usage{ -RatioRMS(ens, ens.ref, obs, pval = TRUE) +RatioRMS(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) + +.RatioRMS(ens, ens.ref, obs, pval = TRUE) } \arguments{ - \item{ens}{ + \item{var_exp1}{ Array of experimental data 1. } - \item{ens.ref}{ + \item{var_exp2}{ Array of experimental data 2. + } + \item{var_obs}{ +Array of observations. + } + \item{ens}{ +Matrix of experimental data 1. } \item{ens.ref}{ +Matrix of experimental data 2. + } + \item{obs}{ Vector of observations. + } + \item{posRMS}{ +Dimension along which the RMSE are to be computed = the position of the start dates. } \item{pval}{ Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. @@ -33,7 +47,31 @@ p.val - the p value \cr The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. } \examples{ - leadtimes_dimension <- 4 +# See examples on Load() to understand the first lines in this example + \dontrun{ +data_path <- system.file('sample_data', package = 's2dverification') +expA <- list(name = 'experiment', path = file.path(data_path, + 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', + '$VAR_NAME$_$START_DATE$.nc')) +obsX <- list(name = 'observation', path = file.path(data_path, + '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', + '$VAR_NAME$_$YEAR$$MONTH$.nc')) + +# Now we are ready to use Load(). +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- Load('tos', list(expA), list(obsX), startDates, + output = 'lonlat', latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } + \dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } +leadtimes_dimension <- 4 initial_month <- 11 mean_start_month <- 12 mean_stop_month <- 2 diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 227591ee..2f0c5084 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -1,6 +1,6 @@ \name{RatioSDRMS} \alias{RatioSDRMS} -\alias{RatioSDRMS} +\alias{.RatioSDRMS} \title{ Computes the ratio between the ensemble spread and RMSE } @@ -13,7 +13,9 @@ The ratio between the standard deviation of the members around the ensemble mean The p-value is provided by a one-sided Fischer test. } \usage{ -RatioSDRMS(ens, obs, pval = TRUE) +RatioSDRMS(var_exp, var_obs, pval = TRUE) + +.RatioSDRMS(ens, obs, pval = TRUE) } \arguments{ \item{var_exp}{ diff --git a/man/Trend.Rd b/man/Trend.Rd index 0a3a930b..dc2d7813 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -1,5 +1,6 @@ \name{Trend} \alias{Trend} +\alias{.Trend} \title{ Computes the Trend of the Ensemble Mean } @@ -9,9 +10,14 @@ Trend() also provides the time series of the detrended ensemble mean forecasts.\ The confidence interval relies on a student-T distribution. } \usage{ -Trend(ens, interval = 1, siglev = 0.95, conf = TRUE) +Trend(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) + +.Trend(ens, interval = 1, siglev = 0.95, conf = TRUE) } \arguments{ + \item{var}{ +Array of any number of dimensions up to 10. + } \item{ens}{ M by N matrix of M forecasts from N ensemble members. } @@ -26,6 +32,9 @@ Confidence level for the computation of confidence interval. 0.95 by default. \item{conf}{ Whether to compute the confidence levels or not. TRUE by default. } + \item{posTR}{ +Position along which to compute the trend. + } } \value{ \item{$trend}{ -- GitLab From 16eb9ff7ae32c33a4dd8d3f08eb8aa2f2f3642ad Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 25 Nov 2016 14:19:34 +0100 Subject: [PATCH 14/41] Adding descriptions to documentation of hidden functions --- man/BrierScore.Rd | 2 ++ man/Corr.Rd | 24 +++++++++--------------- man/RMS.Rd | 2 ++ man/RatioRMS.Rd | 2 ++ man/RatioSDRMS.Rd | 2 ++ man/Trend.Rd | 2 ++ 6 files changed, 19 insertions(+), 15 deletions(-) diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index e4cec048..d31998c7 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -9,6 +9,8 @@ Returns the values of the BS and its standard decomposition as well as the addit Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr Stephenson et al. (2008). Two extra components in the Brier score decomposition. Weather and Forecasting, 23: 752-757.\cr Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924.\cr + +.BrierScore provides the same functionality, but taking a matrix of ensemble members (ens) as input. } \usage{ BrierScore(obs, pred, thresholds = seq(0, 1, 0.1)) diff --git a/man/Corr.Rd b/man/Corr.Rd index 96041b46..d826a5e4 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -6,9 +6,9 @@ Computes the correlation coefficient between an array of forecasts and their cor } \description{ -Calculates the correlation coefficient (Pearson, Kendall or Spearman) between forecasts and observations. The input should be an array with dimensions c(no. of datasets, no. of start dates, no. of forecast times, no. of lons, no. of lats.), where the longitude and latitude dimensions are optional. The correlations are computed along the poscor dimension which should correspond to the startdate dimension. If compROW is given, the correlations are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes. \cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval is computed by a Fisher transformation.\cr The significance level relies on a one-sided student-T distribution.\cr We can modifiy the treshold of the test modifying siglev (default value=0.95). \cr +Calculates the correlation coefficient (Pearson, Kendall or Spearman) for an array of forecasts and observations. The input should be an array with dimensions c(no. of datasets, no. of start dates, no. of forecast times, no. of lons, no. of lats.), where the longitude and latitude dimensions are optional. The correlations are computed along the poscor dimension which should correspond to the startdate dimension. If compROW is given, the correlations are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes. \cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval is computed by a Fisher transformation.\cr The significance level relies on a one-sided student-T distribution.\cr We can modifiy the treshold of the test modifying siglev (default value=0.95). \cr \cr -.Corr is the same function but with a matrix of experiments and a vector of observations as input. +.Corr calculates the correlation between the ensemble mean and the observations, using an N by M matrix (ens) of forecasts and a vector of observations (obs) as input. } \usage{ Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, @@ -26,30 +26,24 @@ Array of experimental data. \item{var_obs}{ Array of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. } - + \item{ens}{ +N by M matrix of N forecasts from M ensemble members. + } + \item{obs}{ +Vector of the corresponding observations of length N. + } \item{posloop}{ Dimension nobs and nexp. } - \item{poscor}{ Dimension along which correlation are to be computed (the dimension of the start dates). } - \item{compROW}{ Data taken into account only if (compROW)th row is complete.\cr Default = NULL. } - \item{limits}{ Complete between limits[1] & limits[2]. Default = NULL. } - - \item{ens}{ -N by M matrix of N forecasts from M ensemble members. - } - \item{obs}{ -Vector of the corresponding observations of length N. - } - \item{siglev}{ Significance level. Default = 0.95. } @@ -64,7 +58,7 @@ Whether to compute statistical significance p-value (default = 'TRUE') or not (F } } - \value{ Matrix with dimensions :\cr c(# of datasets alogn posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr + \value{ Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. } \examples{ # Load sample data as in Load() example: example(Load) diff --git a/man/RMS.Rd b/man/RMS.Rd index 946c20f9..80ae569c 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -10,6 +10,8 @@ The RMSE is computed along the posRMS dimension which should correspond to the s If compROW is given, the RMSE is computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes.\cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval relies on a chi2 distribution. + +.RMS provides the same functionality but taking a matrix of ensemble members as input (ens). } \usage{ RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 5982b713..e2d2d5a4 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -8,6 +8,8 @@ Computes the Ratio Between The RMSE of Two Experiments Calculates the ratio of the RMSE for two forecasts of the same observations.\cr The ratio RMSE(ens, obs) / RMSE(ens.ref, obs) is output.\cr The p-value is provided by a two-sided Fischer test. + +.RatioRMS provides the same functionality but taking two matrices of ensemble members (ens and ens.ref) as input. } \usage{ RatioRMS(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 2f0c5084..ef6b55da 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -11,6 +11,8 @@ and\cr c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)\cr The ratio between the standard deviation of the members around the ensemble mean in var_exp and the RMSE between var_exp and var_obs is output for each experiment and each observational dataset.\cr The p-value is provided by a one-sided Fischer test. + +.RatioSDRMS provides the same functionality but taking a matrix of ensemble members as input (ens). } \usage{ RatioSDRMS(var_exp, var_obs, pval = TRUE) diff --git a/man/Trend.Rd b/man/Trend.Rd index dc2d7813..428d5f70 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -8,6 +8,8 @@ Computes the Trend of the Ensemble Mean Computes the trend along the forecast time of the ensemble mean by least square fitting, and the associated error interval.\cr Trend() also provides the time series of the detrended ensemble mean forecasts.\cr The confidence interval relies on a student-T distribution. + +.Trend provides the same functionality but taking a matrix ensemble members as input (ens). } \usage{ Trend(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) -- GitLab From 0950613653995c5d7eb721453954909fbd03cb54 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 25 Nov 2016 14:36:41 +0100 Subject: [PATCH 15/41] Minor bugfix in Trend example --- man/Trend.Rd | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/man/Trend.Rd b/man/Trend.Rd index 428d5f70..c2981e0f 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -50,10 +50,17 @@ Same dimensions as var with linearly detrended var along the posTR dimension. } } \examples{ -ens <- matrix(rnorm(500), c(100, 5)) -#Add a trend to the first ensemble member -ens[,1] <- ens[, 1] + (seq(1, 100, 1) * 5) -ens.trend <- Trend(ens) +# Load sample data as in Load() example: +example(Load) +months_between_startdates <- 60 +trend <- Trend(sampleData$obs, 3, months_between_startdates) +PlotVsLTime(trend$trend, toptitle = "trend", ytitle = "K / (5 year)", + monini = 11, limits = c(-1,1), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, hlines = 0, + fileout = 'tos_obs_trend.eps') +PlotAno(trend$detrended, NULL, startDates, + toptitle = 'detrended anomalies (along the startdates)', ytitle = 'K', + legends = 'ERSST', biglab = FALSE, fileout = 'tos_detrended_obs.eps') } \author{ History:\cr -- GitLab From bca0686b0d49549027aea7545b203fbabcb89a83 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Mon, 28 Nov 2016 09:30:30 +0100 Subject: [PATCH 16/41] Add output values for hidden functions to manual --- man/Corr.Rd | 11 +++++++++-- man/RMS.Rd | 8 ++++++-- man/RatioRMS.Rd | 10 ++++++++-- man/RatioSDRMS.Rd | 5 ++++- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/man/Corr.Rd b/man/Corr.Rd index d826a5e4..696ce01a 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -58,8 +58,15 @@ Whether to compute statistical significance p-value (default = 'TRUE') or not (F } } - \value{ Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr -The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. } + \value{ Corr: Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr +The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. +.Corr: A list with elements 'corr', 'p.val', 'conf_low', 'conf_high' is output, corresponding to the correlation test statistic, the p value and the upper and lower confidence limits, respectively. +.Corr: +$corr: the correlation statistic\cr +$p.val: the p value\cr +$conf_low: the lower confidence limit\cr +$conf_high: the upper confidence limit\cr +} \examples{ # Load sample data as in Load() example: example(Load) clim <- Clim(sampleData$mod, sampleData$obs) diff --git a/man/RMS.Rd b/man/RMS.Rd index 80ae569c..bf972a17 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -52,9 +52,13 @@ Whether to compute confidence interval or not. TRUE by default. } } \value{ -Matrix with dimensions:\cr +RMS: Array with dimensions:\cr c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all other dimensions of var_exp & var_obs except posRMS).\cr -The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). +The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). \cr +.RMS: +$rms: root mean square error\cr +$conf_low: lower confidence interval\cr +$conf_high: upper confidence interval\cr } \examples{ # Load sample data as in Load() example: diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index e2d2d5a4..4e074d10 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -43,10 +43,16 @@ Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. } } \value{ -A list containing the following components :\cr +RatioRMS: A list containing the following components :\cr ratiorms - the ratio of the rms of the two ensembles \cr p.val - the p value \cr -The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. +The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1.\cr +.RatioRMS: +$ratiorms: the ratio of the root mean square errors\cr +$p.val: the p value\cr + + + } \examples{ # See examples on Load() to understand the first lines in this example diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index ef6b55da..0adcd5c2 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -41,9 +41,12 @@ Whether to compute the p-value of Ho : SD/RMSE = 1 or not. } } \value{ -Array with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to +RatioSDRMS: Array with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to c(nexp/nmod, nobs, 1 or 2, nltime, nlevel, nlat, nlon). The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1. +.RatioSDRMS: +$ratio: ratio of the ensemble spread and RMSE\cr +$p.val: the p value\cr } \examples{ # Load sample data as in Load() example: -- GitLab From 3785381c95ac7fb1c6b71568ad4d3286e0133b6e Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Mon, 28 Nov 2016 09:51:17 +0100 Subject: [PATCH 17/41] More small changes to documentation --- man/Corr.Rd | 20 ++++++++++++++------ man/RMS.Rd | 14 ++++++++++---- man/RatioRMS.Rd | 10 ++++++---- man/RatioSDRMS.Rd | 14 +++++++++----- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/man/Corr.Rd b/man/Corr.Rd index 696ce01a..3b2cae75 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -59,13 +59,21 @@ Whether to compute statistical significance p-value (default = 'TRUE') or not (F } \value{ Corr: Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr -The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. -.Corr: A list with elements 'corr', 'p.val', 'conf_low', 'conf_high' is output, corresponding to the correlation test statistic, the p value and the upper and lower confidence limits, respectively. +The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. \cr + .Corr: -$corr: the correlation statistic\cr -$p.val: the p value\cr -$conf_low: the lower confidence limit\cr -$conf_high: the upper confidence limit\cr + \item{$corr}{ +The correlation statistic. + } + \item{$p.val}{ +Corresponds to the p values for the \code{siglev}\% (only present if \code{pval = TRUE}) for the correlation. + } + \item{$conf_low}{ +Corresponds to the upper limit of the \code{siglev}\% (only present if \code{conf = TRUE}) for the correlation. + } + \item{$conf_high}{ +Corresponds to the lower limit of the \code{siglev}\% (only present if \code{conf = TRUE}) for the correlation. + } } \examples{ # Load sample data as in Load() example: example(Load) diff --git a/man/RMS.Rd b/man/RMS.Rd index bf972a17..663694d5 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -9,7 +9,7 @@ Computes the root mean square error for an array of forecasts, var_exp and an ar The RMSE is computed along the posRMS dimension which should correspond to the startdate dimension.\cr If compROW is given, the RMSE is computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes.\cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -The confidence interval relies on a chi2 distribution. +The confidence interval relies on a chi2 distribution. \cr .RMS provides the same functionality but taking a matrix of ensemble members as input (ens). } @@ -56,9 +56,15 @@ RMS: Array with dimensions:\cr c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all other dimensions of var_exp & var_obs except posRMS).\cr The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). \cr .RMS: -$rms: root mean square error\cr -$conf_low: lower confidence interval\cr -$conf_high: upper confidence interval\cr + \item{$rms}{ +The root mean square error, + } + \item{$conf_low}{ +Corresponding to the lower limit of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the rms. + } + \item{$conf_high}{ +Corresponding to the upper limit of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the rms. + } } \examples{ # Load sample data as in Load() example: diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 4e074d10..b8a7486d 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -48,10 +48,12 @@ ratiorms - the ratio of the rms of the two ensembles \cr p.val - the p value \cr The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1.\cr .RatioRMS: -$ratiorms: the ratio of the root mean square errors\cr -$p.val: the p value\cr - - +\item{$ratiorms}{ +The ratio of the root mean square errors, + } + \item{$conf.int}{ +Corresponding to the p values of the ratio of the rmse statistics (only present if \code{pval = TRUE}). + } } \examples{ diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 0adcd5c2..e38bd2c7 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -10,8 +10,7 @@ Arrays var_exp & var_obs should have dimensions between\cr and\cr c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)\cr The ratio between the standard deviation of the members around the ensemble mean in var_exp and the RMSE between var_exp and var_obs is output for each experiment and each observational dataset.\cr -The p-value is provided by a one-sided Fischer test. - +The p-value is provided by a one-sided Fischer test.\cr .RatioSDRMS provides the same functionality but taking a matrix of ensemble members as input (ens). } \usage{ @@ -43,10 +42,15 @@ Whether to compute the p-value of Ho : SD/RMSE = 1 or not. \value{ RatioSDRMS: Array with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to c(nexp/nmod, nobs, 1 or 2, nltime, nlevel, nlat, nlon). -The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1. +The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1.\cr .RatioSDRMS: -$ratio: ratio of the ensemble spread and RMSE\cr -$p.val: the p value\cr +\item{$ratio}{ +The ratio of the ensemble spread and RMSE, + } + \item{$p.val}{ +Corresponds to the p values of the ratio (only present if \code{pval = TRUE}). + } + } \examples{ # Load sample data as in Load() example: -- GitLab From 559e22b313655e88ff9d34b839cbd2a9c9a70c84 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Mon, 5 Dec 2016 14:31:46 +0100 Subject: [PATCH 18/41] Minor bugfixes for BrierScore --- R/BrierScore.R | 68 ++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index 50b38730..cce75377 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -82,51 +82,49 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { } else { nbins <- length(thresholds) - 1 # Number of bins n <- dim(ens)[1] # Number of observations + ens.mean <- rowMeans(ens, na.rm = TRUE) n.ens <- seq(1,dim(ens)[2],1) # Number of ensemble members - bins <- array(as.list(paste("bin", 1:nbins,sep = "")), c(nbins,dim(ens)[2])) + bins <- as.list(paste("bin", 1:nbins,sep = "")) for (i in 1:nbins) { if (i == nbins) { - bins[i,] <- apply(ens, MARGIN = 2, FUN = function(x) list(which(x >= thresholds[i] & x <= thresholds[i + 1]))) + bins[[i]] <- list(which(ens.mean >= thresholds[i] & ens.mean <= thresholds[i + 1])) } else { - bins[i,] <- apply(ens, MARGIN = 2, FUN = function(x) list(which(x >= thresholds[i] & x < thresholds[i + 1]))) + bins[[i]] <- list(which(ens.mean >= thresholds[i] & ens.mean < thresholds[i + 1])) } } - - - fkbar <- okbar <- nk <- array(0, dim = c(nbins,dim(ens)[2])) - for (k in 1:dim(ens)[2]) { - for (i in 1:nbins) { - nk[i,k] <- length(bins[[i,k]][[1]]) - fkbar[i,k] <- sum(ens[,k][bins[[i,k]][[1]]])/nk[i,k] - okbar[i,k] <- sum(obs[bins[[i,k]][[1]]]) / nk[i,k] + + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(ens.mean[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] } - } fkbar[fkbar == Inf] <- 0 okbar[is.nan(okbar)] <- 0 obar <- sum(obs) / length(obs) - relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- rep(0,dim(ens)[2]) + relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- 0 - for (k in 1:dim(ens)[2]) { + for (i in 1:nbins) { - if (nk[i,k] > 0) { - relsum[k] <- relsum[k] + nk[i,k] * (fkbar[i,k] - okbar[i,k])^2 - ressum[k] <- ressum[k] + nk[i,k] * (okbar[i,k] - obar)^2 + if (nk[i] > 0) { + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 + ressum <- ressum + nk[i] * (okbar[i] - obar)^2 - for (j in 1:nk[i,k]) { - term1[k] <- term1[k] + (ens[,k][bins[[i,k]][[1]][j]] - fkbar[i,k])^2 - term2[k] <- term2[k] + (ens[,k][bins[[i,k]][[1]][j]] - fkbar[i,k]) * (obs[bins[[i,k]][[1]][j]] - okbar[i,k]) + for (j in 1:nk[i]) { + term1 <- term1 + (ens.mean[bins[[i]][[1]][j]] - fkbar[i])^2 + term2 <- term2 + (ens.mean[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) } } } } - + rel <- relsum / n res <- ressum / n unc <- obar * (1 - obar) - bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) - + #bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) + bs <- sum((rowMeans(ens, na.rm = T) - obs)^2) / n bs_check_res <- rel - res + unc bss_res <- (res - rel) / unc gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution @@ -136,24 +134,19 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { # # Estimating the bias-corrected components of the BS # - term3 <- array(0, dim = c(nbins,dim(ens)[2])) + term3 <- array(0, nbins) for (i in 1:nbins) { - for (k in 1:dim(ens)[2]) { - term3[i,k] <- (nk[i,k] / (nk[i,k] - 1)) * okbar[i,k] * (1 - okbar[i,k]) - } + term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) } - term_a <- apply(term3, MARGIN = 2, FUN = function(x) sum(x, na.rm = T) / n) + term_a <- sum(term3, na.rm = T) / n term_b <- (obar * (1 - obar)) / (n - 1) rel_bias_corrected <- rel - term_a gres_bias_corrected <- gres - term_a + term_b - rel_bias_corrected2 <- gres_bias_corrected2 <- rep(0, dim(ens)[2]) - for(j in 1:dim(ens)[2]) { - if (rel_bias_corrected[j] < 0 || gres_bias_corrected[j] < 0) { - rel_bias_corrected2[j] <- max(rel_bias_corrected[j], rel_bias_corrected[j] - gres_bias_corrected[j], 0) - gres_bias_corrected2[j] <- max(gres_bias_corrected[j], gres_bias_corrected[j] - rel_bias_corrected[j], 0) - rel_bias_corrected[j] <- rel_bias_corrected2[j] - gres_bias_corrected[j] <- gres_bias_corrected2[j] - } + if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { + rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) + gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) + rel_bias_corrected <- rel_bias_corrected2 + gres_bias_corrected <- gres_bias_corrected2 } unc_bias_corrected <- unc + term_b bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected @@ -163,6 +156,5 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") #} - invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar, bins = bins, ens = ens, obs = obs)) - } + invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected)) } -- GitLab From 16ca0822a7ac8db087b435c326d9f0e21def91ad Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 21 Dec 2016 10:48:55 +0100 Subject: [PATCH 19/41] Minor bugfix --- R/RatioSDRMS.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index dd30eed6..543d6e05 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -100,6 +100,7 @@ RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { enorms <- Eno(dif,1) enlratiormssd <- std /rms + p.val <- 0 if (pval) { l1 <- enosd @@ -110,7 +111,10 @@ RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { p.val <- 1 - pf(F, l1 - 1, l2 - 1) - } + } + else { + p.val <- NA + } } -- GitLab From c7f41dfbdb2bcf6c5145e675c32c3a91de5000e5 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Tue, 17 Jan 2017 14:35:55 +0100 Subject: [PATCH 20/41] Minor changes --- R/Corr.R | 5 +++-- man/ACC.Rd | 17 ++++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index 40e89684..ce576b84 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -138,8 +138,7 @@ Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, } } - conf_low <- (1 - siglev) / 2 - conf_high <- 1 - conf_low + p <- c() conflow <- c() confhigh <- c() @@ -162,6 +161,8 @@ Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, } if (conf & method == "pearson") { + conf_low <- (1 - siglev) / 2 + conf_high <- 1 - conf_low conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt( eno - 3)), tanh(atanh(CORR) + qnorm(conf_high) / sqrt( eno - 3))) diff --git a/man/ACC.Rd b/man/ACC.Rd index ccb69e71..2c92fe3e 100644 --- a/man/ACC.Rd +++ b/man/ACC.Rd @@ -1,13 +1,12 @@ \name{ACC} \alias{ACC} \title{ -Computes Anomaly Correlation Coefficient (Spatial Correlation) +Computes Anomaly Correlation Coefficient (Centred) } \description{ -Matrix var_exp & var_obs should have dimensions (nexp/nobs, nsdates, nltimes, nlat, nlon) or (nexp/nobs, nsdates, nmember, nltimes, nlat, nlon).\cr -ACC computes the Anomaly Correlation Coefficient for the ensemble mean of each jexp in 1:nexp and each jobs in 1:nobs which gives nexp x nobs ACC for each startdate and each leadtime.\cr -A domain can be selected by providing the list of longitudes/latitudes (lon/lat) of the grid together with the corner of the domain:\cr - lonlatbox = c(lonmin, lonmax, latmin, latmax) +Calculates the Anomaly Correlation Coefficient for the ensemble mean of each jexp in 1:nexp and each jobs in 1:nobs which gives nexp x nobs ACsC for each startdate and each leadtime.\cr +The domain of interest can be specified by providing the list of longitudes/latitudes (lon/lat) of the grid together with the corners of the domain:\cr +lonlatbox = c(lonmin, lonmax, latmin, latmax) } \usage{ ACC(var_exp, var_obs, lon = NULL, lat = NULL, lonlatbox = NULL, @@ -15,13 +14,13 @@ ACC(var_exp, var_obs, lon = NULL, lat = NULL, lonlatbox = NULL, } \arguments{ \item{var_exp}{ -Matrix of experimental anomalies with dimensions:\cr +Array of experimental anomalies with dimensions:\cr c(nexp, nsdates, nltimes, nlat, nlon)\cr or\cr c(nexp, nsdates, nmembers, nltimes, nlat, nlon)\cr } \item{var_obs}{ -Matrix of observational anomalies, same dimensions as var_exp except along the first dimension and the second if it corresponds to the member dimension. +Array of observational anomalies, same dimensions as var_exp except along the first dimension and the second if it corresponds to the member dimension. } \item{lon}{ Array of longitudes of the var_exp/var_obs grids, optional. @@ -57,6 +56,7 @@ Mean Anomaly Correlation Coefficient with dimensions:\cr c(nexp, nobs, nleadtimes) } } + \examples{ # See ?Load for explanations on the first part of this example. \dontrun{ @@ -92,6 +92,9 @@ ano_obs <- Ano(sampleData$obs, clim$clim_obs) acc <- ACC(Mean1Dim(ano_exp, 2), Mean1Dim(ano_obs, 2)) PlotACC(acc$ACC, startDates) } +\references{ +Joliffe and Stephenson (2012). Forecast Verification: A Practitioner's Guide in Atmospheric Science. Wiley-Blackwell. +} \author{ History:\cr 0.1 - 2013-08 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr -- GitLab From 6a37048495c9a29d35d8b95f92d1ed8f36e0c1f4 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 18 Jan 2017 10:58:07 +0100 Subject: [PATCH 21/41] Changes to load manual --- R/PlotEquiMap.R | 1 - man/Load.Rd | 660 ++++++++++++++++++++---------------------------- 2 files changed, 275 insertions(+), 386 deletions(-) diff --git a/R/PlotEquiMap.R b/R/PlotEquiMap.R index dea625df..d8bf07e2 100644 --- a/R/PlotEquiMap.R +++ b/R/PlotEquiMap.R @@ -135,7 +135,6 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, if (!.IsColor(colNA)) { stop("Parameter 'colNA' must be a valid colour identifier.") } - # Check: brks, cols, subsampleg, bar_limits, color_fun, bar_extra_labels, draw_bar_ticks # draw_separators, triangle_ends_scale, label_scale, units, units_scale, # bar_label_digits diff --git a/man/Load.Rd b/man/Load.Rd index dfd61331..67ba6e8d 100644 --- a/man/Load.Rd +++ b/man/Load.Rd @@ -1,285 +1,175 @@ \name{Load} \alias{Load} -\title{Loads Experimental And Observational Data} +\title{Loads Experimental And Observational Data from NetCDF files} \description{ -This function loads monthly or daily data from a set of specified experimental datasets together with data that date-corresponds from a set of specified observational datasets. See parameters 'storefreq', 'sampleperiod', 'exp' and 'obs'.\cr\cr -A set of starting dates is specified through the parameter 'sdates'. Data of each starting date is loaded for each model. -\code{Load()} arranges the data in two arrays with a similar format both with the following dimensions: - \enumerate{ - \item{The number of experimental datasets determined by the user through the argument 'exp' (for the experimental data array) or the number of observational datasets available for validation (for the observational array) determined as well by the user through the argument 'obs'.} - \item{The greatest number of members across all experiments (in the experimental data array) or across all observational datasets (in the observational data array).} - \item{The number of starting dates determined by the user through the 'sdates' argument.} - \item{The greatest number of lead-times.} - \item{The number of latitudes of the selected zone.} - \item{The number of longitudes of the selected zone.} - } -Dimensions 5 and 6 are optional and their presence depends on the type of the specified variable (global mean or 2-dimensional) and on the selected output type (area averaged time series, latitude averaged time series, longitude averaged time series or 2-dimensional time series).\cr -In the case of loading an area average the dimensions of the arrays will be only the first 4.\cr\cr - -Only a specified variable is loaded from each experiment at each starting date. See parameter 'var'.\cr -Afterwards, observational data that matches every starting date and lead-time of every experimental dataset is fetched in the file system (so, if two predictions at two different start dates overlap, some observational values will be loaded and kept in memory more than once).\cr -If no data is found in the file system for an experimental or observational array point it is filled with an NA value.\cr\cr - -If the specified output is 2-dimensional or latitude- or longitude-averaged time series all the data is interpolated into a common grid. If the specified output type is area averaged time series the data is averaged on the individual grid of each dataset but can also be averaged after interpolating into a common grid. See parameters 'grid' and 'method'.\cr -Once the two arrays are filled by calling this function, other functions in the s2dverification package that receive as inputs data formatted in this data structure can be executed (e.g: \code{Clim()} to compute climatologies, \code{Ano()} to compute anomalies, ...).\cr\cr - -Load() has many additional parameters to disable values and trim dimensions of selected variable, even masks can be applied to 2-dimensional variables. See parameters 'nmember', 'nmemberobs', 'nleadtime', 'leadtimemin', 'leadtimemax', 'sampleperiod', 'lonmin', 'lonmax', 'latmin', 'latmax', 'maskmod', 'maskobs', 'varmin', 'varmax'.\cr\cr - -The parameters 'exp' and 'obs' can take various forms. The most direct form is a list of lists, where each sub-list has the component 'path' associated to a character string with a pattern of the path to the files of a dataset to be loaded. These patterns can contain wildcards and tags that will be replaced automatically by \code{Load()} with the specified starting dates, member numbers, variable name, etc.\cr -See parameter 'exp' or 'obs' for details.\cr\cr - -Only NetCDF files are supported. OPeNDAP URLs to NetCDF files are also supported.\cr -\code{Load()} can load 2-dimensional or global mean variables in any of the following formats: - \itemize{ - \item{experiments: - \itemize{ - \item{file per ensemble per starting date (YYYY, MM and DD somewhere in the path)} - \item{file per member per starting date (YYYY, MM, DD and MemberNumber somewhere in the path. Ensemble experiments with different numbers of members can be loaded in a single \code{Load()} call.)} - } -(YYYY, MM and DD specify the starting dates of the predictions) - } - \item{observations: - \itemize{ - \item{file per ensemble per month (YYYY and MM somewhere in the path)} - \item{file per member per month (YYYY, MM and MemberNumber somewhere in the path, obs with different numbers of members supported)} - \item{file per dataset (No constraints in the path but the time axes in the file have to be properly defined)} - } -(YYYY and MM correspond to the actual month data in the file) - } - } -In all the formats the data can be stored in a daily or monthly frequency, or a multiple of these (see parameters 'storefreq' and 'sampleperiod').\cr -All the data files must contain the target variable defined over time and potentially over members, latitude and longitude dimensions in any order, time being the record dimension.\cr -In the case of a two-dimensional variable, the variables longitude and latitude must be defined inside the data file too and must have the same names as the dimension for longitudes and latitudes respectively.\cr -The names of these dimensions (and longitude and latitude variables) and the name for the members dimension are expected to be 'longitude', 'latitude' and 'ensemble' respectively. However, these names can be adjusted with the parameter 'dimnames' or can be configured in the configuration file (read below in parameters 'exp', 'obs' or see \code{?ConfigFileOpen} for more information.\cr -All the data files are expected to have numeric values representable with 32 bits. Be aware when choosing the fill values or infinite values in the datasets to load.\cr\cr - -The Load() function returns a named list following a structure similar to the used in the package 'downscaleR'.\cr -The components are the following: - \itemize{ - \item{'mod' is the array that contains the experimental data. It has the attribute 'dimensions' associated to a vector of strings with the labels of each dimension of the array, in order.} - \item{'obs' is the array that contains the observational data. It has the attribute 'dimensions' associated to a vector of strings with the labels of each dimension of the array, in order.} - \item{'obs' is the array that contains the observational data.} - \item{'lat' and 'lon' are the latitudes and longitudes of the grid into which the data is interpolated (0 if the loaded variable is a global mean or the output is an area average).\cr -Both have the attribute 'cdo_grid_des' associated with a character string with the name of the common grid of the data, following the CDO naming conventions for grids.\cr -The attribute 'projection' is kept for compatibility with 'downscaleR'.} - \item{'Variable' has the following components: - \itemize{ - \item{'varName', with the short name of the loaded variable as specified in the parameter 'var'.} - \item{'level', with information on the pressure level of the variable. Is kept to NULL by now.} - } -And the following attributes: - \itemize{ - \item{'is_standard', kept for compatibility with 'downscaleR', tells if a dataset has been homogenized to standards with 'downscaleR' catalogs.} - \item{'units', a character string with the units of measure of the variable, as found in the source files.} - \item{'longname', a character string with the long name of the variable, as found in the source files.} - \item{'daily_agg_cellfun', 'monthly_agg_cellfun', 'verification_time', kept for compatibility with 'downscaleR'.} - } - } - \item{'Datasets' has the following components: - \itemize{ - \item{'exp', a named list where the names are the identifying character strings of each experiment in 'exp', each associated to a list with the following components: - \itemize{ - \item{'members', a list with the names of the members of the dataset.} - \item{'source', a path or URL to the source of the dataset.} - } - } - \item{'obs', similar to 'exp' but for observational datasets.} - } - } - \item{'Dates', with the follwing components: - \itemize{ - \item{'start', an array of dimensions (sdate, time) with the POSIX initial date of each forecast time of each starting date.} - \item{'end', an array of dimensions (sdate, time) with the POSIX final date of each forecast time of each starting date.} - } - } - \item{'InitializationDates', a vector of starting dates as specified in 'sdates', in POSIX format.} - \item{'when', a time stamp of the date the \code{Load()} call to obtain the data was issued.} - \item{'source_files', a vector of character strings with complete paths to all the found files involved in the \code{Load()} call.} - \item{'not_found_files', a vector of character strings with complete paths to not found files involved in the \code{Load()} call.} - } + This function loads monthly or daily data from a user-specified forecast, and then automatically loads the corresponding observations (if available).\cr\cr + + The function can handle spatial data (lon-lat) as well as global averages. The function can also calculate area-averages.\cr\cr + + If multiple datasets are loaded with different grids, \code{Load()} will interpolate them onto a common grid.\cr } \usage{ -Load(var, exp = NULL, obs = NULL, sdates, nmember = NULL, - nmemberobs = NULL, nleadtime = NULL, leadtimemin = 1, - leadtimemax = NULL, storefreq = 'monthly', sampleperiod = 1, - lonmin = 0, lonmax = 360, latmin = -90, latmax = 90, - output = 'areave', method = 'conservative', grid = NULL, - maskmod = vector("list", 15), maskobs = vector("list", 15), - configfile = NULL, varmin = NULL, varmax = NULL, - silent = FALSE, nprocs = NULL, dimnames = NULL, - remapcells = 2, path_glob_permissive = 'partial') + Load(var, exp = NULL, obs = NULL, sdates, nmember = NULL, + nmemberobs = NULL, nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', sampleperiod = 1, + lonmin = 0, lonmax = 360, latmin = -90, latmax = 90, + output = 'areave', method = 'conservative', grid = NULL, + maskmod = vector("list", 15), maskobs = vector("list", 15), + configfile = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL, dimnames = NULL, + remapcells = 2, path_glob_permissive = 'partial') } \arguments{ \item{var}{ -Name of the variable to load.\cr -If the variable name inside the files to load is not the same as this, adjust properly the parameters 'exp' and 'obs'.\cr -This parameter is mandatory.\cr -Ex: 'tas' + Name of the variable to load.\cr + If the variable name inside the files to load do not both match var, the parameters 'exp' and 'obs' can be used.\cr } \item{exp}{ -This argument can take two formats: a list of lists or a vector of character strings. Each format will trigger a different mechanism of locating the requested datasets.\cr -The first format is adequate when loading data you'll only load once or occasionally. The second format is targeted to avoid providing repeatedly the information on a certain dataset but is more complex to use.\cr\cr -IMPORTANT: Place first the experiment with the largest number of members and, if possible, with the largest number of leadtimes. If not possible, the arguments 'nmember' and/or 'nleadtime' should be filled to not miss any member or leadtime.\cr -If 'exp' is not specified or set to NULL, observational data is loaded for each start-date as far as 'leadtimemax'. If 'leadtimemax' is not provided, \code{Load()} will retrieve data of a period of time as long as the time period between the first specified start date and the current date.\cr -\cr -List of lists:\cr -A list of lists where each sub-list contains information on the location and format of the data files of the dataset to load.\cr -Each sub-list can have the following components: + This argument can either be a list of lists or a vector of character strings.\cr + The first format is adequate when loading data you'll only load once or occasionally. The second format is targeted to avoid providing repeatedly the information on a certain dataset but is more complex to use.\cr\cr + IMPORTANT: Place first the experiment with the largest number of members and, if possible, with the largest number of leadtimes. If not possible, the arguments 'nmember' and/or 'nleadtime' should be filled to not miss any member or leadtime.\cr + If 'exp' is not specified or set to NULL, observational data is loaded for each start-date as far as 'leadtimemax'. If 'leadtimemax' is not provided, \code{Load()} will retrieve data of a period of time as long as the time period between the first specified start date and the current date.\cr + \cr + List of lists:\cr + A list of lists where each sub-list contains information on the location and format of the data files of the dataset to load.\cr + Each sub-list can have the following components: \itemize{ - \item{ -'name': A character string to identify the dataset. Optional. - } - \item{ -'path': A character string with the pattern of the path to the files of the dataset. This pattern can be built up making use of some special tags that \code{Load()} will replace with the appropriate values to find the dataset files. The allowed tags are $START_DATE$, $YEAR$, $MONTH$, $DAY$, $MEMBER_NUMBER$, $STORE_FREQ$, $VAR_NAME$, $EXP_NAME$ (only for experimental datasets), $OBS_NAME$ (only for observational datasets) and $SUFFIX$\cr -Example: /path/to/$EXP_NAME$/postprocessed/$VAR_NAME$/\cr - $VAR_NAME$_$START_DATE$.nc\cr -If 'path' is not specified and 'name' is specified, the dataset information will be fetched with the same mechanism as when using the vector of character strings (read below). - } - \item{ -'nc_var_name': Character string with the actual variable name to look for inside the dataset files. Optional. Takes, by default, the same value as the parameter 'var'. - } - \item{ -'suffix': Wildcard character string that can be used to build the 'path' of the dataset. It can be accessed with the tag $SUFFIX$. Optional. Takes '' by default. - } - \item{ -'var_min': Important: Character string. Minimum value beyond which read values will be deactivated to NA. Optional. No deactivation is performed by default. - } - \item{ -'var_max': Important: Character string. Maximum value beyond which read values will be deactivated to NA. Optional. No deactivation is performed by default. - } + \item{ + 'name': A character string to identify the dataset. Optional. } -The tag $START_DATES$ will be replaced with all the starting dates specified in 'sdates'. $YEAR$, $MONTH$ and $DAY$ will take a value for each iteration over 'sdates', simply these are the same as $START_DATE$ but split in parts.\cr -$MEMBER_NUMBER$ will be replaced by a character string with each member number, from 1 to the value specified in the parameter 'nmember' (in experimental datasets) or in 'nmemberobs' (in observational datasets). It will range from '01' to 'N' or '0N' if N < 10.\cr -$STORE_FREQ$ will take the value specified in the parameter 'storefreq' ('monthly' or 'daily').\cr -$VAR_NAME$ will take the value specified in the parameter 'var'.\cr -$EXP_NAME$ will take the value specified in each component of the parameter 'exp' in the sub-component 'name'.\cr -$OBS_NAME$ will take the value specified in each component of the parameter 'obs' in the sub-component 'obs.\cr -$SUFFIX$ will take the value specified in each component of the parameters 'exp' and 'obs' in the sub-component 'suffix'.\cr - -Example: -\preformatted{ -list( - list( - name = 'experimentA', - path = file.path('/path/to/$DATASET_NAME$/$STORE_FREQ$', - '$VAR_NAME$$SUFFIX$', - '$VAR_NAME$_$START_DATE$.nc'), - nc_var_name = '$VAR_NAME$', - suffix = '_3hourly', - var_min = '-1e19', - var_max = '1e19' - ) -) -} -This will make \code{Load()} look for, for instance, the following paths, if 'sdates' is c('19901101', '19951101', '20001101'):\cr - /path/to/experimentA/monthly_mean/tas_3hourly/tas_19901101.nc\cr - /path/to/experimentA/monthly_mean/tas_3hourly/tas_19951101.nc\cr - /path/to/experimentA/monthly_mean/tas_3hourly/tas_20001101.nc\cr\cr - -Vector of character strings: -To avoid specifying constantly the same information to load the same datasets, a vector with only the names of the datasets to load can be specified.\cr -\code{Load()} will then look for the information in a configuration file whose path must be specified in the parameter 'configfile'.\cr -Check \code{?ConfigFileCreate}, \code{ConfigFileOpen}, \code{ConfigEditEntry} & co. to learn how to create a new configuration file and how to add the information there. - -Example: c('experimentA', 'experimentB') + \item{ + 'path': A character string with the pattern of the path to the files of the dataset. This pattern can be built up making use of some special tags that \code{Load()} will replace with the appropriate values to find the dataset files. The allowed tags are $START_DATE$, $YEAR$, $MONTH$, $DAY$, $MEMBER_NUMBER$, $STORE_FREQ$, $VAR_NAME$, $EXP_NAME$ (only for experimental datasets), $OBS_NAME$ (only for observational datasets) and $SUFFIX$\cr + Example: /path/to/$EXP_NAME$/postprocessed/$VAR_NAME$/\cr + $VAR_NAME$_$START_DATE$.nc\cr + If 'path' is not specified and 'name' is specified, the dataset information will be fetched with the same mechanism as when using the vector of character strings (read below). + } + \item{ + 'nc_var_name': Character string with the actual variable name to look for inside the dataset files. Optional. Takes, by default, the same value as the parameter 'var'. + } + \item{ + 'suffix': Wildcard character string that can be used to build the 'path' of the dataset. It can be accessed with the tag $SUFFIX$. Optional. Takes '' by default. + } + \item{ + 'var_min': Important: Character string. Minimum value beyond which read values will be deactivated to NA. Optional. No deactivation is performed by default. + } + \item{ + 'var_max': Important: Character string. Maximum value beyond which read values will be deactivated to NA. Optional. No deactivation is performed by default. + } + } + The tag $START_DATES$ will be replaced with all the starting dates specified in 'sdates'. $YEAR$, $MONTH$ and $DAY$ will take a value for each iteration over 'sdates', simply these are the same as $START_DATE$ but split in parts.\cr + $MEMBER_NUMBER$ will be replaced by a character string with each member number, from 1 to the value specified in the parameter 'nmember' (in experimental datasets) or in 'nmemberobs' (in observational datasets). It will range from '01' to 'N' or '0N' if N < 10.\cr + $STORE_FREQ$ will take the value specified in the parameter 'storefreq' ('monthly' or 'daily').\cr + $VAR_NAME$ will take the value specified in the parameter 'var'.\cr + $EXP_NAME$ will take the value specified in each component of the parameter 'exp' in the sub-component 'name'.\cr + $OBS_NAME$ will take the value specified in each component of the parameter 'obs' in the sub-component 'obs.\cr + $SUFFIX$ will take the value specified in each component of the parameters 'exp' and 'obs' in the sub-component 'suffix'.\cr + + Example: + \preformatted{ + list( + list( + name = 'experimentA', + path = file.path('/path/to/$DATASET_NAME$/$STORE_FREQ$', + '$VAR_NAME$$SUFFIX$', + '$VAR_NAME$_$START_DATE$.nc'), + nc_var_name = '$VAR_NAME$', + suffix = '_3hourly', + var_min = '-1e19', + var_max = '1e19' + ) + ) + } + This will make \code{Load()} look for, for instance, the following paths, if 'sdates' is c('19901101', '19951101', '20001101'):\cr + /path/to/experimentA/monthly_mean/tas_3hourly/tas_19901101.nc\cr + /path/to/experimentA/monthly_mean/tas_3hourly/tas_19951101.nc\cr + /path/to/experimentA/monthly_mean/tas_3hourly/tas_20001101.nc\cr\cr + + Vector of character strings: + To avoid specifying constantly the same information to load the same datasets, a vector with only the names of the datasets to load can be specified.\cr + \code{Load()} will then look for the information in a configuration file whose path must be specified in the parameter 'configfile'.\cr + Check \code{?ConfigFileCreate}, \code{ConfigFileOpen}, \code{ConfigEditEntry} & co. to learn how to create a new configuration file and how to add the information there. + + Example: c('experimentA', 'experimentB') } \item{obs}{ -Argument with the same format as parameter 'exp'. See details on parameter 'exp'.\cr -If 'obs' is not specified or set to NULL, no observational data is loaded.\cr + Argument with the same format as parameter 'exp'. See details on parameter 'exp'.\cr + If 'obs' is not specified or set to NULL, no observational data is loaded.\cr } \item{sdates}{ -Vector of starting dates of the experimental runs to be loaded following the pattern 'YYYYMMDD'.\cr -This argument is mandatory.\cr -Ex: c('19601101', '19651101', '19701101') + Vector of starting dates of the experimental runs to be loaded following the pattern 'YYYYMMDD'.\cr + + } \item{nmember}{ -Vector with the numbers of members to load from the specified experimental datasets in 'exp'.\cr -If not specified, the automatically detected number of members of the first experimental dataset is detected and replied to all the experimental datasets.\cr -If a single value is specified it is replied to all the experimental datasets.\cr -Data for each member is fetched in the file system. If not found is filled with NA values.\cr -An NA value in the 'nmember' list is interpreted as "fetch as many members of each experimental dataset as the number of members of the first experimental dataset".\cr -Note: It is recommended to specify the number of members of the first experimental dataset if it is stored in file per member format because there are known issues in the automatic detection of members if the path to the dataset in the configuration file contains Shell Globbing wildcards such as '*'.\cr -Ex: c(4, 9) + Vector with the numbers of members to load from the specified experimental datasets in 'exp'.\cr + If not specified, the automatically detected number of members of the first experimental dataset is detected and replied to all the experimental datasets.\cr + If a single value is specified it is replied to all the experimental datasets.\cr + An NA value in the 'nmember' list is interpreted as "fetch the same number of members for each experimental dataset as is available in the first experimental dataset".\cr + Note: It is recommended to specify the number of members of the first experimental dataset if it is stored in file per member format because there are known issues in the automatic detection of members if the path to the dataset in the configuration file contains Shell Globbing wildcards such as '*'.\cr } \item{nmemberobs}{ -Vector with the numbers of members to load from the specified observational datasets in 'obs'.\cr -If not specified, the automatically detected number of members of the first observational dataset is detected and replied to all the observational datasets.\cr -If a single value is specified it is replied to all the observational datasets.\cr -Data for each member is fetched in the file system. If not found is filled with NA values.\cr -An NA value in the 'nmemberobs' list is interpreted as "fetch as many members of each observational dataset as the number of members of the first observational dataset".\cr -Note: It is recommended to specify the number of members of the first observational dataset if it is stored in file per member format because there are known issues in the automatic detection of members if the path to the dataset in the configuration file contains Shell Globbing wildcards such as '*'.\cr -Ex: c(1, 5) - } - \item{nleadtime}{ -Deprecated. See parameter 'leadtimemax'.\cr + The same as nmember but for the reference data.\cr + } + \item{leadtimemin}{ -Only lead-times higher or equal to 'leadtimemin' are loaded. Takes by default value 1. + Only lead-times higher or equal to 'leadtimemin' are loaded. Takes by default value 1. } \item{leadtimemax}{ -Only lead-times lower or equal to 'leadtimemax' are loaded. Takes by default the number of lead-times of the first experimental dataset in 'exp'.\cr -If 'exp' is NULL this argument won't have any effect (see \code{?Load} description). - } - \item{storefreq}{ -Frequency at which the data to be loaded is stored in the file system. Can take values 'monthly' or 'daily'.\cr -By default it takes 'monthly'.\cr -Note: Data stored in other frequencies with a period which is divisible by a month can be loaded with a proper use of 'storefreq' and 'sampleperiod' parameters. It can also be loaded if the period is divisible by a day and the observational datasets are stored in a file per dataset format or 'obs' is empty. - } - \item{sampleperiod}{ -To load only a subset between 'leadtimemin' and 'leadtimemax' with the period of subsampling 'sampleperiod'.\cr -Takes by default value 1 (all lead-times are loaded).\cr -See 'storefreq' for more information. - } - \item{lonmin}{ -If a 2-dimensional variable is loaded, values at longitudes lower than 'lonmin' aren't loaded.\cr -Must take a value in the range [-360, 360] (if negative longitudes are found in the data files these are translated to this range).\cr -It is set to 0 if not specified.\cr -If 'lonmin' > 'lonmax', data across Greenwich is loaded. + Only lead-times lower or equal to 'leadtimemax' are loaded. Takes by default the number of lead-times of the first experimental dataset in 'exp'.\cr + If 'exp' is NULL this argument is ignored. } + \item{storefreq}{ + Frequency at which the data to be loaded is stored in the file system. Can take values 'monthly' or 'daily'.\cr + By default it takes 'monthly'.\cr + } + \item{sampleperiod}{ + To load only a subset between 'leadtimemin' and 'leadtimemax' with the period of subsampling 'sampleperiod'.\cr + Takes by default value 1 (all lead-times are loaded).\cr + See 'storefreq' for more information. + } + \item{lonmin}{ + If a 2-dimensional variable is loaded, values at longitudes lower than 'lonmin' aren't loaded.\cr + If 'lonmin' > 'lonmax', data across the Greenwich meridian are loaded. + } \item{lonmax}{ -If a 2-dimensional variable is loaded, values at longitudes higher than 'lonmax' aren't loaded.\cr -Must take a value in the range [-360, 360] (if negative longitudes are found in the data files these are translated to this range).\cr -It is set to 360 if not specified.\cr -If 'lonmin' > 'lonmax', data across Greenwich is loaded. - } - \item{latmin}{ -If a 2-dimensional variable is loaded, values at latitudes lower than 'latmin' aren't loaded.\cr -Must take a value in the range [-90, 90].\cr -It is set to -90 if not specified. - } + If a 2-dimensional variable is loaded, values at longitudes higher than 'lonmax' aren't loaded.\cr + If 'lonmin' > 'lonmax', data across Greenwich is loaded. + } + \item{latmin}{ + If a 2-dimensional variable is loaded, values at latitudes lower than 'latmin' aren't loaded.\cr + } \item{latmax}{ -If a 2-dimensional variable is loaded, values at latitudes higher than 'latmax' aren't loaded.\cr -Must take a value in the range [-90, 90].\cr -It is set to 90 if not specified. - } - \item{output}{ -This parameter determines the format in which the data is arranged in the output arrays.\cr -Can take values 'areave', 'lon', 'lat', 'lonlat'.\cr + If a 2-dimensional variable is loaded, values at latitudes higher than 'latmax' aren't loaded.\cr + } + \item{output}{ + This parameter determines the format in which the data is arranged in the output arrays.\cr + Can take values 'areave', 'lon', 'lat', 'lonlat'.\cr \itemize{ - \item{'areave': Time series of area-averaged variables over the specified domain.} - \item{'lon': Time series of meridional averages as a function of longitudes.} - \item{'lat': Time series of zonal averages as a function of latitudes.} - \item{'lonlat': Time series of 2d fields.} + \item{'areave': Time series of area-averaged variables over the specified domain.} + \item{'lon': Time series of meridional averages as a function of longitudes.} + \item{'lat': Time series of zonal averages as a function of latitudes.} + \item{'lonlat': Time series of 2d fields.} + } + Takes by default the value 'areave'. If the variable specified in 'var' is a global mean, this parameter is forced to 'areave'.\cr + All the loaded data is interpolated onto the grid of the first experimental dataset except if 'areave' is selected. In that case the area averages are computed on each dataset original grid. A common grid different than the first experiment's can be specified through the parameter 'grid'. If 'grid' is specified when selecting 'areave' output type, all the loaded data is interpolated into the specified grid before calculating the area averages. } -Takes by default the value 'areave'. If the variable specified in 'var' is a global mean, this parameter is forced to 'areave'.\cr -All the loaded data is interpolated into the grid of the first experimental dataset except if 'areave' is selected. In that case the area averages are computed on each dataset original grid. A common grid different than the first experiment's can be specified through the parameter 'grid'. If 'grid' is specified when selecting 'areave' output type, all the loaded data is interpolated into the specified grid before calculating the area averages. - } \item{method}{ -This parameter determines the interpolation method to be used when regridding data (see 'output'). Can take values 'bilinear', 'bicubic', 'conservative', 'distance-weighted'.\cr -See \code{remapcells} for advanced adjustments.\cr -Takes by default the value 'conservative'. + This parameter determines the interpolation method to be used when regridding data (see 'output'). Can take values 'bilinear', 'bicubic', 'conservative', 'distance-weighted'.\cr + See \code{remapcells} for advanced adjustments.\cr + Takes by default the value 'conservative'. } \item{grid}{ -A common grid can be specified through the parameter 'grid' when loading 2-dimensional data. Data is then interpolated onto this grid whichever 'output' type is specified. If the selected output type is 'areave' and a 'grid' is specified, the area averages are calculated after interpolating to the specified grid.\cr -If not specified and the selected output type is 'lon', 'lat' or 'lonlat', this parameter takes as default value the grid of the first experimental dataset, which is read automatically from the source files.\cr -The grid must be supported by 'cdo' tools: rNXxNY or tTRgrid.\cr -Ex: 'r96x72'\cr -Advanced: If the output type is 'lon', 'lat' or 'lonlat' and no common grid is specified, the grid of the first experimental or observational dataset is detected and all data is then interpolated onto this grid. If the first experimental or observational dataset's data is found shifted along the longitudes (i.e., there's no value at the longitude 0 but at a longitude close to it), the data is re-interpolated to suppress the shift. This has to be done in order to make sure all the data from all the datasets is properly aligned along longitudes, as there's no option so far in \code{Load} to specify grids starting at longitudes other than 0. This issue doesn't affect when loading in 'areave' mode without a common grid, the data is not re-interpolated in that case. + A common grid can be specified through the parameter 'grid' when loading 2-dimensional data. Data is then interpolated onto this grid whichever 'output' type is specified. If the selected output type is 'areave' and a 'grid' is specified, the area averages are calculated after interpolating to the specified grid.\cr + If not specified and the selected output type is 'lon', 'lat' or 'lonlat', this parameter takes as default value the grid of the first experimental dataset, which is read automatically from the source files.\cr + The grid must be supported by 'cdo' tools: rNXxNY or tTRgrid.\cr + Ex: 'r96x72'\cr + Advanced: If the output type is 'lon', 'lat' or 'lonlat' and no common grid is specified, the grid of the first experimental or observational dataset is detected and all data is then interpolated onto this grid. If the first experimental or observational dataset's data is found shifted along the longitudes (i.e., there's no value at the longitude 0 but at a longitude close to it), the data is re-interpolated to suppress the shift. This has to be done in order to make sure all the data from all the datasets is properly aligned along longitudes, as there's no option so far in \code{Load} to specify grids starting at longitudes other than 0. This issue doesn't affect when loading in 'areave' mode without a common grid, the data is not re-interpolated in that case. } \item{maskmod}{ -List of masks to be applied to the data of each experimental dataset respectively, if a 2-dimensional variable is specified in 'var'.\cr -Each mask can be defined in 2 formats:\cr -a) a matrix with dimensions c(longitudes, latitudes).\cr + List of masks to be applied to the data of each experimental dataset respectively, if a 2-dimensional variable is specified in 'var'.\cr + Each mask can be defined in 2 formats:\cr + a) a matrix with dimensions c(longitudes, latitudes).\cr b) a list with the components 'path' and, optionally, 'nc_var_name'.\cr In the format a), the matrix must have the same size as the common grid or with the same size as the grid of the corresponding experimental dataset if 'areave' output type is specified and no common 'grid' is specified.\cr In the format b), the component 'path' must be a character string with the path to a NetCDF mask file, also in the common grid or in the grid of the corresponding dataset if 'areave' output type is specified and no common 'grid' is specified. If the mask file contains only a single variable, there's no need to specify the component 'nc_var_name'. Otherwise it must be a character string with the name of the variable inside the mask file that contains the mask values. This variable must be defined only over 2 dimensions with length greater or equal to 1.\cr @@ -292,122 +182,122 @@ Warning: When loading maps, any masks defined for the observational data will be Warning: list() compulsory even if loading 1 experimental dataset only!\cr Ex: list(array(1, dim = c(num_lons, num_lats))) } - \item{maskobs}{ +\item{maskobs}{ See help on parameter 'maskmod'. - } +} \item{configfile}{ -Path to the s2dverification configuration file from which to retrieve information on location in file system (and other) of datasets.\cr -If not specified, the configuration file used at BSC-ES will be used (it is included in the package).\cr -Check the BSC's configuration file or a template of configuration file in the folder 'inst/config' in the package.\cr -Check further information on the configuration file mechanism in \code{ConfigFileOpen()}. - } - \item{varmin}{ -Loaded experimental and observational data values smaller than 'varmin' will be disabled (replaced by NA values).\cr -By default no deactivation is performed. - } - \item{varmax}{ -Loaded experimental and observational data values greater than 'varmax' will be disabled (replaced by NA values).\cr -By default no deactivation is performed. - } - \item{silent}{ -Parameter to show (FALSE) or hide (TRUE) information messages.\cr -Warnings will be displayed even if 'silent' is set to TRUE.\cr -Takes by default the value 'FALSE'. - } - \item{nprocs}{ -Number of parallel processes created to perform the fetch and computation of data.\cr -These processes will use shared memory in the processor in which Load() is launched.\cr -By default the number of logical cores in the machine will be detected and as many processes as logical cores there are will be created.\cr -A value of 1 won't create parallel processes.\cr -When running in multiple processes, if an error occurs in any of the processes, a crash message appears in the R session of the original process but no detail is given about the error. A value of 1 will display all error messages in the original and only R session.\cr -Note: the parallel process create other blocking processes each time they need to compute an interpolation via 'cdo'. - } - \item{dimnames}{ + Path to the s2dverification configuration file from which to retrieve information on location in file system (and other) of datasets.\cr + If not specified, the configuration file used at BSC-ES will be used (it is included in the package).\cr + Check the BSC's configuration file or a template of configuration file in the folder 'inst/config' in the package.\cr + Check further information on the configuration file mechanism in \code{ConfigFileOpen()}. + } +\item{varmin}{ + Loaded experimental and observational data values smaller than 'varmin' will be disabled (replaced by NA values).\cr + By default no deactivation is performed. +} +\item{varmax}{ + Loaded experimental and observational data values greater than 'varmax' will be disabled (replaced by NA values).\cr + By default no deactivation is performed. +} +\item{silent}{ + Parameter to show (FALSE) or hide (TRUE) information messages.\cr + Warnings will be displayed even if 'silent' is set to TRUE.\cr + Takes by default the value 'FALSE'. +} +\item{nprocs}{ + Number of parallel processes created to perform the fetch and computation of data.\cr + These processes will use shared memory in the processor in which Load() is launched.\cr + By default the number of logical cores in the machine will be detected and as many processes as logical cores there are will be created.\cr + A value of 1 won't create parallel processes.\cr + When running in multiple processes, if an error occurs in any of the processes, a crash message appears in the R session of the original process but no detail is given about the error. A value of 1 will display all error messages in the original and only R session.\cr + Note: the parallel process create other blocking processes each time they need to compute an interpolation via 'cdo'. +} +\item{dimnames}{ Named list where the name of each element is a generic name of the expected dimensions inside the NetCDF files. These generic names are 'lon', 'lat' and 'member'. 'time' is not needed because it's detected automatically by discard.\cr The value associated to each name is the actual dimension name in the NetCDF file.\cr The variables in the file that contain the longitudes and latitudes of the data (if the data is a 2-dimensional variable) must have the same name as the longitude and latitude dimensions.\cr By default, these names are 'longitude', 'latitude' and 'ensemble. If any of those is defined in the 'dimnames' parameter, it takes priority and overwrites the default value. Ex.: list(lon = 'x', lat = 'y') In that example, the dimension 'member' will take the default value 'ensemble'. - } - \item{remapcells}{ +} +\item{remapcells}{ When loading a 2-dimensional variable, spatial subsets can be requested via \code{lonmin}, \code{lonmax}, \code{latmin} and \code{latmax}. When \code{Load()} obtains the subset it is then interpolated if needed with the method specified in \code{method}.\cr The result of this interpolation can vary if the values surrounding the spatial subset are not present. To better control this process, the width in number of grid cells of the surrounding area to be taken into account can be specified with \code{remapcells}. A value of 0 will take into account no additional cells but will generate less traffic between the storage and the R processes that load data.\cr A value beyond the limits in the data files will be automatically runcated to the actual limit.\cr The default value is 2. - } - \item{path_glob_permissive}{ +} +\item{path_glob_permissive}{ In some cases, when specifying a path pattern (either in the parameters 'exp'/'obs' or in a configuration file) one can specify path patterns that contain shell globbing expressions. Too much freedom in putting globbing expressions in the path patterns can be dangerous and make \code{Load()} find a file in the file system for a start date for a dataset that really does not belong to that dataset. For example, if the file system contains two directories for two different experiments that share a part of their path and the path pattern contains globbing expressions: - /experiments/model1/expA/monthly_mean/tos/tos_19901101.nc - /experiments/model2/expA/monthly_mean/tos/tos_19951101.nc +/experiments/model1/expA/monthly_mean/tos/tos_19901101.nc +/experiments/model2/expA/monthly_mean/tos/tos_19951101.nc And the path pattern is used as in the example right below to load data of only the experiment 'expA' of the model 'model1' for the starting dates '19901101' and '19951101', \code{Load()} will undesiredly yield data for both starting dates, even if in fact there is data only for the first one: - expA <- list(path = '/experiments/*/expA/monthly_mean/$VAR_NAME$/$VAR_NAME$_$START_DATE$.nc') - data <- Load('tos', list(expA), NULL, c('19901101', '19951101')) +expA <- list(path = '/experiments/*/expA/monthly_mean/$VAR_NAME$/$VAR_NAME$_$START_DATE$.nc') +data <- Load('tos', list(expA), NULL, c('19901101', '19951101')) To avoid these situations, the parameter \code{path_glob_permissive} is set by default to \code{'partial'}, which forces \code{Load()} to replace all the globbing expressions of a path pattern of a data set by fixed values taken from the path of the first found file for each data set, up to the folder right before the final files (globbing expressions in the file name will not be replaced, only those in the path to the file). Replacement of globbing expressions in the file name can also be triggered by setting \code{path_glob_permissive} to \code{FALSE} or \code{'no'}. If needed to keep all globbing expressions, \code{path_glob_permissive} can be set to \code{TRUE} or \code{'yes'}. - } } + } \details{ The two output matrices have between 2 and 6 dimensions:\cr - \enumerate{ - \item{Number of experimental/observational datasets.} - \item{Number of members.} - \item{Number of startdates.} - \item{Number of leadtimes.} - \item{Number of latitudes (optional).} - \item{Number of longitudes (optional).} - } +\enumerate{ +\item{Number of experimental/observational datasets.} +\item{Number of members.} +\item{Number of startdates.} +\item{Number of leadtimes.} +\item{Number of latitudes (optional).} +\item{Number of longitudes (optional).} +} but the two matrices have the same number of dimensions and only the first two dimensions can have different lengths depending on the input arguments. - + For a detailed explanation of the process, read the documentation attached to the package or check the comments in the code. } \value{ \code{Load()} returns a named list following a structure similar to the used in the package 'downscaleR'.\cr The components are the following: - \itemize{ - \item{ +\itemize{ +\item{ 'mod' is the array that contains the experimental data. It has the attribute 'dimensions' associated to a vector of strings with the labels of each dimension of the array, in order. The order of the latitudes is always forced to be from 90 to -90 whereas the order of the longitudes is kept as in the original files (if possible). The longitude values provided in \code{lon} lower than 0 are added 360 (but still kept in the original order). In some cases, however, if multiple data sets are loaded in longitude-latitude mode, the longitudes (and also the data arrays in \code{mod} and \code{obs}) are re-ordered afterwards by \code{Load()} to range from 0 to 360; a warning is given in such cases. The longitude and latitude of the center of the grid cell that corresponds to the value [j, i] in 'mod' (along the dimensions latitude and longitude, respectively) can be found in the outputs \code{lon}[i] and \code{lat}[j] - } - \item{'obs' is the array that contains the observational data. The same documentation of parameter 'mod' applies to this parameter.} - \item{'lat' and 'lon' are the latitudes and longitudes of the centers of the cells of the grid the data is interpolated into (0 if the loaded variable is a global mean or the output is an area average).\cr +} +\item{'obs' is the array that contains the observational data. The same documentation of parameter 'mod' applies to this parameter.} +\item{'lat' and 'lon' are the latitudes and longitudes of the centers of the cells of the grid the data is interpolated into (0 if the loaded variable is a global mean or the output is an area average).\cr Both have the attribute 'cdo_grid_des' associated with a character string with the name of the common grid of the data, following the CDO naming conventions for grids.\cr 'lon' has the attributes 'first_lon' and 'last_lon', with the first and last longitude values found in the region defined by 'lonmin' and 'lonmax'. 'lat' has also the equivalent attributes 'first_lat' and 'last_lat'.\cr 'lon' has also the attribute 'data_across_gw' which tells whether the requested region via 'lonmin', 'lonmax', 'latmin', 'latmax' goes across the Greenwich meridian. As explained in the documentation of the parameter 'mod', the loaded data array is kept in the same order as in the original files when possible: this means that, in some cases, even if the data goes across the Greenwich, the data array may not go across the Greenwich. The attribute 'array_across_gw' tells whether the array actually goes across the Greenwich. E.g: The longitudes in the data files are defined to be from 0 to 360. The requested longitudes are from -80 to 40. The original order is kept, hence the longitudes in the array will be ordered as follows: 0, ..., 40, 280, ..., 360. In that case, 'data_across_gw' will be TRUE and 'array_across_gw' will be FALSE.\cr The attribute 'projection' is kept for compatibility with 'downscaleR'.} - \item{'Variable' has the following components: - \itemize{ - \item{'varName', with the short name of the loaded variable as specified in the parameter 'var'.} - \item{'level', with information on the pressure level of the variable. Is kept to NULL by now.} - } +\item{'Variable' has the following components: +\itemize{ +\item{'varName', with the short name of the loaded variable as specified in the parameter 'var'.} +\item{'level', with information on the pressure level of the variable. Is kept to NULL by now.} +} And the following attributes: - \itemize{ - \item{'is_standard', kept for compatibility with 'downscaleR', tells if a dataset has been homogenized to standards with 'downscaleR' catalogs.} - \item{'units', a character string with the units of measure of the variable, as found in the source files.} - \item{'longname', a character string with the long name of the variable, as found in the source files.} - \item{'daily_agg_cellfun', 'monthly_agg_cellfun', 'verification_time', kept for compatibility with 'downscaleR'.} - } - } - \item{'Datasets' has the following components: - \itemize{ - \item{'exp', a named list where the names are the identifying character strings of each experiment in 'exp', each associated to a list with the following components: - \itemize{ - \item{'members', a list with the names of the members of the dataset.} - \item{'source', a path or URL to the source of the dataset.} - } - } - \item{'obs', similar to 'exp' but for observational datasets.} - } - } - \item{'Dates', with the follwing components: - \itemize{ - \item{'start', an array of dimensions (sdate, time) with the POSIX initial date of each forecast time of each starting date.} - \item{'end', an array of dimensions (sdate, time) with the POSIX final date of each forecast time of each starting date.} - } - } - \item{'InitializationDates', a vector of starting dates as specified in 'sdates', in POSIX format.} - \item{'when', a time stamp of the date the \code{Load()} call to obtain the data was issued.} - \item{'source_files', a vector of character strings with complete paths to all the found files involved in the \code{Load()} call.} - \item{'not_found_files', a vector of character strings with complete paths to not found files involved in the \code{Load()} call.} - } +\itemize{ +\item{'is_standard', kept for compatibility with 'downscaleR', tells if a dataset has been homogenized to standards with 'downscaleR' catalogs.} +\item{'units', a character string with the units of measure of the variable, as found in the source files.} +\item{'longname', a character string with the long name of the variable, as found in the source files.} +\item{'daily_agg_cellfun', 'monthly_agg_cellfun', 'verification_time', kept for compatibility with 'downscaleR'.} +} +} +\item{'Datasets' has the following components: +\itemize{ +\item{'exp', a named list where the names are the identifying character strings of each experiment in 'exp', each associated to a list with the following components: +\itemize{ +\item{'members', a list with the names of the members of the dataset.} +\item{'source', a path or URL to the source of the dataset.} +} +} +\item{'obs', similar to 'exp' but for observational datasets.} +} +} +\item{'Dates', with the follwing components: +\itemize{ +\item{'start', an array of dimensions (sdate, time) with the POSIX initial date of each forecast time of each starting date.} +\item{'end', an array of dimensions (sdate, time) with the POSIX final date of each forecast time of each starting date.} +} +} +\item{'InitializationDates', a vector of starting dates as specified in 'sdates', in POSIX format.} +\item{'when', a time stamp of the date the \code{Load()} call to obtain the data was issued.} +\item{'source_files', a vector of character strings with complete paths to all the found files involved in the \code{Load()} call.} +\item{'not_found_files', a vector of character strings with complete paths to not found files involved in the \code{Load()} call.} +} } \author{ History:\cr @@ -493,56 +383,56 @@ History:\cr # data already processed in R. # # Example 1: providing lists in 'exp' and 'obs'. - \dontrun{ -data_path <- system.file('sample_data', package = 's2dverification') -expA <- list(name = 'experiment', path = file.path(data_path, - 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', - '$VAR_NAME$_$START_DATE$.nc')) -obsX <- list(name = 'observation', path = file.path(data_path, - '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', - '$VAR_NAME$_$YEAR$$MONTH$.nc')) - -# Now we are ready to use Load(). -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- Load('tos', list(expA), list(obsX), startDates, - output = 'areave', latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) -# -# Example 2: providing character strings in 'exp' and 'obs', and providing -# a configuration file. -# The configuration file 'sample.conf' that we will create in the example -# has the proper entries to load these (see ?LoadConfigFile for details on -# writing a configuration file). -# -configfile <- paste0(tempdir(), '/sample.conf') -ConfigFileCreate(configfile, confirm = FALSE) -c <- ConfigFileOpen(configfile) -c <- ConfigEditDefinition(c, 'DEFAULT_VAR_MIN', '-1e19', confirm = FALSE) -c <- ConfigEditDefinition(c, 'DEFAULT_VAR_MAX', '1e19', confirm = FALSE) -data_path <- system.file('sample_data', package = 's2dverification') -exp_data_path <- paste0(data_path, '/model/$EXP_NAME$/') -obs_data_path <- paste0(data_path, '/$OBS_NAME$/') -c <- ConfigAddEntry(c, 'experiments', dataset_name = 'experiment', - var_name = 'tos', main_path = exp_data_path, - file_path = '$STORE_FREQ$_mean/$VAR_NAME$_3hourly/$VAR_NAME$_$START_DATE$.nc') -c <- ConfigAddEntry(c, 'observations', dataset_name = 'observation', - var_name = 'tos', main_path = obs_data_path, - file_path = '$STORE_FREQ$_mean/$VAR_NAME$/$VAR_NAME$_$YEAR$$MONTH$.nc') -ConfigFileSave(c, configfile, confirm = FALSE) - -# Now we are ready to use Load(). -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- Load('tos', c('experiment'), c('observation'), startDates, - output = 'areave', latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40, configfile = configfile) - } - \dontshow{ -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), - c('observation'), startDates, - output = 'areave', - latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - } +\dontrun{ + data_path <- system.file('sample_data', package = 's2dverification') + expA <- list(name = 'experiment', path = file.path(data_path, + 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', + '$VAR_NAME$_$START_DATE$.nc')) + obsX <- list(name = 'observation', path = file.path(data_path, + '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', + '$VAR_NAME$_$YEAR$$MONTH$.nc')) + + # Now we are ready to use Load(). + startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') + sampleData <- Load('tos', list(expA), list(obsX), startDates, + output = 'areave', latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + # + # Example 2: providing character strings in 'exp' and 'obs', and providing + # a configuration file. + # The configuration file 'sample.conf' that we will create in the example + # has the proper entries to load these (see ?LoadConfigFile for details on + # writing a configuration file). + # + configfile <- paste0(tempdir(), '/sample.conf') + ConfigFileCreate(configfile, confirm = FALSE) + c <- ConfigFileOpen(configfile) + c <- ConfigEditDefinition(c, 'DEFAULT_VAR_MIN', '-1e19', confirm = FALSE) + c <- ConfigEditDefinition(c, 'DEFAULT_VAR_MAX', '1e19', confirm = FALSE) + data_path <- system.file('sample_data', package = 's2dverification') + exp_data_path <- paste0(data_path, '/model/$EXP_NAME$/') + obs_data_path <- paste0(data_path, '/$OBS_NAME$/') + c <- ConfigAddEntry(c, 'experiments', dataset_name = 'experiment', + var_name = 'tos', main_path = exp_data_path, + file_path = '$STORE_FREQ$_mean/$VAR_NAME$_3hourly/$VAR_NAME$_$START_DATE$.nc') + c <- ConfigAddEntry(c, 'observations', dataset_name = 'observation', + var_name = 'tos', main_path = obs_data_path, + file_path = '$STORE_FREQ$_mean/$VAR_NAME$/$VAR_NAME$_$YEAR$$MONTH$.nc') + ConfigFileSave(c, configfile, confirm = FALSE) + + # Now we are ready to use Load(). + startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') + sampleData <- Load('tos', c('experiment'), c('observation'), startDates, + output = 'areave', latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40, configfile = configfile) +} +\dontshow{ + startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') + sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + output = 'areave', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} } \keyword{datagen} -- GitLab From d69ad5de90214e4cb764ac977ea0285419c5fea2 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 25 Jan 2017 08:23:46 +0100 Subject: [PATCH 22/41] More manual changes --- man/Clim.Rd | 2 +- man/Load.Rd | 40 ++++------------------------------------ man/PlotAno.Rd | 4 +--- man/PlotEquiMap.Rd | 2 +- 4 files changed, 7 insertions(+), 41 deletions(-) diff --git a/man/Clim.Rd b/man/Clim.Rd index 03a3982b..a4225653 100644 --- a/man/Clim.Rd +++ b/man/Clim.Rd @@ -1,6 +1,6 @@ \name{Clim} \alias{Clim} -\title{Computes Per-pair/Kharin/Fuckar Climatologies} +\title{Computes Bias Corrected Climatologies} \description{ This function computes only per-pair climatologies from the experimental and observational matrices output from \code{Load()}. To compute plain climatologies from only experimental or observational data from \code{Load()}, the following code can be used: diff --git a/man/Load.Rd b/man/Load.Rd index 67ba6e8d..d2686499 100644 --- a/man/Load.Rd +++ b/man/Load.Rd @@ -63,33 +63,6 @@ $EXP_NAME$ will take the value specified in each component of the parameter 'exp' in the sub-component 'name'.\cr $OBS_NAME$ will take the value specified in each component of the parameter 'obs' in the sub-component 'obs.\cr $SUFFIX$ will take the value specified in each component of the parameters 'exp' and 'obs' in the sub-component 'suffix'.\cr - - Example: - \preformatted{ - list( - list( - name = 'experimentA', - path = file.path('/path/to/$DATASET_NAME$/$STORE_FREQ$', - '$VAR_NAME$$SUFFIX$', - '$VAR_NAME$_$START_DATE$.nc'), - nc_var_name = '$VAR_NAME$', - suffix = '_3hourly', - var_min = '-1e19', - var_max = '1e19' - ) - ) - } - This will make \code{Load()} look for, for instance, the following paths, if 'sdates' is c('19901101', '19951101', '20001101'):\cr - /path/to/experimentA/monthly_mean/tas_3hourly/tas_19901101.nc\cr - /path/to/experimentA/monthly_mean/tas_3hourly/tas_19951101.nc\cr - /path/to/experimentA/monthly_mean/tas_3hourly/tas_20001101.nc\cr\cr - - Vector of character strings: - To avoid specifying constantly the same information to load the same datasets, a vector with only the names of the datasets to load can be specified.\cr - \code{Load()} will then look for the information in a configuration file whose path must be specified in the parameter 'configfile'.\cr - Check \code{?ConfigFileCreate}, \code{ConfigFileOpen}, \code{ConfigEditEntry} & co. to learn how to create a new configuration file and how to add the information there. - - Example: c('experimentA', 'experimentB') } \item{obs}{ Argument with the same format as parameter 'exp'. See details on parameter 'exp'.\cr @@ -104,7 +77,6 @@ Vector with the numbers of members to load from the specified experimental datasets in 'exp'.\cr If not specified, the automatically detected number of members of the first experimental dataset is detected and replied to all the experimental datasets.\cr If a single value is specified it is replied to all the experimental datasets.\cr - An NA value in the 'nmember' list is interpreted as "fetch the same number of members for each experimental dataset as is available in the first experimental dataset".\cr Note: It is recommended to specify the number of members of the first experimental dataset if it is stored in file per member format because there are known issues in the automatic detection of members if the path to the dataset in the configuration file contains Shell Globbing wildcards such as '*'.\cr } \item{nmemberobs}{ @@ -130,11 +102,11 @@ } \item{lonmin}{ If a 2-dimensional variable is loaded, values at longitudes lower than 'lonmin' aren't loaded.\cr - If 'lonmin' > 'lonmax', data across the Greenwich meridian are loaded. + If 'lonmin' > 'lonmax', data crossing the Greenwich Meridian are loaded. } \item{lonmax}{ If a 2-dimensional variable is loaded, values at longitudes higher than 'lonmax' aren't loaded.\cr - If 'lonmin' > 'lonmax', data across Greenwich is loaded. + If 'lonmin' > 'lonmax', data crossing the Greenwich Meridian is loaded. } \item{latmin}{ If a 2-dimensional variable is loaded, values at latitudes lower than 'latmin' aren't loaded.\cr @@ -206,11 +178,7 @@ See help on parameter 'maskmod'. } \item{nprocs}{ Number of parallel processes created to perform the fetch and computation of data.\cr - These processes will use shared memory in the processor in which Load() is launched.\cr - By default the number of logical cores in the machine will be detected and as many processes as logical cores there are will be created.\cr - A value of 1 won't create parallel processes.\cr When running in multiple processes, if an error occurs in any of the processes, a crash message appears in the R session of the original process but no detail is given about the error. A value of 1 will display all error messages in the original and only R session.\cr - Note: the parallel process create other blocking processes each time they need to compute an interpolation via 'cdo'. } \item{dimnames}{ Named list where the name of each element is a generic name of the expected dimensions inside the NetCDF files. These generic names are 'lon', 'lat' and 'member'. 'time' is not needed because it's detected automatically by discard.\cr @@ -237,7 +205,7 @@ To avoid these situations, the parameter \code{path_glob_permissive} is set by d } } \details{ -The two output matrices have between 2 and 6 dimensions:\cr +The two output arrays have between 2 and 6 dimensions:\cr \enumerate{ \item{Number of experimental/observational datasets.} \item{Number of members.} @@ -261,7 +229,7 @@ The components are the following: \item{'lat' and 'lon' are the latitudes and longitudes of the centers of the cells of the grid the data is interpolated into (0 if the loaded variable is a global mean or the output is an area average).\cr Both have the attribute 'cdo_grid_des' associated with a character string with the name of the common grid of the data, following the CDO naming conventions for grids.\cr 'lon' has the attributes 'first_lon' and 'last_lon', with the first and last longitude values found in the region defined by 'lonmin' and 'lonmax'. 'lat' has also the equivalent attributes 'first_lat' and 'last_lat'.\cr -'lon' has also the attribute 'data_across_gw' which tells whether the requested region via 'lonmin', 'lonmax', 'latmin', 'latmax' goes across the Greenwich meridian. As explained in the documentation of the parameter 'mod', the loaded data array is kept in the same order as in the original files when possible: this means that, in some cases, even if the data goes across the Greenwich, the data array may not go across the Greenwich. The attribute 'array_across_gw' tells whether the array actually goes across the Greenwich. E.g: The longitudes in the data files are defined to be from 0 to 360. The requested longitudes are from -80 to 40. The original order is kept, hence the longitudes in the array will be ordered as follows: 0, ..., 40, 280, ..., 360. In that case, 'data_across_gw' will be TRUE and 'array_across_gw' will be FALSE.\cr +'lon' has also the attribute 'data_across_gw' which tells whether the requested region via 'lonmin', 'lonmax', 'latmin', 'latmax' goes across the Greenwich Meridian. As explained in the documentation of the parameter 'mod', the loaded data array is kept in the same order as in the original files when possible: this means that, in some cases, even if the data crosses the Greenwich Meridian, the data array may not cross the Greenwich meridian. The attribute 'array_across_gw' tells whether the array actually goes across the Greenwich. E.g: The longitudes in the data files are defined to be from 0 to 360. The requested longitudes are from -80 to 40. The original order is kept, hence the longitudes in the array will be ordered as follows: 0, ..., 40, 280, ..., 360. In that case, 'data_across_gw' will be TRUE and 'array_across_gw' will be FALSE.\cr The attribute 'projection' is kept for compatibility with 'downscaleR'.} \item{'Variable' has the following components: \itemize{ diff --git a/man/PlotAno.Rd b/man/PlotAno.Rd index ec3d6de7..c0185e41 100644 --- a/man/PlotAno.Rd +++ b/man/PlotAno.Rd @@ -4,9 +4,7 @@ Plot Raw Or Smoothed Anomalies } \description{ -Plots timeseries of raw or smoothed anomalies of any index output from \code{Load()} or \code{Ano()} or or \code{Ano_CrossValid()} or \code{Smoothing()} and organized in matrices with dimensions:\cr - c(nmod/nexp, nmemb/nparam, nsdates, nltime) for the model data\cr - c(nobs, nmemb, nsdates, nltime) for the observational data +Plots timeseries of raw or smoothed anomalies of any variable output from \code{Load()} or \code{Ano()} or or \code{Ano_CrossValid()} or \code{Smoothing()}. } \usage{ PlotAno(exp_ano, obs_ano = NULL, sdates, toptitle = c("", "", "", "", "", "", diff --git a/man/PlotEquiMap.Rd b/man/PlotEquiMap.Rd index 9ba2ca49..cae7ec3f 100644 --- a/man/PlotEquiMap.Rd +++ b/man/PlotEquiMap.Rd @@ -4,7 +4,7 @@ Maps A Two-Dimensional Variable On A Cylindrical Equidistant Projection } \description{ -Map longitude-latitude array (on a regular rectangular or gaussian grid) on a cylindrical equidistant latitude and longitude world projection with coloured grid cells. Only the region for which data has been provided is displayed. A colour bar (legend) can be plotted and adjusted. It is possible to draw superimposed arrows, dots, symbols, contour lines and boxes. A number of options is provided to adjust the position, size and colour of the components. This plot function is compatible with figure layouts if colour bar is disabled. +Map longitude-latitude array (on a regular rectangular or gaussian grid) on a cylindrical equidistant latitude and longitude projection with coloured grid cells. Only the region for which data has been provided is displayed. A colour bar (legend) can be plotted and adjusted. It is possible to draw superimposed arrows, dots, symbols, contour lines and boxes. A number of options is provided to adjust the position, size and colour of the components. This plot function is compatible with figure layouts if colour bar is disabled. } \usage{ PlotEquiMap(var, lon, lat, varu = NULL, varv = NULL, -- GitLab From 8b5dc9ed2285cc61b43061ba288e9c81c41a6647 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Thu, 26 Jan 2017 17:14:18 +0100 Subject: [PATCH 23/41] Documentation changes and updates (minor) --- man/ACC.Rd | 12 ++++++------ man/Ano_CrossValid.Rd | 2 +- man/BrierScore.Rd | 11 +++++++---- man/Cluster.Rd | 8 ++++---- man/Consist_Trend.Rd | 6 +++--- man/EOF.Rd | 2 +- man/Eno.Rd | 4 ++-- man/EnoNew.Rd | 11 +++++++---- man/Filter.Rd | 6 +++--- man/FitAcfCoef.Rd | 4 ++-- man/FitAutocor.Rd | 4 ++-- man/GenSeries.Rd | 2 +- man/Histo2Hindcast.Rd | 4 ++-- 13 files changed, 41 insertions(+), 35 deletions(-) diff --git a/man/ACC.Rd b/man/ACC.Rd index 2c92fe3e..f24b9d82 100644 --- a/man/ACC.Rd +++ b/man/ACC.Rd @@ -1,10 +1,10 @@ \name{ACC} \alias{ACC} \title{ -Computes Anomaly Correlation Coefficient (Centred) +Computes Anomaly Correlation Coefficient } \description{ -Calculates the Anomaly Correlation Coefficient for the ensemble mean of each jexp in 1:nexp and each jobs in 1:nobs which gives nexp x nobs ACsC for each startdate and each leadtime.\cr +Calculates the Anomaly Correlation Coefficient for the ensemble mean of each model and the corresponding references for each startdate and each leadtime.\cr The domain of interest can be specified by providing the list of longitudes/latitudes (lon/lat) of the grid together with the corners of the domain:\cr lonlatbox = c(lonmin, lonmax, latmin, latmax) } @@ -40,19 +40,19 @@ TRUE/FALSE: confidence intervals and significance level provided or not. To guarantee the statistical robustness of the result, make sure that your experiments/oservations/startdates/leadtimes always have the same number of members. } \item{siglev}{ -Desired confidence level of the computed confidence intervals. +The confidence level for the computed confidence intervals. } } \value{ \item{ACC}{ -If conf set as TRUE, Matrix with dimensions:\cr +If \code{conf = TRUE}, array with dimensions:\cr c(nexp, nobs, nsdates, nleadtimes, 4) \cr The fifth dimension of length 4 corresponds to the lower limit of the \code{siglev}\% confidence interval, the ACC, the upper limit of the \code{siglev}\% confidence interval and the \code{siglev}\% significance level.\cr -If conf set as FALSE, Anomaly Correlation Coefficient with dimensions:\cr +If \code{conf = FALSE}, the array of the Anomaly Correlation Coefficient has dimensions:\cr c(nexp, nobs, nsdates, nleadtimes). } \item{MACC}{ -Mean Anomaly Correlation Coefficient with dimensions:\cr +The array of the Mean Anomaly Correlation Coefficient with dimensions:\cr c(nexp, nobs, nleadtimes) } } diff --git a/man/Ano_CrossValid.Rd b/man/Ano_CrossValid.Rd index 0261e863..7e488ca2 100644 --- a/man/Ano_CrossValid.Rd +++ b/man/Ano_CrossValid.Rd @@ -2,7 +2,7 @@ \alias{Ano_CrossValid} \title{Computes Anomalies In Cross-Validation Mode} \description{ -This function computes anomalies from experimental and observational matrices output from \code{load()} by subtracting the climatologies computed in a cross-validation mode and with a per-pair method. +Computes the anomalies from the arrays of the experimental and observational data output from \code{load()} by subtracting the climatologies computed with a cross-validation technique and a per-pair method. } \usage{ Ano_CrossValid(var_exp, var_obs, memb = TRUE) diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index d31998c7..65b0a40c 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -5,10 +5,8 @@ Compute Brier Score And Its Decomposition And Brier Skill Score } \description{ -Returns the values of the BS and its standard decomposition as well as the addition of the two winthin-bin extra components (Stephenson et al., 2008). It also solves the bias-corrected decomposition of the BS (Ferro and Fricker, 2012). BSS having the climatology as the reference forecast. -Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr -Stephenson et al. (2008). Two extra components in the Brier score decomposition. Weather and Forecasting, 23: 752-757.\cr -Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924.\cr +Computes the Brier score (BS) and the components of its standard decomposition as well with the two within-bin components described in Stephenson et al., (2008). It also returns the bias-corrected decomposition of the BS (Ferro and Fricker, 2012). BSS having the climatology as the reference forecast. + .BrierScore provides the same functionality, but taking a matrix of ensemble members (ens) as input. } @@ -60,6 +58,11 @@ x$bs - x$bs_check_res x$bs - x$bs_check_gres x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected } +\references{ +Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr +Stephenson et al. (2008). Two extra components in the Brier score decomposition. Weather and Forecasting, 23: 752-757.\cr +Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924.\cr +} \author{ History:\cr 0.1 - 2012-04 (L. Rodrigues, \email{lrodrigues@ic3.cat}) - Original code\cr diff --git a/man/Cluster.Rd b/man/Cluster.Rd index 1f798635..b9b15db8 100644 --- a/man/Cluster.Rd +++ b/man/Cluster.Rd @@ -1,7 +1,7 @@ \name{Cluster} \alias{Cluster} \title{ -The K-means cluster analysis. +K-means Clustering. } \description{ This function computes cluster centers and their time series of occurrences, @@ -12,9 +12,7 @@ to time. Specifically, it partitions the array along time axis in K groups or clusters in which each space vector/array belongs to (i.e., is a member of) the cluster with the nearest center or centroid. This function relies on the NbClust -package (Charrad et al., 2014 JSS). For more information about the K-means see -Chapter 15 Cluster Analysis in Wilks, 2011, Statistical Methods in the -Atmospheric Sciences, 3rd ed., Elsevire, pp 676. +package (Charrad et al., 2014 JSS). } \usage{ Cluster(var, weights, nclusters = NULL, index = 'sdindex', posdates = 1) @@ -115,6 +113,8 @@ res2 <- Cluster(var = a1, weights = array(1, dim = dim(a1)[2])) print(res2$cluster) print(res2$centers) } +\references{ +Wilks, 2011, Statistical Methods in the Atmospheric Sciences, 3rd ed., Elsevire, pp 676. \author{ History: 1.0 # 2014-10 (N.S. Fuckar, neven.fuckar@bsc.es) # Original code diff --git a/man/Consist_Trend.Rd b/man/Consist_Trend.Rd index 66deedf8..6f964bf8 100644 --- a/man/Consist_Trend.Rd +++ b/man/Consist_Trend.Rd @@ -4,9 +4,9 @@ Computes Trends Using Only Model Data For Which Observations Are Available } \description{ -Computes trends by least square fitting together with the associated error interval for both the observational and model data.\cr +Computes the trend coefficients for a time series by least square fitting, together with the associated error interval for both the observational and model data.\cr Provides also the detrended observational and modeled data.\cr -The trend is computed along the second dimension, expected to be the start date dimension (the user is supposed to perform an ensemble averaging operation with \code{Mean1Dim()} prior to using \code{Consist_trend()}). +By default, the trend is computed along the second dimension of the input array, which is expected to be the start date dimension. For arrays containing multiple model members, the user will first have to calculate the ensemble average using \code{Mean1Dim()} or elsewhise (see the example). } \usage{ Consist_Trend(var_exp, var_obs, interval = 1) @@ -29,7 +29,7 @@ Number of months/years between 2 start dates. Default = 1. The trends will be pr } \value{ \item{$trend}{ -Trends of model and observational data with dimensions:\cr +Trend coefficients of model and observational data with dimensions:\cr c(nmod/nexp + nobs, 3, nltime) up to\cr c(nmod/nexp + nobs, 3, nltime, nlevel, nlat, nlon)\cr The length 3 dimension corresponds to the lower limit of the 95\% confidence interval, the slope of the trends and the upper limit of the 95\% confidence interval. diff --git a/man/EOF.Rd b/man/EOF.Rd index 6b3fc9a4..7c914e15 100644 --- a/man/EOF.Rd +++ b/man/EOF.Rd @@ -5,7 +5,7 @@ Area-Weighted Empirical Orthogonal Function Analysis Using SVD } \description{ Performs an area-weighted EOF analysis using SVD based on a covariance matrix -by default, based on a correlation matrix if \code{corr} argument is set to +by default, based on the correlation matrix if \code{corr} argument is set to \code{TRUE}. } \usage{ diff --git a/man/Eno.Rd b/man/Eno.Rd index bd93f958..8a5e0b5f 100644 --- a/man/Eno.Rd +++ b/man/Eno.Rd @@ -4,8 +4,8 @@ Computes Effective Sample Size With Classical Method } \description{ -Computes the effective number of independant data along the posdim dimension of a matrix.\cr -This effective number of independant date may be required to perform statistical/inference tests.\cr +Computes the effective number of independent values along the posdim dimension of a matrix.\cr +This effective number of independent observations can be used in statistical/inference tests.\cr Based on eno function from Caio Coelho from rclim.txt. } \usage{ diff --git a/man/EnoNew.Rd b/man/EnoNew.Rd index a1dab668..0cec23b4 100644 --- a/man/EnoNew.Rd +++ b/man/EnoNew.Rd @@ -2,14 +2,14 @@ \alias{EnoNew} \title{Computes Effective Sample Size Following Guemas et al, BAMS, 2013b} \description{ -This function computes the equivalent number of independent data in the xdata array following the method described in Guemas V., Auger L., Doblas-Reyes F., JAMC, 2013. The method relies on the Trenberth (1984) formula combined with a reduced uncertainty of the estimated autocorrelation function compared to the original approach.} +This function computes the effective number of independent values in the xdata array following the method described in Guemas V., Auger L., Doblas-Reyes F., JAMC, 2013. \code{EnoNew} provides similar functionality to \code{Eno} but with the added options to remove the linear trend or filter the frequency.} \usage{ EnoNew(xdata, detrend = FALSE, filter = FALSE) } \arguments{ - \item{xdata}{Timeseries from which the equivalent number of independent data is requested} - \item{detrend}{TRUE applies a linear detrending to xdata prior to the estimation of the equivalent number of independant data.} - \item{filter}{TRUE applies a filtering of any frequency peak prior to the estimation of the equivalent number of independant data.} + \item{xdata}{a numeric vector} + \item{detrend}{should the linear trend be removed from the data prior to the estimation of the equivalent number of independent values.} + \item{filter}{should a filtering of the frequency peaks be applied prior to the estimation of the equivalent number of independant data.} } \examples{ # See examples on Load() to understand the first lines in this example @@ -50,6 +50,9 @@ sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), eno <- EnoNew(sampleData$mod[1, 1, , 1, 2, 3]) print(eno) } +\references{ +Guemas V, Auger L, Doblas-Reyes FJ, Rust H, Ribes A, 2014, Dependencies in Statistical Hypothesis Tests for Climate Time Series. Bulletin of the American Meteorological Society, 95 (11), 1666-1667. +} \author{ History:\cr 0.1 - 2012-06 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr diff --git a/man/Filter.Rd b/man/Filter.Rd index 6029692d..bb1bcf44 100644 --- a/man/Filter.Rd +++ b/man/Filter.Rd @@ -2,9 +2,9 @@ \alias{Filter} \title{Filter Frequency Peaks From An Array} \description{ -This function filters from the xdata array, the signal of frequency freq.\cr -The filtering is performed by dichotomy, seeking for the frequency around freq and the phase that maximizes the signal to subtract to xdata.\cr -The maximization of the signal to subtract relies on a minimization of the mean square differences between xdata and a cosine of given frequency and phase. +This function filters out the selected frequency from a time series.\cr +The filtering is performed by dichotomy, seeking for a frequency around the parameter \code{freq} and the phase that maximizes the signal to subtract from the time series.\cr +The maximization of the signal to subtract relies on a minimization of the mean square differences between the time series (xdata) and the cosine of the specified frequency and phase. } \usage{ Filter(xdata, freq) diff --git a/man/FitAcfCoef.Rd b/man/FitAcfCoef.Rd index 1be3af50..73db6dfc 100644 --- a/man/FitAcfCoef.Rd +++ b/man/FitAcfCoef.Rd @@ -3,8 +3,8 @@ \title{Fits an AR1 AutoCorrelation Function Using the Cardano Formula} \description{ This function finds the minimum point of the fourth order polynom (a - x)2 + 0.25(b - x2)2 written to fit the two autoregression coefficients a and b.\cr -Thanks to the Cardano formula, provided a and b in [0 1], the problem is well posed, delta > 0 and there is only one solution to the minimum.\cr\cr -This function is called in Alpha() to minimize the mean square differences between the theoretical autocorrelation function of an AR1 and the first guess of estimated autocorrelation function estacf, using only the first two lags.} +A consequence of the Cardano formula is that, provided a and b are in [0 1], the problem is well posed, delta > 0 and there is only one minimum.\cr\cr +This function is called in Alpha() to minimize the mean square differences between the theoretical autocorrelation function of an AR1 and the first guess of the estimated autocorrelation function estacf, using only the first two lags.} \usage{FitAcfCoef(a, b)} \arguments{ \item{a}{Coefficient a : first estimate of the autocorrelation at lag 1} diff --git a/man/FitAutocor.Rd b/man/FitAutocor.Rd index 2474453f..d5031fdb 100644 --- a/man/FitAutocor.Rd +++ b/man/FitAutocor.Rd @@ -1,12 +1,12 @@ \name{FitAutocor} \alias{FitAutocor} \title{Fits an AR1 Autocorrelation Function Using Dichotomy} -\description{This function fits the theoretical autocorrelation function of an AR1 to the first guess of estimated autocorrelation function estacf containing any number of lags. The fitting relies on a dichotomial minimisation of the mean square differences between both autocorrelation functions. It returns the autocorrelation at lag 1 of the fitted AR1 process.} +\description{This function fits the theoretical autocorrelation function of an AR1 to the first guess of the estimated autocorrelation function estacf containing any number of lags. The fitting relies on a dichotomial minimisation of the mean square differences between both autocorrelation functions. It returns the autocorrelation at lag 1 of the fitted AR1 process.} \usage{ FitAutocor(estacf, window = c(-1, 1), prec = 0.01) } \arguments{ - \item{estacf}{First guess of the autocorrelation function} + \item{estacf}{First guess for the autocorrelation function} \item{window}{Interval in which the autocorrelation at lag 1 should be found.} \item{prec}{Precision to which the autocorrelation function at lag 1 is to be estimated.} } diff --git a/man/GenSeries.Rd b/man/GenSeries.Rd index 2162e927..e616d6a1 100644 --- a/man/GenSeries.Rd +++ b/man/GenSeries.Rd @@ -1,7 +1,7 @@ \name{GenSeries} \alias{GenSeries} \title{Generates An AR1 Time Series} -\description{This functions generates AR1 processes containing n data, with alpha as autocorrelation at lag 1, and mean and standard deviation provided by the mean and std arguments.} +\description{This function generates AR1 processes containing n data points, where alpha is the autocorrelation at lag 1, and the mean and standard deviation are specified by the mean and std arguments.} \usage{ GenSeries(n, alpha, mean, std) } diff --git a/man/Histo2Hindcast.Rd b/man/Histo2Hindcast.Rd index cf9e2aa8..3764f661 100644 --- a/man/Histo2Hindcast.Rd +++ b/man/Histo2Hindcast.Rd @@ -11,7 +11,7 @@ Histo2Hindcast(varin, sdatesin, sdatesout, nleadtimesout) } \arguments{ \item{varin}{ -Input model or observational data:\cr +Array of model or observational data with dimensions:\cr c(nmod/nexp/nobs, nmemb/nparam, nsdates, nltimes) up to\cr c(nmod/nexp/nobs, nmemb/nparam, nsdates, nltimes, nlevel, nlat, nlon) } @@ -26,7 +26,7 @@ Number of leadtimes in the output matrix. } } \value{ -A matrix with the same number of dimensions as the input one, the same dimensions 1 and 2 and potentially the same dimensions 5 to 7. Dimensions 3 and 4 are set by the arguments sdatesout and nleadtimesout. +An array with the same number of dimensions as varin, the same dimensions 1 and 2 and potentially the same dimensions 5 to 7. Dimensions 3 and 4 are set by the arguments sdatesout and nleadtimesout. } \examples{ # See examples on Load() to understand the first lines in this example -- GitLab From 0edabb9dbc6a15044dc1527eb36fe2d4018c964c Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 27 Jan 2017 11:45:57 +0100 Subject: [PATCH 24/41] Small documentation changes --- man/IniListDims.Rd | 2 +- man/InsertDim.Rd | 4 ++-- man/LeapYear.Rd | 4 ++-- man/Mean1Dim.Rd | 8 ++++---- man/MeanListDim.Rd | 8 ++++---- man/NAO.Rd | 12 +++++++----- man/PlotACC.Rd | 4 ++-- man/ProbBins.Rd | 4 ++-- man/ProjectField.Rd | 2 +- man/Regression.Rd | 8 ++++---- man/Season.Rd | 8 ++++---- man/SelIndices.Rd | 10 +++++----- man/Smoothing.Rd | 4 ++-- man/Spectrum.Rd | 2 +- man/StatSeasAtlHurr.Rd | 12 +++++++----- man/ToyModel.Rd | 2 +- 16 files changed, 49 insertions(+), 45 deletions(-) diff --git a/man/IniListDims.Rd b/man/IniListDims.Rd index a171dd96..0af0253c 100644 --- a/man/IniListDims.Rd +++ b/man/IniListDims.Rd @@ -4,7 +4,7 @@ Creates A List Of Integer Ranges } \description{ -This function generates a list of arrays where those arrays contain integers from 1 to various numbers. This list of arrays is used in the other functions as a list of indices of the elements of the matrices. +This function generates a list of arrays containing integers greater than or equal to 1. This list of arrays is used in other functions as a list of indices of the elements of the matrices. } \usage{ IniListDims(dims, lenlist) diff --git a/man/InsertDim.Rd b/man/InsertDim.Rd index d3708b89..6b08a583 100644 --- a/man/InsertDim.Rd +++ b/man/InsertDim.Rd @@ -1,10 +1,10 @@ \name{InsertDim} \alias{InsertDim} \title{ -Adds A Dimension To A Matrix +Adds A Dimension To An Array } \description{ -Add one dimension to the matrix 'var' in position 'posdim' with length 'lendim' and which correspond to 'lendim' repetitions of the 'var' matrix. +Inserts an extra dimension into an array at position 'posdim' with length 'lendim' and which correspond to 'lendim' repetitions of the 'var' array. } \usage{ InsertDim(var, posdim, lendim) diff --git a/man/LeapYear.Rd b/man/LeapYear.Rd index b09d2157..faf41456 100644 --- a/man/LeapYear.Rd +++ b/man/LeapYear.Rd @@ -2,14 +2,14 @@ \alias{LeapYear} \title{Checks Whether A Year Is Leap Year} \description{ -This function tells whether a year is leap year or not. +This function tells whether a year is a leap year or not. } \usage{ LeapYear(year) } \arguments{ \item{year}{ -The year to tell whether is leap year or not. +A numeric value indicating the year in the Gregorian calendar. } } \value{ diff --git a/man/Mean1Dim.Rd b/man/Mean1Dim.Rd index e1df907c..0d56d083 100644 --- a/man/Mean1Dim.Rd +++ b/man/Mean1Dim.Rd @@ -1,10 +1,10 @@ \name{Mean1Dim} \alias{Mean1Dim} \title{ -Averages A Matrix Along A Dimension +Averages An Array Along A Dimension } \description{ -Averages the matrix var along the posdim dimension between limits [1] and limits [2] if limits argument is provided by the user. +Averages the array along the posdim dimension along the user specified dimension. The user can specify a subset of the dimension to take the mean along. } \usage{ Mean1Dim(var, posdim, narm = TRUE, limits = NULL) @@ -20,11 +20,11 @@ Dimension to average along. Ignore NA (TRUE) values or not (FALSE). } \item{limits}{ -Limits to average between. +Limits to average between. Default is to take the mean along the entire dimension. } } \value{ -Matrix with one dimension less than the input one containing the average along posdim dimension. +Array with one dimension less than the input array, containing the average along the posdim dimension. } \examples{ a <- array(rnorm(24), dim = c(2, 3, 4)) diff --git a/man/MeanListDim.Rd b/man/MeanListDim.Rd index 547d30db..f3667d75 100644 --- a/man/MeanListDim.Rd +++ b/man/MeanListDim.Rd @@ -1,17 +1,17 @@ \name{MeanListDim} \alias{MeanListDim} \title{ -Averages A Matrix Along Various Dimensions +Averages An Array Along Multiple Dimensions } \description{ -Averages the matrix var along a set of dimensions given by the argument dims. +Averages an array along a set of dimensions given by the argument dims. } \usage{ MeanListDim(var, dims, narm = TRUE) } \arguments{ \item{var}{ -Matrix to average. +Input array. } \item{dims}{ List of dimensions to average along. @@ -21,7 +21,7 @@ Ignore NA (TRUE) values or not (FALSE). } } \value{ -Matrix with as many dimensions less than the input matrix as provided by the list dims and containing the average along this list of dimensions. +The averaged array, with the dimensions specified in \code{dims} removed. } \examples{ a <- array(rnorm(24), dim = c(2, 3, 4)) diff --git a/man/NAO.Rd b/man/NAO.Rd index 1f3360ae..c0963652 100644 --- a/man/NAO.Rd +++ b/man/NAO.Rd @@ -1,11 +1,11 @@ \name{NAO} \alias{NAO} \title{ -Compute the North Atlantic Oscillation (NAO) Index +Computes the North Atlantic Oscillation (NAO) Index } \description{ Compute the North Atlantic Oscillation (NAO) index based on -the leading EOF of sea level pressure (SLP) anomalies over the +the leading EOF of the sea level pressure (SLP) anomalies over the north Atlantic region (20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and observed anomalies onto the observed EOF pattern (Pobs) or the forecast anomalies onto the EOF pattern of the other years of the forecast @@ -13,9 +13,6 @@ forecast anomalies onto the EOF pattern of the other years of the forecast 1-month lead seasonal forecasts that can be plotted with BoxPlot(). Returns cross-validated PCs of the NAO index for forecast (ano_exp) and observations (ano_obs) based on the leading EOF pattern.\cr -See Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of -multi-model seasonal forecasts of the wintertime North Atlantic Oscillation. -Climate Dynamics, 21, 501-514. DOI: 10.1007/s00382-003-0350-4 } \usage{ NAO(ano_exp = NULL, ano_obs = NULL, lon, lat, ftime_average = 2:4, @@ -115,6 +112,11 @@ nao <- NAO(ano$ano_exp, ano$ano_obs, sampleData$lon, sampleData$lat) PlotBoxWhisker(nao$NAO_exp, nao$NAO_obs, "NAO index, DJF", "NAO index (PC1) TOS", monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") } +\references{ +Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +multi-model seasonal forecasts of the wintertime North Atlantic Oscillation. +Climate Dynamics, 21, 501-514. DOI: 10.1007/s00382-003-0350-4 +} \author{ History:\cr 0.1 - 2013-08 (F. Lienert, \email{flienert at ic3.cat}) - Original code\cr diff --git a/man/PlotACC.Rd b/man/PlotACC.Rd index 21cfd80a..dc4b5c87 100644 --- a/man/PlotACC.Rd +++ b/man/PlotACC.Rd @@ -4,9 +4,9 @@ Plot Plumes/Timeseries Of Anomaly Correlation Coefficients } \description{ -Plots plumes/timeseries of ACC from a matrix with dimensions (output from \code{ACC()}): \cr +Plots plumes/timeseries of ACC from an array with dimensions (output from \code{ACC()}): \cr c(nexp, nobs, nsdates, nltime, 4)\cr -with the fourth dimension of length 4 containing the lower limit of the 95\% confidence interval, the ACC, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. +where the fourth dimension is of length 4 and contains the lower limit of the 95\% confidence interval, the ACC, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. } \usage{ PlotACC(ACC, sdates, toptitle = "", sizetit = 1, ytitle = "", limits = NULL, diff --git a/man/ProbBins.Rd b/man/ProbBins.Rd index 96ec0576..51c2331c 100644 --- a/man/ProbBins.Rd +++ b/man/ProbBins.Rd @@ -1,7 +1,7 @@ \name{ProbBins} \alias{ProbBins} \title{ -Computes probabilistic information of a forecast relative to a threshold or a quantile. +Computes Probabilistic Information of a Forecast Relative to a Threshold or a Quantile. } \description{ Compute probabilistic bins of a set of forecast years ('fcyr') relative to the forecast climatology over the whole period of anomalies, optionally excluding the selected forecast years ('fcyr') or the forecast year for which the probabilistic bins are being computed (see 'compPeriod'). @@ -37,7 +37,7 @@ Three options: "Full period"/"Without fcyr"/"cross-validation" (The probabilitie } } \value{ -Matrix with probabilistic information and dimensions:\cr +Array with probabilistic information and dimensions:\cr c(length('thr'+1), nfcyr, nmemb/nparam, nmod/nexp/nobs, nltime, nlat, nlon)\cr The values along the first dimension take values 0 or 1 depending on which of the 'thr'+1 cathegories the forecast/observation at the corresponding grid point, time step, member and starting date belongs to. } diff --git a/man/ProjectField.Rd b/man/ProjectField.Rd index 6d49bf2b..91d33039 100644 --- a/man/ProjectField.Rd +++ b/man/ProjectField.Rd @@ -4,7 +4,7 @@ Project Anomalies onto Modes of Variability } \description{ -Project anomalies onto modes to get temporal evolution of the EOF mode\cr +Project anomalies onto modes of variability to get the temporal evolution of the EOF mode\cr selected. Returns principal components (PCs) by area-weighted projection onto EOF pattern (from \code{EOF()}). Able to handle NAs. } diff --git a/man/Regression.Rd b/man/Regression.Rd index 00242993..da4dca37 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -1,7 +1,7 @@ \name{Regression} \alias{Regression} \title{ -Computes The Regression Of A Matrix On Another Along A Dimension +Computes The Regression Of An Array On Another Along A Dimension } \description{ Computes the regression of the input matrice vary on the input matrice varx along the posREG dimension by least square fitting. Provides the slope of the regression, the associated confidence interval, and the intercept.\cr @@ -13,10 +13,10 @@ Regression(vary, varx, posREG = 2) } \arguments{ \item{vary}{ -Matrix of any number of dimensions up to 10. +Array of any number of dimensions up to 10. } \item{varx}{ -Matrix of any number of dimensions up to 10. Same dimensions as vary. +Array of any number of dimensions up to 10. Same dimensions as vary. } \item{posREG}{ Position along which to compute the regression. @@ -24,7 +24,7 @@ Position along which to compute the regression. } \value{ \item{$regression}{ -Matrix with same dimensions as varx and vary except along posREG dimension which is replaced by a length 4 dimension, corresponding to the lower limit of the 95\% confidence interval, the slope, the upper limit of the 95\% confidence interval and the intercept. +Array with same dimensions as varx and vary except along posREG dimension which is replaced by a length 4 dimension, corresponding to the lower limit of the 95\% confidence interval, the slope, the upper limit of the 95\% confidence interval and the intercept. } \item{$filtered}{ Same dimensions as vary filtered out from the regression onto varx along the posREG dimension. diff --git a/man/Season.Rd b/man/Season.Rd index 0e19b490..aa143e27 100644 --- a/man/Season.Rd +++ b/man/Season.Rd @@ -4,20 +4,20 @@ Computes Seasonal Means } \description{ -Computes seasonal means on timeseries organized in a matrix of any number of dimensions up to 10 dimensions where the time dimension is one of those 10 dimensions. +Computes seasonal means on timeseries organized in a array of any number of dimensions up to 10 dimensions where the time dimension is one of those 10 dimensions. } \usage{ Season(var, posdim = 4, monini, moninf, monsup) } \arguments{ \item{var}{ -Matrix containing the timeseries along one of its dimensions. +Array containing the timeseries along one of its dimensions. } \item{posdim}{ Dimension along which to compute seasonal means = Time dimension } \item{monini}{ -First month of the time-series: 1 to 12. +First month of the time series: 1 to 12. } \item{moninf}{ Month when to start the seasonal means: 1 to 12. @@ -27,7 +27,7 @@ Month when to stop the seasonal means: 1 to 12. } } \value{ -Matrix with the same dimensions as var except along the posdim dimension which length corresponds to the number of seasons. Partial seasons are not accounted for. +Array with the same dimensions as var except along the posdim dimension whose length corresponds to the number of seasons. Partial seasons are not accounted for. } \examples{ # Load sample data as in Load() example: diff --git a/man/SelIndices.Rd b/man/SelIndices.Rd index 11c5fdf0..b6b1795f 100644 --- a/man/SelIndices.Rd +++ b/man/SelIndices.Rd @@ -4,24 +4,24 @@ Slices A Matrix Along A Dimension } \description{ -This function allows to select a subensemble from a matrix of any dimensions, providing the dimension along which the user aims at cutting the input matrix and between which indices. +This function selects a subset of ensemble members from an array containing any number of dimensions. } \usage{ SelIndices(var, posdim, limits) } \arguments{ \item{var}{ -A matrix of any dimensions. +An array with any number of dimensions. } \item{posdim}{ -The dimension along which a submatrix should be selected. +The dimension along which the ensemble subset should be selected. } \item{limits}{ -The lower and upper indice of the selection along the posdim dimension. +The lower and upper limits for the selection of ensemble members along the posdim dimension. } } \value{ -The sliced matrix. +The subsetted array. } \examples{ a <- array(rnorm(24), dim = c(2, 3, 4, 1)) diff --git a/man/Smoothing.Rd b/man/Smoothing.Rd index 77ecee40..841fb052 100644 --- a/man/Smoothing.Rd +++ b/man/Smoothing.Rd @@ -1,10 +1,10 @@ \name{Smoothing} \alias{Smoothing} \title{ -Smoothes A Matrix Along A Dimension +Smoothes An Array Along A Dimension } \description{ -Smoothes a matrix of any number of dimensions along one of its dimensions +Smoothes an array of any number of dimensions along one of its dimensions } \usage{ Smoothing(var, runmeanlen = 12, numdimt = 4) diff --git a/man/Spectrum.Rd b/man/Spectrum.Rd index f99567ad..eda82779 100644 --- a/man/Spectrum.Rd +++ b/man/Spectrum.Rd @@ -2,7 +2,7 @@ \alias{Spectrum} \title{Estimates Frequency Spectrum} \description{ -This function estimates the frequency spectrum of the xdata array together with its 95\% and 99\% significance level. The output is provided as a matrix with dimensions c(number of frequencies, 4). The column contains the frequency values, the power, the 95\% significance level and the 99\% one.\cr +This function estimates the frequency spectrum of the xdata array together with its 95\% and 99\% significance level. The output is provided as an array with dimensions c(number of frequencies, 4). The column contains the frequency values, the power, the 95\% significance level and the 99\% one.\cr The spectrum estimation relies on a R built-in function and the significance levels are estimated by a Monte-Carlo method. } \usage{Spectrum(xdata)} diff --git a/man/StatSeasAtlHurr.Rd b/man/StatSeasAtlHurr.Rd index 5ee2fc2e..3acb0edb 100644 --- a/man/StatSeasAtlHurr.Rd +++ b/man/StatSeasAtlHurr.Rd @@ -5,11 +5,6 @@ Compute one of G. Villarini's statistically downscaled measure of mean Atlantic hurricane activity and its variance. The hurricane activity is estimated using seasonal averages of sea surface temperature anomalies over the tropical Atlantic (bounded by 10N-25N and 80W-20W) and the tropics at large (bounded by 30N-30S). The anomalies are for the JJASON season.\cr The estimated seasonal average is either 1) number of hurricanes, 2) number of tropical cyclones with lifetime >=48h or 3) power dissipation index (PDI; in 10^11 m^3 s^{-2}).\cr The statistical models used in this function are described in\cr -Villarini et al. (2010) Mon Wea Rev, 138, 2681-2705.\cr -Villarini et al. (2012) Mon Wea Rev, 140, 44-65.\cr -Villarini et al. (2012) J Clim, 25, 625-637.\cr -An example of how the function can be used in hurricane forecast studies is given in\cr -Caron, L.-P. et al. (2014) Multi-year prediction skill of Atlantic hurricane activity in CMIP5 decadal hindcasts. Climate Dynamics, 42, 2675-2690. doi:10.1007/s00382-013-1773-1. } \usage{ StatSeasAtlHurr(atlano = NULL, tropano = NULL, hrvar = 'HR') @@ -62,6 +57,13 @@ TropAno <- matrix(c(-0.22, -.13, 0.07, -0.16, -0.15, hr_count <- StatSeasAtlHurr(atlano = AtlAno, tropano = TropAno, hrvar = 'HR')$mean print(hr_count) } +\references{ +Villarini et al. (2010) Mon Wea Rev, 138, 2681-2705.\cr +Villarini et al. (2012) Mon Wea Rev, 140, 44-65.\cr +Villarini et al. (2012) J Clim, 25, 625-637.\cr +An example of how the function can be used in hurricane forecast studies is given in\cr +Caron, L.-P. et al. (2014) Multi-year prediction skill of Atlantic hurricane activity in CMIP5 decadal hindcasts. Climate Dynamics, 42, 2675-2690. doi:10.1007/s00382-013-1773-1. +} \author{ History:\cr 0.1 - 2015-11 (Louis-Philippe Caron, \email{louis-philippe.caron@bsc.es}) - Original code diff --git a/man/ToyModel.Rd b/man/ToyModel.Rd index 54c3cf79..b009ef0f 100644 --- a/man/ToyModel.Rd +++ b/man/ToyModel.Rd @@ -1,7 +1,7 @@ \name{ToyModel} \alias{ToyModel} \title{ -Synthetic forecast generator imitating seasonal to decadal forecasts. Tg +Synthetic forecast generator imitating seasonal to decadal forecasts. The components of a forecast: (1) predictabiltiy (2) forecast error (3) non-stationarity and (4) ensemble generation. The forecast can be computed for real observations or observations generated artifically. -- GitLab From e8c0046b20f845a9cdee3edf17e6ca6a1e64265f Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 31 Jan 2017 17:06:11 +0100 Subject: [PATCH 25/41] Small docfix. --- man/Cluster.Rd | 1 + 1 file changed, 1 insertion(+) diff --git a/man/Cluster.Rd b/man/Cluster.Rd index b9b15db8..16447731 100644 --- a/man/Cluster.Rd +++ b/man/Cluster.Rd @@ -115,6 +115,7 @@ print(res2$centers) } \references{ Wilks, 2011, Statistical Methods in the Atmospheric Sciences, 3rd ed., Elsevire, pp 676. +} \author{ History: 1.0 # 2014-10 (N.S. Fuckar, neven.fuckar@bsc.es) # Original code -- GitLab From c39e62785c3b907cbadd065a02fcc499898778c6 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 1 Feb 2017 10:36:31 +0100 Subject: [PATCH 26/41] Correcting indentation --- R/BrierScore.R | 117 ++++++++++++++++++++++++------------------------- R/Corr.R | 74 ++++++++++++++----------------- R/RMS.R | 35 +++++++-------- R/RMSSS.R | 34 ++++++++++++++ R/RatioRMS.R | 18 +++----- R/RatioSDRMS.R | 39 ++++++----------- R/Trend.R | 18 ++++---- 7 files changed, 167 insertions(+), 168 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index cce75377..b6cb81a3 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -85,76 +85,75 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { ens.mean <- rowMeans(ens, na.rm = TRUE) n.ens <- seq(1,dim(ens)[2],1) # Number of ensemble members bins <- as.list(paste("bin", 1:nbins,sep = "")) - for (i in 1:nbins) { - if (i == nbins) { - bins[[i]] <- list(which(ens.mean >= thresholds[i] & ens.mean <= thresholds[i + 1])) - } else { - bins[[i]] <- list(which(ens.mean >= thresholds[i] & ens.mean < thresholds[i + 1])) + for (i in 1:nbins) { + if (i == nbins) { + bins[[i]] <- list(which(ens.mean >= thresholds[i] & ens.mean <= thresholds[i + 1])) + } else { + bins[[i]] <- list(which(ens.mean >= thresholds[i] & ens.mean < thresholds[i + 1])) + } } - } - fkbar <- okbar <- nk <- array(0, dim = nbins) - for (i in 1:nbins) { - nk[i] <- length(bins[[i]][[1]]) - fkbar[i] <- sum(ens.mean[bins[[i]][[1]]]) / nk[i] - okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] - } + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(ens.mean[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + } - fkbar[fkbar == Inf] <- 0 - okbar[is.nan(okbar)] <- 0 - obar <- sum(obs) / length(obs) - relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- 0 + fkbar[fkbar == Inf] <- 0 + okbar[is.nan(okbar)] <- 0 + obar <- sum(obs) / length(obs) + relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- 0 for (i in 1:nbins) { - if (nk[i] > 0) { - relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 - ressum <- ressum + nk[i] * (okbar[i] - obar)^2 + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 + ressum <- ressum + nk[i] * (okbar[i] - obar)^2 - for (j in 1:nk[i]) { - term1 <- term1 + (ens.mean[bins[[i]][[1]][j]] - fkbar[i])^2 - term2 <- term2 + (ens.mean[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + for (j in 1:nk[i]) { + term1 <- term1 + (ens.mean[bins[[i]][[1]][j]] - fkbar[i])^2 + term2 <- term2 + (ens.mean[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + } } - } - } - } + } + } - rel <- relsum / n - res <- ressum / n - unc <- obar * (1 - obar) - #bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) - bs <- sum((rowMeans(ens, na.rm = T) - obs)^2) / n - bs_check_res <- rel - res + unc - bss_res <- (res - rel) / unc - gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution - bs_check_gres <- rel - gres + unc # BS using GRES - bss_gres <- (gres - rel) / unc # BSS using GRES - - # - # Estimating the bias-corrected components of the BS - # - term3 <- array(0, nbins) - for (i in 1:nbins) { - term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) - } - term_a <- sum(term3, na.rm = T) / n - term_b <- (obar * (1 - obar)) / (n - 1) - rel_bias_corrected <- rel - term_a - gres_bias_corrected <- gres - term_a + term_b - if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { - rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) - gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) - rel_bias_corrected <- rel_bias_corrected2 - gres_bias_corrected <- gres_bias_corrected2 + rel <- relsum / n + res <- ressum / n + unc <- obar * (1 - obar) + #bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) + bs <- sum((rowMeans(ens, na.rm = T) - obs)^2) / n + bs_check_res <- rel - res + unc + bss_res <- (res - rel) / unc + gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution + bs_check_gres <- rel - gres + unc # BS using GRES + bss_gres <- (gres - rel) / unc # BSS using GRES + + # + # Estimating the bias-corrected components of the BS + # + term3 <- array(0, nbins) + for (i in 1:nbins) { + term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) } - unc_bias_corrected <- unc + term_b - bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected + term_a <- sum(term3, na.rm = T) / n + term_b <- (obar * (1 - obar)) / (n - 1) + rel_bias_corrected <- rel - term_a + gres_bias_corrected <- gres - term_a + term_b + if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { + rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) + gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) + rel_bias_corrected <- rel_bias_corrected2 + gres_bias_corrected <- gres_bias_corrected2 + } + unc_bias_corrected <- unc + term_b + bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected - #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { - # cat("No error found \ n") - # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") - #} + #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { + # cat("No error found \ n") + # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") + #} - invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected)) + invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected)) } diff --git a/R/Corr.R b/R/Corr.R index ce576b84..48d6ab0e 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -125,55 +125,47 @@ Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, } .Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE) { + conf = TRUE, pval = TRUE) { if (method != "kendall" && method != "spearman" && method != "pearson") { stop("Wrong correlation method") - # Check the siglev arguments: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - if (siglev > 1 || siglev < 0) { - stop("siglev need to be higher than O and lower than 1") - } + if (siglev > 1 || siglev < 0) { + stop("siglev need to be higher than O and lower than 1") + } } - - p <- c() - conflow <- c() - confhigh <- c() - - ens.mean <- rowMeans(ens) - CORR <- cor(obs, ens.mean, use = "pairwise.complete.obs", method = method) - - if (pval || conf) { - if (method == "kendall" | method == "spearman") { - eno <- Eno(rank(obs), 1) - } else if (method == "pearson") { - eno <- Eno(obs, 1) - } - } - if (pval & method == "pearson") { - - t <- CORR*sqrt((eno-2)/(1-(CORR^2))) - p <- 1 - pt(t, eno-2) - p.val <- p - - } - if (conf & method == "pearson") { - conf_low <- (1 - siglev) / 2 - conf_high <- 1 - conf_low - conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt( - eno - 3)), tanh(atanh(CORR) + qnorm(conf_high) / sqrt( - eno - 3))) - conf.int <- conf.int[!is.na(CORR)] - conflow =conf.int[1] - confhigh=conf.int[2] - } - - invisible(result <- list(corr = CORR, p.val = p, conf_low = conflow, conf_high = confhigh)) + p <- c() + conflow <- c() + confhigh <- c() + ens.mean <- rowMeans(ens) + CORR <- cor(obs, ens.mean, use = "pairwise.complete.obs", method = method) + if (pval || conf) { + if (method == "kendall" | method == "spearman") { + eno <- Eno(rank(obs), 1) + } else if (method == "pearson") { + eno <- Eno(obs, 1) + } + } + if (pval & method == "pearson") { + t <- CORR*sqrt((eno-2)/(1-(CORR^2))) + p <- 1 - pt(t, eno-2) + p.val <- p + } + if (conf & method == "pearson") { + conf_low <- (1 - siglev) / 2 + conf_high <- 1 - conf_low + conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt(eno - 3)), + tanh(atanh(CORR) + qnorm(conf_high) / sqrt(eno - 3))) + conf.int <- conf.int[!is.na(CORR)] + conflow =conf.int[1] + confhigh=conf.int[2] + } # Output # ~~~~~~~~ - # - + # + invisible(result <- list(corr = CORR, p.val = p, conf_low = conflow, conf_high = confhigh)) + } diff --git a/R/RMS.R b/R/RMS.R index 04bf52b2..adf43fc3 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -102,33 +102,28 @@ RMS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, } .RMS <- function(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) { - - # # RMS & its confidence interval computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # if (conf) { - conf_low <- (1 - siglev) / 2 conf_high <- 1 - conf_low } + dif <- rowMeans(ens) - obs + enlrms <- mean(dif ** 2, na.rm = TRUE) ** 0.5 + if (conf) { + eno <- Eno(dif, 1) + ndat <- length(sort(dif)) + conf.int <- c((eno * enlrms ** 2 / qchisq(conf_high, eno - 1)) ** 0.5, + (eno * enlrms ** 2 / qchisq(conf_low, eno - 1)) ** 0.5) + names(conf.int) <- c("conf_low","conf_high") + } else { + conf.int <- c() + names(conf.int) <- c() + } - dif <- rowMeans(ens) - obs - enlrms <- mean(dif ** 2, na.rm = TRUE) ** 0.5 - if (conf) { - eno <- Eno(dif, 1) - - ndat <- length(sort(dif)) - conf.int <- c((eno * enlrms ** 2 / qchisq(conf_high, eno - 1)) ** 0.5, - (eno * enlrms ** 2 / qchisq(conf_low, eno - 1)) ** 0.5) - names(conf.int) <- c("conf_low","conf_high") - } else { - conf.int <- c() - names(conf.int) <- c() } - - results <- c(enlrms,conf.int) - names(results) <- c("rms",names(conf.int)) - return(results) - + results <- c(enlrms, conf.int) + names(results) <- c("rms", names(conf.int)) + return(results) } diff --git a/R/RMSSS.R b/R/RMSSS.R index 74a2a387..2e9063b7 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -87,3 +87,37 @@ RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { # enlRMSSS } + +.RMSSS <- function(ens, obs, pval = TRUE) { + # + # RMSSS and its pvalue computation + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (pval) { + nvals <- 2 + } else { + nvals <- 1 + } + dif1 <- rowMeans(ens) - obs + dif2 <- obs + rms1 <- mean(dif1 ** 2, na.rm = TRUE) ** 0.5 + rms2 <- mean(dif2 ** 2, na.rm = TRUE) ** 0.5 + rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( + rms2), na.rm = TRUE) / 1000 + rmsss <- 1 - (rms1 / rms2) + if (pval) { + eno1 <- Eno(dif1, 1) + eno2 <- Eno(dif2, 1) + F <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) + if (is.na(eno1) == FALSE & is.na(eno2) == FALSE & eno1 > 2 & eno2 > 2) { + p.val <- 1 - pf(F, eno1 - 1, eno2 - 1) + } else { + p.val <- NA + } + } + # + # Output + # ~~~~~~~~ + # + list(rmsss = rmsss, p.val = p.val) +} \ No newline at end of file diff --git a/R/RatioRMS.R b/R/RatioRMS.R index a5bc5b48..9c8e4377 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -86,35 +86,27 @@ RatioRMS <- function(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) { } .RatioRMS <- function(ens, ens.ref, obs, pval = TRUE) { - # # RMS ratio and its pvalue computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # - - dif1 <- rowMeans(ens, na.rm = TRUE) - obs dif2 <- rowMeans(ens.ref, na.rm = TRUE) - obs rms1 <- mean(dif1 ** 2, na.rm = TRUE) ** 0.5 rms2 <- mean(dif2 ** 2, na.rm = TRUE) ** 0.5 rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max( - abs(rms2), na.rm = TRUE) / 1000 + abs(rms2), na.rm = TRUE) / 1000 enlratiorms <- (rms1 / rms2) if (pval) { eno1 <- Eno(dif1, 1) eno2 <- Eno(dif2, 1) F <- (eno1 * (rms1) ** 2 / (eno1 - 1)) / (eno2 * (rms2) ** 2 / (eno2 - 1)) F[which(F < 1)] <- 1 / F[which(F < 1)] - - - if (!is.na(eno1) && !is.na(eno2) && eno1 > 2 && eno2 > 2) { - p.val <- (1 - pf(F, - eno1 - 1, eno2 - 1)) * 2 - - } - + if (!is.na(eno1) && !is.na(eno2) && eno1 > 2 && eno2 > 2) { + p.val <- (1 - pf(F, eno1 - 1, eno2 - 1)) * 2 + } } # Output - list(ratiorms = enlratiorms,p.val =p.val) + list(ratiorms = enlratiorms, p.val = p.val) } diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index 543d6e05..d33656b6 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -91,38 +91,27 @@ RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ensmean <- rowMeans(ens) - std <- sd(ensmean) - enosd <- Eno(ensmean,1) - + enosd <- Eno(ensmean, 1) dif <- ensmean - obs - rms <- mean(dif ** 2, 1, narm = TRUE) ** 0.5 - - enorms <- Eno(dif,1) - enlratiormssd <- std /rms + rms <- mean(dif ** 2, 1, na.rm = TRUE) ** 0.5 + enorms <- Eno(dif, 1) + enlratiormssd <- std / rms p.val <- 0 if (pval) { - - l1 <- enosd - l2 <- enorms - - - F <- (enosd * std ** 2 / (enosd - 1)) / (enorms * (rms) ** 2 / (enorms - 1)) - if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { - p.val <- 1 - pf(F, l1 - 1, l2 - 1) - - } - else { - p.val <- NA - } - } - - + l1 <- enosd + l2 <- enorms + F <- (enosd * std ** 2 / (enosd - 1)) / (enorms * (rms) ** 2 / (enorms - 1)) + if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { + p.val <- 1 - pf(F, l1 - 1, l2 - 1) + } + else { + p.val <- NA + } + } # # Output # ~~~~~~~~ # - list(ratio = enlratiormssd, p.val = p.val) - } diff --git a/R/Trend.R b/R/Trend.R index b8ef4d0c..a797b705 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -84,17 +84,15 @@ Trend <- function(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) { ensmean <- rowMeans(ens, na.rm = TRUE) if (any(!is.na(ensmean))) { - mon <- seq(ensmean) * interval - lm.out <- lm(ensmean ~ mon, na.action = na.omit) - trend <- c(lm.out$coefficients[2], lm.out$coefficients[1]) - if (conf) { - conf.int <- confint(lm.out, level = siglev)[2, 1:2] - } - detrend <- ensmean[is.na(ensmean) == FALSE] - lm.out$fitted.values - } + mon <- seq(ensmean) * interval + lm.out <- lm(ensmean ~ mon, na.action = na.omit) + trend <- c(lm.out$coefficients[2], lm.out$coefficients[1]) + if (conf) { + conf.int <- confint(lm.out, level = siglev)[2, 1:2] + } + detrend <- ensmean[is.na(ensmean) == FALSE] - lm.out$fitted.values + } - - # # Outputs # ~~~~~~~~~ -- GitLab From f8f5a12ebeaccde6cbec9ac0f9bdc741066661c9 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 1 Feb 2017 11:23:14 +0100 Subject: [PATCH 27/41] Revert load manual changes --- man/Load.Rd | 632 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 387 insertions(+), 245 deletions(-) diff --git a/man/Load.Rd b/man/Load.Rd index d2686499..dfd61331 100644 --- a/man/Load.Rd +++ b/man/Load.Rd @@ -1,147 +1,285 @@ \name{Load} \alias{Load} -\title{Loads Experimental And Observational Data from NetCDF files} +\title{Loads Experimental And Observational Data} \description{ - This function loads monthly or daily data from a user-specified forecast, and then automatically loads the corresponding observations (if available).\cr\cr - - The function can handle spatial data (lon-lat) as well as global averages. The function can also calculate area-averages.\cr\cr - - If multiple datasets are loaded with different grids, \code{Load()} will interpolate them onto a common grid.\cr +This function loads monthly or daily data from a set of specified experimental datasets together with data that date-corresponds from a set of specified observational datasets. See parameters 'storefreq', 'sampleperiod', 'exp' and 'obs'.\cr\cr +A set of starting dates is specified through the parameter 'sdates'. Data of each starting date is loaded for each model. +\code{Load()} arranges the data in two arrays with a similar format both with the following dimensions: + \enumerate{ + \item{The number of experimental datasets determined by the user through the argument 'exp' (for the experimental data array) or the number of observational datasets available for validation (for the observational array) determined as well by the user through the argument 'obs'.} + \item{The greatest number of members across all experiments (in the experimental data array) or across all observational datasets (in the observational data array).} + \item{The number of starting dates determined by the user through the 'sdates' argument.} + \item{The greatest number of lead-times.} + \item{The number of latitudes of the selected zone.} + \item{The number of longitudes of the selected zone.} + } +Dimensions 5 and 6 are optional and their presence depends on the type of the specified variable (global mean or 2-dimensional) and on the selected output type (area averaged time series, latitude averaged time series, longitude averaged time series or 2-dimensional time series).\cr +In the case of loading an area average the dimensions of the arrays will be only the first 4.\cr\cr + +Only a specified variable is loaded from each experiment at each starting date. See parameter 'var'.\cr +Afterwards, observational data that matches every starting date and lead-time of every experimental dataset is fetched in the file system (so, if two predictions at two different start dates overlap, some observational values will be loaded and kept in memory more than once).\cr +If no data is found in the file system for an experimental or observational array point it is filled with an NA value.\cr\cr + +If the specified output is 2-dimensional or latitude- or longitude-averaged time series all the data is interpolated into a common grid. If the specified output type is area averaged time series the data is averaged on the individual grid of each dataset but can also be averaged after interpolating into a common grid. See parameters 'grid' and 'method'.\cr +Once the two arrays are filled by calling this function, other functions in the s2dverification package that receive as inputs data formatted in this data structure can be executed (e.g: \code{Clim()} to compute climatologies, \code{Ano()} to compute anomalies, ...).\cr\cr + +Load() has many additional parameters to disable values and trim dimensions of selected variable, even masks can be applied to 2-dimensional variables. See parameters 'nmember', 'nmemberobs', 'nleadtime', 'leadtimemin', 'leadtimemax', 'sampleperiod', 'lonmin', 'lonmax', 'latmin', 'latmax', 'maskmod', 'maskobs', 'varmin', 'varmax'.\cr\cr + +The parameters 'exp' and 'obs' can take various forms. The most direct form is a list of lists, where each sub-list has the component 'path' associated to a character string with a pattern of the path to the files of a dataset to be loaded. These patterns can contain wildcards and tags that will be replaced automatically by \code{Load()} with the specified starting dates, member numbers, variable name, etc.\cr +See parameter 'exp' or 'obs' for details.\cr\cr + +Only NetCDF files are supported. OPeNDAP URLs to NetCDF files are also supported.\cr +\code{Load()} can load 2-dimensional or global mean variables in any of the following formats: + \itemize{ + \item{experiments: + \itemize{ + \item{file per ensemble per starting date (YYYY, MM and DD somewhere in the path)} + \item{file per member per starting date (YYYY, MM, DD and MemberNumber somewhere in the path. Ensemble experiments with different numbers of members can be loaded in a single \code{Load()} call.)} + } +(YYYY, MM and DD specify the starting dates of the predictions) + } + \item{observations: + \itemize{ + \item{file per ensemble per month (YYYY and MM somewhere in the path)} + \item{file per member per month (YYYY, MM and MemberNumber somewhere in the path, obs with different numbers of members supported)} + \item{file per dataset (No constraints in the path but the time axes in the file have to be properly defined)} + } +(YYYY and MM correspond to the actual month data in the file) + } + } +In all the formats the data can be stored in a daily or monthly frequency, or a multiple of these (see parameters 'storefreq' and 'sampleperiod').\cr +All the data files must contain the target variable defined over time and potentially over members, latitude and longitude dimensions in any order, time being the record dimension.\cr +In the case of a two-dimensional variable, the variables longitude and latitude must be defined inside the data file too and must have the same names as the dimension for longitudes and latitudes respectively.\cr +The names of these dimensions (and longitude and latitude variables) and the name for the members dimension are expected to be 'longitude', 'latitude' and 'ensemble' respectively. However, these names can be adjusted with the parameter 'dimnames' or can be configured in the configuration file (read below in parameters 'exp', 'obs' or see \code{?ConfigFileOpen} for more information.\cr +All the data files are expected to have numeric values representable with 32 bits. Be aware when choosing the fill values or infinite values in the datasets to load.\cr\cr + +The Load() function returns a named list following a structure similar to the used in the package 'downscaleR'.\cr +The components are the following: + \itemize{ + \item{'mod' is the array that contains the experimental data. It has the attribute 'dimensions' associated to a vector of strings with the labels of each dimension of the array, in order.} + \item{'obs' is the array that contains the observational data. It has the attribute 'dimensions' associated to a vector of strings with the labels of each dimension of the array, in order.} + \item{'obs' is the array that contains the observational data.} + \item{'lat' and 'lon' are the latitudes and longitudes of the grid into which the data is interpolated (0 if the loaded variable is a global mean or the output is an area average).\cr +Both have the attribute 'cdo_grid_des' associated with a character string with the name of the common grid of the data, following the CDO naming conventions for grids.\cr +The attribute 'projection' is kept for compatibility with 'downscaleR'.} + \item{'Variable' has the following components: + \itemize{ + \item{'varName', with the short name of the loaded variable as specified in the parameter 'var'.} + \item{'level', with information on the pressure level of the variable. Is kept to NULL by now.} + } +And the following attributes: + \itemize{ + \item{'is_standard', kept for compatibility with 'downscaleR', tells if a dataset has been homogenized to standards with 'downscaleR' catalogs.} + \item{'units', a character string with the units of measure of the variable, as found in the source files.} + \item{'longname', a character string with the long name of the variable, as found in the source files.} + \item{'daily_agg_cellfun', 'monthly_agg_cellfun', 'verification_time', kept for compatibility with 'downscaleR'.} + } + } + \item{'Datasets' has the following components: + \itemize{ + \item{'exp', a named list where the names are the identifying character strings of each experiment in 'exp', each associated to a list with the following components: + \itemize{ + \item{'members', a list with the names of the members of the dataset.} + \item{'source', a path or URL to the source of the dataset.} + } + } + \item{'obs', similar to 'exp' but for observational datasets.} + } + } + \item{'Dates', with the follwing components: + \itemize{ + \item{'start', an array of dimensions (sdate, time) with the POSIX initial date of each forecast time of each starting date.} + \item{'end', an array of dimensions (sdate, time) with the POSIX final date of each forecast time of each starting date.} + } + } + \item{'InitializationDates', a vector of starting dates as specified in 'sdates', in POSIX format.} + \item{'when', a time stamp of the date the \code{Load()} call to obtain the data was issued.} + \item{'source_files', a vector of character strings with complete paths to all the found files involved in the \code{Load()} call.} + \item{'not_found_files', a vector of character strings with complete paths to not found files involved in the \code{Load()} call.} + } } \usage{ - Load(var, exp = NULL, obs = NULL, sdates, nmember = NULL, - nmemberobs = NULL, nleadtime = NULL, leadtimemin = 1, - leadtimemax = NULL, storefreq = 'monthly', sampleperiod = 1, - lonmin = 0, lonmax = 360, latmin = -90, latmax = 90, - output = 'areave', method = 'conservative', grid = NULL, - maskmod = vector("list", 15), maskobs = vector("list", 15), - configfile = NULL, varmin = NULL, varmax = NULL, - silent = FALSE, nprocs = NULL, dimnames = NULL, - remapcells = 2, path_glob_permissive = 'partial') +Load(var, exp = NULL, obs = NULL, sdates, nmember = NULL, + nmemberobs = NULL, nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', sampleperiod = 1, + lonmin = 0, lonmax = 360, latmin = -90, latmax = 90, + output = 'areave', method = 'conservative', grid = NULL, + maskmod = vector("list", 15), maskobs = vector("list", 15), + configfile = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL, dimnames = NULL, + remapcells = 2, path_glob_permissive = 'partial') } \arguments{ \item{var}{ - Name of the variable to load.\cr - If the variable name inside the files to load do not both match var, the parameters 'exp' and 'obs' can be used.\cr +Name of the variable to load.\cr +If the variable name inside the files to load is not the same as this, adjust properly the parameters 'exp' and 'obs'.\cr +This parameter is mandatory.\cr +Ex: 'tas' } \item{exp}{ - This argument can either be a list of lists or a vector of character strings.\cr - The first format is adequate when loading data you'll only load once or occasionally. The second format is targeted to avoid providing repeatedly the information on a certain dataset but is more complex to use.\cr\cr - IMPORTANT: Place first the experiment with the largest number of members and, if possible, with the largest number of leadtimes. If not possible, the arguments 'nmember' and/or 'nleadtime' should be filled to not miss any member or leadtime.\cr - If 'exp' is not specified or set to NULL, observational data is loaded for each start-date as far as 'leadtimemax'. If 'leadtimemax' is not provided, \code{Load()} will retrieve data of a period of time as long as the time period between the first specified start date and the current date.\cr - \cr - List of lists:\cr - A list of lists where each sub-list contains information on the location and format of the data files of the dataset to load.\cr - Each sub-list can have the following components: +This argument can take two formats: a list of lists or a vector of character strings. Each format will trigger a different mechanism of locating the requested datasets.\cr +The first format is adequate when loading data you'll only load once or occasionally. The second format is targeted to avoid providing repeatedly the information on a certain dataset but is more complex to use.\cr\cr +IMPORTANT: Place first the experiment with the largest number of members and, if possible, with the largest number of leadtimes. If not possible, the arguments 'nmember' and/or 'nleadtime' should be filled to not miss any member or leadtime.\cr +If 'exp' is not specified or set to NULL, observational data is loaded for each start-date as far as 'leadtimemax'. If 'leadtimemax' is not provided, \code{Load()} will retrieve data of a period of time as long as the time period between the first specified start date and the current date.\cr +\cr +List of lists:\cr +A list of lists where each sub-list contains information on the location and format of the data files of the dataset to load.\cr +Each sub-list can have the following components: \itemize{ - \item{ - 'name': A character string to identify the dataset. Optional. - } - \item{ - 'path': A character string with the pattern of the path to the files of the dataset. This pattern can be built up making use of some special tags that \code{Load()} will replace with the appropriate values to find the dataset files. The allowed tags are $START_DATE$, $YEAR$, $MONTH$, $DAY$, $MEMBER_NUMBER$, $STORE_FREQ$, $VAR_NAME$, $EXP_NAME$ (only for experimental datasets), $OBS_NAME$ (only for observational datasets) and $SUFFIX$\cr - Example: /path/to/$EXP_NAME$/postprocessed/$VAR_NAME$/\cr - $VAR_NAME$_$START_DATE$.nc\cr - If 'path' is not specified and 'name' is specified, the dataset information will be fetched with the same mechanism as when using the vector of character strings (read below). - } - \item{ - 'nc_var_name': Character string with the actual variable name to look for inside the dataset files. Optional. Takes, by default, the same value as the parameter 'var'. - } - \item{ - 'suffix': Wildcard character string that can be used to build the 'path' of the dataset. It can be accessed with the tag $SUFFIX$. Optional. Takes '' by default. - } - \item{ - 'var_min': Important: Character string. Minimum value beyond which read values will be deactivated to NA. Optional. No deactivation is performed by default. - } - \item{ - 'var_max': Important: Character string. Maximum value beyond which read values will be deactivated to NA. Optional. No deactivation is performed by default. - } + \item{ +'name': A character string to identify the dataset. Optional. + } + \item{ +'path': A character string with the pattern of the path to the files of the dataset. This pattern can be built up making use of some special tags that \code{Load()} will replace with the appropriate values to find the dataset files. The allowed tags are $START_DATE$, $YEAR$, $MONTH$, $DAY$, $MEMBER_NUMBER$, $STORE_FREQ$, $VAR_NAME$, $EXP_NAME$ (only for experimental datasets), $OBS_NAME$ (only for observational datasets) and $SUFFIX$\cr +Example: /path/to/$EXP_NAME$/postprocessed/$VAR_NAME$/\cr + $VAR_NAME$_$START_DATE$.nc\cr +If 'path' is not specified and 'name' is specified, the dataset information will be fetched with the same mechanism as when using the vector of character strings (read below). + } + \item{ +'nc_var_name': Character string with the actual variable name to look for inside the dataset files. Optional. Takes, by default, the same value as the parameter 'var'. + } + \item{ +'suffix': Wildcard character string that can be used to build the 'path' of the dataset. It can be accessed with the tag $SUFFIX$. Optional. Takes '' by default. + } + \item{ +'var_min': Important: Character string. Minimum value beyond which read values will be deactivated to NA. Optional. No deactivation is performed by default. + } + \item{ +'var_max': Important: Character string. Maximum value beyond which read values will be deactivated to NA. Optional. No deactivation is performed by default. + } } - The tag $START_DATES$ will be replaced with all the starting dates specified in 'sdates'. $YEAR$, $MONTH$ and $DAY$ will take a value for each iteration over 'sdates', simply these are the same as $START_DATE$ but split in parts.\cr - $MEMBER_NUMBER$ will be replaced by a character string with each member number, from 1 to the value specified in the parameter 'nmember' (in experimental datasets) or in 'nmemberobs' (in observational datasets). It will range from '01' to 'N' or '0N' if N < 10.\cr - $STORE_FREQ$ will take the value specified in the parameter 'storefreq' ('monthly' or 'daily').\cr - $VAR_NAME$ will take the value specified in the parameter 'var'.\cr - $EXP_NAME$ will take the value specified in each component of the parameter 'exp' in the sub-component 'name'.\cr - $OBS_NAME$ will take the value specified in each component of the parameter 'obs' in the sub-component 'obs.\cr - $SUFFIX$ will take the value specified in each component of the parameters 'exp' and 'obs' in the sub-component 'suffix'.\cr +The tag $START_DATES$ will be replaced with all the starting dates specified in 'sdates'. $YEAR$, $MONTH$ and $DAY$ will take a value for each iteration over 'sdates', simply these are the same as $START_DATE$ but split in parts.\cr +$MEMBER_NUMBER$ will be replaced by a character string with each member number, from 1 to the value specified in the parameter 'nmember' (in experimental datasets) or in 'nmemberobs' (in observational datasets). It will range from '01' to 'N' or '0N' if N < 10.\cr +$STORE_FREQ$ will take the value specified in the parameter 'storefreq' ('monthly' or 'daily').\cr +$VAR_NAME$ will take the value specified in the parameter 'var'.\cr +$EXP_NAME$ will take the value specified in each component of the parameter 'exp' in the sub-component 'name'.\cr +$OBS_NAME$ will take the value specified in each component of the parameter 'obs' in the sub-component 'obs.\cr +$SUFFIX$ will take the value specified in each component of the parameters 'exp' and 'obs' in the sub-component 'suffix'.\cr + +Example: +\preformatted{ +list( + list( + name = 'experimentA', + path = file.path('/path/to/$DATASET_NAME$/$STORE_FREQ$', + '$VAR_NAME$$SUFFIX$', + '$VAR_NAME$_$START_DATE$.nc'), + nc_var_name = '$VAR_NAME$', + suffix = '_3hourly', + var_min = '-1e19', + var_max = '1e19' + ) +) +} +This will make \code{Load()} look for, for instance, the following paths, if 'sdates' is c('19901101', '19951101', '20001101'):\cr + /path/to/experimentA/monthly_mean/tas_3hourly/tas_19901101.nc\cr + /path/to/experimentA/monthly_mean/tas_3hourly/tas_19951101.nc\cr + /path/to/experimentA/monthly_mean/tas_3hourly/tas_20001101.nc\cr\cr + +Vector of character strings: +To avoid specifying constantly the same information to load the same datasets, a vector with only the names of the datasets to load can be specified.\cr +\code{Load()} will then look for the information in a configuration file whose path must be specified in the parameter 'configfile'.\cr +Check \code{?ConfigFileCreate}, \code{ConfigFileOpen}, \code{ConfigEditEntry} & co. to learn how to create a new configuration file and how to add the information there. + +Example: c('experimentA', 'experimentB') } \item{obs}{ - Argument with the same format as parameter 'exp'. See details on parameter 'exp'.\cr - If 'obs' is not specified or set to NULL, no observational data is loaded.\cr +Argument with the same format as parameter 'exp'. See details on parameter 'exp'.\cr +If 'obs' is not specified or set to NULL, no observational data is loaded.\cr } \item{sdates}{ - Vector of starting dates of the experimental runs to be loaded following the pattern 'YYYYMMDD'.\cr - - +Vector of starting dates of the experimental runs to be loaded following the pattern 'YYYYMMDD'.\cr +This argument is mandatory.\cr +Ex: c('19601101', '19651101', '19701101') } \item{nmember}{ - Vector with the numbers of members to load from the specified experimental datasets in 'exp'.\cr - If not specified, the automatically detected number of members of the first experimental dataset is detected and replied to all the experimental datasets.\cr - If a single value is specified it is replied to all the experimental datasets.\cr - Note: It is recommended to specify the number of members of the first experimental dataset if it is stored in file per member format because there are known issues in the automatic detection of members if the path to the dataset in the configuration file contains Shell Globbing wildcards such as '*'.\cr +Vector with the numbers of members to load from the specified experimental datasets in 'exp'.\cr +If not specified, the automatically detected number of members of the first experimental dataset is detected and replied to all the experimental datasets.\cr +If a single value is specified it is replied to all the experimental datasets.\cr +Data for each member is fetched in the file system. If not found is filled with NA values.\cr +An NA value in the 'nmember' list is interpreted as "fetch as many members of each experimental dataset as the number of members of the first experimental dataset".\cr +Note: It is recommended to specify the number of members of the first experimental dataset if it is stored in file per member format because there are known issues in the automatic detection of members if the path to the dataset in the configuration file contains Shell Globbing wildcards such as '*'.\cr +Ex: c(4, 9) } \item{nmemberobs}{ - The same as nmember but for the reference data.\cr - +Vector with the numbers of members to load from the specified observational datasets in 'obs'.\cr +If not specified, the automatically detected number of members of the first observational dataset is detected and replied to all the observational datasets.\cr +If a single value is specified it is replied to all the observational datasets.\cr +Data for each member is fetched in the file system. If not found is filled with NA values.\cr +An NA value in the 'nmemberobs' list is interpreted as "fetch as many members of each observational dataset as the number of members of the first observational dataset".\cr +Note: It is recommended to specify the number of members of the first observational dataset if it is stored in file per member format because there are known issues in the automatic detection of members if the path to the dataset in the configuration file contains Shell Globbing wildcards such as '*'.\cr +Ex: c(1, 5) + } + \item{nleadtime}{ +Deprecated. See parameter 'leadtimemax'.\cr } - \item{leadtimemin}{ - Only lead-times higher or equal to 'leadtimemin' are loaded. Takes by default value 1. +Only lead-times higher or equal to 'leadtimemin' are loaded. Takes by default value 1. } \item{leadtimemax}{ - Only lead-times lower or equal to 'leadtimemax' are loaded. Takes by default the number of lead-times of the first experimental dataset in 'exp'.\cr - If 'exp' is NULL this argument is ignored. +Only lead-times lower or equal to 'leadtimemax' are loaded. Takes by default the number of lead-times of the first experimental dataset in 'exp'.\cr +If 'exp' is NULL this argument won't have any effect (see \code{?Load} description). + } + \item{storefreq}{ +Frequency at which the data to be loaded is stored in the file system. Can take values 'monthly' or 'daily'.\cr +By default it takes 'monthly'.\cr +Note: Data stored in other frequencies with a period which is divisible by a month can be loaded with a proper use of 'storefreq' and 'sampleperiod' parameters. It can also be loaded if the period is divisible by a day and the observational datasets are stored in a file per dataset format or 'obs' is empty. + } + \item{sampleperiod}{ +To load only a subset between 'leadtimemin' and 'leadtimemax' with the period of subsampling 'sampleperiod'.\cr +Takes by default value 1 (all lead-times are loaded).\cr +See 'storefreq' for more information. + } + \item{lonmin}{ +If a 2-dimensional variable is loaded, values at longitudes lower than 'lonmin' aren't loaded.\cr +Must take a value in the range [-360, 360] (if negative longitudes are found in the data files these are translated to this range).\cr +It is set to 0 if not specified.\cr +If 'lonmin' > 'lonmax', data across Greenwich is loaded. } - \item{storefreq}{ - Frequency at which the data to be loaded is stored in the file system. Can take values 'monthly' or 'daily'.\cr - By default it takes 'monthly'.\cr - } - \item{sampleperiod}{ - To load only a subset between 'leadtimemin' and 'leadtimemax' with the period of subsampling 'sampleperiod'.\cr - Takes by default value 1 (all lead-times are loaded).\cr - See 'storefreq' for more information. - } - \item{lonmin}{ - If a 2-dimensional variable is loaded, values at longitudes lower than 'lonmin' aren't loaded.\cr - If 'lonmin' > 'lonmax', data crossing the Greenwich Meridian are loaded. - } \item{lonmax}{ - If a 2-dimensional variable is loaded, values at longitudes higher than 'lonmax' aren't loaded.\cr - If 'lonmin' > 'lonmax', data crossing the Greenwich Meridian is loaded. - } - \item{latmin}{ - If a 2-dimensional variable is loaded, values at latitudes lower than 'latmin' aren't loaded.\cr - } +If a 2-dimensional variable is loaded, values at longitudes higher than 'lonmax' aren't loaded.\cr +Must take a value in the range [-360, 360] (if negative longitudes are found in the data files these are translated to this range).\cr +It is set to 360 if not specified.\cr +If 'lonmin' > 'lonmax', data across Greenwich is loaded. + } + \item{latmin}{ +If a 2-dimensional variable is loaded, values at latitudes lower than 'latmin' aren't loaded.\cr +Must take a value in the range [-90, 90].\cr +It is set to -90 if not specified. + } \item{latmax}{ - If a 2-dimensional variable is loaded, values at latitudes higher than 'latmax' aren't loaded.\cr - } - \item{output}{ - This parameter determines the format in which the data is arranged in the output arrays.\cr - Can take values 'areave', 'lon', 'lat', 'lonlat'.\cr +If a 2-dimensional variable is loaded, values at latitudes higher than 'latmax' aren't loaded.\cr +Must take a value in the range [-90, 90].\cr +It is set to 90 if not specified. + } + \item{output}{ +This parameter determines the format in which the data is arranged in the output arrays.\cr +Can take values 'areave', 'lon', 'lat', 'lonlat'.\cr \itemize{ - \item{'areave': Time series of area-averaged variables over the specified domain.} - \item{'lon': Time series of meridional averages as a function of longitudes.} - \item{'lat': Time series of zonal averages as a function of latitudes.} - \item{'lonlat': Time series of 2d fields.} - } - Takes by default the value 'areave'. If the variable specified in 'var' is a global mean, this parameter is forced to 'areave'.\cr - All the loaded data is interpolated onto the grid of the first experimental dataset except if 'areave' is selected. In that case the area averages are computed on each dataset original grid. A common grid different than the first experiment's can be specified through the parameter 'grid'. If 'grid' is specified when selecting 'areave' output type, all the loaded data is interpolated into the specified grid before calculating the area averages. + \item{'areave': Time series of area-averaged variables over the specified domain.} + \item{'lon': Time series of meridional averages as a function of longitudes.} + \item{'lat': Time series of zonal averages as a function of latitudes.} + \item{'lonlat': Time series of 2d fields.} } +Takes by default the value 'areave'. If the variable specified in 'var' is a global mean, this parameter is forced to 'areave'.\cr +All the loaded data is interpolated into the grid of the first experimental dataset except if 'areave' is selected. In that case the area averages are computed on each dataset original grid. A common grid different than the first experiment's can be specified through the parameter 'grid'. If 'grid' is specified when selecting 'areave' output type, all the loaded data is interpolated into the specified grid before calculating the area averages. + } \item{method}{ - This parameter determines the interpolation method to be used when regridding data (see 'output'). Can take values 'bilinear', 'bicubic', 'conservative', 'distance-weighted'.\cr - See \code{remapcells} for advanced adjustments.\cr - Takes by default the value 'conservative'. +This parameter determines the interpolation method to be used when regridding data (see 'output'). Can take values 'bilinear', 'bicubic', 'conservative', 'distance-weighted'.\cr +See \code{remapcells} for advanced adjustments.\cr +Takes by default the value 'conservative'. } \item{grid}{ - A common grid can be specified through the parameter 'grid' when loading 2-dimensional data. Data is then interpolated onto this grid whichever 'output' type is specified. If the selected output type is 'areave' and a 'grid' is specified, the area averages are calculated after interpolating to the specified grid.\cr - If not specified and the selected output type is 'lon', 'lat' or 'lonlat', this parameter takes as default value the grid of the first experimental dataset, which is read automatically from the source files.\cr - The grid must be supported by 'cdo' tools: rNXxNY or tTRgrid.\cr - Ex: 'r96x72'\cr - Advanced: If the output type is 'lon', 'lat' or 'lonlat' and no common grid is specified, the grid of the first experimental or observational dataset is detected and all data is then interpolated onto this grid. If the first experimental or observational dataset's data is found shifted along the longitudes (i.e., there's no value at the longitude 0 but at a longitude close to it), the data is re-interpolated to suppress the shift. This has to be done in order to make sure all the data from all the datasets is properly aligned along longitudes, as there's no option so far in \code{Load} to specify grids starting at longitudes other than 0. This issue doesn't affect when loading in 'areave' mode without a common grid, the data is not re-interpolated in that case. +A common grid can be specified through the parameter 'grid' when loading 2-dimensional data. Data is then interpolated onto this grid whichever 'output' type is specified. If the selected output type is 'areave' and a 'grid' is specified, the area averages are calculated after interpolating to the specified grid.\cr +If not specified and the selected output type is 'lon', 'lat' or 'lonlat', this parameter takes as default value the grid of the first experimental dataset, which is read automatically from the source files.\cr +The grid must be supported by 'cdo' tools: rNXxNY or tTRgrid.\cr +Ex: 'r96x72'\cr +Advanced: If the output type is 'lon', 'lat' or 'lonlat' and no common grid is specified, the grid of the first experimental or observational dataset is detected and all data is then interpolated onto this grid. If the first experimental or observational dataset's data is found shifted along the longitudes (i.e., there's no value at the longitude 0 but at a longitude close to it), the data is re-interpolated to suppress the shift. This has to be done in order to make sure all the data from all the datasets is properly aligned along longitudes, as there's no option so far in \code{Load} to specify grids starting at longitudes other than 0. This issue doesn't affect when loading in 'areave' mode without a common grid, the data is not re-interpolated in that case. } \item{maskmod}{ - List of masks to be applied to the data of each experimental dataset respectively, if a 2-dimensional variable is specified in 'var'.\cr - Each mask can be defined in 2 formats:\cr - a) a matrix with dimensions c(longitudes, latitudes).\cr +List of masks to be applied to the data of each experimental dataset respectively, if a 2-dimensional variable is specified in 'var'.\cr +Each mask can be defined in 2 formats:\cr +a) a matrix with dimensions c(longitudes, latitudes).\cr b) a list with the components 'path' and, optionally, 'nc_var_name'.\cr In the format a), the matrix must have the same size as the common grid or with the same size as the grid of the corresponding experimental dataset if 'areave' output type is specified and no common 'grid' is specified.\cr In the format b), the component 'path' must be a character string with the path to a NetCDF mask file, also in the common grid or in the grid of the corresponding dataset if 'areave' output type is specified and no common 'grid' is specified. If the mask file contains only a single variable, there's no need to specify the component 'nc_var_name'. Otherwise it must be a character string with the name of the variable inside the mask file that contains the mask values. This variable must be defined only over 2 dimensions with length greater or equal to 1.\cr @@ -154,118 +292,122 @@ Warning: When loading maps, any masks defined for the observational data will be Warning: list() compulsory even if loading 1 experimental dataset only!\cr Ex: list(array(1, dim = c(num_lons, num_lats))) } -\item{maskobs}{ + \item{maskobs}{ See help on parameter 'maskmod'. -} + } \item{configfile}{ - Path to the s2dverification configuration file from which to retrieve information on location in file system (and other) of datasets.\cr - If not specified, the configuration file used at BSC-ES will be used (it is included in the package).\cr - Check the BSC's configuration file or a template of configuration file in the folder 'inst/config' in the package.\cr - Check further information on the configuration file mechanism in \code{ConfigFileOpen()}. - } -\item{varmin}{ - Loaded experimental and observational data values smaller than 'varmin' will be disabled (replaced by NA values).\cr - By default no deactivation is performed. -} -\item{varmax}{ - Loaded experimental and observational data values greater than 'varmax' will be disabled (replaced by NA values).\cr - By default no deactivation is performed. -} -\item{silent}{ - Parameter to show (FALSE) or hide (TRUE) information messages.\cr - Warnings will be displayed even if 'silent' is set to TRUE.\cr - Takes by default the value 'FALSE'. -} -\item{nprocs}{ - Number of parallel processes created to perform the fetch and computation of data.\cr - When running in multiple processes, if an error occurs in any of the processes, a crash message appears in the R session of the original process but no detail is given about the error. A value of 1 will display all error messages in the original and only R session.\cr -} -\item{dimnames}{ +Path to the s2dverification configuration file from which to retrieve information on location in file system (and other) of datasets.\cr +If not specified, the configuration file used at BSC-ES will be used (it is included in the package).\cr +Check the BSC's configuration file or a template of configuration file in the folder 'inst/config' in the package.\cr +Check further information on the configuration file mechanism in \code{ConfigFileOpen()}. + } + \item{varmin}{ +Loaded experimental and observational data values smaller than 'varmin' will be disabled (replaced by NA values).\cr +By default no deactivation is performed. + } + \item{varmax}{ +Loaded experimental and observational data values greater than 'varmax' will be disabled (replaced by NA values).\cr +By default no deactivation is performed. + } + \item{silent}{ +Parameter to show (FALSE) or hide (TRUE) information messages.\cr +Warnings will be displayed even if 'silent' is set to TRUE.\cr +Takes by default the value 'FALSE'. + } + \item{nprocs}{ +Number of parallel processes created to perform the fetch and computation of data.\cr +These processes will use shared memory in the processor in which Load() is launched.\cr +By default the number of logical cores in the machine will be detected and as many processes as logical cores there are will be created.\cr +A value of 1 won't create parallel processes.\cr +When running in multiple processes, if an error occurs in any of the processes, a crash message appears in the R session of the original process but no detail is given about the error. A value of 1 will display all error messages in the original and only R session.\cr +Note: the parallel process create other blocking processes each time they need to compute an interpolation via 'cdo'. + } + \item{dimnames}{ Named list where the name of each element is a generic name of the expected dimensions inside the NetCDF files. These generic names are 'lon', 'lat' and 'member'. 'time' is not needed because it's detected automatically by discard.\cr The value associated to each name is the actual dimension name in the NetCDF file.\cr The variables in the file that contain the longitudes and latitudes of the data (if the data is a 2-dimensional variable) must have the same name as the longitude and latitude dimensions.\cr By default, these names are 'longitude', 'latitude' and 'ensemble. If any of those is defined in the 'dimnames' parameter, it takes priority and overwrites the default value. Ex.: list(lon = 'x', lat = 'y') In that example, the dimension 'member' will take the default value 'ensemble'. -} -\item{remapcells}{ + } + \item{remapcells}{ When loading a 2-dimensional variable, spatial subsets can be requested via \code{lonmin}, \code{lonmax}, \code{latmin} and \code{latmax}. When \code{Load()} obtains the subset it is then interpolated if needed with the method specified in \code{method}.\cr The result of this interpolation can vary if the values surrounding the spatial subset are not present. To better control this process, the width in number of grid cells of the surrounding area to be taken into account can be specified with \code{remapcells}. A value of 0 will take into account no additional cells but will generate less traffic between the storage and the R processes that load data.\cr A value beyond the limits in the data files will be automatically runcated to the actual limit.\cr The default value is 2. -} -\item{path_glob_permissive}{ + } + \item{path_glob_permissive}{ In some cases, when specifying a path pattern (either in the parameters 'exp'/'obs' or in a configuration file) one can specify path patterns that contain shell globbing expressions. Too much freedom in putting globbing expressions in the path patterns can be dangerous and make \code{Load()} find a file in the file system for a start date for a dataset that really does not belong to that dataset. For example, if the file system contains two directories for two different experiments that share a part of their path and the path pattern contains globbing expressions: -/experiments/model1/expA/monthly_mean/tos/tos_19901101.nc -/experiments/model2/expA/monthly_mean/tos/tos_19951101.nc + /experiments/model1/expA/monthly_mean/tos/tos_19901101.nc + /experiments/model2/expA/monthly_mean/tos/tos_19951101.nc And the path pattern is used as in the example right below to load data of only the experiment 'expA' of the model 'model1' for the starting dates '19901101' and '19951101', \code{Load()} will undesiredly yield data for both starting dates, even if in fact there is data only for the first one: -expA <- list(path = '/experiments/*/expA/monthly_mean/$VAR_NAME$/$VAR_NAME$_$START_DATE$.nc') -data <- Load('tos', list(expA), NULL, c('19901101', '19951101')) + expA <- list(path = '/experiments/*/expA/monthly_mean/$VAR_NAME$/$VAR_NAME$_$START_DATE$.nc') + data <- Load('tos', list(expA), NULL, c('19901101', '19951101')) To avoid these situations, the parameter \code{path_glob_permissive} is set by default to \code{'partial'}, which forces \code{Load()} to replace all the globbing expressions of a path pattern of a data set by fixed values taken from the path of the first found file for each data set, up to the folder right before the final files (globbing expressions in the file name will not be replaced, only those in the path to the file). Replacement of globbing expressions in the file name can also be triggered by setting \code{path_glob_permissive} to \code{FALSE} or \code{'no'}. If needed to keep all globbing expressions, \code{path_glob_permissive} can be set to \code{TRUE} or \code{'yes'}. + } } - } \details{ -The two output arrays have between 2 and 6 dimensions:\cr -\enumerate{ -\item{Number of experimental/observational datasets.} -\item{Number of members.} -\item{Number of startdates.} -\item{Number of leadtimes.} -\item{Number of latitudes (optional).} -\item{Number of longitudes (optional).} -} +The two output matrices have between 2 and 6 dimensions:\cr + \enumerate{ + \item{Number of experimental/observational datasets.} + \item{Number of members.} + \item{Number of startdates.} + \item{Number of leadtimes.} + \item{Number of latitudes (optional).} + \item{Number of longitudes (optional).} + } but the two matrices have the same number of dimensions and only the first two dimensions can have different lengths depending on the input arguments. - + For a detailed explanation of the process, read the documentation attached to the package or check the comments in the code. } \value{ \code{Load()} returns a named list following a structure similar to the used in the package 'downscaleR'.\cr The components are the following: -\itemize{ -\item{ + \itemize{ + \item{ 'mod' is the array that contains the experimental data. It has the attribute 'dimensions' associated to a vector of strings with the labels of each dimension of the array, in order. The order of the latitudes is always forced to be from 90 to -90 whereas the order of the longitudes is kept as in the original files (if possible). The longitude values provided in \code{lon} lower than 0 are added 360 (but still kept in the original order). In some cases, however, if multiple data sets are loaded in longitude-latitude mode, the longitudes (and also the data arrays in \code{mod} and \code{obs}) are re-ordered afterwards by \code{Load()} to range from 0 to 360; a warning is given in such cases. The longitude and latitude of the center of the grid cell that corresponds to the value [j, i] in 'mod' (along the dimensions latitude and longitude, respectively) can be found in the outputs \code{lon}[i] and \code{lat}[j] -} -\item{'obs' is the array that contains the observational data. The same documentation of parameter 'mod' applies to this parameter.} -\item{'lat' and 'lon' are the latitudes and longitudes of the centers of the cells of the grid the data is interpolated into (0 if the loaded variable is a global mean or the output is an area average).\cr + } + \item{'obs' is the array that contains the observational data. The same documentation of parameter 'mod' applies to this parameter.} + \item{'lat' and 'lon' are the latitudes and longitudes of the centers of the cells of the grid the data is interpolated into (0 if the loaded variable is a global mean or the output is an area average).\cr Both have the attribute 'cdo_grid_des' associated with a character string with the name of the common grid of the data, following the CDO naming conventions for grids.\cr 'lon' has the attributes 'first_lon' and 'last_lon', with the first and last longitude values found in the region defined by 'lonmin' and 'lonmax'. 'lat' has also the equivalent attributes 'first_lat' and 'last_lat'.\cr -'lon' has also the attribute 'data_across_gw' which tells whether the requested region via 'lonmin', 'lonmax', 'latmin', 'latmax' goes across the Greenwich Meridian. As explained in the documentation of the parameter 'mod', the loaded data array is kept in the same order as in the original files when possible: this means that, in some cases, even if the data crosses the Greenwich Meridian, the data array may not cross the Greenwich meridian. The attribute 'array_across_gw' tells whether the array actually goes across the Greenwich. E.g: The longitudes in the data files are defined to be from 0 to 360. The requested longitudes are from -80 to 40. The original order is kept, hence the longitudes in the array will be ordered as follows: 0, ..., 40, 280, ..., 360. In that case, 'data_across_gw' will be TRUE and 'array_across_gw' will be FALSE.\cr +'lon' has also the attribute 'data_across_gw' which tells whether the requested region via 'lonmin', 'lonmax', 'latmin', 'latmax' goes across the Greenwich meridian. As explained in the documentation of the parameter 'mod', the loaded data array is kept in the same order as in the original files when possible: this means that, in some cases, even if the data goes across the Greenwich, the data array may not go across the Greenwich. The attribute 'array_across_gw' tells whether the array actually goes across the Greenwich. E.g: The longitudes in the data files are defined to be from 0 to 360. The requested longitudes are from -80 to 40. The original order is kept, hence the longitudes in the array will be ordered as follows: 0, ..., 40, 280, ..., 360. In that case, 'data_across_gw' will be TRUE and 'array_across_gw' will be FALSE.\cr The attribute 'projection' is kept for compatibility with 'downscaleR'.} -\item{'Variable' has the following components: -\itemize{ -\item{'varName', with the short name of the loaded variable as specified in the parameter 'var'.} -\item{'level', with information on the pressure level of the variable. Is kept to NULL by now.} -} + \item{'Variable' has the following components: + \itemize{ + \item{'varName', with the short name of the loaded variable as specified in the parameter 'var'.} + \item{'level', with information on the pressure level of the variable. Is kept to NULL by now.} + } And the following attributes: -\itemize{ -\item{'is_standard', kept for compatibility with 'downscaleR', tells if a dataset has been homogenized to standards with 'downscaleR' catalogs.} -\item{'units', a character string with the units of measure of the variable, as found in the source files.} -\item{'longname', a character string with the long name of the variable, as found in the source files.} -\item{'daily_agg_cellfun', 'monthly_agg_cellfun', 'verification_time', kept for compatibility with 'downscaleR'.} -} -} -\item{'Datasets' has the following components: -\itemize{ -\item{'exp', a named list where the names are the identifying character strings of each experiment in 'exp', each associated to a list with the following components: -\itemize{ -\item{'members', a list with the names of the members of the dataset.} -\item{'source', a path or URL to the source of the dataset.} -} -} -\item{'obs', similar to 'exp' but for observational datasets.} -} -} -\item{'Dates', with the follwing components: -\itemize{ -\item{'start', an array of dimensions (sdate, time) with the POSIX initial date of each forecast time of each starting date.} -\item{'end', an array of dimensions (sdate, time) with the POSIX final date of each forecast time of each starting date.} -} -} -\item{'InitializationDates', a vector of starting dates as specified in 'sdates', in POSIX format.} -\item{'when', a time stamp of the date the \code{Load()} call to obtain the data was issued.} -\item{'source_files', a vector of character strings with complete paths to all the found files involved in the \code{Load()} call.} -\item{'not_found_files', a vector of character strings with complete paths to not found files involved in the \code{Load()} call.} -} + \itemize{ + \item{'is_standard', kept for compatibility with 'downscaleR', tells if a dataset has been homogenized to standards with 'downscaleR' catalogs.} + \item{'units', a character string with the units of measure of the variable, as found in the source files.} + \item{'longname', a character string with the long name of the variable, as found in the source files.} + \item{'daily_agg_cellfun', 'monthly_agg_cellfun', 'verification_time', kept for compatibility with 'downscaleR'.} + } + } + \item{'Datasets' has the following components: + \itemize{ + \item{'exp', a named list where the names are the identifying character strings of each experiment in 'exp', each associated to a list with the following components: + \itemize{ + \item{'members', a list with the names of the members of the dataset.} + \item{'source', a path or URL to the source of the dataset.} + } + } + \item{'obs', similar to 'exp' but for observational datasets.} + } + } + \item{'Dates', with the follwing components: + \itemize{ + \item{'start', an array of dimensions (sdate, time) with the POSIX initial date of each forecast time of each starting date.} + \item{'end', an array of dimensions (sdate, time) with the POSIX final date of each forecast time of each starting date.} + } + } + \item{'InitializationDates', a vector of starting dates as specified in 'sdates', in POSIX format.} + \item{'when', a time stamp of the date the \code{Load()} call to obtain the data was issued.} + \item{'source_files', a vector of character strings with complete paths to all the found files involved in the \code{Load()} call.} + \item{'not_found_files', a vector of character strings with complete paths to not found files involved in the \code{Load()} call.} + } } \author{ History:\cr @@ -351,56 +493,56 @@ History:\cr # data already processed in R. # # Example 1: providing lists in 'exp' and 'obs'. -\dontrun{ - data_path <- system.file('sample_data', package = 's2dverification') - expA <- list(name = 'experiment', path = file.path(data_path, - 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', - '$VAR_NAME$_$START_DATE$.nc')) - obsX <- list(name = 'observation', path = file.path(data_path, - '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', - '$VAR_NAME$_$YEAR$$MONTH$.nc')) - - # Now we are ready to use Load(). - startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') - sampleData <- Load('tos', list(expA), list(obsX), startDates, - output = 'areave', latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - # - # Example 2: providing character strings in 'exp' and 'obs', and providing - # a configuration file. - # The configuration file 'sample.conf' that we will create in the example - # has the proper entries to load these (see ?LoadConfigFile for details on - # writing a configuration file). - # - configfile <- paste0(tempdir(), '/sample.conf') - ConfigFileCreate(configfile, confirm = FALSE) - c <- ConfigFileOpen(configfile) - c <- ConfigEditDefinition(c, 'DEFAULT_VAR_MIN', '-1e19', confirm = FALSE) - c <- ConfigEditDefinition(c, 'DEFAULT_VAR_MAX', '1e19', confirm = FALSE) - data_path <- system.file('sample_data', package = 's2dverification') - exp_data_path <- paste0(data_path, '/model/$EXP_NAME$/') - obs_data_path <- paste0(data_path, '/$OBS_NAME$/') - c <- ConfigAddEntry(c, 'experiments', dataset_name = 'experiment', - var_name = 'tos', main_path = exp_data_path, - file_path = '$STORE_FREQ$_mean/$VAR_NAME$_3hourly/$VAR_NAME$_$START_DATE$.nc') - c <- ConfigAddEntry(c, 'observations', dataset_name = 'observation', - var_name = 'tos', main_path = obs_data_path, - file_path = '$STORE_FREQ$_mean/$VAR_NAME$/$VAR_NAME$_$YEAR$$MONTH$.nc') - ConfigFileSave(c, configfile, confirm = FALSE) - - # Now we are ready to use Load(). - startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') - sampleData <- Load('tos', c('experiment'), c('observation'), startDates, - output = 'areave', latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40, configfile = configfile) -} -\dontshow{ - startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') - sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), - c('observation'), startDates, - output = 'areave', - latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) -} + \dontrun{ +data_path <- system.file('sample_data', package = 's2dverification') +expA <- list(name = 'experiment', path = file.path(data_path, + 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', + '$VAR_NAME$_$START_DATE$.nc')) +obsX <- list(name = 'observation', path = file.path(data_path, + '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', + '$VAR_NAME$_$YEAR$$MONTH$.nc')) + +# Now we are ready to use Load(). +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- Load('tos', list(expA), list(obsX), startDates, + output = 'areave', latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +# +# Example 2: providing character strings in 'exp' and 'obs', and providing +# a configuration file. +# The configuration file 'sample.conf' that we will create in the example +# has the proper entries to load these (see ?LoadConfigFile for details on +# writing a configuration file). +# +configfile <- paste0(tempdir(), '/sample.conf') +ConfigFileCreate(configfile, confirm = FALSE) +c <- ConfigFileOpen(configfile) +c <- ConfigEditDefinition(c, 'DEFAULT_VAR_MIN', '-1e19', confirm = FALSE) +c <- ConfigEditDefinition(c, 'DEFAULT_VAR_MAX', '1e19', confirm = FALSE) +data_path <- system.file('sample_data', package = 's2dverification') +exp_data_path <- paste0(data_path, '/model/$EXP_NAME$/') +obs_data_path <- paste0(data_path, '/$OBS_NAME$/') +c <- ConfigAddEntry(c, 'experiments', dataset_name = 'experiment', + var_name = 'tos', main_path = exp_data_path, + file_path = '$STORE_FREQ$_mean/$VAR_NAME$_3hourly/$VAR_NAME$_$START_DATE$.nc') +c <- ConfigAddEntry(c, 'observations', dataset_name = 'observation', + var_name = 'tos', main_path = obs_data_path, + file_path = '$STORE_FREQ$_mean/$VAR_NAME$/$VAR_NAME$_$YEAR$$MONTH$.nc') +ConfigFileSave(c, configfile, confirm = FALSE) + +# Now we are ready to use Load(). +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- Load('tos', c('experiment'), c('observation'), startDates, + output = 'areave', latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40, configfile = configfile) + } + \dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + output = 'areave', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } } \keyword{datagen} -- GitLab From 34f89f1f015d593662ae705f07d96d6d8be1fb32 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 1 Feb 2017 16:08:04 +0100 Subject: [PATCH 28/41] Change ens to exp --- R/BrierScore.R | 14 +++++++------- R/Corr.R | 6 +++--- R/RMS.R | 4 ++-- R/RMSSS.R | 6 +++--- R/RatioRMS.R | 6 +++--- R/RatioSDRMS.R | 4 ++-- R/Trend.R | 4 ++-- man/BrierScore.Rd | 16 +++++++++++----- man/Corr.Rd | 26 ++++++++++++++++---------- man/RMS.Rd | 22 ++++++++++++++-------- man/RMSSS.Rd | 18 +++++++++++++++++- man/RatioRMS.Rd | 23 ++++++++++++++--------- man/RatioSDRMS.Rd | 18 ++++++++++++------ man/Trend.Rd | 6 +++--- 14 files changed, 109 insertions(+), 64 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index b6cb81a3..92d701b9 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -74,16 +74,16 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { } -.BrierScore <- function(ens, obs, thresholds = seq(0, 1, 0.1)) { - if (max(ens) > 1 | min(ens) < 0) { +.BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1)) { + if (max(exp) > 1 | min(exp) < 0) { stop("Predictions outside [0,1] range. Are you certain this is a probability forecast? \n") } else if (max(obs) != 1 & min(obs) != 0) { .message("Binary events must be either 0 or 1. Are you certain this is a binary event? ") } else { nbins <- length(thresholds) - 1 # Number of bins - n <- dim(ens)[1] # Number of observations - ens.mean <- rowMeans(ens, na.rm = TRUE) - n.ens <- seq(1,dim(ens)[2],1) # Number of ensemble members + n <- dim(exp)[1] # Number of observations + ens.mean <- rowMeans(exp, na.rm = TRUE) + n.ens <- seq(1,dim(exp)[2],1) # Number of ensemble members bins <- as.list(paste("bin", 1:nbins,sep = "")) for (i in 1:nbins) { if (i == nbins) { @@ -123,7 +123,7 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { res <- ressum / n unc <- obar * (1 - obar) #bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) - bs <- sum((rowMeans(ens, na.rm = T) - obs)^2) / n + bs <- sum((rowMeans(exp, na.rm = T) - obs)^2) / n bs_check_res <- rel - res + unc bss_res <- (res - rel) / unc gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution @@ -136,7 +136,7 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { term3 <- array(0, nbins) for (i in 1:nbins) { term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) - } + } term_a <- sum(term3, na.rm = T) / n term_b <- (obar * (1 - obar)) / (n - 1) rel_bias_corrected <- rel - term_a diff --git a/R/Corr.R b/R/Corr.R index 48d6ab0e..7137b921 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -124,7 +124,7 @@ Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, CORR } -.Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', +.Corr <- function(exp, obs, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) { if (method != "kendall" && method != "spearman" && method != "pearson") { @@ -140,7 +140,7 @@ Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, p <- c() conflow <- c() confhigh <- c() - ens.mean <- rowMeans(ens) + ens.mean <- rowMeans(exp) CORR <- cor(obs, ens.mean, use = "pairwise.complete.obs", method = method) if (pval || conf) { if (method == "kendall" | method == "spearman") { @@ -149,7 +149,7 @@ Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, eno <- Eno(obs, 1) } } - if (pval & method == "pearson") { + if (pval && method == "pearson") { t <- CORR*sqrt((eno-2)/(1-(CORR^2))) p <- 1 - pt(t, eno-2) p.val <- p diff --git a/R/RMS.R b/R/RMS.R index adf43fc3..89f2ce43 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -101,7 +101,7 @@ RMS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, enlrms } -.RMS <- function(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) { +.RMS <- function(exp, obs, limits = NULL, siglev = 0.95, conf = TRUE) { # # RMS & its confidence interval computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -110,7 +110,7 @@ RMS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, conf_low <- (1 - siglev) / 2 conf_high <- 1 - conf_low } - dif <- rowMeans(ens) - obs + dif <- rowMeans(exp) - obs enlrms <- mean(dif ** 2, na.rm = TRUE) ** 0.5 if (conf) { eno <- Eno(dif, 1) diff --git a/R/RMSSS.R b/R/RMSSS.R index 2e9063b7..9a504aca 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -88,7 +88,7 @@ RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { enlRMSSS } -.RMSSS <- function(ens, obs, pval = TRUE) { +.RMSSS <- function(exp, obs, pval = TRUE) { # # RMSSS and its pvalue computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -98,7 +98,7 @@ RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { } else { nvals <- 1 } - dif1 <- rowMeans(ens) - obs + dif1 <- rowMeans(exp) - obs dif2 <- obs rms1 <- mean(dif1 ** 2, na.rm = TRUE) ** 0.5 rms2 <- mean(dif2 ** 2, na.rm = TRUE) ** 0.5 @@ -109,7 +109,7 @@ RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { eno1 <- Eno(dif1, 1) eno2 <- Eno(dif2, 1) F <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) - if (is.na(eno1) == FALSE & is.na(eno2) == FALSE & eno1 > 2 & eno2 > 2) { + if (is.na(eno1) == FALSE && is.na(eno2) == FALSE && eno1 > 2 && eno2 > 2) { p.val <- 1 - pf(F, eno1 - 1, eno2 - 1) } else { p.val <- NA diff --git a/R/RatioRMS.R b/R/RatioRMS.R index 9c8e4377..b58d327d 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -85,13 +85,13 @@ RatioRMS <- function(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) { enlratiorms } -.RatioRMS <- function(ens, ens.ref, obs, pval = TRUE) { +.RatioRMS <- function(exp, exp.ref, obs, pval = TRUE) { # # RMS ratio and its pvalue computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # - dif1 <- rowMeans(ens, na.rm = TRUE) - obs - dif2 <- rowMeans(ens.ref, na.rm = TRUE) - obs + dif1 <- rowMeans(exp, na.rm = TRUE) - obs + dif2 <- rowMeans(exp.ref, na.rm = TRUE) - obs rms1 <- mean(dif1 ** 2, na.rm = TRUE) ** 0.5 rms2 <- mean(dif2 ** 2, na.rm = TRUE) ** 0.5 rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max( diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index d33656b6..31569816 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -84,13 +84,13 @@ RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { enlratiormssd } -.RatioSDRMS <- function(ens, obs, pval = TRUE) { +.RatioSDRMS <- function(exp, obs, pval = TRUE) { # # Ratio RMSE / SD and its significance level # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # - ensmean <- rowMeans(ens) + ensmean <- rowMeans(exp) std <- sd(ensmean) enosd <- Eno(ensmean, 1) dif <- ensmean - obs diff --git a/R/Trend.R b/R/Trend.R index a797b705..e035df2a 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -79,9 +79,9 @@ Trend <- function(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) { invisible(list(trend = enltrend, detrended = enldetrend)) } -.Trend <- function(ens, interval = 1, siglev = 0.95, conf = TRUE) { +.Trend <- function(exp, interval = 1, siglev = 0.95, conf = TRUE) { - ensmean <- rowMeans(ens, na.rm = TRUE) + ensmean <- rowMeans(exp, na.rm = TRUE) if (any(!is.na(ensmean))) { mon <- seq(ensmean) * interval diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index 65b0a40c..aea5d4e7 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -8,12 +8,12 @@ Compute Brier Score And Its Decomposition And Brier Skill Score Computes the Brier score (BS) and the components of its standard decomposition as well with the two within-bin components described in Stephenson et al., (2008). It also returns the bias-corrected decomposition of the BS (Ferro and Fricker, 2012). BSS having the climatology as the reference forecast. -.BrierScore provides the same functionality, but taking a matrix of ensemble members (ens) as input. +.BrierScore provides the same functionality, but taking a matrix of ensemble members (exp) as input. } \usage{ BrierScore(obs, pred, thresholds = seq(0, 1, 0.1)) -.BrierScore(ens, obs, thresholds = seq(0, 1, 0.1)) +.BrierScore(exp, obs, thresholds = seq(0, 1, 0.1)) } \arguments{ \item{obs}{ @@ -21,13 +21,13 @@ Vector of binary observations (1 or 0) } \item{pred}{ Vector of probablistic predictions with values in the range [0,1] - } - \item{ens}{ -Matrix of predictions with values in the range [0,1] for the .BrierScore function } \item{thresholds}{ Values used to bin the forecasts. By default the bins are {[0,0.1), [0.1, 0.2), ... [0.9, 1]} } + \item{exp}{ +Matrix of predictions with values in the range [0,1] for the .BrierScore function + } } \value{ $rel: standard reliability\cr @@ -57,6 +57,11 @@ x <- BrierScore(b, a) x$bs - x$bs_check_res x$bs - x$bs_check_gres x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected + \dontrun{ + a <- runif(10) + b <- cbind(round(a),round(a)) # matrix containing 2 identical ensemble members... + x2 <- BrierScore(a, b) + } } \references{ Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr @@ -66,5 +71,6 @@ Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Jo \author{ History:\cr 0.1 - 2012-04 (L. Rodrigues, \email{lrodrigues@ic3.cat}) - Original code\cr +1.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/Corr.Rd b/man/Corr.Rd index 3b2cae75..a36a9278 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -8,7 +8,7 @@ Computes the correlation coefficient between an array of forecasts and their cor Calculates the correlation coefficient (Pearson, Kendall or Spearman) for an array of forecasts and observations. The input should be an array with dimensions c(no. of datasets, no. of start dates, no. of forecast times, no. of lons, no. of lats.), where the longitude and latitude dimensions are optional. The correlations are computed along the poscor dimension which should correspond to the startdate dimension. If compROW is given, the correlations are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes. \cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval is computed by a Fisher transformation.\cr The significance level relies on a one-sided student-T distribution.\cr We can modifiy the treshold of the test modifying siglev (default value=0.95). \cr \cr -.Corr calculates the correlation between the ensemble mean and the observations, using an N by M matrix (ens) of forecasts and a vector of observations (obs) as input. +.Corr calculates the correlation between the ensemble mean and the observations, using an N by M matrix (exp) of forecasts and a vector of observations (obs) as input. } \usage{ Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, @@ -26,12 +26,6 @@ Array of experimental data. \item{var_obs}{ Array of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. } - \item{ens}{ -N by M matrix of N forecasts from M ensemble members. - } - \item{obs}{ -Vector of the corresponding observations of length N. - } \item{posloop}{ Dimension nobs and nexp. } @@ -57,7 +51,12 @@ Whether to compute confidence intervals (default = 'TRUE') or not (FALSE). Whether to compute statistical significance p-value (default = 'TRUE') or not (FALSE). } } - + \item{exp}{ +N by M matrix of N forecasts from M ensemble members. + } + \item{obs}{ +Vector of the corresponding observations of length N. + } \value{ Corr: Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. \cr @@ -96,13 +95,20 @@ example(Load) PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), - fileout = 'tos_cor.eps') } + fileout = 'tos_cor.eps') + \dontrun{ + library(easyVerification) + dim(ano_obs) <- dim(ano_obs)[-2] # see ?veriApply for how to use the 'parallel' option + corr2 <- veriApply(".Corr", ano_exp, ano_obs, tdim = 3, ensdim = 2) + } +} \author{ History:\cr 0.1 - 2011-04 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN\cr 1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz at ic3.cat}) - Adding siglev argument\cr -1.2 - 2015-03 (L.P. Caron, \email{louis-philippe.caron at ic3.cat}) - Adding method argument +1.2 - 2015-03 (L.P. Caron, \email{louis-philippe.caron at ic3.cat}) - Adding method argument\cr +2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/RMS.Rd b/man/RMS.Rd index 663694d5..fa7e7b9a 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -11,7 +11,7 @@ If compROW is given, the RMSE is computed only if rows along the compROW dimensi Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval relies on a chi2 distribution. \cr -.RMS provides the same functionality but taking a matrix of ensemble members as input (ens). +.RMS provides the same functionality but taking a matrix of ensemble members as input (exp). } \usage{ RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) @@ -24,12 +24,6 @@ Matrix of experimental data. } \item{var_obs}{ Matrix of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. - } - \item{ens}{ -N by M matrix of N forecasts from M ensemble members. - } - \item{obs}{ -Vector of the corresponding observations of length N. } \item{posloop}{ Dimension nobs and nexp. @@ -50,6 +44,12 @@ Confidence level of the computed confidence interval. 0.95 by default. \item{conf}{ Whether to compute confidence interval or not. TRUE by default. } + \item{exp}{ +N by M matrix of N forecasts from M ensemble members. + } + \item{obs}{ +Vector of the corresponding observations of length N. + } } \value{ RMS: Array with dimensions:\cr @@ -88,10 +88,16 @@ PlotVsLTime(rms, toptitle = "Root Mean Square Error", ytitle = "K", monini = 11, limits = NULL, listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, hlines = c(0), fileout = 'tos_rms.eps') + \dontrun{ + library(easyVerification) + dim(smooth_ano_obs) <- dim(smooth_ano_obs)[-2] # see ?veriApply for how to use the 'parallel' option + rms2 <- veriApply(".RMS", smooth_ano_exp, smooth_ano_obs, tdim = 3, ensdim = 2) + } } \author{ History:\cr 0.1 - 2011-05 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN\cr +2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index 7e618836..acc4111b 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -1,5 +1,6 @@ \name{RMSSS} \alias{RMSSS} +\alias{.RMSSS} \title{ Computes Root Mean Square Skill Score } @@ -8,10 +9,13 @@ Computes the root mean square error skill score between an array of forecasts, v RMSSS computes the Root Mean Square Skill Score of each jexp in 1:nexp against each jobs in 1:nobs which gives nexp x nobs RMSSS for each other grid point of the matrix (each latitude/longitude/level/leadtime).\cr The RMSSS are computed along the posRMS dimension which should correspond to the startdate dimension.\cr The p-value is optionally provided by a one-sided Fisher test. + +.RMSSS provides the same functionality but taking a matrix of ensemble members as input (exp). } \usage{ RMSSS(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) +.RMSSS(ens, obs, pval = TRUE) } \arguments{ \item{var_exp}{ @@ -29,6 +33,12 @@ Dimension along which the RMSE are to be computed (the dimension of the start da \item{pval}{ Whether to compute or not the p-value of the test Ho : RMSSS = 0. TRUE by default. } + \item{exp}{ +N by M matrix of N forecasts from M ensemble members. + } + \item{obs}{ +Vector of the corresponding observations of length N. + } } \value{ Array with dimensions:\cr @@ -49,10 +59,16 @@ PlotVsLTime(rmsss, toptitle = "Root Mean Square Skill Score", ytitle = "", monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), fileout = 'tos_rmsss.eps') + \dontrun{ + library(easyVerification) + dim(ano_obs) <- dim(ano_obs)[-2] # see ?veriApply for how to use the 'parallel' option + rmsss3 <- veriApply(".RMSSS", ano_exp, ano_obs, tdim = 3, ensdim = 2) + } } \author{ History:\cr 0.1 - 2012-04 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN\cr +2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index b8a7486d..4a547c89 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -26,21 +26,21 @@ Array of experimental data 2. \item{var_obs}{ Array of observations. } - \item{ens}{ + \item{posRMS}{ +Dimension along which the RMSE are to be computed = the position of the start dates. + } + \item{pval}{ +Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. + } + \item{exp}{ Matrix of experimental data 1. } - \item{ens.ref}{ + \item{exp.ref}{ Matrix of experimental data 2. } \item{obs}{ Vector of observations. } - \item{posRMS}{ -Dimension along which the RMSE are to be computed = the position of the start dates. - } - \item{pval}{ -Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. - } } \value{ RatioRMS: A list containing the following components :\cr @@ -97,11 +97,16 @@ rrms <- RatioRMS(Mean1Dim(ano_exp[ , 1:2, , , , ], 1)[, 1, , ], Mean1Dim(ano_obs, 2)[1, , 1, , ], 1) PlotEquiMap(rrms[1, , ], sampleData$lon, sampleData$lat, toptitle = 'Ratio RMSE') + \dontrun{ + library(easyVerification) + dim(ano_obs) <- dim(ano_obs)[-2] # see ?veriApply for how to use the 'parallel' option + rrms2 <- veriApply(".RatioRMS", ano_exp[, 1:2, , , ,], ano_exp[, 2:3, , , ,], ano_obs, tdim = 3, ensdim = 2) + } } \author{ History:\cr 0.1 - 2011-11 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN\cr -2.0 - 2016-08 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() +2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index e38bd2c7..a4dd4dd1 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -11,7 +11,7 @@ and\cr c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)\cr The ratio between the standard deviation of the members around the ensemble mean in var_exp and the RMSE between var_exp and var_obs is output for each experiment and each observational dataset.\cr The p-value is provided by a one-sided Fischer test.\cr -.RatioSDRMS provides the same functionality but taking a matrix of ensemble members as input (ens). +.RatioSDRMS provides the same functionality but taking a matrix of ensemble members as input (exp). } \usage{ RatioSDRMS(var_exp, var_obs, pval = TRUE) @@ -29,15 +29,15 @@ Observational data:\cr c(nobs, nmemb, nsdates, nltime) up to\cr c(nobs, nmemb, nsdates, nltime, nlevel, nlat, nlon) } - \item{ens}{ + \item{pval}{ +Whether to compute the p-value of Ho : SD/RMSE = 1 or not. + } + \item{exp}{ N by M matrix of N forecasts from M ensemble members. } \item{obs}{ Vector of the corresponding observations of length N. } - \item{pval}{ -Whether to compute the p-value of Ho : SD/RMSE = 1 or not. - } } \value{ RatioSDRMS: Array with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to @@ -63,10 +63,16 @@ PlotVsLTime(rsdrms2, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, siglev = TRUE, fileout = 'tos_rsdrms.eps') + \dontrun{ + library(easyVerification) + dim(modelData$obs) <- dim(modelData$obs)[-2] # see ?veriApply for how to use the 'parallel' option + rsdrms3 <- veriApply(".RatioSDRMS", modelData$exp, modelData$obs, tdim = 3, ensdim = 2) + } } \author{ History:\cr 0.1 - 2011-12 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau-manubens at ic3.cat}) - Formatting to CRAN +1.0 - 2013-09 (N. Manubens, \email{nicolau-manubens at ic3.cat}) - Formatting to CRAN\cr +2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/Trend.Rd b/man/Trend.Rd index c2981e0f..089dbb02 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -9,7 +9,7 @@ Computes the trend along the forecast time of the ensemble mean by least square Trend() also provides the time series of the detrended ensemble mean forecasts.\cr The confidence interval relies on a student-T distribution. -.Trend provides the same functionality but taking a matrix ensemble members as input (ens). +.Trend provides the same functionality but taking a matrix ensemble members as input (exp). } \usage{ Trend(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) @@ -20,7 +20,7 @@ Trend(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) \item{var}{ Array of any number of dimensions up to 10. } - \item{ens}{ + \item{exp}{ M by N matrix of M forecasts from N ensemble members. } \item{interval}{ @@ -66,6 +66,6 @@ PlotAno(trend$detrended, NULL, startDates, History:\cr 0.1 - 2011-05 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to CRAN\cr -2.0 - 2016-08 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapt to veriApply() +2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapt to veriApply() } \keyword{datagen} -- GitLab From e46be2fdbec74254cdb5142630826f9d63050b47 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 1 Feb 2017 16:10:22 +0100 Subject: [PATCH 29/41] Minor Corr.Rd correction --- man/Corr.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/Corr.Rd b/man/Corr.Rd index a36a9278..360085b7 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -50,13 +50,13 @@ Whether to compute confidence intervals (default = 'TRUE') or not (FALSE). \item{pval}{ Whether to compute statistical significance p-value (default = 'TRUE') or not (FALSE). } -} - \item{exp}{ + \item{exp}{ N by M matrix of N forecasts from M ensemble members. } \item{obs}{ Vector of the corresponding observations of length N. } +} \value{ Corr: Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. \cr -- GitLab From c90c236efe2c44778c733fa6fc46222cb481a283 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 1 Feb 2017 16:28:15 +0100 Subject: [PATCH 30/41] Change ens to exp in manual --- man/Corr.Rd | 2 +- man/RMS.Rd | 2 +- man/RMSSS.Rd | 2 +- man/RatioRMS.Rd | 2 +- man/RatioSDRMS.Rd | 2 +- man/Trend.Rd | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/man/Corr.Rd b/man/Corr.Rd index 360085b7..202dbabb 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -15,7 +15,7 @@ Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, limits = NULL, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) -.Corr(ens, obs, siglev = 0.95, +.Corr(exp, obs, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) } \arguments{ diff --git a/man/RMS.Rd b/man/RMS.Rd index fa7e7b9a..37602a96 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -16,7 +16,7 @@ The confidence interval relies on a chi2 distribution. \cr \usage{ RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) -.RMS(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) +.RMS(exp, obs, limits = NULL, siglev = 0.95, conf = TRUE) } \arguments{ \item{var_exp}{ diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index acc4111b..d6cd519a 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -15,7 +15,7 @@ The p-value is optionally provided by a one-sided Fisher test. \usage{ RMSSS(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) -.RMSSS(ens, obs, pval = TRUE) +.RMSSS(exp, obs, pval = TRUE) } \arguments{ \item{var_exp}{ diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 4a547c89..25f26e9e 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -14,7 +14,7 @@ The p-value is provided by a two-sided Fischer test. \usage{ RatioRMS(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) -.RatioRMS(ens, ens.ref, obs, pval = TRUE) +.RatioRMS(exp, exp.ref, obs, pval = TRUE) } \arguments{ \item{var_exp1}{ diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index a4dd4dd1..988aaa07 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -16,7 +16,7 @@ The p-value is provided by a one-sided Fischer test.\cr \usage{ RatioSDRMS(var_exp, var_obs, pval = TRUE) -.RatioSDRMS(ens, obs, pval = TRUE) +.RatioSDRMS(exp, obs, pval = TRUE) } \arguments{ \item{var_exp}{ diff --git a/man/Trend.Rd b/man/Trend.Rd index 089dbb02..716e0543 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -14,7 +14,7 @@ The confidence interval relies on a student-T distribution. \usage{ Trend(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) -.Trend(ens, interval = 1, siglev = 0.95, conf = TRUE) +.Trend(exp, interval = 1, siglev = 0.95, conf = TRUE) } \arguments{ \item{var}{ -- GitLab From 5b57c8a98843dfd6043020b399bdadff9dfc5ac3 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Thu, 2 Feb 2017 16:16:43 +0100 Subject: [PATCH 31/41] Add veriApply examples to relecant functions --- R/RMSSS.R | 39 ++++++++++++++++----------------------- R/RatioSDRMS.R | 5 +++-- man/Corr.Rd | 4 ++-- man/RatioRMS.Rd | 5 ----- man/RatioSDRMS.Rd | 4 ++-- 5 files changed, 23 insertions(+), 34 deletions(-) diff --git a/R/RMSSS.R b/R/RMSSS.R index 9a504aca..ac36baf3 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -89,32 +89,25 @@ RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { } .RMSSS <- function(exp, obs, pval = TRUE) { - # - # RMSSS and its pvalue computation - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (pval) { - nvals <- 2 - } else { - nvals <- 1 - } - dif1 <- rowMeans(exp) - obs + dif2 <- obs + dif1 <- rowMeans(exp) - obs rms1 <- mean(dif1 ** 2, na.rm = TRUE) ** 0.5 rms2 <- mean(dif2 ** 2, na.rm = TRUE) ** 0.5 - rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( - rms2), na.rm = TRUE) / 1000 - rmsss <- 1 - (rms1 / rms2) - if (pval) { - eno1 <- Eno(dif1, 1) - eno2 <- Eno(dif2, 1) - F <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) - if (is.na(eno1) == FALSE && is.na(eno2) == FALSE && eno1 > 2 && eno2 > 2) { - p.val <- 1 - pf(F, eno1 - 1, eno2 - 1) - } else { - p.val <- NA - } - } + rms2[abs(rms2) <= abs(rms2) / 1000] <- abs(rms2) / 1000 + rmsss <- c(1 - (rms1 / rms2)) + if (pval == TRUE) { + eno1 <- Eno(dif1, 1) + eno2 <- Eno(dif2, 1) + F.stat <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) + if (is.na(eno1) == FALSE && is.na(eno2) == FALSE && eno1 > 2 && eno2 > 2) { + p.val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + } + } + else { + p.val <- NA + } + # # Output # ~~~~~~~~ diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index 31569816..0f42ffdf 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -91,10 +91,11 @@ RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ensmean <- rowMeans(exp) - std <- sd(ensmean) enosd <- Eno(ensmean, 1) + dif <- exp - ensmean + std <- sd(dif, na.rm = TRUE) dif <- ensmean - obs - rms <- mean(dif ** 2, 1, na.rm = TRUE) ** 0.5 + rms <- mean(dif ** 2, na.rm = TRUE) ** 0.5 enorms <- Eno(dif, 1) enlratiormssd <- std / rms p.val <- 0 diff --git a/man/Corr.Rd b/man/Corr.Rd index 202dbabb..6f4b2c18 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -98,8 +98,8 @@ example(Load) fileout = 'tos_cor.eps') \dontrun{ library(easyVerification) - dim(ano_obs) <- dim(ano_obs)[-2] # see ?veriApply for how to use the 'parallel' option - corr2 <- veriApply(".Corr", ano_exp, ano_obs, tdim = 3, ensdim = 2) + dim(smooth_ano_obs) <- dim(smooth_ano_obs)[-2] # see ?veriApply for how to use the 'parallel' option + corr2 <- veriApply(".Corr", smooth_ano_exp, smooth_ano_obs, tdim = 3, ensdim = 2) } } diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 25f26e9e..5bd09d2f 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -97,11 +97,6 @@ rrms <- RatioRMS(Mean1Dim(ano_exp[ , 1:2, , , , ], 1)[, 1, , ], Mean1Dim(ano_obs, 2)[1, , 1, , ], 1) PlotEquiMap(rrms[1, , ], sampleData$lon, sampleData$lat, toptitle = 'Ratio RMSE') - \dontrun{ - library(easyVerification) - dim(ano_obs) <- dim(ano_obs)[-2] # see ?veriApply for how to use the 'parallel' option - rrms2 <- veriApply(".RatioRMS", ano_exp[, 1:2, , , ,], ano_exp[, 2:3, , , ,], ano_obs, tdim = 3, ensdim = 2) - } } \author{ History:\cr diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 988aaa07..0b07664e 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -65,8 +65,8 @@ PlotVsLTime(rsdrms2, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", fileout = 'tos_rsdrms.eps') \dontrun{ library(easyVerification) - dim(modelData$obs) <- dim(modelData$obs)[-2] # see ?veriApply for how to use the 'parallel' option - rsdrms3 <- veriApply(".RatioSDRMS", modelData$exp, modelData$obs, tdim = 3, ensdim = 2) + dim(sampleData$obs) <- dim(sampleData$obs)[-2] # see ?veriApply for how to use the 'parallel' option + rsdrms3 <- veriApply(".RatioSDRMS", sampleData$mod, sampleData$obs, tdim = 3, ensdim = 2) } } \author{ -- GitLab From 091953f799809317aa3196698d40cb28c303864a Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 7 Feb 2017 15:30:23 +0100 Subject: [PATCH 32/41] Small enhancements in documentatiotion and parameter names. --- R/BrierScore.R | 87 +++++++++++++++++---------------- R/Corr.R | 51 +++++++++---------- R/RMS.R | 14 +++--- R/RMSSS.R | 28 +++++------ R/RatioRMS.R | 8 +-- R/RatioSDRMS.R | 8 +-- man/ACC.Rd | 14 +++--- man/BrierScore.Rd | 63 +++++++++++++----------- man/Corr.Rd | 121 ++++++++++++++++++++++++---------------------- man/RMS.Rd | 24 +++++---- man/RMSSS.Rd | 42 ++++++++++------ man/RatioRMS.Rd | 62 +++++++++++++++--------- man/RatioSDRMS.Rd | 38 +++++++++------ 13 files changed, 309 insertions(+), 251 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index 92d701b9..e1b01bcf 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -75,55 +75,52 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { .BrierScore <- function(exp, obs, thresholds = seq(0, 1, 0.1)) { - if (max(exp) > 1 | min(exp) < 0) { - stop("Predictions outside [0,1] range. Are you certain this is a probability forecast? \n") - } else if (max(obs) != 1 & min(obs) != 0) { - .message("Binary events must be either 0 or 1. Are you certain this is a binary event? ") + if (max(exp) > 1 || min(exp) < 0) { + stop("Parameter 'exp' contains predictions outside [0,1] range. Are you certain this is a probability forecast?") + } else if (max(obs) != 1 && min(obs) != 0) { + .message("Binary events in 'obs' must be either 0 or 1. Are you certain this is a binary event?") } else { nbins <- length(thresholds) - 1 # Number of bins - n <- dim(exp)[1] # Number of observations - ens.mean <- rowMeans(exp, na.rm = TRUE) - n.ens <- seq(1,dim(exp)[2],1) # Number of ensemble members - bins <- as.list(paste("bin", 1:nbins,sep = "")) - for (i in 1:nbins) { - if (i == nbins) { - bins[[i]] <- list(which(ens.mean >= thresholds[i] & ens.mean <= thresholds[i + 1])) - } else { - bins[[i]] <- list(which(ens.mean >= thresholds[i] & ens.mean < thresholds[i + 1])) - } + n <- dim(exp)[1] # Number of observations + ens_mean <- rowMeans(exp, na.rm = TRUE) + n.ens <- seq(1, dim(exp)[2], 1) # Number of ensemble members + bins <- as.list(paste("bin", 1:nbins, sep = "")) + for (i in 1:nbins) { + if (i == nbins) { + bins[[i]] <- list(which(ens_mean >= thresholds[i] & ens_mean <= thresholds[i + 1])) + } else { + bins[[i]] <- list(which(ens_mean >= thresholds[i] & ens_mean < thresholds[i + 1])) } - - fkbar <- okbar <- nk <- array(0, dim = nbins) - for (i in 1:nbins) { - nk[i] <- length(bins[[i]][[1]]) - fkbar[i] <- sum(ens.mean[bins[[i]][[1]]]) / nk[i] - okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + } + + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(ens_mean[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + } + + fkbar[fkbar == Inf] <- 0 + okbar[is.nan(okbar)] <- 0 + obar <- sum(obs) / length(obs) + relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- 0 + + for (i in 1:nbins) { + if (nk[i] > 0) { + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i]) ^ 2 + ressum <- ressum + nk[i] * (okbar[i] - obar) ^ 2 + for (j in 1:nk[i]) { + term1 <- term1 + (ens_mean[bins[[i]][[1]][j]] - fkbar[i]) ^ 2 + term2 <- term2 + (ens_mean[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + } } - - fkbar[fkbar == Inf] <- 0 - okbar[is.nan(okbar)] <- 0 - obar <- sum(obs) / length(obs) - relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- 0 - - - for (i in 1:nbins) { - if (nk[i] > 0) { - relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 - ressum <- ressum + nk[i] * (okbar[i] - obar)^2 - - for (j in 1:nk[i]) { - term1 <- term1 + (ens.mean[bins[[i]][[1]][j]] - fkbar[i])^2 - term2 <- term2 + (ens.mean[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) - } - } - } - } - + } + } rel <- relsum / n res <- ressum / n unc <- obar * (1 - obar) #bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) - bs <- sum((rowMeans(exp, na.rm = T) - obs)^2) / n + bs <- sum((rowMeans(exp, na.rm = T) - obs) ^ 2) / n bs_check_res <- rel - res + unc bss_res <- (res - rel) / unc gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution @@ -155,5 +152,11 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") #} - invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected)) + invisible(list(rel = rel, res = res, unc = unc, bs = bs, + bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, + bs_check_gres = bs_check_gres, bss_gres = bss_gres, + rel_bias_corrected = rel_bias_corrected, + gres_bias_corrected = gres_bias_corrected, + unc_bias_corrected = unc_bias_corrected, + bss_bias_corrected = bss_bias_corrected)) } diff --git a/R/Corr.R b/R/Corr.R index 7137b921..10858c29 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -5,8 +5,8 @@ Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, # Remove data along compROW dim if there is at least one NA between limits # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # - if (is.null(compROW) == FALSE) { - if (is.null(limits) == TRUE) { + if (!is.null(compROW)) { + if (is.null(limits)) { limits <- c(1, dim(var_obs)[compROW]) } outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) @@ -126,46 +126,41 @@ Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, .Corr <- function(exp, obs, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) { - - if (method != "kendall" && method != "spearman" && method != "pearson") { - stop("Wrong correlation method") - - # Check the siglev arguments: - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + # Check 'method' + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") + # Check 'siglev' if (siglev > 1 || siglev < 0) { - stop("siglev need to be higher than O and lower than 1") + stop("Parameter 'siglev' must be higher than O and lower than 1.") } } - p <- c() - conflow <- c() - confhigh <- c() - ens.mean <- rowMeans(exp) - CORR <- cor(obs, ens.mean, use = "pairwise.complete.obs", method = method) + p_val <- NULL + conflow <- NULL + confhigh <- NULL + ens_mean <- rowMeans(exp) + CORR <- cor(obs, ens_mean, use = "pairwise.complete.obs", method = method) if (pval || conf) { - if (method == "kendall" | method == "spearman") { + if (method == "kendall" || method == "spearman") { eno <- Eno(rank(obs), 1) } else if (method == "pearson") { eno <- Eno(obs, 1) } } - if (pval && method == "pearson") { - t <- CORR*sqrt((eno-2)/(1-(CORR^2))) - p <- 1 - pt(t, eno-2) - p.val <- p + if (pval && (method == "pearson")) { + t <- CORR * sqrt((eno - 2) / (1 - (CORR ^ 2))) + p_val <- 1 - pt(t, eno - 2) } - if (conf & method == "pearson") { + if (conf && (method == "pearson")) { conf_low <- (1 - siglev) / 2 conf_high <- 1 - conf_low - conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt(eno - 3)), + conf_int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt(eno - 3)), tanh(atanh(CORR) + qnorm(conf_high) / sqrt(eno - 3))) - conf.int <- conf.int[!is.na(CORR)] - conflow =conf.int[1] - confhigh=conf.int[2] + conf_int <- conf_int[!is.na(CORR)] + conflow <- conf_int[1] + confhigh <- conf_int[2] } # Output - # ~~~~~~~~ - # - invisible(result <- list(corr = CORR, p.val = p, conf_low = conflow, conf_high = confhigh)) - + invisible(result <- list(corr = CORR, p_val = p_val, conf_low = conflow, conf_high = confhigh)) } diff --git a/R/RMS.R b/R/RMS.R index 89f2ce43..b45f8cab 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -101,7 +101,7 @@ RMS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, enlrms } -.RMS <- function(exp, obs, limits = NULL, siglev = 0.95, conf = TRUE) { +.RMS <- function(exp, obs, siglev = 0.95, conf = TRUE) { # # RMS & its confidence interval computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -115,15 +115,15 @@ RMS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, if (conf) { eno <- Eno(dif, 1) ndat <- length(sort(dif)) - conf.int <- c((eno * enlrms ** 2 / qchisq(conf_high, eno - 1)) ** 0.5, + conf_int <- c((eno * enlrms ** 2 / qchisq(conf_high, eno - 1)) ** 0.5, (eno * enlrms ** 2 / qchisq(conf_low, eno - 1)) ** 0.5) - names(conf.int) <- c("conf_low","conf_high") + names(conf_int) <- c("conf_low","conf_high") } else { - conf.int <- c() - names(conf.int) <- c() + conf_int <- c() + names(conf_int) <- c() } - results <- c(enlrms, conf.int) - names(results) <- c("rms", names(conf.int)) + results <- c(enlrms, conf_int) + names(results) <- c("rms", names(conf_int)) return(results) } diff --git a/R/RMSSS.R b/R/RMSSS.R index ac36baf3..42622477 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -89,28 +89,26 @@ RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { } .RMSSS <- function(exp, obs, pval = TRUE) { - dif2 <- obs dif1 <- rowMeans(exp) - obs rms1 <- mean(dif1 ** 2, na.rm = TRUE) ** 0.5 rms2 <- mean(dif2 ** 2, na.rm = TRUE) ** 0.5 rms2[abs(rms2) <= abs(rms2) / 1000] <- abs(rms2) / 1000 rmsss <- c(1 - (rms1 / rms2)) - if (pval == TRUE) { - eno1 <- Eno(dif1, 1) - eno2 <- Eno(dif2, 1) - F.stat <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) - if (is.na(eno1) == FALSE && is.na(eno2) == FALSE && eno1 > 2 && eno2 > 2) { - p.val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) - } - } - else { - p.val <- NA - } - + if (pval == TRUE) { + eno1 <- Eno(dif1, 1) + eno2 <- Eno(dif2, 1) + F.stat <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) + if (is.na(eno1) == FALSE && is.na(eno2) == FALSE && eno1 > 2 && eno2 > 2) { + p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + } + } else { + p_val <- NA + } + # # Output # ~~~~~~~~ # - list(rmsss = rmsss, p.val = p.val) -} \ No newline at end of file + list(rmsss = rmsss, p_val = p_val) +} diff --git a/R/RatioRMS.R b/R/RatioRMS.R index b58d327d..564f9658 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -85,13 +85,13 @@ RatioRMS <- function(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) { enlratiorms } -.RatioRMS <- function(exp, exp.ref, obs, pval = TRUE) { +.RatioRMS <- function(exp, exp_ref, obs, pval = TRUE) { # # RMS ratio and its pvalue computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # dif1 <- rowMeans(exp, na.rm = TRUE) - obs - dif2 <- rowMeans(exp.ref, na.rm = TRUE) - obs + dif2 <- rowMeans(exp_ref, na.rm = TRUE) - obs rms1 <- mean(dif1 ** 2, na.rm = TRUE) ** 0.5 rms2 <- mean(dif2 ** 2, na.rm = TRUE) ** 0.5 rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max( @@ -103,10 +103,10 @@ RatioRMS <- function(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) { F <- (eno1 * (rms1) ** 2 / (eno1 - 1)) / (eno2 * (rms2) ** 2 / (eno2 - 1)) F[which(F < 1)] <- 1 / F[which(F < 1)] if (!is.na(eno1) && !is.na(eno2) && eno1 > 2 && eno2 > 2) { - p.val <- (1 - pf(F, eno1 - 1, eno2 - 1)) * 2 + p_val <- (1 - pf(F, eno1 - 1, eno2 - 1)) * 2 } } # Output - list(ratiorms = enlratiorms, p.val = p.val) + list(ratiorms = enlratiorms, p_val = p_val) } diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index 0f42ffdf..2bce490d 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -98,21 +98,21 @@ RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { rms <- mean(dif ** 2, na.rm = TRUE) ** 0.5 enorms <- Eno(dif, 1) enlratiormssd <- std / rms - p.val <- 0 + p_val <- 0 if (pval) { l1 <- enosd l2 <- enorms F <- (enosd * std ** 2 / (enosd - 1)) / (enorms * (rms) ** 2 / (enorms - 1)) if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { - p.val <- 1 - pf(F, l1 - 1, l2 - 1) + p_val <- 1 - pf(F, l1 - 1, l2 - 1) } else { - p.val <- NA + p_val <- NA } } # # Output # ~~~~~~~~ # - list(ratio = enlratiormssd, p.val = p.val) + list(ratio = enlratiormssd, p_val = p_val) } diff --git a/man/ACC.Rd b/man/ACC.Rd index f24b9d82..d4d66aec 100644 --- a/man/ACC.Rd +++ b/man/ACC.Rd @@ -97,12 +97,12 @@ Joliffe and Stephenson (2012). Forecast Verification: A Practitioner's Guide in } \author{ History:\cr -0.1 - 2013-08 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to CRAN\cr -1.1 - 2013-09 (C. Prodhomme, \email{chloe.prodhomme at ic3.cat}) - optimization\cr -1.2 - 2014-08 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Bug-fixes : handling of NA & selection of domain + Simplification of code\cr -1.3.0 - 2014-08 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Boostrapping over members\cr -1.3.1 - 2014-09 (C. Prodhomme, chloe.prodhomme at ic3.cat) - Add comments and minor style changes\cr -1.3.2 - 2015-02 (N. Manubens, nicolau.manubens at ic3.cat) - Fixed ACC documentation and examples\cr +0.1 - 2013-08 (V. Guemas, \email{virginie.guemas at bsc.es}) - Original code\cr +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Formatting to CRAN\cr +1.1 - 2013-09 (C. Prodhomme, \email{chloe.prodhomme at bsc.es}) - optimization\cr +1.2 - 2014-08 (V. Guemas, \email{virginie.guemas at bsc.es}) - Bug-fixes : handling of NA & selection of domain + Simplification of code\cr +1.3.0 - 2014-08 (V. Guemas, \email{virginie.guemas at bsc.es}) - Boostrapping over members\cr +1.3.1 - 2014-09 (C. Prodhomme, chloe.prodhomme at bsc.es) - Add comments and minor style changes\cr +1.3.2 - 2015-02 (N. Manubens, nicolau.manubens at bsc.es) - Fixed ACC documentation and examples\cr } \keyword{datagen} diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index aea5d4e7..f3fee0b3 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -5,9 +5,8 @@ Compute Brier Score And Its Decomposition And Brier Skill Score } \description{ -Computes the Brier score (BS) and the components of its standard decomposition as well with the two within-bin components described in Stephenson et al., (2008). It also returns the bias-corrected decomposition of the BS (Ferro and Fricker, 2012). BSS having the climatology as the reference forecast. - - +Computes the Brier score (BS) and the components of its standard decomposition as well with the two within-bin components described in Stephenson et al., (2008). It also returns the bias-corrected decomposition of the BS (Ferro and Fricker, 2012). BSS having the climatology as the reference forecast.\cr +\cr .BrierScore provides the same functionality, but taking a matrix of ensemble members (exp) as input. } \usage{ @@ -30,25 +29,28 @@ Matrix of predictions with values in the range [0,1] for the .BrierScore functio } } \value{ -$rel: standard reliability\cr -$res: standard resolution\cr -$unc: standard uncertainty\cr -$bs: Brier score\cr -$bs_check_res: rel-res+unc\cr -$bss_res: res-rel/unc\cr -$gres: generalized resolution\cr -$bs_check_gres: rel-gres+unc\cr -$bss_gres: gres-rel/unc\cr -$rel_bias_corrected: bias-corrected rel\cr -$gres_bias_corrected: bias-corrected gres\cr -$unc_bias_corrected: bias-corrected unc\cr -$bss_bias_corrected: gres_bias_corrected-rel_bias_corrected/unc_bias_corrected\cr -$nk: number of forecast in each bin\cr -$fkbar: average probability of each bin\cr -$okbar: relative frequency that the observed event occurred\cr -$bins: bins used\cr -$pred: values with which the forecasts are verified\cr -$obs: probability forecasts of the event\cr +Both BrierScore and .Brier score provide the same outputs: + \itemize{ + \item{$rel}{standard reliability} + \item{$res}{standard resolution} + \item{$unc}{standard uncertainty} + \item{$bs}{Brier score} + \item{$bs_check_res}{rel-res+unc} + \item{$bss_res}{res-rel/unc} + \item{$gres}{generalized resolution} + \item{$bs_check_gres}{rel-gres+unc} + \item{$bss_gres}{gres-rel/unc} + \item{$rel_bias_corrected}{bias-corrected rel} + \item{$gres_bias_corrected}{bias-corrected gres} + \item{$unc_bias_corrected}{bias-corrected unc} + \item{$bss_bias_corrected}{gres_bias_corrected-rel_bias_corrected/unc_bias_corrected} + \item{$nk}{number of forecast in each bin} + \item{$fkbar}{average probability of each bin} + \item{$okbar}{relative frequency that the observed event occurred} + \item{$bins}{bins used} + \item{$pred}{values with which the forecasts are verified} + \item{$obs}{probability forecasts of the event} + } } \examples{ a <- runif(10) @@ -57,10 +59,15 @@ x <- BrierScore(b, a) x$bs - x$bs_check_res x$bs - x$bs_check_gres x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected - \dontrun{ - a <- runif(10) - b <- cbind(round(a),round(a)) # matrix containing 2 identical ensemble members... - x2 <- BrierScore(a, b) + \dontrun{ +a <- runif(10) +b <- cbind(round(a),round(a)) # matrix containing 2 identical ensemble members... +x2 <- BrierScore(a, b) + } + \dontrun{ +library(easyVerification) +BrierScore2 <- s2dverification:::.BrierScore +veriApply("BrierScore2", a, b, ) } } \references{ @@ -70,7 +77,7 @@ Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Jo } \author{ History:\cr -0.1 - 2012-04 (L. Rodrigues, \email{lrodrigues@ic3.cat}) - Original code\cr -1.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() +0.1 - 2012-04 (L. Rodrigues, \email{lrodrigues at ic3.cat}) - Original code\cr +0.2 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/Corr.Rd b/man/Corr.Rd index 6f4b2c18..ce962879 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -5,7 +5,6 @@ Computes the correlation coefficient between an array of forecasts and their corresponding observations. } \description{ - Calculates the correlation coefficient (Pearson, Kendall or Spearman) for an array of forecasts and observations. The input should be an array with dimensions c(no. of datasets, no. of start dates, no. of forecast times, no. of lons, no. of lats.), where the longitude and latitude dimensions are optional. The correlations are computed along the poscor dimension which should correspond to the startdate dimension. If compROW is given, the correlations are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes. \cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval is computed by a Fisher transformation.\cr The significance level relies on a one-sided student-T distribution.\cr We can modifiy the treshold of the test modifying siglev (default value=0.95). \cr \cr .Corr calculates the correlation between the ensemble mean and the observations, using an N by M matrix (exp) of forecasts and a vector of observations (obs) as input. @@ -15,27 +14,26 @@ Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, limits = NULL, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) -.Corr(exp, obs, siglev = 0.95, - method = 'pearson', conf = TRUE, pval = TRUE) +.Corr(exp, obs, siglev = 0.95, method = 'pearson', + conf = TRUE, pval = TRUE) } \arguments{ - \item{var_exp}{ + \item{var_exp}{ Array of experimental data. - } - - \item{var_obs}{ + } + \item{var_obs}{ Array of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. - } - \item{posloop}{ + } + \item{posloop}{ Dimension nobs and nexp. - } - \item{poscor}{ + } + \item{poscor}{ Dimension along which correlation are to be computed (the dimension of the start dates). - } - \item{compROW}{ + } + \item{compROW}{ Data taken into account only if (compROW)th row is complete.\cr Default = NULL. - } - \item{limits}{ + } + \item{limits}{ Complete between limits[1] & limits[2]. Default = NULL. } \item{siglev}{ @@ -50,65 +48,74 @@ Whether to compute confidence intervals (default = 'TRUE') or not (FALSE). \item{pval}{ Whether to compute statistical significance p-value (default = 'TRUE') or not (FALSE). } - \item{exp}{ + \item{exp}{ N by M matrix of N forecasts from M ensemble members. } \item{obs}{ Vector of the corresponding observations of length N. } } - \value{ Corr: Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr +\value{ +Corr: Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. \cr - +\cr .Corr: - \item{$corr}{ + \itemize{ + \item{$corr}{ The correlation statistic. - } - \item{$p.val}{ + } + \item{$p_val}{ Corresponds to the p values for the \code{siglev}\% (only present if \code{pval = TRUE}) for the correlation. - } - \item{$conf_low}{ + } + \item{$conf_low}{ Corresponds to the upper limit of the \code{siglev}\% (only present if \code{conf = TRUE}) for the correlation. - } - \item{$conf_high}{ + } + \item{$conf_high}{ Corresponds to the lower limit of the \code{siglev}\% (only present if \code{conf = TRUE}) for the correlation. - } -} - \examples{ # Load sample data as in Load() example: + } + } +} +\examples{ +# Load sample data as in Load() example: example(Load) - clim <- Clim(sampleData$mod, sampleData$obs) - ano_exp <- Ano(sampleData$mod, clim$clim_exp) - ano_obs <- Ano(sampleData$obs, clim$clim_obs) - runmean_months <- 12 - dim_to_smooth <- 4 - # Smooth along lead-times - smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) - smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) - dim_to_mean <- 2 # Mean along members - required_complete_row <- 3 # Discard start dates which contain any NA lead-times - leadtimes_per_startdate <- 60 - corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), - Mean1Dim(smooth_ano_obs, dim_to_mean), - compROW = required_complete_row, - limits = c(ceiling((runmean_months + 1) / 2), - leadtimes_per_startdate - floor(runmean_months / 2))) - PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", - monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), - fileout = 'tos_cor.eps') +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +runmean_months <- 12 +dim_to_smooth <- 4 +# Smooth along lead-times +smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) +smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) +dim_to_mean <- 2 # Mean along members +required_complete_row <- 3 # Discard start dates which contain any NA lead-times +leadtimes_per_startdate <- 60 +corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), + Mean1Dim(smooth_ano_obs, dim_to_mean), + compROW = required_complete_row, + limits = c(ceiling((runmean_months + 1) / 2), + leadtimes_per_startdate - floor(runmean_months / 2))) +PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", + monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), + fileout = 'tos_cor.eps') + +# The following example uses veriApply combined with .Corr instead of Corr \dontrun{ - library(easyVerification) - dim(smooth_ano_obs) <- dim(smooth_ano_obs)[-2] # see ?veriApply for how to use the 'parallel' option - corr2 <- veriApply(".Corr", smooth_ano_exp, smooth_ano_obs, tdim = 3, ensdim = 2) +library(easyVerification) +Corr2 <- s2dverification:::.Corr +corr2 <- veriApply("Corr2", + smooth_ano_exp, + # see ?veriApply for how to use the 'parallel' option + Mean1Dim(smooth_ano_obs, dim_to_mean), + tdim = 3, ensdim = 2) } } - \author{ History:\cr -0.1 - 2011-04 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN\cr -1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz at ic3.cat}) - Adding siglev argument\cr -1.2 - 2015-03 (L.P. Caron, \email{louis-philippe.caron at ic3.cat}) - Adding method argument\cr -2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() +0.1 - 2011-04 (V. Guemas, \email{vguemas at bsc.es}) - Original code\cr +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Formatting to R CRAN\cr +1.1 - 2014-10 (M. Menegoz, \email{martin.menegoz at bsc.es}) - Adding siglev argument\cr +1.2 - 2015-03 (L.P. Caron, \email{louis-philippe.caron at bsc.es}) - Adding method argument\cr +1.3 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/RMS.Rd b/man/RMS.Rd index 37602a96..2b1ca8f3 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -9,14 +9,14 @@ Computes the root mean square error for an array of forecasts, var_exp and an ar The RMSE is computed along the posRMS dimension which should correspond to the startdate dimension.\cr If compROW is given, the RMSE is computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes.\cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -The confidence interval relies on a chi2 distribution. \cr - +The confidence interval relies on a chi2 distribution.\cr +\cr .RMS provides the same functionality but taking a matrix of ensemble members as input (exp). } \usage{ RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) -.RMS(exp, obs, limits = NULL, siglev = 0.95, conf = TRUE) +.RMS(exp, obs, siglev = 0.95, conf = TRUE) } \arguments{ \item{var_exp}{ @@ -55,6 +55,7 @@ Vector of the corresponding observations of length N. RMS: Array with dimensions:\cr c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all other dimensions of var_exp & var_obs except posRMS).\cr The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). \cr +\cr .RMS: \item{$rms}{ The root mean square error, @@ -62,7 +63,7 @@ The root mean square error, \item{$conf_low}{ Corresponding to the lower limit of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the rms. } - \item{$conf_high}{ + \item{$conf_high}{ Corresponding to the upper limit of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the rms. } } @@ -88,16 +89,21 @@ PlotVsLTime(rms, toptitle = "Root Mean Square Error", ytitle = "K", monini = 11, limits = NULL, listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, hlines = c(0), fileout = 'tos_rms.eps') - \dontrun{ - library(easyVerification) - dim(smooth_ano_obs) <- dim(smooth_ano_obs)[-2] # see ?veriApply for how to use the 'parallel' option - rms2 <- veriApply(".RMS", smooth_ano_exp, smooth_ano_obs, tdim = 3, ensdim = 2) +# The following example uses veriApply combined with .RMS instead of RMS + \dontrun{ +library(easyVerification) +RMS2 <- s2dverification:::.RMS +rms2 <- veriApply("RMS2", + smooth_ano_exp, + # see ?veriApply for how to use the 'parallel' option + Mean1Dim(smooth_ano_obs, dim_to_mean), + tdim = 3, ensdim = 2) } } \author{ History:\cr 0.1 - 2011-05 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN\cr -2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() +1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index d6cd519a..bd1f3e75 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -8,8 +8,8 @@ Computes Root Mean Square Skill Score Computes the root mean square error skill score between an array of forecasts, var_exp and an array of observations, var_obs, which should have the same dimensions except along posloop where the lengths can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr RMSSS computes the Root Mean Square Skill Score of each jexp in 1:nexp against each jobs in 1:nobs which gives nexp x nobs RMSSS for each other grid point of the matrix (each latitude/longitude/level/leadtime).\cr The RMSSS are computed along the posRMS dimension which should correspond to the startdate dimension.\cr -The p-value is optionally provided by a one-sided Fisher test. - +The p-value is optionally provided by a one-sided Fisher test.\cr +\cr .RMSSS provides the same functionality but taking a matrix of ensemble members as input (exp). } \usage{ @@ -41,9 +41,19 @@ Vector of the corresponding observations of length N. } } \value{ -Array with dimensions:\cr +RMSSS: Array with dimensions:\cr c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 2, all other dimensions of var_exp & var_obs except posRMS).\cr -The 3rd dimension corresponds to the RMSSS and, if \code{pval = TRUE}, the p-value of the one-sided Fisher test with Ho: RMSSS = 0. +The 3rd dimension corresponds to the RMSSS and, if \code{pval = TRUE}, the p-value of the one-sided Fisher test with Ho: RMSSS = 0.\cr +\cr +.RMSSS: + \itemize{ + \item{$rmsss}{ +The RMSSS. + } + \item{$p_val}{ +Corresponds to the p values (only present if \code{pval = TRUE}) for the RMSSS. + } + } } \examples{ # Load sample data as in Load() example: @@ -52,23 +62,27 @@ clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) rmsss <- RMSSS(Mean1Dim(ano_exp, 2), Mean1Dim(ano_obs, 2)) -rmsss2 <- array(dim = c(dim(rmsss)[1:2], 4, dim(rmsss)[4])) -rmsss2[, , 2, ] <- rmsss[, , 1, ] -rmsss2[, , 4, ] <- rmsss[, , 2, ] -PlotVsLTime(rmsss, toptitle = "Root Mean Square Skill Score", ytitle = "", +rmsss_plot <- array(dim = c(dim(rmsss)[1:2], 4, dim(rmsss)[4])) +rmsss_plot[, , 2, ] <- rmsss[, , 1, ] +rmsss_plot[, , 4, ] <- rmsss[, , 2, ] +PlotVsLTime(rmsss_plot, toptitle = "Root Mean Square Skill Score", ytitle = "", monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), fileout = 'tos_rmsss.eps') +# The following example uses veriApply combined with .RMSSS instead of RMSSS \dontrun{ - library(easyVerification) - dim(ano_obs) <- dim(ano_obs)[-2] # see ?veriApply for how to use the 'parallel' option - rmsss3 <- veriApply(".RMSSS", ano_exp, ano_obs, tdim = 3, ensdim = 2) +library(easyVerification) +RMSSS2 <- s2dverification:::.RMSSS +rmsss2 <- veriApply("RMSSS2", ano_exp, + # see ?veriApply for how to use the 'parallel' option + Mean1Dim(ano_obs, 2), + tdim = 3, ensdim = 2) } } \author{ History:\cr -0.1 - 2012-04 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN\cr -2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() +0.1 - 2012-04 (V. Guemas, \email{vguemas at bsc.es}) - Original code\cr +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Formatting to R CRAN\cr +1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 5bd09d2f..6ac10330 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -7,14 +7,14 @@ Computes the Ratio Between The RMSE of Two Experiments \description{ Calculates the ratio of the RMSE for two forecasts of the same observations.\cr The ratio RMSE(ens, obs) / RMSE(ens.ref, obs) is output.\cr -The p-value is provided by a two-sided Fischer test. - +The p-value is provided by a two-sided Fischer test.\cr +\cr .RatioRMS provides the same functionality but taking two matrices of ensemble members (ens and ens.ref) as input. } \usage{ RatioRMS(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) -.RatioRMS(exp, exp.ref, obs, pval = TRUE) +.RatioRMS(exp, exp_ref, obs, pval = TRUE) } \arguments{ \item{var_exp1}{ @@ -26,7 +26,7 @@ Array of experimental data 2. \item{var_obs}{ Array of observations. } - \item{posRMS}{ + \item{posRMS}{ Dimension along which the RMSE are to be computed = the position of the start dates. } \item{pval}{ @@ -35,7 +35,7 @@ Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. \item{exp}{ Matrix of experimental data 1. } - \item{exp.ref}{ + \item{exp_ref}{ Matrix of experimental data 2. } \item{obs}{ @@ -43,18 +43,14 @@ Vector of observations. } } \value{ -RatioRMS: A list containing the following components :\cr -ratiorms - the ratio of the rms of the two ensembles \cr -p.val - the p value \cr -The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1.\cr -.RatioRMS: -\item{$ratiorms}{ -The ratio of the root mean square errors, +RatioRMS:\cr +Matrix with the same dimensions as var_exp1/var_exp2/var_obs except along posRMS where the dimension has length 2 if ‘pval = TRUE’, or 1 otherwise. The dimension of length 2 corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p-value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1.\cr +\cr +.RatioRMS:\cr + \itemize{ + \item{ratiorms}{The ratio of the RMSE of the two experimental datasets} + \item{p_val}{The p-value} } - \item{$conf.int}{ -Corresponding to the p values of the ratio of the rmse statistics (only present if \code{pval = TRUE}). - } - } \examples{ # See examples on Load() to understand the first lines in this example @@ -81,6 +77,7 @@ sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) } +# Compute DJF seasonal means and anomalies. leadtimes_dimension <- 4 initial_month <- 11 mean_start_month <- 12 @@ -92,16 +89,37 @@ sampleData$obs <- Season(sampleData$obs, leadtimes_dimension, initial_month, clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) -rrms <- RatioRMS(Mean1Dim(ano_exp[ , 1:2, , , , ], 1)[, 1, , ], - ano_exp[ , 3, , , , ][, 1, , ], - Mean1Dim(ano_obs, 2)[1, , 1, , ], 1) +# Generate two experiments with 2 and 1 members from the only experiment +# available in the sample data. Take only data values for a single forecast +# time step. +ano_exp_1 <- Subset(ano_exp, 'member', c(1, 2)) +ano_exp_2 <- Subset(ano_exp, 'member', c(3)) +ano_exp_1 <- Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +ano_exp_2 <- Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +ano_obs <- Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +# Compute ensemble mean and provide as inputs to RatioRMS. +rrms <- RatioRMS(Mean1Dim(ano_exp_1, 1), + Mean1Dim(ano_exp_2, 1), + Mean1Dim(ano_obs, 1)) +# Plot the RatioRMS for the first forecast time step. PlotEquiMap(rrms[1, , ], sampleData$lon, sampleData$lat, toptitle = 'Ratio RMSE') + +# The following example uses veriApply combined with .RatioRMS instead of RatioRMS + \dontrun{ +library(easyVerification) +RatioRMS2 <- s2dverification:::.RatioRMS +rrms2 <- veriApply("RatioRMS2", ano_exp_1, + # see ?veriApply for how to use the 'parallel' option + Mean1Dim(ano_obs, 1), + ano_exp_2, + tdim = 2, ensdim = 1) + } } \author{ History:\cr -0.1 - 2011-11 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN\cr -2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() +0.1 - 2011-11 (V. Guemas, \email{vguemas at bsc.es}) - Original code\cr +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Formatting to R CRAN\cr +1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 0b07664e..59d373ec 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -11,6 +11,7 @@ and\cr c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)\cr The ratio between the standard deviation of the members around the ensemble mean in var_exp and the RMSE between var_exp and var_obs is output for each experiment and each observational dataset.\cr The p-value is provided by a one-sided Fischer test.\cr +\cr .RatioSDRMS provides the same functionality but taking a matrix of ensemble members as input (exp). } \usage{ @@ -41,38 +42,47 @@ Vector of the corresponding observations of length N. } \value{ RatioSDRMS: Array with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to - c(nexp/nmod, nobs, 1 or 2, nltime, nlevel, nlat, nlon). + c(nexp/nmod, nobs, 1 or 2, nltime, nlevel, nlat, nlon). \cr The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1.\cr +\cr .RatioSDRMS: -\item{$ratio}{ + \itemize{ + \item{$ratio}{ The ratio of the ensemble spread and RMSE, - } - \item{$p.val}{ + } + \item{$p_val}{ Corresponds to the p values of the ratio (only present if \code{pval = TRUE}). - } - + } + } } \examples{ # Load sample data as in Load() example: example(Load) rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) -rsdrms2 <- array(dim = c(dim(rsdrms)[1:2], 4, dim(rsdrms)[4])) -rsdrms2[, , 2, ] <- rsdrms[, , 1, ] -rsdrms2[, , 4, ] <- rsdrms[, , 2, ] -PlotVsLTime(rsdrms2, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +# Reorder the data in order to plot it with PlotVsLTime +rsdrms_plot <- array(dim = c(dim(rsdrms)[1:2], 4, dim(rsdrms)[4])) +rsdrms_plot[, , 2, ] <- rsdrms[, , 1, ] +rsdrms_plot[, , 4, ] <- rsdrms[, , 2, ] +PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, siglev = TRUE, fileout = 'tos_rsdrms.eps') + +# The following example uses veriApply combined with .RatioSDRMS instead of RatioSDRMS \dontrun{ - library(easyVerification) - dim(sampleData$obs) <- dim(sampleData$obs)[-2] # see ?veriApply for how to use the 'parallel' option - rsdrms3 <- veriApply(".RatioSDRMS", sampleData$mod, sampleData$obs, tdim = 3, ensdim = 2) +library(easyVerification) +RatioSDRMS2 <- s2dverification:::.RatioSDRMS +rsdrms2 <- veriApply("RatioSDRMS2", + sampleData$mod, + # see ?veriApply for how to use the 'parallel' option + Mean1Dim(sampleData$obs, 2), + tdim = 3, ensdim = 2) } } \author{ History:\cr 0.1 - 2011-12 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau-manubens at ic3.cat}) - Formatting to CRAN\cr -2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() +1.1 - 2017-02 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} -- GitLab From c060a7455d662918349e7616ab096e4a1a9df296 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 7 Feb 2017 18:17:43 +0100 Subject: [PATCH 33/41] Attempt to fix the bug. Vero to test it. --- R/ProbBins.R | 143 ++++++++++++++++++++++++++++----------------------- 1 file changed, 80 insertions(+), 63 deletions(-) diff --git a/R/ProbBins.R b/R/ProbBins.R index c6e4fec1..0f529faa 100644 --- a/R/ProbBins.R +++ b/R/ProbBins.R @@ -1,9 +1,8 @@ -ProbBins <- function(ano, fcyr, thr, quantile=TRUE, posdates = 3, - posdim = 2, compPeriod= "Full period") { +ProbBins <- function(ano, fcyr = 'all', thr, quantile = TRUE, posdates = 3, + posdim = 2, compPeriod = "Full period") { # define dimensions #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nbdim <- length(dim(ano)) - nfcyr<-length(fcyr) if (nbdim < 7){ ano <- Enlarge(ano, 7) @@ -13,78 +12,96 @@ ProbBins <- function(ano, fcyr, thr, quantile=TRUE, posdates = 3, posdim <- setdiff(posdim, posdates) nbpos <- length(posdim) #permute dimensions in ano - ano <- aperm(ano, c(posdates, posdim, setdiff(seq(1,7,1), c(posdates, posdim)))) + if (posdates != 1 || posdim != 2) { + ano <- aperm(ano, c(posdates, posdim, setdiff(seq(1,7,1), c(posdates, posdim)))) + posdates <- 1 + posdim <- 2 + } dimano <- dim(ano) - nsdates=dimano[1] + nsdates <- dimano[1] #calculate the number of elements on posdim dimension in ano - nmemb=1 + nmemb <- 1 if (nbpos > 0){ for (idim in 2:(nbpos+1)){ - nmemb=nmemb*dimano[idim] + nmemb <- nmemb*dimano[idim] } } - - # separate forecast and hindcast - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - fore <- array(ano[fcyr, , , , , , ], dim = c(nfcyr, - dimano[2:7])) - # the members and startdates are combined in one dimension - sample_fore <- array(fore, dim=c(nfcyr*nmemb, dimano[(nbpos+2):7])) - - if(compPeriod=="Full period"){ - hind <- ano - sample <- array(hind, dim=c(nsdates*nmemb, dimano[(nbpos+2):7])) + + if (fcyr == 'all') { + fcyr <- 1:nsdates } - - if (compPeriod=="Without fcyr"){ - hind <- array(ano[-fcyr, , , , , , ], dim = c(nsdates-nfcyr, + nfcyr <- length(fcyr) + + if (compPeriod == "Cross-validation") { + result <- array(dim = c(nfcyr, length(thr) + 1, 1, nmemb, dimano[(nbpos + 2):nbdim])) + store_indices <- as.list(rep(TRUE, length(dim(result)))) + for (iyr in fcyr) { + store_indices[[1]] <- iyr + result <- do.call("[<-", c(list(x = result), + store_indices, + list(value = ProbBins(ano, iyr, thr, quantile, + posdates, posdim, + "Without fcyr")) + ) + ) + } + return(result) + } else if (compPeriod %in% c("Full period", "Without fcyr")) { + # separate forecast and hindcast + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + fore <- array(ano[fcyr, , , , , , ], dim = c(nfcyr, dimano[2:7])) - sample <- array(hind, dim=c((nsdates-nfcyr)*nmemb, dimano[(nbpos+2):7])) - } - - #quantiles for each grid point and experiment - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # the members and startdates are combined in one dimension + sample_fore <- array(fore, dim=c(nfcyr*nmemb, dimano[(nbpos+2):7])) + + if(compPeriod == "Full period") { + hind <- ano + sample <- array(hind, dim=c(nsdates*nmemb, dimano[(nbpos+2):7])) + } else if (compPeriod == "Without fcyr") { + hind <- array(ano[-fcyr, , , , , , ], dim = c(nsdates-nfcyr, + dimano[2:7])) + sample <- array(hind, dim=c((nsdates-nfcyr)*nmemb, dimano[(nbpos+2):7])) + } - if (quantile==TRUE){ - qum <- apply(sample, seq(2,7-nbpos,1), FUN=quantile,probs=thr,na.rm=TRUE,names=FALSE,type=8) - }else{ - qum<-array(thr,dim=c(length(thr), dimano[(nbpos+2):7])) - } - - # This function assign the values to a category which is limited by the thresholds - # It provides binary information + #quantiles for each grid point and experiment + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + if (quantile==TRUE){ + qum <- apply(sample, seq(2,7-nbpos,1), FUN=quantile,probs=thr,na.rm=TRUE,names=FALSE,type=8) + }else{ + qum<-array(thr,dim=c(length(thr), dimano[(nbpos+2):7])) + } - counts <- function (dat, nbthr){ - thr <- dat[1:nbthr] - data <- dat[nbthr+1:(length(dat)-nbthr)] - prob <- array(NA, dim=c(nbthr+1, length(dat)-nbthr)) - prob[1,]=1*(data <= thr[1]) - if(nbthr!=1){ - for (ithr in 2:(nbthr)){ - prob[ithr,]=1*((data > thr[ithr - 1]) & (data <= thr[ithr])) + # This function assign the values to a category which is limited by the thresholds + # It provides binary information + + counts <- function (dat, nbthr){ + thr <- dat[1:nbthr] + data <- dat[nbthr+1:(length(dat)-nbthr)] + prob <- array(NA, dim=c(nbthr+1, length(dat)-nbthr)) + prob[1,]=1*(data <= thr[1]) + if(nbthr!=1){ + for (ithr in 2:(nbthr)){ + prob[ithr,]=1*((data > thr[ithr - 1]) & (data <= thr[ithr])) + } } + prob[nbthr+1,]=1*(data > thr[nbthr]) + return(prob) } - prob[nbthr+1,]=1*(data > thr[nbthr]) - return(prob) - } - - # The thresholds and anomalies are combined to use apply - data <- abind(qum, sample_fore, along = 1) - - # PBF:Probabilistic bins of a forecast. - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # This array contains zeros and ones that indicate the category where your forecast is. - - PBF <- array(apply(data, seq(2,7-nbpos,1), FUN=counts, nbthr=length(thr)), - dim=c(length(thr)+1, nfcyr, nmemb, dimano[(nbpos+2):nbdim])) - - return(PBF) - + + # The thresholds and anomalies are combined to use apply + data <- abind(qum, sample_fore, along = 1) + + # PBF:Probabilistic bins of a forecast. + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # This array contains zeros and ones that indicate the category where your forecast is. - if (compPeriod == "cross-validation"){ - for (iyr in 1:fcyr){ - ProbBins(ano,fcyr=iyr,thr=thr,posdates=posdates,posdim=posdim) - } + PBF <- array(apply(data, seq(2,7-nbpos,1), FUN=counts, nbthr=length(thr)), + dim=c(length(thr)+1, nfcyr, nmemb, dimano[(nbpos+2):nbdim])) + + return(PBF) + } else { + stop("Parameter 'compPeriod' must be one of 'Full period', 'Without fcyr' or 'Cross-validation'.") } } -- GitLab From 0aa2f79760e8e127e9c6bcb45e9a0a71dd1cbeb8 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 7 Feb 2017 18:43:19 +0100 Subject: [PATCH 34/41] Enhancement. --- R/ProbBins.R | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/R/ProbBins.R b/R/ProbBins.R index 0f529faa..c4b11d60 100644 --- a/R/ProbBins.R +++ b/R/ProbBins.R @@ -33,17 +33,11 @@ ProbBins <- function(ano, fcyr = 'all', thr, quantile = TRUE, posdates = 3, nfcyr <- length(fcyr) if (compPeriod == "Cross-validation") { - result <- array(dim = c(nfcyr, length(thr) + 1, 1, nmemb, dimano[(nbpos + 2):nbdim])) - store_indices <- as.list(rep(TRUE, length(dim(result)))) + result <- NULL for (iyr in fcyr) { store_indices[[1]] <- iyr - result <- do.call("[<-", c(list(x = result), - store_indices, - list(value = ProbBins(ano, iyr, thr, quantile, - posdates, posdim, - "Without fcyr")) - ) - ) + result <- abind(result, ProbBins(ano, iyr, thr, quantile, + posdates, posdim, "Without fcyr"), 2) } return(result) } else if (compPeriod %in% c("Full period", "Without fcyr")) { -- GitLab From 7e22d4e40904cd74507abe987b34e2b88d5633bb Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 7 Feb 2017 18:47:40 +0100 Subject: [PATCH 35/41] Doc enhancements. --- man/ProbBins.Rd | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/man/ProbBins.Rd b/man/ProbBins.Rd index 96ec0576..a1c5ac50 100644 --- a/man/ProbBins.Rd +++ b/man/ProbBins.Rd @@ -7,7 +7,7 @@ Computes probabilistic information of a forecast relative to a threshold or a qu Compute probabilistic bins of a set of forecast years ('fcyr') relative to the forecast climatology over the whole period of anomalies, optionally excluding the selected forecast years ('fcyr') or the forecast year for which the probabilistic bins are being computed (see 'compPeriod'). } \usage{ -ProbBins(ano, fcyr, thr, quantile = TRUE, posdates = 3, posdim = 2, +ProbBins(ano, fcyr = 'all', thr, quantile = TRUE, posdates = 3, posdim = 2, compPeriod = "Full period") } \arguments{ @@ -16,8 +16,8 @@ Array of anomalies from Ano().\cr Must be of dimension (nexp/nobs, nmemb, nsdates, nleadtime, nlat, nlon) } \item{fcyr}{ -Indices of the forecast years of the anomalies of which to compute the probabilistic bins.\cr -Ex: c(1:5), c(1, 4) or 4. +Indices of the forecast years of the anomalies which to compute the probabilistic bins for, or 'all' to compute the bins for all the years.\cr +Ex: c(1:5), c(1, 4), 4 or 'all'. } \item{thr}{ Values used as thresholds to bin the anomalies. @@ -33,12 +33,12 @@ Position of the dimension in \code{ano} that corresponds to the start dates (def Position of the dimension in \code{ano} which will be combined with 'posdates' to compute the quantiles (default = 2, ensemble members). } \item{compPeriod}{ -Three options: "Full period"/"Without fcyr"/"cross-validation" (The probabilities are computed with the terciles based on ano/ano with all 'fcyr's removed/cross-validation). The default is "Full period". +Three options: "Full period"/"Without fcyr"/"Cross-validation" (The probabilities are computed with the terciles based on ano/ano with all 'fcyr's removed/cross-validation). The default is "Full period". } } \value{ Matrix with probabilistic information and dimensions:\cr -c(length('thr'+1), nfcyr, nmemb/nparam, nmod/nexp/nobs, nltime, nlat, nlon)\cr +c(length('thr') + 1, length(fcyr), nmemb/nparam, nmod/nexp/nobs, nltime, nlat, nlon)\cr The values along the first dimension take values 0 or 1 depending on which of the 'thr'+1 cathegories the forecast/observation at the corresponding grid point, time step, member and starting date belongs to. } \examples{ @@ -74,6 +74,7 @@ PB <- ProbBins(ano_exp, fcyr = 3, thr = c(1/3, 2/3), quantile = TRUE, posdates = \author{ History:\cr 1.0 - 2013 (F.Lienert) - Original code\cr -2.0 - 2014-03 (N. Gonzalez and V.Torralba, \email{veronica.torralba@ic3.cat}) - Debugging +2.0 - 2014-03 (N. Gonzalez and V. Torralba, \email{veronica.torralba at bsc.es}) - Debugging +2.1 - 2017-02 (V. Torralba and N. Manubens, \email{veronica.torralba at bsc.es}) - Fix bug with cross-validation } \keyword{datagen} -- GitLab From fbedcd25dd1dd75e96b890e64b839e26147b3938 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 7 Feb 2017 18:50:26 +0100 Subject: [PATCH 36/41] Small fix. --- R/ProbBins.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/ProbBins.R b/R/ProbBins.R index c4b11d60..1651a01f 100644 --- a/R/ProbBins.R +++ b/R/ProbBins.R @@ -35,7 +35,6 @@ ProbBins <- function(ano, fcyr = 'all', thr, quantile = TRUE, posdates = 3, if (compPeriod == "Cross-validation") { result <- NULL for (iyr in fcyr) { - store_indices[[1]] <- iyr result <- abind(result, ProbBins(ano, iyr, thr, quantile, posdates, posdim, "Without fcyr"), 2) } -- GitLab From 82a6087a67de1017b6048341da4438f4f19116c2 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 7 Feb 2017 19:08:35 +0100 Subject: [PATCH 37/41] Small fix. --- R/ProbBins.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/ProbBins.R b/R/ProbBins.R index 1651a01f..2e2495fb 100644 --- a/R/ProbBins.R +++ b/R/ProbBins.R @@ -36,7 +36,8 @@ ProbBins <- function(ano, fcyr = 'all', thr, quantile = TRUE, posdates = 3, result <- NULL for (iyr in fcyr) { result <- abind(result, ProbBins(ano, iyr, thr, quantile, - posdates, posdim, "Without fcyr"), 2) + posdates, posdim, "Without fcyr"), + along = 2) } return(result) } else if (compPeriod %in% c("Full period", "Without fcyr")) { -- GitLab From 813f87c9f5c7b21c793f31bb37ff1e636871dbe6 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 7 Feb 2017 19:50:44 +0100 Subject: [PATCH 38/41] RatioRMS example now working. --- man/RatioRMS.Rd | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 6ac10330..a804999c 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -44,7 +44,7 @@ Vector of observations. } \value{ RatioRMS:\cr -Matrix with the same dimensions as var_exp1/var_exp2/var_obs except along posRMS where the dimension has length 2 if ‘pval = TRUE’, or 1 otherwise. The dimension of length 2 corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p-value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1.\cr +Matrix with the same dimensions as var_exp1/var_exp2/var_obs except along posRMS where the dimension has length 2 if 'pval = TRUE', or 1 otherwise. The dimension of length 2 corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p-value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1.\cr \cr .RatioRMS:\cr \itemize{ @@ -108,8 +108,10 @@ PlotEquiMap(rrms[1, , ], sampleData$lon, sampleData$lat, # The following example uses veriApply combined with .RatioRMS instead of RatioRMS \dontrun{ library(easyVerification) -RatioRMS2 <- s2dverification:::.RatioRMS -rrms2 <- veriApply("RatioRMS2", ano_exp_1, +# The name of the function has to end in 'ss' in order for veriApply() to +# detect it as a skill score. +RatioRMSss <- s2dverification:::.RatioRMS +rrms2 <- veriApply("RatioRMSss", ano_exp_1, # see ?veriApply for how to use the 'parallel' option Mean1Dim(ano_obs, 1), ano_exp_2, -- GitLab From ef20dfef4a1de3407c6a74af1a3d99f50bf17bc8 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Wed, 8 Feb 2017 11:41:31 +0100 Subject: [PATCH 39/41] Some progress. --- man/BrierScore.Rd | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index f3fee0b3..acc68c19 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -53,6 +53,8 @@ Both BrierScore and .Brier score provide the same outputs: } } \examples{ +# Minimalist examples with BrierScore +# See ?UltimateBrier to see how to apply BrierScore to multidimensional arrays a <- runif(10) b <- round(a) x <- BrierScore(b, a) @@ -64,7 +66,26 @@ a <- runif(10) b <- cbind(round(a),round(a)) # matrix containing 2 identical ensemble members... x2 <- BrierScore(a, b) } + +# Example of .BrierScore with veriApply \dontrun{ +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +runmean_months <- 12 +dim_to_smooth <- 4 +# Smooth along lead-times +smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) +smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) +dim_to_mean <- 2 # Mean along members +corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), + Mean1Dim(smooth_ano_obs, dim_to_mean), + compROW = required_complete_row, + limits = c(ceiling((runmean_months + 1) / 2), + leadtimes_per_startdate - floor(runmean_months / 2))) + + library(easyVerification) BrierScore2 <- s2dverification:::.BrierScore veriApply("BrierScore2", a, b, ) -- GitLab From 4af7600172005d16ca7e2f383daf0b0097365548 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Wed, 8 Feb 2017 11:46:52 +0100 Subject: [PATCH 40/41] Now returning dimension names. --- R/ProbBins.R | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/R/ProbBins.R b/R/ProbBins.R index 2e2495fb..4aee0817 100644 --- a/R/ProbBins.R +++ b/R/ProbBins.R @@ -13,7 +13,10 @@ ProbBins <- function(ano, fcyr = 'all', thr, quantile = TRUE, posdates = 3, nbpos <- length(posdim) #permute dimensions in ano if (posdates != 1 || posdim != 2) { - ano <- aperm(ano, c(posdates, posdim, setdiff(seq(1,7,1), c(posdates, posdim)))) + dimnames_backup <- names(dim(ano)) + perm <- c(posdates, posdim, (1:7)[-c(posdates, posdim)]) + ano <- aperm(ano, perm) + names(dim(ano)) <- dimnames_backup[perm] posdates <- 1 posdim <- 2 } @@ -35,9 +38,16 @@ ProbBins <- function(ano, fcyr = 'all', thr, quantile = TRUE, posdates = 3, if (compPeriod == "Cross-validation") { result <- NULL for (iyr in fcyr) { - result <- abind(result, ProbBins(ano, iyr, thr, quantile, - posdates, posdim, "Without fcyr"), - along = 2) + if (is.null(result)) { + result <- ProbBins(ano, iyr, thr, quantile, + posdates, posdim, "Without fcyr") + } else { + dimnames_backup <- names(dim(result)) + result <- abind(result, ProbBins(ano, iyr, thr, quantile, + posdates, posdim, "Without fcyr"), + along = 2) + names(dim(result)) <- dimnames_backup + } } return(result) } else if (compPeriod %in% c("Full period", "Without fcyr")) { @@ -94,6 +104,7 @@ ProbBins <- function(ano, fcyr = 'all', thr, quantile = TRUE, posdates = 3, PBF <- array(apply(data, seq(2,7-nbpos,1), FUN=counts, nbthr=length(thr)), dim=c(length(thr)+1, nfcyr, nmemb, dimano[(nbpos+2):nbdim])) + names(dim(PBF)) <- c('bin', 'sdate', 'member', names(dim(ano))[(nbpos+2):nbdim]) return(PBF) } else { stop("Parameter 'compPeriod' must be one of 'Full period', 'Without fcyr' or 'Cross-validation'.") -- GitLab From 6457a914c68d0fe2dabc4d1359e80ddc89a822b2 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Wed, 8 Feb 2017 17:42:04 +0100 Subject: [PATCH 41/41] Added example of .BrierScore with veriApply. --- R/ProbBins.R | 6 ++++-- man/BrierScore.Rd | 27 ++++++++++----------------- man/Corr.Rd | 2 +- man/RMS.Rd | 4 ++-- man/RMSSS.Rd | 2 +- man/RatioRMS.Rd | 4 ++-- man/RatioSDRMS.Rd | 2 +- 7 files changed, 21 insertions(+), 26 deletions(-) diff --git a/R/ProbBins.R b/R/ProbBins.R index 4aee0817..fef21045 100644 --- a/R/ProbBins.R +++ b/R/ProbBins.R @@ -30,8 +30,10 @@ ProbBins <- function(ano, fcyr = 'all', thr, quantile = TRUE, posdates = 3, } } - if (fcyr == 'all') { - fcyr <- 1:nsdates + if (length(fcyr) == 1) { + if (fcyr == 'all') { + fcyr <- 1:nsdates + } } nfcyr <- length(fcyr) diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index acc68c19..e126729a 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -54,7 +54,6 @@ Both BrierScore and .Brier score provide the same outputs: } \examples{ # Minimalist examples with BrierScore -# See ?UltimateBrier to see how to apply BrierScore to multidimensional arrays a <- runif(10) b <- round(a) x <- BrierScore(b, a) @@ -67,28 +66,22 @@ b <- cbind(round(a),round(a)) # matrix containing 2 identical ensemble members.. x2 <- BrierScore(a, b) } -# Example of .BrierScore with veriApply - \dontrun{ +# Example of BrierScore using UltimateBrier +# See ?UltimateBrier for more information example(Load) clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) -runmean_months <- 12 -dim_to_smooth <- 4 -# Smooth along lead-times -smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) -smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) -dim_to_mean <- 2 # Mean along members -corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), - Mean1Dim(smooth_ano_obs, dim_to_mean), - compROW = required_complete_row, - limits = c(ceiling((runmean_months + 1) / 2), - leadtimes_per_startdate - floor(runmean_months / 2))) - +bs <- UltimateBrier(ano_exp, ano_obs, thr = c(1/3, 2/3)) -library(easyVerification) + \dontrun{ +# Example of .BrierScore with veriApply +require(easyVerification) BrierScore2 <- s2dverification:::.BrierScore -veriApply("BrierScore2", a, b, ) +bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3), posdates = 3, posdim = 2) +bs2 <- veriApply("BrierScore2", bins_ano_exp, Mean1Dim(bins_ano_obs, 3), + tdim = 2, ensdim = 3) } } \references{ diff --git a/man/Corr.Rd b/man/Corr.Rd index ce962879..ae4b9f00 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -101,7 +101,7 @@ PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", # The following example uses veriApply combined with .Corr instead of Corr \dontrun{ -library(easyVerification) +require(easyVerification) Corr2 <- s2dverification:::.Corr corr2 <- veriApply("Corr2", smooth_ano_exp, diff --git a/man/RMS.Rd b/man/RMS.Rd index 2b1ca8f3..34980b65 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -57,7 +57,7 @@ RMS: Array with dimensions:\cr The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). \cr \cr .RMS: - \item{$rms}{ + \item{$rms}{ The root mean square error, } \item{$conf_low}{ @@ -91,7 +91,7 @@ PlotVsLTime(rms, toptitle = "Root Mean Square Error", ytitle = "K", fileout = 'tos_rms.eps') # The following example uses veriApply combined with .RMS instead of RMS \dontrun{ -library(easyVerification) +require(easyVerification) RMS2 <- s2dverification:::.RMS rms2 <- veriApply("RMS2", smooth_ano_exp, diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index bd1f3e75..b7a994a7 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -71,7 +71,7 @@ PlotVsLTime(rmsss_plot, toptitle = "Root Mean Square Skill Score", ytitle = "", fileout = 'tos_rmsss.eps') # The following example uses veriApply combined with .RMSSS instead of RMSSS \dontrun{ -library(easyVerification) +require(easyVerification) RMSSS2 <- s2dverification:::.RMSSS rmsss2 <- veriApply("RMSSS2", ano_exp, # see ?veriApply for how to use the 'parallel' option diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index a804999c..aa179b76 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -32,7 +32,7 @@ Dimension along which the RMSE are to be computed = the position of the start da \item{pval}{ Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. } - \item{exp}{ + \item{exp}{ Matrix of experimental data 1. } \item{exp_ref}{ @@ -107,7 +107,7 @@ PlotEquiMap(rrms[1, , ], sampleData$lon, sampleData$lat, # The following example uses veriApply combined with .RatioRMS instead of RatioRMS \dontrun{ -library(easyVerification) +require(easyVerification) # The name of the function has to end in 'ss' in order for veriApply() to # detect it as a skill score. RatioRMSss <- s2dverification:::.RatioRMS diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 59d373ec..1e6a9f9d 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -70,7 +70,7 @@ PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", # The following example uses veriApply combined with .RatioSDRMS instead of RatioSDRMS \dontrun{ -library(easyVerification) +require(easyVerification) RatioSDRMS2 <- s2dverification:::.RatioSDRMS rsdrms2 <- veriApply("RatioSDRMS2", sampleData$mod, -- GitLab