From f372a7695abfbeeae0215b840422fba2ae87d79f Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 5 Aug 2016 12:01:30 +0200 Subject: [PATCH 01/20] veriApply compatible Corr --- R/Corr.R | 144 ++++++++++++++----------------------------------------- 1 file changed, 37 insertions(+), 107 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index e23895b7..cc43bf2d 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -1,125 +1,55 @@ -Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, - limits = NULL, siglev = 0.95, method = 'pearson', +Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) { - # - # Remove data along compROW dim if there is at least one NA between limits - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (is.null(compROW) == FALSE) { - if (is.null(limits) == TRUE) { - limits <- c(1, dim(var_obs)[compROW]) - } - outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) - outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) - var_obs[which(outrows)] <- NA - } - # - # Enlarge var_exp & var_obs to 10 dim + move posloop & poscor to 1st & 2nd - # pos - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp) - for (iind in 1:length(dimsvar)) { - if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { - stop("var_exp & var_obs must have same dimensions except along posloop") - } - } - if (dimsvar[poscor] < 3 ) { - stop("At least 3 values required to compute correlation") - } + if (method != "kendall" && method != "spearman" && method != "pearson") { stop("Wrong correlation method") - } - nexp <- dimsvar[posloop] - nobs <- dim(var_obs)[posloop] - var_exp <- Enlarge(var_exp, 10) - var_obs <- Enlarge(var_obs, 10) - posaperm <- numeric(10) - posaperm[1] <- posloop - posaperm[2] <- poscor - posaperm[3:10] <- seq(1, 10)[-c(posloop, poscor)] - var_exp <- aperm(var_exp, posaperm) - var_obs <- aperm(var_obs, posaperm) - dimsaperm <- dim(var_exp) - # + # Check the siglev arguments: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (siglev > 1 || siglev < 0) { stop("siglev need to be higher than O and lower than 1") } - # - # Loop to compute correlation for each grid point - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dim_val <- 2 - dim_pval <- 4 - nvals <- 1 + 2*conf + pval - if (!conf) { - dim_val <- 1 - dim_pval <- 2 - } else { + } + conf_low <- (1 - siglev) / 2 conf_high <- 1 - conf_low - } - CORR <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) - for (jexp in 1:nexp) { - for (jobs in 1:nobs) { - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - tmp1 <- var_exp[jexp, , j3, j4, j5, j6, j7, j8, j9, - j10] - tmp2 <- var_obs[jobs, , j3, j4, j5, j6, j7, j8, j9, - j10] - if (any(!is.na(tmp1)) && sum(!is.na(tmp2)) > 2) { - toto <- cor(tmp1, tmp2, use = "pairwise.complete.obs", method = method) - CORR[jexp, jobs, dim_val, j3, j4, j5, j6, j7, j8, j9, j10] <- toto - #eno <- min(Eno(tmp2, 1), Eno(tmp1, 1)) - if (pval || conf) { - if (method == "kendall" | method == "spearman") { - eno <- Eno(rank(tmp2), 1) - } else if (method == "pearson") { - eno <- Eno(tmp2, 1) + + ens.mean <- rowMeans(ens) + CORR <- cor(obs, ens.mean, use = "pairwise.complete.obs", method = method) + + if (pval || conf) { + if (method == "kendall" | method == "spearman") { + eno <- Eno(rank(obs), 1) + } else if (method == "pearson") { + eno <- Eno(obs, 1) } } - if (pval) { - #t <- qt(0.95, eno - 2) - t <- qt(siglev, eno - 2) - CORR[jexp, jobs, dim_pval, j3, j4, j5, j6, j7, j8, j9, - j10] <- sqrt((t * t) / ((t * t) + eno - 2)) - } - if (conf) { - CORR[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, - j10] <- tanh(atanh(toto) + qnorm(conf_high) / sqrt( - #j10] <- tanh(atanh(toto) + qnorm(0.975) / sqrt( - eno - 3)) - CORR[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, - j10] <- tanh(atanh(toto) + qnorm(conf_low) / sqrt( - #j10] <- tanh(atanh(toto) + qnorm(0.025) / sqrt( - eno - 3)) - } + if (pval) { + + t <- CORR*sqrt((eno-2)/(1-(CORR^2))) + p <- 1 - pt(t, eno-2) + p.val <- p + names(p.val) <- "p.val" + } else { + p.val <- c() + names(p.val) <- c() } - } - } - } - } - } - } - } - } - } - } - # - dim(CORR) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, poscor)]) - # + if (conf) { + conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt( + eno - 3)), tanh(atanh(CORR) + qnorm(conf_high) / sqrt( + eno - 3))) + conf.int <- conf.int[!is.na(CORR)] + names(conf.int) <- c("conf_low","conf_high") + } else { + conf.int <- c() + names(conf.int) <- c() + } + # Output # ~~~~~~~~ # - CORR + results <- c(CORR,conf.int, p.val) + names(results) <- c("Corr", names(conf.int), names(p.val)) + return(results) } -- GitLab From 73dbae9a20fc7beef8ff3925d9ee4c46bd3e421a Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 5 Aug 2016 12:11:03 +0200 Subject: [PATCH 02/20] veriApply compatible Corr --- man/Corr.Rd | 75 ++++++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 53 deletions(-) diff --git a/man/Corr.Rd b/man/Corr.Rd index 1c5641e0..5ba5ede6 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -1,82 +1,51 @@ \name{Corr} \alias{Corr} \title{ -Computes Correlation Skill Measure (Temporal Correlation Along Start Dates) +Computes the correlation coefficient between the ensemble mean and observations. } \description{ -Matrix var_exp & var_obs should have the same dimensions except along posloop dimension where the length can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr -Corr computes the correlation skill of each jexp in 1:nexp against each jobs in 1:nobs which gives nexp x nobs correlation skill measures for each other grid point of the matrix (each latitude/longitude/level/leadtime). \cr -The correlations are computed along the poscor dimension which should correspond to the startdate dimension. If compROW is given, the correlations are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], that mean with no NA between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes. \cr -Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -The confidence interval is computed by a Fisher transformation.\cr -The significance level relies on a one-sided student-T distribution.\cr -We can modifiy the treshold of the test modifying siglev (default value=0.95). + +Calculates the correlation coefficient (Pearson, Kendall or Spearman) between the ensemble mean and the observations along the initialization times. The confidence interval and p-value from a one-tailed t-test are provided, where the number of degrees of freedom is calculated using the effective numberof observations (see Eno). \cr + + } \usage{ -Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, +Corr(ens, obs, posloop = 1, poscor = 2, compROW = NULL, limits = NULL, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) } \arguments{ - \item{var_exp}{ -Matrix of experimental data. + \item{ens}{ +N by M matrix of N forecasts from M ensemble members. } - \item{var_obs}{ -Matrix of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. - } - \item{posloop}{ -Dimension nobs and nexp. - } - \item{poscor}{ -Dimension along which correlation are to be computed (the dimension of the start dates). - } - \item{compROW}{ -Data taken into account only if (compROW)th row is complete.\cr -Default = NULL. - } - \item{limits}{ -Complete between limits[1] & limits[2]. Default = NULL. + \item{obs}{ +Vector of the corresponding observations of length N. } + \item{siglev}{ -Significance level according. Default = 0.95. +Significance level. Default = 0.95. } \item{method}{ Type of correlation: 'pearson', 'spearman' or 'kendall'. Default='pearson' } \item{conf}{ -Whether to compute confidence intervals (TRUE; default) or not (FALSE). +Whether to compute confidence intervals (default = 'TRUE') or not (FALSE). } \item{pval}{ -Whether to compute statistical significance p-value (TRUE; default) or not (FALSE). +Whether to compute statistical significance p-value (default = 'TRUE') or not (FALSE). } } \value{ -Matrix with dimensions :\cr -c(# of datasets alogn posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr -The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. +A list containing the following components :\cr +statistic - the value of the test statistic \cr +conf_low - the lower limit of the confidence interval \cr +conf_high - the upper limit of the confidence interval \cr +p.val - the p value } \examples{ -# Load sample data as in Load() example: -example(Load) -clim <- Clim(sampleData$mod, sampleData$obs) -ano_exp <- Ano(sampleData$mod, clim$clim_exp) -ano_obs <- Ano(sampleData$obs, clim$clim_obs) -runmean_months <- 12 -dim_to_smooth <- 4 # Smooth along lead-times -smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) -smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) -dim_to_mean <- 2 # Mean along members -required_complete_row <- 3 # Discard start dates which contain any NA lead-times -leadtimes_per_startdate <- 60 -corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), - Mean1Dim(smooth_ano_obs, dim_to_mean), - compROW = required_complete_row, - limits = c(ceiling((runmean_months + 1) / 2), - leadtimes_per_startdate - floor(runmean_months / 2))) -PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", - monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), - fileout = 'tos_cor.eps') +obs <- rnorm(100) +ens <- matrix(rnorm(500), c(100,5)) +Corr(ens,obs) } \author{ History:\cr -- GitLab From 0dd30f37b5a50f19d74af4e690d06b114ab8a32d Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 5 Aug 2016 14:36:17 +0200 Subject: [PATCH 03/20] veriApply compatible BrierScore --- R/BrierScore.R | 86 ++++++++++++++++++++++++++++------------------- man/BrierScore.Rd | 15 +++++---- 2 files changed, 60 insertions(+), 41 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index c64434ed..ff4fc2c1 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -1,74 +1,92 @@ -BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { - if (max(pred) > 1 | min(pred) < 0) { +BrierScore <- function(ens, obs, thresholds = seq(0, 1, 0.1)) { + if (max(ens) > 1 | min(ens) < 0) { stop("Predictions outside [0,1] range. Are you certain this is a probability forecast? \n") } else if (max(obs) != 1 & min(obs) != 0) { .message("Binary events must be either 0 or 1. Are you certain this is a binary event? ") } else { nbins <- length(thresholds) - 1 # Number of bins - n <- length(pred) - bins <- as.list(paste("bin", 1:nbins,sep = "")) + n <- dim(ens)[1] # Number of observations + n.ens <- seq(1,dim(ens)[2],1) # Number of ensemble members + bins <- array(as.list(paste("bin", 1:nbins,sep = "")), c(nbins,dim(ens)[2])) for (i in 1:nbins) { if (i == nbins) { - bins[[i]] <- list(which(pred >= thresholds[i] & pred <= thresholds[i + 1])) + bins[i,] <- apply(ens, MARGIN = 2, FUN = function(x) list(which(x >= thresholds[i] & x <= thresholds[i + 1]))) } else { - bins[[i]] <- list(which(pred >= thresholds[i] & pred < thresholds[i + 1])) + bins[i,] <- apply(ens, MARGIN = 2, FUN = function(x) list(which(x >= thresholds[i] & x < thresholds[i + 1]))) } } - - fkbar <- okbar <- nk <- array(0, dim = nbins) - for (i in 1:nbins) { - nk[i] <- length(bins[[i]][[1]]) - fkbar[i] <- sum(pred[bins[[i]][[1]]]) / nk[i] - okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + + + fkbar <- okbar <- nk <- array(0, dim = c(nbins,dim(ens)[2])) + for (k in 1:dim(ens)[2]) { + for (i in 1:nbins) { + nk[i,k] <- length(bins[[i,k]][[1]]) + fkbar[i,k] <- sum(ens[,k][bins[[i,k]][[1]]])/nk[i,k] + okbar[i,k] <- sum(obs[bins[[i,k]][[1]]]) / nk[i,k] } - + } + + fkbar[fkbar == Inf] <- 0 + okbar[is.nan(okbar)] <- 0 obar <- sum(obs) / length(obs) - relsum <- ressum <- term1 <- term2 <- 0 - for (i in 1:nbins) { - if (nk[i] > 0) { - relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 - ressum <- ressum + nk[i] * (okbar[i] - obar)^2 - for (j in 1:nk[i]) { - term1 <- term1 + (pred[bins[[i]][[1]][j]] - fkbar[i])^2 - term2 <- term2 + (pred[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- rep(0,dim(ens)[2]) + + for (k in 1:dim(ens)[2]) { + for (i in 1:nbins) { + + if (nk[i,k] > 0) { + relsum[k] <- relsum[k] + nk[i,k] * (fkbar[i,k] - okbar[i,k])^2 + ressum[k] <- ressum[k] + nk[i,k] * (okbar[i,k] - obar)^2 + + for (j in 1:nk[i,k]) { + term1[k] <- term1[k] + (ens[,k][bins[[i,k]][[1]][j]] - fkbar[i,k])^2 + term2[k] <- term2[k] + (ens[,k][bins[[i,k]][[1]][j]] - fkbar[i,k]) * (obs[bins[[i,k]][[1]][j]] - okbar[i,k]) + } } } } + rel <- relsum / n res <- ressum / n unc <- obar * (1 - obar) - bs <- sum((pred - obs)^2) / n + bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) + bs_check_res <- rel - res + unc bss_res <- (res - rel) / unc gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution bs_check_gres <- rel - gres + unc # BS using GRES bss_gres <- (gres - rel) / unc # BSS using GRES - + # # Estimating the bias-corrected components of the BS # - term3 <- array(0, nbins) + term3 <- array(0, dim = c(nbins,dim(ens)[2])) for (i in 1:nbins) { - term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) + for (k in 1:dim(ens)[2]) { + term3[i,k] <- (nk[i,k] / (nk[i,k] - 1)) * okbar[i,k] * (1 - okbar[i,k]) + } } - term_a <- sum(term3, na.rm = T) / n + term_a <- apply(term3, MARGIN = 2, FUN = function(x) sum(x, na.rm = T) / n) term_b <- (obar * (1 - obar)) / (n - 1) rel_bias_corrected <- rel - term_a gres_bias_corrected <- gres - term_a + term_b - if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { - rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) - gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) - rel_bias_corrected <- rel_bias_corrected2 - gres_bias_corrected <- gres_bias_corrected2 + rel_bias_corrected2 <- gres_bias_corrected2 <- rep(0, dim(ens)[2]) + for(j in 1:dim(ens)[2]) { + if (rel_bias_corrected[j] < 0 || gres_bias_corrected[j] < 0) { + rel_bias_corrected2[j] <- max(rel_bias_corrected[j], rel_bias_corrected[j] - gres_bias_corrected[j], 0) + gres_bias_corrected2[j] <- max(gres_bias_corrected[j], gres_bias_corrected[j] - rel_bias_corrected[j], 0) + rel_bias_corrected[j] <- rel_bias_corrected2[j] + gres_bias_corrected[j] <- gres_bias_corrected2[j] + } } unc_bias_corrected <- unc + term_b bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected - + #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { # cat("No error found \ n") # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") #} - - invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar, bins = bins, pred = pred, obs = obs)) + + invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar, bins = bins, ens = ens, obs = obs)) } } diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index 43843523..e6639f82 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -1,7 +1,7 @@ \name{BrierScore} \alias{BrierScore} \title{ -Compute Brier Score And Its Decomposition And Brier Skill Score +Compute Brier Score and its decomposition and the Brier Skill Score } \description{ Returns the values of the BS and its standard decomposition as well as the addition of the two winthin-bin extra components (Stephenson et al., 2008). It also solves the bias-corrected decomposition of the BS (Ferro and Fricker, 2012). BSS having the climatology as the reference forecast. @@ -10,15 +10,16 @@ Stephenson et al. (2008). Two extra components in the Brier score decomposition. Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924.\cr } \usage{ -BrierScore(obs, pred, thresholds = seq(0, 1, 0.1)) +BrierScore(ens,obs, thresholds = seq(0, 1, 0.1)) } -\arguments{ +\arguments{ + \item{ens}{ +Vector of probablistic predictions with values in the range [0,1] + } \item{obs}{ Vector of binary observations (1 or 0) } - \item{pred}{ -Vector of probablistic predictions with values in the range [0,1] - } + \item{thresholds}{ Values used to bin the forecasts. By default the bins are {[0,0.1), [0.1, 0.2), ... [0.9, 1]} } @@ -47,7 +48,7 @@ $obs: probability forecasts of the event\cr \examples{ a <- runif(10) b <- round(a) -x <- BrierScore(b, a) +x <- BrierScore(a,b) x$bs - x$bs_check_res x$bs - x$bs_check_gres x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected -- GitLab From 46abcfe171621d30638ba06535373044fe646a3f Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 5 Aug 2016 14:41:04 +0200 Subject: [PATCH 04/20] veriApply compatible score functions --- R/RMS.R | 107 ++++++++-------------------------------------- R/RatioRMS.R | 93 ++++++++++------------------------------ R/RatioSDRMS.R | 100 ++++++++++++------------------------------- R/Trend.R | 90 +++++++------------------------------- man/RMS.Rd | 62 +++++++-------------------- man/RatioRMS.Rd | 71 ++++++++---------------------- man/RatioSDRMS.Rd | 44 +++++++------------ man/Trend.Rd | 37 +++++++--------- 8 files changed, 147 insertions(+), 457 deletions(-) diff --git a/R/RMS.R b/R/RMS.R index 1a63b48f..c8ba7c12 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -1,102 +1,31 @@ -RMS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, - limits = NULL, siglev = 0.95, conf = TRUE) { - # - # Remove data along compROW dim if there is at least one NA between limits - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (is.null(compROW) == FALSE) { - if (is.null(limits) == TRUE) { - limits <- c(1, dim(var_obs)[compROW]) - } - outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) - outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) - var_obs[which(outrows)] <- NA - } +RMS <- function(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) { + - # - # Enlarge var_exp & var_obs to 10 dim + move posloop & posRMS to 1st & 2nd - # pos - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp) - for (iind in 1:length(dimsvar)) { - if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { - stop("var_exp & var_obs must have same dimensions except along posloop") - } - } - if (dimsvar[posRMS] < 2 ) { - stop("At least 2 values required to compute RMSE") - } - enlvarexp <- Enlarge(var_exp, 10) - enlvarobs <- Enlarge(var_obs, 10) - nexp <- dimsvar[posloop] - nobs <- dim(var_obs)[posloop] - posaperm <- numeric(10) - posaperm[1] <- posloop - posaperm[2] <- posRMS - posaperm[3:10] <- seq(1, 10)[-c(posloop, posRMS)] - permvarexp <- aperm(enlvarexp, posaperm) - permvarobs <- aperm(enlvarobs, posaperm) - dimsaperm <- dim(permvarexp) # # RMS & its confidence interval computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # if (conf) { - nvals <- 3 - dim_rms <- 2 + conf_low <- (1 - siglev) / 2 conf_high <- 1 - conf_low - } else { - nvals <- 1 - dim_rms <- 1 } - enlrms <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) - for (jexp in 1:nexp) { - for (jobs in 1:nobs) { - dif <- array(dim = dimsaperm[-1]) - dif[, , , , , , , , ] <- permvarexp[jexp, , , , , , , , - , ] - permvarobs[jobs, , , , , , , , , ] - enlrms[jexp, jobs, dim_rms, , , , , , , , ] <- Mean1Dim(dif ** 2, 1, - narm = TRUE) ** 0.5 + + dif <- rowMeans(ens) - obs + enlrms <- mean(dif ** 2, na.rm = TRUE) ** 0.5 if (conf) { eno <- Eno(dif, 1) - for (j3 in 1:dimsaperm[3]){ - for (j4 in 1:dimsaperm[4]){ - for (j5 in 1:dimsaperm[5]){ - for (j6 in 1:dimsaperm[6]){ - for (j7 in 1:dimsaperm[7]){ - for (j8 in 1:dimsaperm[8]){ - for (j9 in 1:dimsaperm[9]){ - for (j10 in 1:dimsaperm[10]){ - ndat <- length(sort(dif[, j3, j4, j5, j6, j7, j8, j9, - j10])) - enlrms[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, - j10] <- (eno[j3, j4, j5, j6, j7, j8, j9, - j10] * enlrms[jexp, jobs, 2, j3, j4, j5, j6, j7, - j8, j9, j10] ** 2 / qchisq(conf_high, eno[j3, j4, j5, - j6, j7, j8, j9, j10] - 1)) ** 0.5 - enlrms[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, - j10] <- (eno[j3, j4, j5, j6, j7, j8, j9, - j10] * enlrms[jexp, jobs, 2, j3, j4, j5, j6, j7, - j8, j9, j10] ** 2 / qchisq(conf_low, eno[j3, j4, j5, - j6, j7, j8, j9, j10] - 1)) ** 0.5 - } - } - } - } - } - } - } - } - } - } - } + + ndat <- length(sort(dif)) + conf.int <- c((eno * enlrms ** 2 / qchisq(conf_high, eno - 1)) ** 0.5, + (eno * enlrms ** 2 / qchisq(conf_low, eno - 1)) ** 0.5) + names(conf.int) <- c("conf_low","conf_high") + } else { + conf.int <- c() + names(conf.int) <- c() } - dim(enlrms) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, posRMS)]) - # - # Output - # ~~~~~~~~ - # - enlrms + results <- c(enlrms,conf.int) + names(results) <- c("rms",names(conf.int)) + return(results) + } diff --git a/R/RatioRMS.R b/R/RatioRMS.R index a97ec02f..3f505613 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -1,86 +1,37 @@ -RatioRMS <- function(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) { - # - # Enlarge var_exps & var_obs to 10 dim + move posRMS to 1st pos - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp1) - for (iind in 1:length(dimsvar)) { - if (dim(var_exp2)[iind] != dimsvar[iind] | - dim(var_obs)[iind] != dimsvar[iind]) { - stop("all input vars should have the same dimensions") - } - } - enlvarexp1 <- Enlarge(var_exp1, 10) - enlvarexp2 <- Enlarge(var_exp2, 10) - enlvarobs <- Enlarge(var_obs, 10) - posaperm <- 1:10 - posaperm[1] <- posRMS - posaperm[posRMS] <- 1 - permvarexp1 <- aperm(enlvarexp1, posaperm) - permvarexp2 <- aperm(enlvarexp2, posaperm) - permvarobs <- aperm(enlvarobs, posaperm) - dimsaperm <- dim(permvarexp1) +RatioRMS <- function(ens, ens.ref, obs, pval = TRUE) { + # # RMS ratio and its pvalue computation # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # - if (pval) { - nvals <- 2 - } else { - nvals <- 1 - } - enlratiorms <- array(dim = c(nvals, dimsaperm[2:10])) - dif1 <- permvarexp1 - permvarobs - dif2 <- permvarexp2 - permvarobs - rms1 <- Mean1Dim(dif1 ** 2, 1, narm = TRUE) ** 0.5 - rms2 <- Mean1Dim(dif2 ** 2, 1, narm = TRUE) ** 0.5 + + + dif1 <- rowMeans(ens, na.rm = TRUE) - obs + dif2 <- rowMeans(ens.ref, na.rm = TRUE) - obs + rms1 <- mean(dif1 ** 2, na.rm = TRUE) ** 0.5 + rms2 <- mean(dif2 ** 2, na.rm = TRUE) ** 0.5 rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max( abs(rms2), na.rm = TRUE) / 1000 - enlratiorms[1, , , , , , , , , ] <- (rms1 / rms2) + enlratiorms <- (rms1 / rms2) if (pval) { eno1 <- Eno(dif1, 1) eno2 <- Eno(dif2, 1) F <- (eno1 * (rms1) ** 2 / (eno1 - 1)) / (eno2 * (rms2) ** 2 / (eno2 - 1)) F[which(F < 1)] <- 1 / F[which(F < 1)] - for (j2 in 1:dimsaperm[2]) { - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - l1 <- eno1[j2, j3, j4, j5, j6, j7, j8, j9, j10] - l2 <- eno2[j2, j3, j4, j5, j6, j7, j8, j9, j10] - if (!is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { - enlratiorms[2, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- (1 - pf(F[j2, j3, j4, j5, j6, j7, j8, j9, j10], - l1 - 1, l2 - 1)) * 2 - } else { - enlratiorms[1, j2, j3, j4, j5, j6, j7, j8, j9, j10] <- NA - } - } - } - } - } - } - } - } - } + + + if (!is.na(eno1) && !is.na(eno2) && eno1 > 2 && eno2 > 2) { + p.val <- (1 - pf(F, + eno1 - 1, eno2 - 1)) * 2 + names(p.val) <- "p.val" + } + else { + p.val <- c() + names(p.val) <- c() } } + results <- c(enlratiorms, p.val) + names(results) <- c("ratiorms",names(p.val)) + return(results) - enlratiorms <- aperm(enlratiorms, posaperm) - if (pval) { - dimsvar[posRMS] <- 2 - } else { - dimsvar[posRMS] <- 1 - } - dim(enlratiorms) <- dimsvar - # - # Output - # ~~~~~~~~ - # - enlratiorms } diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index d76a832b..7ec8ff67 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -1,85 +1,41 @@ -RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { - # - # Enlarge the number of dimensions of var_exp and var_obs to 7 if necessary - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimexp <- dim(var_exp) - dimobs <- dim(var_obs) - if (length(dimexp) < 4 | length(dimobs) < 4) { - stop("At least 4 dim needed : c(nexp/nobs, nmemb, nsdates, nltime)") - } - for (jn in 3:max(length(dimexp), length(dimobs))) { - if (dimexp[jn] != dimobs[jn]) { - stop("Wrong input dimensions") - } - } - var_exp <- Enlarge(var_exp, 7) - var_obs <- Enlarge(var_obs, 7) +RatioSDRMS <- function(ens, obs, pval = TRUE) { # # Ratio RMSE / SD and its significance level # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # - ensmeanexp <- Mean1Dim(var_exp, 2) - ensmeanobs <- Mean1Dim(var_obs, 2) - dimrms <- c(dimexp[1], dimobs[1], dimexp[4:length(dimexp)]) + ensmean <- rowMeans(ens) + + std <- sd(ensmean) + enosd <- Eno(ensmean,1) + + dif <- ensmean - obs + rms <- mean(dif ** 2, 1, narm = TRUE) ** 0.5 + + enorms <- Eno(dif,1) + enlratiormssd <- std /rms if (pval) { - nvals <- 2 - } else { - nvals <- 1 - } - dimratiormssd <- c(dimexp[1], dimobs[1], nvals, dimexp[4:length(dimexp)]) - if (length(dimrms) < 6) { - dimrms <- c(dimrms, array(1, dim = (6 - length(dimrms)))) - } - if (length(dimratiormssd) < 7) { - dimenlratiormssd <- c(dimratiormssd, - array(1, dim = (7 - length(dimratiormssd)))) - } else { - dimenlratiormssd <- dimratiormssd - } - dif <- var_exp - InsertDim(ensmeanexp, 2, dimexp[2]) - std <- apply(array(dif, dim = c(dimexp[1], dimexp[2] * dimexp[3], - dimrms[3:6])), c(1, 3, 4, 5, 6), sd, na.rm = TRUE) - enosd <- apply(Eno(dif, 3), c(1, 3, 4, 5, 6), sum, na.rm = TRUE) - rms <- array(dim = dimrms) - enlratiormssd <- array(dim = dimenlratiormssd) - for (jexp in 1:dimexp[1]) { - for (jobs in 1:dimobs[1]) { - dif <- ensmeanexp[jexp, , , , , ] - ensmeanobs[jobs, , , , , ] - rms[jexp,jobs, , , , ] <- Mean1Dim(dif ** 2, 1, narm = TRUE) ** 0.5 - enorms <- array(Eno(dif, 1), dim = dimrms[3:6]) - enlratiormssd[jexp, jobs, 1, , , , ] <- std[jexp, , , , ] / rms[jexp, - jobs, , , , ] - if (pval) { - for (jltime in 1:dimrms[3]) { - for (jlev in 1:dimrms[4]) { - for (jlat in 1:dimrms[5]) { - for (jlon in 1:dimrms[6]) { - l1 <- enosd[jexp, jltime, jlev, jlat, jlon] - l2 <- enorms[jltime, jlev, jlat, jlon] - F <- (enosd[jexp, jltime, jlev, jlat, jlon] * (std[jexp, jltime, - jlev, jlat, jlon]) ** 2 / (enosd[jexp, jltime, jlev, jlat, - jlon] - 1)) / (enorms[jltime, jlev, jlat, jlon] * (rms[jexp, - jobs, jltime, jlev, jlat, jlon]) ** 2 / (enorms[jltime, - jlev, jlat, jlon] - 1)) + + l1 <- enosd + l2 <- enorms + + + F <- (enosd * std ** 2 / (enosd - 1)) / (enorms * (rms) ** 2 / (enorms - 1)) if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { - enlratiormssd[jexp, jobs, 2, jltime, jlev, jlat, - jlon] <- 1 - pf(F, l1 - 1, l2 - 1) - } else { - enlratiormssd[jexp, jobs, 1, jltime, jlev, jlat, jlon] <- NA - } - } - } - } - } - } - } + p.val <- 1 - pf(F, l1 - 1, l2 - 1) + names(p.val) <- "p.val" + } } - dim(enlratiormssd) <- dimratiormssd + else { + p.val <- c() + names(p.val) <- c() } + # # Output # ~~~~~~~~ # - enlratiormssd + + results <- c(enlratiormssd, p.val) + names(results) <- c("ratio", names(p.val)) + return(results) } diff --git a/R/Trend.R b/R/Trend.R index 79984160..f427f283 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -1,80 +1,22 @@ -Trend <- function(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) { - # - # Enlarge the size of var to 10 and move posTR to first position - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var) - if (is.null(dimsvar)) { - dimsvar <- length(var) - } - enlvar <- Enlarge(var, 10) - outdim <- c(dimsvar, array(1, dim = (10 - length(dimsvar)))) - if (conf) { - nvals <- 4 - poscoef2 <- 2 - poscoef1 <- 4 - } else { - nvals <- 2 - poscoef2 <- 1 - poscoef1 <- 2 - } - outdim[posTR] <- nvals - posaperm <- 1:10 - posaperm[posTR] <- 1 - posaperm[1] <- posTR - enlvar <- aperm(enlvar, posaperm) - dimsaperm <- outdim[posaperm] - # - # Loop on all dimensions to compute trends - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - enltrend <- array(dim = dimsaperm) - enldetrend <- array(dim = dim(enlvar)) - for (j2 in 1:dimsaperm[2]) { - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - tmp <- enlvar[, j2, j3, j4, j5, j6, j7, j8, j9, j10] - if (any(!is.na(tmp))) { - mon <- seq(tmp) * interval - lm.out <- lm(tmp ~ mon, na.action = na.omit) - enltrend[poscoef2, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- lm.out$coefficients[2] - enltrend[poscoef1, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- lm.out$coefficients[1] - if (conf) { - enltrend[c(1, 3), j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- confint(lm.out, level = siglev)[2, 1:2] - } - enldetrend[is.na(tmp) == FALSE, j2, j3, j4, j5, j6, j7, j8, - j9, j10] <- tmp[is.na(tmp) == FALSE] - lm.out$fitted.values - } - } - } - } - } - } +Trend <- function(ens, interval = 1, siglev = 0.95, conf = TRUE) { + + ensmean <- rowMeans(ens, na.rm = TRUE) + + if (any(!is.na(ensmean))) { + mon <- seq(ensmean) * interval + lm.out <- lm(ensmean ~ mon, na.action = na.omit) + trend <- c(lm.out$coefficients[2], lm.out$coefficients[1]) + if (conf) { + conf.int <- confint(lm.out, level = siglev)[2, 1:2] } - } - } - } - # - # Back to the original dimensions - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - enldetrend <- aperm(enldetrend, posaperm) - dim(enldetrend) <- dimsvar - enltrend <- aperm(enltrend, posaperm) - dimsvar[posTR] <- nvals - dim(enltrend) <- dimsvar + detrend <- ensmean[is.na(ensmean) == FALSE] - lm.out$fitted.values + } + + + # # Outputs # ~~~~~~~~~ # - invisible(list(trend = enltrend, detrended = enldetrend)) + invisible(list(trend = trend, conf.int = conf.int, detrended = detrend)) } diff --git a/man/RMS.Rd b/man/RMS.Rd index 118400f4..8d276c57 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -1,36 +1,23 @@ \name{RMS} \alias{RMS} \title{ -Computes Root Mean Square Error Skill Measure +Computes the root mean square error of the ensemble mean } \description{ -Matrix var_exp & var_obs should have the same dimensions except along posloop dimension where the length can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr -RMS computes the Root Mean Square Error skill of each jexp in 1:nexp against each jobs in 1:nobs which gives nexp x nobs RMSE skill measures for each other grid point of the matrix (each latitude/longitude/level/leadtime).\cr -The RMSE are computed along the posRMS dimension which should correspond to the startdate dimension.\cr -If compROW is given, the RMSE are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], that mean with no NA between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes.\cr -Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -The confidence interval relies on a chi2 distribution. +Computes the RMSE of an ensemble mean forecast against a vector of observations.\cr +The confidence interval of the RMSE is obtained with a chi2 distribution. } \usage{ -RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) +RMS(ens, obs, siglev = 0.95, conf = TRUE) } \arguments{ - \item{var_exp}{ -Matrix of experimental data. + \item{ens}{ +N by M matrix of N forecasts from M ensemble members. } - \item{var_obs}{ -Matrix of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. - } - \item{posloop}{ -Dimension nobs and nexp. - } - \item{posRMS}{ -Dimension along which RMSE are to be computed (the dimension of the start dates). - } - \item{compROW}{ -Data taken into account only if (compROW)th row is complete.\cr -Default = NULL. + \item{obs}{ +Vector of the corresponding observations of length N. } + \item{limits}{ Complete between limits[1] & limits[2]. Default = NULL. } @@ -42,32 +29,15 @@ Whether to compute confidence interval or not. TRUE by default. } } \value{ -Matrix with dimensions:\cr - c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all other dimensions of var_exp & var_obs except posRMS).\cr -The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). +A list containing the following components :\cr +rms - the root mean square error \cr +conf_low - the lower limit of the confidence interval \cr +conf_high - the upper limit of the confidence interval \cr } \examples{ -# Load sample data as in Load() example: -example(Load) -clim <- Clim(sampleData$mod, sampleData$obs) -ano_exp <- Ano(sampleData$mod, clim$clim_exp) -ano_obs <- Ano(sampleData$obs, clim$clim_obs) -runmean_months <- 12 -dim_to_smooth <- 4 # Smooth along lead-times -smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) -smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) -dim_to_mean <- 2 # Mean along members -required_complete_row <- 3 # Discard start-dates for which some leadtimes are missing -leadtimes_per_startdate <- 60 -rms <- RMS(Mean1Dim(smooth_ano_exp, dim_to_mean), - Mean1Dim(smooth_ano_obs, dim_to_mean), - compROW = required_complete_row, - limits = c(ceiling((runmean_months + 1) / 2), - leadtimes_per_startdate - floor(runmean_months / 2))) -PlotVsLTime(rms, toptitle = "Root Mean Square Error", ytitle = "K", - monini = 11, limits = NULL, listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = c(0), - fileout = 'tos_rms.eps') +obs <- rnorm(100) +ens <- matrix(rnorm(500), c(100,5)) +RMS(ens, obs) } \author{ History:\cr diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index ee116561..9dfcb19e 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -1,79 +1,42 @@ \name{RatioRMS} \alias{RatioRMS} \title{ -Computes The Ratio Between The RMSE Scores of 2 Experiments. +Computes the ratio between the RMSE of 2 experiments. } \description{ -Matrix var_exp1 / var_exp2 / var_obs should have the same dimensions.\cr -The ratio RMSE(var_exp1, var_obs) / RMSE(var_exp2, var_obs) is output.\cr +Calculates the ratio of the RMSE for two forecasts of the same observations.\cr +The ratio RMSE(ens, obs) / RMSE(ens.ref, obs) is output.\cr The p-value is provided by a two-sided Fischer test. } \usage{ -RatioRMS(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) +RatioRMS(ens, ens.ref, obs, pval = TRUE) } \arguments{ - \item{var_exp1}{ + \item{ens}{ Matrix of experimental data 1. } - \item{var_exp2}{ -Matrix of experimental data 2, same dimensions as var_exp1. + \item{ens.ref}{ +Matrix of experimental data 2. } - \item{var_obs}{ -Matrix of observational data, same dimensions as var_exp1. - } - \item{posRMS}{ -Dimension along which the RMSE are to be computed = the position of the start dates. + \item{ens.ref}{ +Vector of observations. } + \item{pval}{ Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. } } \value{ -Matrix with the same dimensions as var_exp1/var_exp2/var_obs except along posRMS where the dimension has length 2 if \code{pval = TRUE}, or 1 otherwise.\cr +A list containing the following components :\cr +ratiorms - the ratio of the rms of the two ensembles \cr +p.val - the p value \cr The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. } \examples{ -# See examples on Load() to understand the first lines in this example - \dontrun{ -data_path <- system.file('sample_data', package = 's2dverification') -expA <- list(name = 'experiment', path = file.path(data_path, - 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', - '$VAR_NAME$_$START_DATE$.nc')) -obsX <- list(name = 'observation', path = file.path(data_path, - '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', - '$VAR_NAME$_$YEAR$$MONTH$.nc')) - -# Now we are ready to use Load(). -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- Load('tos', list(expA), list(obsX), startDates, - output = 'lonlat', latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - } - \dontshow{ -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), - c('observation'), startDates, - output = 'lonlat', - latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - } -leadtimes_dimension <- 4 -initial_month <- 11 -mean_start_month <- 12 -mean_stop_month <- 2 -sampleData$mod <- Season(sampleData$mod, leadtimes_dimension, initial_month, - mean_start_month, mean_stop_month) -sampleData$obs <- Season(sampleData$obs, leadtimes_dimension, initial_month, - mean_start_month, mean_stop_month) -clim <- Clim(sampleData$mod, sampleData$obs) -ano_exp <- Ano(sampleData$mod, clim$clim_exp) -ano_obs <- Ano(sampleData$obs, clim$clim_obs) -rrms <- RatioRMS(Mean1Dim(ano_exp[ , 1:2, , , , ], 1)[, 1, , ], - ano_exp[ , 3, , , , ][, 1, , ], - Mean1Dim(ano_obs, 2)[1, , 1, , ], 1) -PlotEquiMap(rrms[1, , ], sampleData$lon, sampleData$lat, - toptitle = 'Ratio RMSE') -} + ens <- matrix(rnorm(500), c(100,5)) + ens.ref <- matrix(rnorm(500), c(100,5)) + obs <- rnorm(100) + Ratio <- RatioRMS(ens,ens.ref,obs) \author{ History:\cr 0.1 - 2011-11 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 3dcfcd96..28d01b49 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -1,50 +1,36 @@ \name{RatioSDRMS} \alias{RatioSDRMS} \title{ -Computes The Ratio Between the Ensemble Spread and the RMSE of the Ensemble Mean +Computes the ratio between the ensemble spread and the RMSE of the ensemble mean } \description{ -Matrices var_exp & var_obs should have dimensions between\cr - c(nmod/nexp, nmemb/nparam, nsdates, nltime)\cr -and\cr - c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)\cr -The ratio between the standard deviation of the members around the ensemble mean in var_exp and the RMSE between var_exp and var_obs is output for each experiment and each observational dataset.\cr -The p-value is provided by a one-sided Fischer test. +Calculates the ratio of spread and the RMSE from a matrix of N ensemble members for M forecasts and the corresponding vector of observations of length M. \cr + +The p-value is provided by a one-sided Fisher test. } \usage{ -RatioSDRMS(var_exp, var_obs, pval = TRUE) +RatioSDRMS(ens, obs, pval = TRUE) } \arguments{ - \item{var_exp}{ -Model data:\cr - c(nmod/nexp, nmemb/nparam, nsdates, nltime) up to\cr - c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon) +\item{ens}{ +N by M matrix of N forecasts from M ensemble members. } - \item{var_obs}{ -Observational data:\cr - c(nobs, nmemb, nsdates, nltime) up to\cr - c(nobs, nmemb, nsdates, nltime, nlevel, nlat, nlon) + \item{obs}{ +Vector of the corresponding observations of length N. } \item{pval}{ Whether to compute the p-value of Ho : SD/RMSE = 1 or not. } } \value{ -Matrix with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to - c(nexp/nmod, nobs, 1 or 2, nltime, nlevel, nlat, nlon). -The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1. +A vector containing the following components :\cr +ratio - the ratio of the spread and RMSE \cr +p.val - the p.value (if pval ='TRUE') \cr } \examples{ -# Load sample data as in Load() example: -example(Load) -rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) -rsdrms2 <- array(dim = c(dim(rsdrms)[1:2], 4, dim(rsdrms)[4])) -rsdrms2[, , 2, ] <- rsdrms[, , 1, ] -rsdrms2[, , 4, ] <- rsdrms[, , 2, ] -PlotVsLTime(rsdrms2, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", - monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, siglev = TRUE, - fileout = 'tos_rsdrms.eps') +obs <- rnorm(100) +ens <- matrix(rnorm(500), c(100,5)) +RatioSDRMS(ens,obs) } \author{ History:\cr diff --git a/man/Trend.Rd b/man/Trend.Rd index 959e32a9..8a644415 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -1,23 +1,21 @@ \name{Trend} \alias{Trend} \title{ -Computes Trends +Computes the trend of the ensemble mean. } \description{ -Computes the trend along the posTR dimension of the matrix var by least square fitting, and the associated an error interval.\cr -Provide also the detrended data.\cr +Computes the trend along the forecast time of the ensemble mean by least square fitting, and the associated error interval.\cr +Trend() also provides the time series of the detrended ensemble mean forecasts.\cr The confidence interval relies on a student-T distribution. } \usage{ -Trend(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) +Trend(ens, interval = 1, siglev = 0.95, conf = TRUE) } \arguments{ - \item{var}{ -Matrix of any number of dimensions up to 10. - } - \item{posTR}{ -Position along which to compute the trend. + \item{ens}{ +M by N matrix of M forecasts from N ensemble members. } + \item{interval}{ Number of months/years between 2 points along posTR dimension.\cr Default = 1.\cr @@ -32,25 +30,20 @@ Whether to compute the confidence levels or not. TRUE by default. } \value{ \item{$trend}{ -Same dimensions as var except along the posTR dimension which is replaced by a dimension of length 2 or 4, corresponding to the lower limit of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}, trends, the upper limit of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}), and intercept of the trend for each point of the matrix along all the other dimensions. +The intercept and slope coefficients for the least squares fitting of the trend, } +\item{$conf.int}{corresponding to the limits of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the slope coefficient.} \item{$detrended}{ Same dimensions as var with linearly detrended var along the posTR dimension. } } \examples{ -# Load sample data as in Load() example: -example(Load) -months_between_startdates <- 60 -trend <- Trend(sampleData$obs, 3, months_between_startdates) -PlotVsLTime(trend$trend, toptitle = "trend", ytitle = "K / (5 year)", - monini = 11, limits = c(-1,1), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = 0, - fileout = 'tos_obs_trend.eps') -PlotAno(trend$detrended, NULL, startDates, - toptitle = 'detrended anomalies (along the startdates)', ytitle = 'K', - legends = 'ERSST', biglab = FALSE, fileout = 'tos_detrended_obs.eps') -} +ens <- matrix(rnorm(500), c(100,5)) +#Add a trend to the first ensemble member +ens[,1] <- ens[,1]+(seq(1,100,1)*5) +ens.trend <- Trend(ens) + + \author{ History:\cr 0.1 - 2011-05 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr -- GitLab From d410ec12074e16543f25de5648880fd108dca4b2 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 5 Aug 2016 16:48:53 +0200 Subject: [PATCH 05/20] veriApply compatible score functions --- R/Corr.R | 19 +++++-------------- R/RatioRMS.R | 12 ++++-------- R/RatioSDRMS.R | 11 ++++------- 3 files changed, 13 insertions(+), 29 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index cc43bf2d..eea1c4b7 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -30,26 +30,17 @@ Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', t <- CORR*sqrt((eno-2)/(1-(CORR^2))) p <- 1 - pt(t, eno-2) p.val <- p - names(p.val) <- "p.val" - } else { - p.val <- c() - names(p.val) <- c() - } + } if (conf) { conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt( eno - 3)), tanh(atanh(CORR) + qnorm(conf_high) / sqrt( eno - 3))) conf.int <- conf.int[!is.na(CORR)] - names(conf.int) <- c("conf_low","conf_high") - } else { - conf.int <- c() - names(conf.int) <- c() - } + + } # Output - # ~~~~~~~~ + # ~~~~~~~~ # - results <- c(CORR,conf.int, p.val) - names(results) <- c("Corr", names(conf.int), names(p.val)) - return(results) + list(Corr = CORR,conf_low =conf.int[1],conf_high=conf.int[2],p.val =p.val) } diff --git a/R/RatioRMS.R b/R/RatioRMS.R index 3f505613..5589f170 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -23,15 +23,11 @@ RatioRMS <- function(ens, ens.ref, obs, pval = TRUE) { if (!is.na(eno1) && !is.na(eno2) && eno1 > 2 && eno2 > 2) { p.val <- (1 - pf(F, eno1 - 1, eno2 - 1)) * 2 - names(p.val) <- "p.val" + } - else { - p.val <- c() - names(p.val) <- c() - } + } - results <- c(enlratiorms, p.val) - names(results) <- c("ratiorms",names(p.val)) - return(results) + # Output + list(ratiorms = enlratiorms,p.val =p.val) } diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index 7ec8ff67..1edbd700 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -23,19 +23,16 @@ RatioSDRMS <- function(ens, obs, pval = TRUE) { F <- (enosd * std ** 2 / (enosd - 1)) / (enorms * (rms) ** 2 / (enorms - 1)) if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { p.val <- 1 - pf(F, l1 - 1, l2 - 1) - names(p.val) <- "p.val" + } } - else { - p.val <- c() - names(p.val) <- c() } + # # Output # ~~~~~~~~ # - results <- c(enlratiormssd, p.val) - names(results) <- c("ratio", names(p.val)) - return(results) + list(ratio = enlratiormssd, p.val = p.val) + } -- GitLab From 780a7afe6f48ebf9c3b6821e07a87c22ba56e1f2 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Fri, 19 Aug 2016 12:44:17 +0200 Subject: [PATCH 06/20] Docfixes in Trend and RatioRMS. --- man/RatioRMS.Rd | 12 ++++++------ man/Trend.Rd | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 9dfcb19e..2e93d992 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -1,7 +1,7 @@ \name{RatioRMS} \alias{RatioRMS} \title{ -Computes the ratio between the RMSE of 2 experiments. +Computes the Ratio Between The RMSE of Two Experiments } \description{ Calculates the ratio of the RMSE for two forecasts of the same observations.\cr @@ -21,7 +21,6 @@ Matrix of experimental data 2. \item{ens.ref}{ Vector of observations. } - \item{pval}{ Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. } @@ -33,13 +32,14 @@ p.val - the p value \cr The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. } \examples{ - ens <- matrix(rnorm(500), c(100,5)) - ens.ref <- matrix(rnorm(500), c(100,5)) + ens <- matrix(rnorm(500), c(100, 5)) + ens.ref <- matrix(rnorm(500), c(100, 5)) obs <- rnorm(100) - Ratio <- RatioRMS(ens,ens.ref,obs) + Ratio <- RatioRMS(ens, ens.ref, obs) \author{ History:\cr 0.1 - 2011-11 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN\cr +2.0 - 2016-08 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapted to veriApply() } \keyword{datagen} diff --git a/man/Trend.Rd b/man/Trend.Rd index 8a644415..0a3a930b 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -1,7 +1,7 @@ \name{Trend} \alias{Trend} \title{ -Computes the trend of the ensemble mean. +Computes the Trend of the Ensemble Mean } \description{ Computes the trend along the forecast time of the ensemble mean by least square fitting, and the associated error interval.\cr @@ -14,8 +14,7 @@ Trend(ens, interval = 1, siglev = 0.95, conf = TRUE) \arguments{ \item{ens}{ M by N matrix of M forecasts from N ensemble members. - } - + } \item{interval}{ Number of months/years between 2 points along posTR dimension.\cr Default = 1.\cr @@ -32,21 +31,23 @@ Whether to compute the confidence levels or not. TRUE by default. \item{$trend}{ The intercept and slope coefficients for the least squares fitting of the trend, } -\item{$conf.int}{corresponding to the limits of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the slope coefficient.} + \item{$conf.int}{ +Corresponding to the limits of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the slope coefficient. + } \item{$detrended}{ Same dimensions as var with linearly detrended var along the posTR dimension. } } \examples{ -ens <- matrix(rnorm(500), c(100,5)) +ens <- matrix(rnorm(500), c(100, 5)) #Add a trend to the first ensemble member -ens[,1] <- ens[,1]+(seq(1,100,1)*5) +ens[,1] <- ens[, 1] + (seq(1, 100, 1) * 5) ens.trend <- Trend(ens) - - +} \author{ History:\cr 0.1 - 2011-05 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to CRAN +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to CRAN\cr +2.0 - 2016-08 (A. Hunter, \email{alasdair.hunter at bsc.es}) - Adapt to veriApply() } \keyword{datagen} -- GitLab From 57b38fab07e014426a36a78fcf659c6a5cc2e76f Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Fri, 19 Aug 2016 12:47:14 +0200 Subject: [PATCH 07/20] Docfix. --- man/RatioRMS.Rd | 1 + 1 file changed, 1 insertion(+) diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 2e93d992..49bc5cbc 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -36,6 +36,7 @@ The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p. ens.ref <- matrix(rnorm(500), c(100, 5)) obs <- rnorm(100) Ratio <- RatioRMS(ens, ens.ref, obs) +} \author{ History:\cr 0.1 - 2011-11 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -- GitLab From 2ae82489bee61d2e1d42576fdf0e128aaf88fc14 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Mon, 5 Sep 2016 09:31:21 +0200 Subject: [PATCH 08/20] update Corr function --- R/Corr.R | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index eea1c4b7..ea3a9170 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -14,10 +14,13 @@ Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', conf_low <- (1 - siglev) / 2 conf_high <- 1 - conf_low - + p <- c() + conflow <- c() + confhigh <- c() + ens.mean <- rowMeans(ens) CORR <- cor(obs, ens.mean, use = "pairwise.complete.obs", method = method) - + if (pval || conf) { if (method == "kendall" | method == "spearman") { eno <- Eno(rank(obs), 1) @@ -25,22 +28,25 @@ Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', eno <- Eno(obs, 1) } } - if (pval) { + if (pval & method == "pearson") { t <- CORR*sqrt((eno-2)/(1-(CORR^2))) p <- 1 - pt(t, eno-2) p.val <- p + } - if (conf) { + if (conf & method == "pearson") { conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt( eno - 3)), tanh(atanh(CORR) + qnorm(conf_high) / sqrt( eno - 3))) conf.int <- conf.int[!is.na(CORR)] - + conflow =conf.int[1] + confhigh=conf.int[2] } + invisible(result <- list(Corr = CORR, p.val = p, conf_low = conflow, confhigh = confhigh)) # Output # ~~~~~~~~ # - list(Corr = CORR,conf_low =conf.int[1],conf_high=conf.int[2],p.val =p.val) + } -- GitLab From 33ade855f0b33650a2e05fd3f037360fc446455d Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 14 Sep 2016 14:39:30 +0200 Subject: [PATCH 09/20] Minor changes to Corr.R --- R/Corr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Corr.R b/R/Corr.R index ea3a9170..56c0780a 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -44,7 +44,7 @@ Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', confhigh=conf.int[2] } - invisible(result <- list(Corr = CORR, p.val = p, conf_low = conflow, confhigh = confhigh)) + invisible(result <- list(corr = CORR, p.val = p, conf_low = conflow, conf_high = confhigh)) # Output # ~~~~~~~~ # -- GitLab From fc9ab4203f2bf4bccce7b66f81a81f85e3032970 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Thu, 24 Nov 2016 11:54:04 +0100 Subject: [PATCH 10/20] Add hidden function to Corr for veriApply compatibility --- R/Corr.R | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++- man/Corr.Rd | 79 ++++++++++++++++++++++++-------- 2 files changed, 187 insertions(+), 20 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index 56c0780a..40e89684 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -1,4 +1,130 @@ -Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', +Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, + limits = NULL, siglev = 0.95, method = 'pearson', + conf = TRUE, pval = TRUE) { + # + # Remove data along compROW dim if there is at least one NA between limits + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (is.null(compROW) == FALSE) { + if (is.null(limits) == TRUE) { + limits <- c(1, dim(var_obs)[compROW]) + } + outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) + outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) + var_obs[which(outrows)] <- NA + } + # + # Enlarge var_exp & var_obs to 10 dim + move posloop & poscor to 1st & 2nd + # pos + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var_exp) + for (iind in 1:length(dimsvar)) { + if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { + stop("var_exp & var_obs must have same dimensions except along posloop") + } + } + if (dimsvar[poscor] < 3 ) { + stop("At least 3 values required to compute correlation") + } + if (method != "kendall" && method != "spearman" && method != "pearson") { + stop("Wrong correlation method") + } + nexp <- dimsvar[posloop] + nobs <- dim(var_obs)[posloop] + var_exp <- Enlarge(var_exp, 10) + var_obs <- Enlarge(var_obs, 10) + posaperm <- numeric(10) + posaperm[1] <- posloop + posaperm[2] <- poscor + posaperm[3:10] <- seq(1, 10)[-c(posloop, poscor)] + var_exp <- aperm(var_exp, posaperm) + var_obs <- aperm(var_obs, posaperm) + dimsaperm <- dim(var_exp) + # + + # Check the siglev arguments: + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if (siglev > 1 || siglev < 0) { + stop("siglev need to be higher than O and lower than 1") + } + # + # Loop to compute correlation for each grid point + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dim_val <- 2 + dim_pval <- 4 + nvals <- 1 + 2*conf + pval + if (!conf) { + dim_val <- 1 + dim_pval <- 2 + } else { + conf_low <- (1 - siglev) / 2 + conf_high <- 1 - conf_low + } + CORR <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + for (j3 in 1:dimsaperm[3]) { + for (j4 in 1:dimsaperm[4]) { + for (j5 in 1:dimsaperm[5]) { + for (j6 in 1:dimsaperm[6]) { + for (j7 in 1:dimsaperm[7]) { + for (j8 in 1:dimsaperm[8]) { + for (j9 in 1:dimsaperm[9]) { + for (j10 in 1:dimsaperm[10]) { + tmp1 <- var_exp[jexp, , j3, j4, j5, j6, j7, j8, j9, + j10] + tmp2 <- var_obs[jobs, , j3, j4, j5, j6, j7, j8, j9, + j10] + if (any(!is.na(tmp1)) && sum(!is.na(tmp2)) > 2) { + toto <- cor(tmp1, tmp2, use = "pairwise.complete.obs", method = method) + CORR[jexp, jobs, dim_val, j3, j4, j5, j6, j7, j8, j9, j10] <- toto + #eno <- min(Eno(tmp2, 1), Eno(tmp1, 1)) + if (pval || conf) { + if (method == "kendall" | method == "spearman") { + eno <- Eno(rank(tmp2), 1) + } else if (method == "pearson") { + eno <- Eno(tmp2, 1) + } + } + if (pval) { + #t <- qt(0.95, eno - 2) + t <- qt(siglev, eno - 2) + CORR[jexp, jobs, dim_pval, j3, j4, j5, j6, j7, j8, j9, + j10] <- sqrt((t * t) / ((t * t) + eno - 2)) + } + if (conf) { + CORR[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, + j10] <- tanh(atanh(toto) + qnorm(conf_high) / sqrt( + #j10] <- tanh(atanh(toto) + qnorm(0.975) / sqrt( + eno - 3)) + CORR[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, + j10] <- tanh(atanh(toto) + qnorm(conf_low) / sqrt( + #j10] <- tanh(atanh(toto) + qnorm(0.025) / sqrt( + eno - 3)) + } + } + } + } + } + } + } + } + } + } + } + } + # + dim(CORR) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, poscor)]) + # + # Output + # ~~~~~~~~ + # + CORR +} + +.Corr <- function(ens, obs, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) { if (method != "kendall" && method != "spearman" && method != "pearson") { diff --git a/man/Corr.Rd b/man/Corr.Rd index 5ba5ede6..311d6c78 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -1,20 +1,48 @@ \name{Corr} \alias{Corr} +\alias{.Corr} \title{ -Computes the correlation coefficient between the ensemble mean and observations. +Computes the correlation coefficient between an array of forecasts and their corresponding observations. } \description{ -Calculates the correlation coefficient (Pearson, Kendall or Spearman) between the ensemble mean and the observations along the initialization times. The confidence interval and p-value from a one-tailed t-test are provided, where the number of degrees of freedom is calculated using the effective numberof observations (see Eno). \cr - - +Calculates the correlation coefficient (Pearson, Kendall or Spearman) between forecasts and observations. The input should be an array with dimensions c(no. of datasets, no. of start dates, no. of forecast times, no. of lons, no. of lats.), where the longitude and latitude dimensions are optional. The correlations are computed along the poscor dimension which should correspond to the startdate dimension. If compROW is given, the correlations are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes. \cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval is computed by a Fisher transformation.\cr The significance level relies on a one-sided student-T distribution.\cr We can modifiy the treshold of the test modifying siglev (default value=0.95). \cr +\cr +.Corr is the same function but with a matrix of experiments and a vector of observations as input. } \usage{ -Corr(ens, obs, posloop = 1, poscor = 2, compROW = NULL, - limits = NULL, siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE) +Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, + limits = NULL, siglev = 0.95, method = 'pearson', + conf = TRUE, pval = TRUE) \cr + +.Corr(ens, obs, siglev = 0.95, + method = 'pearson', conf = TRUE, pval = TRUE) } \arguments{ + \item{var_exp}{ +Array of experimental data. + } + + \item{var_obs}{ +Array of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. + } + + \item{posloop}{ +Dimension nobs and nexp. + } + + \item{poscor}{ +Dimension along which correlation are to be computed (the dimension of the start dates). + } + + \item{compROW}{ +Data taken into account only if (compROW)th row is complete.\cr Default = NULL. + } + + \item{limits}{ +Complete between limits[1] & limits[2]. Default = NULL. + } + \item{ens}{ N by M matrix of N forecasts from M ensemble members. } @@ -35,18 +63,31 @@ Whether to compute confidence intervals (default = 'TRUE') or not (FALSE). Whether to compute statistical significance p-value (default = 'TRUE') or not (FALSE). } } -\value{ -A list containing the following components :\cr -statistic - the value of the test statistic \cr -conf_low - the lower limit of the confidence interval \cr -conf_high - the upper limit of the confidence interval \cr -p.val - the p value -} -\examples{ -obs <- rnorm(100) -ens <- matrix(rnorm(500), c(100,5)) -Corr(ens,obs) -} + + \value{ Matrix with dimensions :\cr c(# of datasets alogn posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr +The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. } + \examples{ # Load sample data as in Load() example: +example(Load) + clim <- Clim(sampleData$mod, sampleData$obs) + ano_exp <- Ano(sampleData$mod, clim$clim_exp) + ano_obs <- Ano(sampleData$obs, clim$clim_obs) + runmean_months <- 12 dim_to_smooth <- 4 + # Smooth along lead-times + smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) + smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) + dim_to_mean <- 2 # Mean along members + required_complete_row <- 3 # Discard start dates which contain any NA lead-times + leadtimes_per_startdate <- 60 + corr <- Corr(Mean1Dim(smooth_ano_exp, dim_to_mean), + Mean1Dim(smooth_ano_obs, dim_to_mean), + compROW = required_complete_row, + limits = c(ceiling((runmean_months + 1) / 2), + leadtimes_per_startdate - floor(runmean_months / 2))) + PlotVsLTime(corr, toptitle = "correlations", ytitle = "correlation", + monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1), + fileout = 'tos_cor.eps') } + \author{ History:\cr 0.1 - 2011-04 (V. Guemas, \email{vguemas at ic3.cat}) - Original code\cr -- GitLab From ff1bc6a42bda36c8d40c2d6f35207f32137cf7e8 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Thu, 24 Nov 2016 12:24:17 +0100 Subject: [PATCH 11/20] Dual input for score functions --- R/RMS.R | 105 ++++++++++++++++++++++++++++++++++++++++++++++++- R/RMSSS.R | 90 ++++++++++++++++++++++++++++++++++++++++++ R/RatioRMS.R | 89 ++++++++++++++++++++++++++++++++++++++++- R/RatioSDRMS.R | 88 ++++++++++++++++++++++++++++++++++++++++- R/Trend.R | 83 +++++++++++++++++++++++++++++++++++++- man/RMS.Rd | 5 ++- 6 files changed, 454 insertions(+), 6 deletions(-) diff --git a/R/RMS.R b/R/RMS.R index c8ba7c12..04bf52b2 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -1,4 +1,107 @@ -RMS <- function(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) { +RMS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, + limits = NULL, siglev = 0.95, conf = TRUE) { + # + # Remove data along compROW dim if there is at least one NA between limits + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (is.null(compROW) == FALSE) { + if (is.null(limits) == TRUE) { + limits <- c(1, dim(var_obs)[compROW]) + } + outrows <- (is.na(Mean1Dim(var_obs, compROW, narm = FALSE, limits))) + outrows <- InsertDim(outrows, compROW, dim(var_obs)[compROW]) + var_obs[which(outrows)] <- NA + } + + # + # Enlarge var_exp & var_obs to 10 dim + move posloop & posRMS to 1st & 2nd + # pos + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var_exp) + for (iind in 1:length(dimsvar)) { + if (iind != posloop & dim(var_obs)[iind] != dimsvar[iind]) { + stop("var_exp & var_obs must have same dimensions except along posloop") + } + } + if (dimsvar[posRMS] < 2 ) { + stop("At least 2 values required to compute RMSE") + } + enlvarexp <- Enlarge(var_exp, 10) + enlvarobs <- Enlarge(var_obs, 10) + nexp <- dimsvar[posloop] + nobs <- dim(var_obs)[posloop] + posaperm <- numeric(10) + posaperm[1] <- posloop + posaperm[2] <- posRMS + posaperm[3:10] <- seq(1, 10)[-c(posloop, posRMS)] + permvarexp <- aperm(enlvarexp, posaperm) + permvarobs <- aperm(enlvarobs, posaperm) + dimsaperm <- dim(permvarexp) + # + # RMS & its confidence interval computation + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (conf) { + nvals <- 3 + dim_rms <- 2 + conf_low <- (1 - siglev) / 2 + conf_high <- 1 - conf_low + } else { + nvals <- 1 + dim_rms <- 1 + } + enlrms <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + dif <- array(dim = dimsaperm[-1]) + dif[, , , , , , , , ] <- permvarexp[jexp, , , , , , , , + , ] - permvarobs[jobs, , , , , , , , , ] + enlrms[jexp, jobs, dim_rms, , , , , , , , ] <- Mean1Dim(dif ** 2, 1, + narm = TRUE) ** 0.5 + if (conf) { + eno <- Eno(dif, 1) + for (j3 in 1:dimsaperm[3]){ + for (j4 in 1:dimsaperm[4]){ + for (j5 in 1:dimsaperm[5]){ + for (j6 in 1:dimsaperm[6]){ + for (j7 in 1:dimsaperm[7]){ + for (j8 in 1:dimsaperm[8]){ + for (j9 in 1:dimsaperm[9]){ + for (j10 in 1:dimsaperm[10]){ + ndat <- length(sort(dif[, j3, j4, j5, j6, j7, j8, j9, + j10])) + enlrms[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, + j10] <- (eno[j3, j4, j5, j6, j7, j8, j9, + j10] * enlrms[jexp, jobs, 2, j3, j4, j5, j6, j7, + j8, j9, j10] ** 2 / qchisq(conf_high, eno[j3, j4, j5, + j6, j7, j8, j9, j10] - 1)) ** 0.5 + enlrms[jexp, jobs, 3, j3, j4, j5, j6, j7, j8, j9, + j10] <- (eno[j3, j4, j5, j6, j7, j8, j9, + j10] * enlrms[jexp, jobs, 2, j3, j4, j5, j6, j7, + j8, j9, j10] ** 2 / qchisq(conf_low, eno[j3, j4, j5, + j6, j7, j8, j9, j10] - 1)) ** 0.5 + } + } + } + } + } + } + } + } + } + } + } + + dim(enlrms) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, posRMS)]) + # + # Output + # ~~~~~~~~ + # + enlrms +} + +.RMS <- function(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) { # diff --git a/R/RMSSS.R b/R/RMSSS.R index 585a398b..9a33c8a6 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -33,6 +33,96 @@ RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { nvals <- 1 } enlRMSSS <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + dif1 <- array(dim = dimsaperm[-1]) + dif2 <- array(dim = dimsaperm[-1]) + dif1[, , , , , , , , ] <- permvarexp[jexp, , , , , , , , + , ] - permvarobs[jobs, , , , , , , , , ] + dif2[, , , , , , , , ] <- permvarobs[jobs, , , , , , , , , ] + rms1 <- Mean1Dim(dif1 ** 2, 1, narm = TRUE) ** 0.5 + rms2 <- Mean1Dim(dif2 ** 2, 1, narm = TRUE) ** 0.5 + rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( + rms2), na.rm = TRUE) / 1000 + enlRMSSS[jexp, jobs, 1, , , , , , , , ] <- 1 - (rms1 / rms2) + if (pval) { + eno1 <- Eno(dif1, 1) + eno2 <- Eno(dif2, 1) + F <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) + for (j3 in 1:dimsaperm[3]) { + for (j4 in 1:dimsaperm[4]) { + for (j5 in 1:dimsaperm[5]) { + for (j6 in 1:dimsaperm[6]) { + for (j7 in 1:dimsaperm[7]) { + for (j8 in 1:dimsaperm[8]) { + for (j9 in 1:dimsaperm[9]) { + for (j10 in 1:dimsaperm[10]) { + l1 <- eno1[j3, j4, j5, j6, j7, j8, j9, j10] + l2 <- eno2[j3, j4, j5, j6, j7, j8, j9, j10] + if (is.na(l1) == FALSE & is.na(l2) == FALSE & l1 > 2 & l2 > 2) { + enlRMSSS[jexp, jobs, 2, j3, j4, j5, j6, j7, j8, j9, + j10] <- 1 - pf(F[j3, j4, j5, j6, j7, j8, j9, + j10], l1 - 1, l2 - 1) + } else { + enlRMSSS[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, + j10] <- NA + } + } + } + } + } + } + } + } + } + } + } + } + + dim(enlRMSSS) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, posRMS)]) + # + # Output + # ~~~~~~~~ + # + enlRMSSS +} + +.RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { + # + # Enlarge var_exp & var_obs & clim to 10 dim + move posloop & posRMS + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var_exp) + for (iind in 1:length(dimsvar)) { + if (iind !=posloop & dim(var_obs)[iind] != dimsvar[iind]) { + stop("var_exp & var_obs must have same dimensions except along posloop") + } + } + if (dimsvar[posRMS] < 2 ) { + stop("At least 2 values required to compute RMSE") + } + enlvarexp <- Enlarge(var_exp, 10) + enlvarobs <- Enlarge(var_obs, 10) + nexp <- dimsvar[posloop] + nobs <- dim(var_obs)[posloop] + posaperm <- numeric(10) + posaperm[1] <- posloop + posaperm[2] <- posRMS + posaperm[3:10] <- seq(1, 10)[-c(posloop, posRMS)] + permvarexp <- aperm(enlvarexp, posaperm) + permvarobs <- aperm(enlvarobs, posaperm) + dimsaperm <- dim(permvarexp) + # + # RMSSS and its pvalue computation + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (pval) { + nvals <- 2 + } else { + nvals <- 1 + } + enlRMSSS <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) for (jexp in 1:nexp) { for (jobs in 1:nobs) { diff --git a/R/RatioRMS.R b/R/RatioRMS.R index 5589f170..a5bc5b48 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -1,4 +1,91 @@ -RatioRMS <- function(ens, ens.ref, obs, pval = TRUE) { +RatioRMS <- function(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) { + # + # Enlarge var_exps & var_obs to 10 dim + move posRMS to 1st pos + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var_exp1) + for (iind in 1:length(dimsvar)) { + if (dim(var_exp2)[iind] != dimsvar[iind] | + dim(var_obs)[iind] != dimsvar[iind]) { + stop("all input vars should have the same dimensions") + } + } + enlvarexp1 <- Enlarge(var_exp1, 10) + enlvarexp2 <- Enlarge(var_exp2, 10) + enlvarobs <- Enlarge(var_obs, 10) + posaperm <- 1:10 + posaperm[1] <- posRMS + posaperm[posRMS] <- 1 + permvarexp1 <- aperm(enlvarexp1, posaperm) + permvarexp2 <- aperm(enlvarexp2, posaperm) + permvarobs <- aperm(enlvarobs, posaperm) + dimsaperm <- dim(permvarexp1) + # + # RMS ratio and its pvalue computation + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (pval) { + nvals <- 2 + } else { + nvals <- 1 + } + enlratiorms <- array(dim = c(nvals, dimsaperm[2:10])) + dif1 <- permvarexp1 - permvarobs + dif2 <- permvarexp2 - permvarobs + rms1 <- Mean1Dim(dif1 ** 2, 1, narm = TRUE) ** 0.5 + rms2 <- Mean1Dim(dif2 ** 2, 1, narm = TRUE) ** 0.5 + rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max( + abs(rms2), na.rm = TRUE) / 1000 + enlratiorms[1, , , , , , , , , ] <- (rms1 / rms2) + if (pval) { + eno1 <- Eno(dif1, 1) + eno2 <- Eno(dif2, 1) + F <- (eno1 * (rms1) ** 2 / (eno1 - 1)) / (eno2 * (rms2) ** 2 / (eno2 - 1)) + F[which(F < 1)] <- 1 / F[which(F < 1)] + for (j2 in 1:dimsaperm[2]) { + for (j3 in 1:dimsaperm[3]) { + for (j4 in 1:dimsaperm[4]) { + for (j5 in 1:dimsaperm[5]) { + for (j6 in 1:dimsaperm[6]) { + for (j7 in 1:dimsaperm[7]) { + for (j8 in 1:dimsaperm[8]) { + for (j9 in 1:dimsaperm[9]) { + for (j10 in 1:dimsaperm[10]) { + l1 <- eno1[j2, j3, j4, j5, j6, j7, j8, j9, j10] + l2 <- eno2[j2, j3, j4, j5, j6, j7, j8, j9, j10] + if (!is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { + enlratiorms[2, j2, j3, j4, j5, j6, j7, j8, j9, + j10] <- (1 - pf(F[j2, j3, j4, j5, j6, j7, j8, j9, j10], + l1 - 1, l2 - 1)) * 2 + } else { + enlratiorms[1, j2, j3, j4, j5, j6, j7, j8, j9, j10] <- NA + } + } + } + } + } + } + } + } + } + } + } + + enlratiorms <- aperm(enlratiorms, posaperm) + if (pval) { + dimsvar[posRMS] <- 2 + } else { + dimsvar[posRMS] <- 1 + } + dim(enlratiorms) <- dimsvar + # + # Output + # ~~~~~~~~ + # + enlratiorms +} + +.RatioRMS <- function(ens, ens.ref, obs, pval = TRUE) { # # RMS ratio and its pvalue computation diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index 1edbd700..dd30eed6 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -1,4 +1,90 @@ -RatioSDRMS <- function(ens, obs, pval = TRUE) { +RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { + # + # Enlarge the number of dimensions of var_exp and var_obs to 7 if necessary + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimexp <- dim(var_exp) + dimobs <- dim(var_obs) + if (length(dimexp) < 4 | length(dimobs) < 4) { + stop("At least 4 dim needed : c(nexp/nobs, nmemb, nsdates, nltime)") + } + for (jn in 3:max(length(dimexp), length(dimobs))) { + if (dimexp[jn] != dimobs[jn]) { + stop("Wrong input dimensions") + } + } + var_exp <- Enlarge(var_exp, 7) + var_obs <- Enlarge(var_obs, 7) + + # + # Ratio RMSE / SD and its significance level + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + ensmeanexp <- Mean1Dim(var_exp, 2) + ensmeanobs <- Mean1Dim(var_obs, 2) + dimrms <- c(dimexp[1], dimobs[1], dimexp[4:length(dimexp)]) + if (pval) { + nvals <- 2 + } else { + nvals <- 1 + } + dimratiormssd <- c(dimexp[1], dimobs[1], nvals, dimexp[4:length(dimexp)]) + if (length(dimrms) < 6) { + dimrms <- c(dimrms, array(1, dim = (6 - length(dimrms)))) + } + if (length(dimratiormssd) < 7) { + dimenlratiormssd <- c(dimratiormssd, + array(1, dim = (7 - length(dimratiormssd)))) + } else { + dimenlratiormssd <- dimratiormssd + } + dif <- var_exp - InsertDim(ensmeanexp, 2, dimexp[2]) + std <- apply(array(dif, dim = c(dimexp[1], dimexp[2] * dimexp[3], + dimrms[3:6])), c(1, 3, 4, 5, 6), sd, na.rm = TRUE) + enosd <- apply(Eno(dif, 3), c(1, 3, 4, 5, 6), sum, na.rm = TRUE) + rms <- array(dim = dimrms) + enlratiormssd <- array(dim = dimenlratiormssd) + for (jexp in 1:dimexp[1]) { + for (jobs in 1:dimobs[1]) { + dif <- ensmeanexp[jexp, , , , , ] - ensmeanobs[jobs, , , , , ] + rms[jexp,jobs, , , , ] <- Mean1Dim(dif ** 2, 1, narm = TRUE) ** 0.5 + enorms <- array(Eno(dif, 1), dim = dimrms[3:6]) + enlratiormssd[jexp, jobs, 1, , , , ] <- std[jexp, , , , ] / rms[jexp, + jobs, , , , ] + if (pval) { + for (jltime in 1:dimrms[3]) { + for (jlev in 1:dimrms[4]) { + for (jlat in 1:dimrms[5]) { + for (jlon in 1:dimrms[6]) { + l1 <- enosd[jexp, jltime, jlev, jlat, jlon] + l2 <- enorms[jltime, jlev, jlat, jlon] + F <- (enosd[jexp, jltime, jlev, jlat, jlon] * (std[jexp, jltime, + jlev, jlat, jlon]) ** 2 / (enosd[jexp, jltime, jlev, jlat, + jlon] - 1)) / (enorms[jltime, jlev, jlat, jlon] * (rms[jexp, + jobs, jltime, jlev, jlat, jlon]) ** 2 / (enorms[jltime, + jlev, jlat, jlon] - 1)) + if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { + enlratiormssd[jexp, jobs, 2, jltime, jlev, jlat, + jlon] <- 1 - pf(F, l1 - 1, l2 - 1) + } else { + enlratiormssd[jexp, jobs, 1, jltime, jlev, jlat, jlon] <- NA + } + } + } + } + } + } + } + } + dim(enlratiormssd) <- dimratiormssd + # + # Output + # ~~~~~~~~ + # + enlratiormssd +} + +.RatioSDRMS <- function(ens, obs, pval = TRUE) { # # Ratio RMSE / SD and its significance level diff --git a/R/Trend.R b/R/Trend.R index f427f283..b8ef4d0c 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -1,4 +1,85 @@ -Trend <- function(ens, interval = 1, siglev = 0.95, conf = TRUE) { +Trend <- function(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) { + # + # Enlarge the size of var to 10 and move posTR to first position + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var) + if (is.null(dimsvar)) { + dimsvar <- length(var) + } + enlvar <- Enlarge(var, 10) + outdim <- c(dimsvar, array(1, dim = (10 - length(dimsvar)))) + if (conf) { + nvals <- 4 + poscoef2 <- 2 + poscoef1 <- 4 + } else { + nvals <- 2 + poscoef2 <- 1 + poscoef1 <- 2 + } + outdim[posTR] <- nvals + posaperm <- 1:10 + posaperm[posTR] <- 1 + posaperm[1] <- posTR + enlvar <- aperm(enlvar, posaperm) + dimsaperm <- outdim[posaperm] + # + # Loop on all dimensions to compute trends + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + enltrend <- array(dim = dimsaperm) + enldetrend <- array(dim = dim(enlvar)) + for (j2 in 1:dimsaperm[2]) { + for (j3 in 1:dimsaperm[3]) { + for (j4 in 1:dimsaperm[4]) { + for (j5 in 1:dimsaperm[5]) { + for (j6 in 1:dimsaperm[6]) { + for (j7 in 1:dimsaperm[7]) { + for (j8 in 1:dimsaperm[8]) { + for (j9 in 1:dimsaperm[9]) { + for (j10 in 1:dimsaperm[10]) { + tmp <- enlvar[, j2, j3, j4, j5, j6, j7, j8, j9, j10] + if (any(!is.na(tmp))) { + mon <- seq(tmp) * interval + lm.out <- lm(tmp ~ mon, na.action = na.omit) + enltrend[poscoef2, j2, j3, j4, j5, j6, j7, j8, j9, + j10] <- lm.out$coefficients[2] + enltrend[poscoef1, j2, j3, j4, j5, j6, j7, j8, j9, + j10] <- lm.out$coefficients[1] + if (conf) { + enltrend[c(1, 3), j2, j3, j4, j5, j6, j7, j8, j9, + j10] <- confint(lm.out, level = siglev)[2, 1:2] + } + enldetrend[is.na(tmp) == FALSE, j2, j3, j4, j5, j6, j7, j8, + j9, j10] <- tmp[is.na(tmp) == FALSE] - lm.out$fitted.values + } + } + } + } + } + } + } + } + } + } + # + # Back to the original dimensions + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + enldetrend <- aperm(enldetrend, posaperm) + dim(enldetrend) <- dimsvar + enltrend <- aperm(enltrend, posaperm) + dimsvar[posTR] <- nvals + dim(enltrend) <- dimsvar + # + # Outputs + # ~~~~~~~~~ + # + invisible(list(trend = enltrend, detrended = enldetrend)) +} + +.Trend <- function(ens, interval = 1, siglev = 0.95, conf = TRUE) { ensmean <- rowMeans(ens, na.rm = TRUE) diff --git a/man/RMS.Rd b/man/RMS.Rd index 8d276c57..fab80a03 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -1,10 +1,11 @@ \name{RMS} \alias{RMS} +\alias{.RMS} \title{ -Computes the root mean square error of the ensemble mean +Computes the root mean square error for a set of forecasts and observations } \description{ -Computes the RMSE of an ensemble mean forecast against a vector of observations.\cr +Computes the RMSE of an array of forecasts against an array of corresponding observations.\cr The confidence interval of the RMSE is obtained with a chi2 distribution. } \usage{ -- GitLab From d7873e1f7c61a27bf772b5c0ae6b3950eeae2c47 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Thu, 24 Nov 2016 14:17:02 +0100 Subject: [PATCH 12/20] Small update of manuals to include hidden functions --- man/RMS.Rd | 61 +++++++++++++++++++++++++++++++++++++---------- man/RMSSS.Rd | 9 ++++++- man/RatioRMS.Rd | 25 ++++++++++++++----- man/RatioSDRMS.Rd | 43 ++++++++++++++++++++++++--------- 4 files changed, 107 insertions(+), 31 deletions(-) diff --git a/man/RMS.Rd b/man/RMS.Rd index fab80a03..09b9d0a8 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -2,23 +2,41 @@ \alias{RMS} \alias{.RMS} \title{ -Computes the root mean square error for a set of forecasts and observations +Computes Root Mean Square Error } \description{ -Computes the RMSE of an array of forecasts against an array of corresponding observations.\cr -The confidence interval of the RMSE is obtained with a chi2 distribution. +Computes the root mean square error for an array of forecasts, var_exp and an array of observations, var_obs, which should have the same dimensions except along the posloop dimension where the lengths can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr +The RMSE is computed along the posRMS dimension which should correspond to the startdate dimension.\cr +If compROW is given, the RMSE is computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes.\cr +Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr +The confidence interval relies on a chi2 distribution. } \usage{ -RMS(ens, obs, siglev = 0.95, conf = TRUE) +RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) } \arguments{ - \item{ens}{ + \item{var_exp}{ +Matrix of experimental data. + } + \item{var_obs}{ +Matrix of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. + } + \item{ens}{ N by M matrix of N forecasts from M ensemble members. } \item{obs}{ Vector of the corresponding observations of length N. } - + \item{posloop}{ +Dimension nobs and nexp. + } + \item{posRMS}{ +Dimension along which RMSE are to be computed (the dimension of the start dates). + } + \item{compROW}{ +Data taken into account only if (compROW)th row is complete.\cr +Default = NULL. + } \item{limits}{ Complete between limits[1] & limits[2]. Default = NULL. } @@ -30,15 +48,32 @@ Whether to compute confidence interval or not. TRUE by default. } } \value{ -A list containing the following components :\cr -rms - the root mean square error \cr -conf_low - the lower limit of the confidence interval \cr -conf_high - the upper limit of the confidence interval \cr +Matrix with dimensions:\cr + c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all other dimensions of var_exp & var_obs except posRMS).\cr +The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). } \examples{ -obs <- rnorm(100) -ens <- matrix(rnorm(500), c(100,5)) -RMS(ens, obs) +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +runmean_months <- 12 +dim_to_smooth <- 4 # Smooth along lead-times +smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) +smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) +dim_to_mean <- 2 # Mean along members +required_complete_row <- 3 # Discard start-dates for which some leadtimes are missing +leadtimes_per_startdate <- 60 +rms <- RMS(Mean1Dim(smooth_ano_exp, dim_to_mean), + Mean1Dim(smooth_ano_obs, dim_to_mean), + compROW = required_complete_row, + limits = c(ceiling((runmean_months + 1) / 2), + leadtimes_per_startdate - floor(runmean_months / 2))) +PlotVsLTime(rms, toptitle = "Root Mean Square Error", ytitle = "K", + monini = 11, limits = NULL, listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, hlines = c(0), + fileout = 'tos_rms.eps') } \author{ History:\cr diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index 3d77230d..2547a9e5 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -1,10 +1,11 @@ \name{RMSSS} \alias{RMSSS} +\alias{.RMSSS} \title{ Computes Root Mean Square Skill Score } \description{ -Arrays var_exp & var_obs should have the same dimensions except along posloop where the length can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr +Computes the root mean square error skill score between an array of forecasts, var_exp and an array of observations, var_obs, which should have the same dimensions except along posloop where the lengths can be different, with the number of experiments/models for var_exp (nexp) and the number of obserational datasets for var_obs (nobs).\cr RMSSS computes the Root Mean Square Skill Score of each jexp in 1:nexp against each jobs in 1:nobs which gives nexp x nobs RMSSS for each other grid point of the matrix (each latitude/longitude/level/leadtime).\cr The RMSSS are computed along the posRMS dimension which should correspond to the startdate dimension.\cr The p-value is optionally provided by a one-sided Fisher test. @@ -18,6 +19,12 @@ Array of experimental data. } \item{var_obs}{ Array of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. + } + \item{ens}{ +N by M matrix of N forecasts from M ensemble members. + } + \item{obs}{ +Vector of N observations. } \item{posloop}{ Dimension nobs and nexp. diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 49bc5cbc..7cd5bac1 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -1,5 +1,6 @@ \name{RatioRMS} \alias{RatioRMS} +\alias{.RatioRMS} \title{ Computes the Ratio Between The RMSE of Two Experiments } @@ -13,10 +14,10 @@ RatioRMS(ens, ens.ref, obs, pval = TRUE) } \arguments{ \item{ens}{ -Matrix of experimental data 1. +Array of experimental data 1. } \item{ens.ref}{ -Matrix of experimental data 2. +Array of experimental data 2. } \item{ens.ref}{ Vector of observations. @@ -32,10 +33,22 @@ p.val - the p value \cr The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. } \examples{ - ens <- matrix(rnorm(500), c(100, 5)) - ens.ref <- matrix(rnorm(500), c(100, 5)) - obs <- rnorm(100) - Ratio <- RatioRMS(ens, ens.ref, obs) + leadtimes_dimension <- 4 +initial_month <- 11 +mean_start_month <- 12 +mean_stop_month <- 2 +sampleData$mod <- Season(sampleData$mod, leadtimes_dimension, initial_month, + mean_start_month, mean_stop_month) +sampleData$obs <- Season(sampleData$obs, leadtimes_dimension, initial_month, + mean_start_month, mean_stop_month) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +rrms <- RatioRMS(Mean1Dim(ano_exp[ , 1:2, , , , ], 1)[, 1, , ], + ano_exp[ , 3, , , , ][, 1, , ], + Mean1Dim(ano_obs, 2)[1, , 1, , ], 1) +PlotEquiMap(rrms[1, , ], sampleData$lon, sampleData$lat, + toptitle = 'Ratio RMSE') } \author{ History:\cr diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 28d01b49..227591ee 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -1,18 +1,32 @@ \name{RatioSDRMS} \alias{RatioSDRMS} +\alias{RatioSDRMS} \title{ -Computes the ratio between the ensemble spread and the RMSE of the ensemble mean +Computes the ratio between the ensemble spread and RMSE } \description{ -Calculates the ratio of spread and the RMSE from a matrix of N ensemble members for M forecasts and the corresponding vector of observations of length M. \cr - -The p-value is provided by a one-sided Fisher test. +Arrays var_exp & var_obs should have dimensions between\cr + c(nmod/nexp, nmemb/nparam, nsdates, nltime)\cr +and\cr + c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)\cr +The ratio between the standard deviation of the members around the ensemble mean in var_exp and the RMSE between var_exp and var_obs is output for each experiment and each observational dataset.\cr +The p-value is provided by a one-sided Fischer test. } \usage{ RatioSDRMS(ens, obs, pval = TRUE) } \arguments{ -\item{ens}{ + \item{var_exp}{ +Model data:\cr + c(nmod/nexp, nmemb/nparam, nsdates, nltime) up to\cr + c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon) + } + \item{var_obs}{ +Observational data:\cr + c(nobs, nmemb, nsdates, nltime) up to\cr + c(nobs, nmemb, nsdates, nltime, nlevel, nlat, nlon) + } + \item{ens}{ N by M matrix of N forecasts from M ensemble members. } \item{obs}{ @@ -23,14 +37,21 @@ Whether to compute the p-value of Ho : SD/RMSE = 1 or not. } } \value{ -A vector containing the following components :\cr -ratio - the ratio of the spread and RMSE \cr -p.val - the p.value (if pval ='TRUE') \cr +Array with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to + c(nexp/nmod, nobs, 1 or 2, nltime, nlevel, nlat, nlon). +The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1. } \examples{ -obs <- rnorm(100) -ens <- matrix(rnorm(500), c(100,5)) -RatioSDRMS(ens,obs) +# Load sample data as in Load() example: +example(Load) +rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) +rsdrms2 <- array(dim = c(dim(rsdrms)[1:2], 4, dim(rsdrms)[4])) +rsdrms2[, , 2, ] <- rsdrms[, , 1, ] +rsdrms2[, , 4, ] <- rsdrms[, , 2, ] +PlotVsLTime(rsdrms2, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", + monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, siglev = TRUE, + fileout = 'tos_rsdrms.eps') } \author{ History:\cr -- GitLab From 0d13def7a9b9f085728bf15270e3991e671b8386 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Thu, 24 Nov 2016 16:03:22 +0100 Subject: [PATCH 13/20] Minor bugfixes in documentation --- R/BrierScore.R | 78 +++++++++++++++++++++++++++++++++++++++- R/RMSSS.R | 90 ----------------------------------------------- man/BrierScore.Rd | 21 ++++++----- man/Corr.Rd | 5 +-- man/RMS.Rd | 2 ++ man/RMSSS.Rd | 8 +---- man/RatioRMS.Rd | 46 +++++++++++++++++++++--- man/RatioSDRMS.Rd | 6 ++-- man/Trend.Rd | 11 +++++- 9 files changed, 152 insertions(+), 115 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index ff4fc2c1..50b38730 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -1,4 +1,80 @@ -BrierScore <- function(ens, obs, thresholds = seq(0, 1, 0.1)) { +BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { + if (max(pred) > 1 | min(pred) < 0) { + stop("Predictions outside [0,1] range. Are you certain this is a probability forecast? \n") + } else if (max(obs) != 1 & min(obs) != 0) { + .message("Binary events must be either 0 or 1. Are you certain this is a binary event? ") + } else { + nbins <- length(thresholds) - 1 # Number of bins + n <- length(pred) + bins <- as.list(paste("bin", 1:nbins,sep = "")) + for (i in 1:nbins) { + if (i == nbins) { + bins[[i]] <- list(which(pred >= thresholds[i] & pred <= thresholds[i + 1])) + } else { + bins[[i]] <- list(which(pred >= thresholds[i] & pred < thresholds[i + 1])) + } + } + + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(pred[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + } + + obar <- sum(obs) / length(obs) + relsum <- ressum <- term1 <- term2 <- 0 + for (i in 1:nbins) { + if (nk[i] > 0) { + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 + ressum <- ressum + nk[i] * (okbar[i] - obar)^2 + for (j in 1:nk[i]) { + term1 <- term1 + (pred[bins[[i]][[1]][j]] - fkbar[i])^2 + term2 <- term2 + (pred[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + } + } + } + rel <- relsum / n + res <- ressum / n + unc <- obar * (1 - obar) + bs <- sum((pred - obs)^2) / n + bs_check_res <- rel - res + unc + bss_res <- (res - rel) / unc + gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution + bs_check_gres <- rel - gres + unc # BS using GRES + bss_gres <- (gres - rel) / unc # BSS using GRES + + # + # Estimating the bias-corrected components of the BS + # + term3 <- array(0, nbins) + for (i in 1:nbins) { + term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) + } + term_a <- sum(term3, na.rm = T) / n + term_b <- (obar * (1 - obar)) / (n - 1) + rel_bias_corrected <- rel - term_a + gres_bias_corrected <- gres - term_a + term_b + if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { + rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) + gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) + rel_bias_corrected <- rel_bias_corrected2 + gres_bias_corrected <- gres_bias_corrected2 + } + unc_bias_corrected <- unc + term_b + bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected + + #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { + # cat("No error found \ n") + # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") + #} + + invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar, bins = bins, pred = pred, obs = obs)) + } +} + + +.BrierScore <- function(ens, obs, thresholds = seq(0, 1, 0.1)) { if (max(ens) > 1 | min(ens) < 0) { stop("Predictions outside [0,1] range. Are you certain this is a probability forecast? \n") } else if (max(obs) != 1 & min(obs) != 0) { diff --git a/R/RMSSS.R b/R/RMSSS.R index 9a33c8a6..74a2a387 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -87,93 +87,3 @@ RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { # enlRMSSS } - -.RMSSS <- function(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) { - # - # Enlarge var_exp & var_obs & clim to 10 dim + move posloop & posRMS - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var_exp) - for (iind in 1:length(dimsvar)) { - if (iind !=posloop & dim(var_obs)[iind] != dimsvar[iind]) { - stop("var_exp & var_obs must have same dimensions except along posloop") - } - } - if (dimsvar[posRMS] < 2 ) { - stop("At least 2 values required to compute RMSE") - } - enlvarexp <- Enlarge(var_exp, 10) - enlvarobs <- Enlarge(var_obs, 10) - nexp <- dimsvar[posloop] - nobs <- dim(var_obs)[posloop] - posaperm <- numeric(10) - posaperm[1] <- posloop - posaperm[2] <- posRMS - posaperm[3:10] <- seq(1, 10)[-c(posloop, posRMS)] - permvarexp <- aperm(enlvarexp, posaperm) - permvarobs <- aperm(enlvarobs, posaperm) - dimsaperm <- dim(permvarexp) - # - # RMSSS and its pvalue computation - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - if (pval) { - nvals <- 2 - } else { - nvals <- 1 - } - enlRMSSS <- array(dim = c(nexp, nobs, nvals, dimsaperm[3:10])) - - for (jexp in 1:nexp) { - for (jobs in 1:nobs) { - dif1 <- array(dim = dimsaperm[-1]) - dif2 <- array(dim = dimsaperm[-1]) - dif1[, , , , , , , , ] <- permvarexp[jexp, , , , , , , , - , ] - permvarobs[jobs, , , , , , , , , ] - dif2[, , , , , , , , ] <- permvarobs[jobs, , , , , , , , , ] - rms1 <- Mean1Dim(dif1 ** 2, 1, narm = TRUE) ** 0.5 - rms2 <- Mean1Dim(dif2 ** 2, 1, narm = TRUE) ** 0.5 - rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( - rms2), na.rm = TRUE) / 1000 - enlRMSSS[jexp, jobs, 1, , , , , , , , ] <- 1 - (rms1 / rms2) - if (pval) { - eno1 <- Eno(dif1, 1) - eno2 <- Eno(dif2, 1) - F <- (eno2 * (rms2) ** 2 / (eno2 - 1)) / (eno1 * (rms1) ** 2 / (eno1 - 1)) - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - l1 <- eno1[j3, j4, j5, j6, j7, j8, j9, j10] - l2 <- eno2[j3, j4, j5, j6, j7, j8, j9, j10] - if (is.na(l1) == FALSE & is.na(l2) == FALSE & l1 > 2 & l2 > 2) { - enlRMSSS[jexp, jobs, 2, j3, j4, j5, j6, j7, j8, j9, - j10] <- 1 - pf(F[j3, j4, j5, j6, j7, j8, j9, - j10], l1 - 1, l2 - 1) - } else { - enlRMSSS[jexp, jobs, 1, j3, j4, j5, j6, j7, j8, j9, - j10] <- NA - } - } - } - } - } - } - } - } - } - } - } - } - - dim(enlRMSSS) <- c(nexp, nobs, nvals, dimsvar[-c(posloop, posRMS)]) - # - # Output - # ~~~~~~~~ - # - enlRMSSS -} diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index e6639f82..e4cec048 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -1,7 +1,8 @@ \name{BrierScore} \alias{BrierScore} +\alias{.BrierScore} \title{ -Compute Brier Score and its decomposition and the Brier Skill Score +Compute Brier Score And Its Decomposition And Brier Skill Score } \description{ Returns the values of the BS and its standard decomposition as well as the addition of the two winthin-bin extra components (Stephenson et al., 2008). It also solves the bias-corrected decomposition of the BS (Ferro and Fricker, 2012). BSS having the climatology as the reference forecast. @@ -10,16 +11,20 @@ Stephenson et al. (2008). Two extra components in the Brier score decomposition. Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924.\cr } \usage{ -BrierScore(ens,obs, thresholds = seq(0, 1, 0.1)) +BrierScore(obs, pred, thresholds = seq(0, 1, 0.1)) + +.BrierScore(ens, obs, thresholds = seq(0, 1, 0.1)) } -\arguments{ - \item{ens}{ -Vector of probablistic predictions with values in the range [0,1] - } +\arguments{ \item{obs}{ Vector of binary observations (1 or 0) } - + \item{pred}{ +Vector of probablistic predictions with values in the range [0,1] + } + \item{ens}{ +Matrix of predictions with values in the range [0,1] for the .BrierScore function + } \item{thresholds}{ Values used to bin the forecasts. By default the bins are {[0,0.1), [0.1, 0.2), ... [0.9, 1]} } @@ -48,7 +53,7 @@ $obs: probability forecasts of the event\cr \examples{ a <- runif(10) b <- round(a) -x <- BrierScore(a,b) +x <- BrierScore(b, a) x$bs - x$bs_check_res x$bs - x$bs_check_gres x$rel_bias_corrected - x$gres_bias_corrected + x$unc_bias_corrected diff --git a/man/Corr.Rd b/man/Corr.Rd index 311d6c78..96041b46 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -13,7 +13,7 @@ Calculates the correlation coefficient (Pearson, Kendall or Spearman) between fo \usage{ Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, limits = NULL, siglev = 0.95, method = 'pearson', - conf = TRUE, pval = TRUE) \cr + conf = TRUE, pval = TRUE) .Corr(ens, obs, siglev = 0.95, method = 'pearson', conf = TRUE, pval = TRUE) @@ -71,7 +71,8 @@ example(Load) clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) - runmean_months <- 12 dim_to_smooth <- 4 + runmean_months <- 12 + dim_to_smooth <- 4 # Smooth along lead-times smooth_ano_exp <- Smoothing(ano_exp, runmean_months, dim_to_smooth) smooth_ano_obs <- Smoothing(ano_obs, runmean_months, dim_to_smooth) diff --git a/man/RMS.Rd b/man/RMS.Rd index 09b9d0a8..946c20f9 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -13,6 +13,8 @@ The confidence interval relies on a chi2 distribution. } \usage{ RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) + +.RMS(ens, obs, limits = NULL, siglev = 0.95, conf = TRUE) } \arguments{ \item{var_exp}{ diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index 2547a9e5..7e618836 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -1,6 +1,5 @@ \name{RMSSS} \alias{RMSSS} -\alias{.RMSSS} \title{ Computes Root Mean Square Skill Score } @@ -12,6 +11,7 @@ The p-value is optionally provided by a one-sided Fisher test. } \usage{ RMSSS(var_exp, var_obs, posloop = 1, posRMS = 2, pval = TRUE) + } \arguments{ \item{var_exp}{ @@ -19,12 +19,6 @@ Array of experimental data. } \item{var_obs}{ Array of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. - } - \item{ens}{ -N by M matrix of N forecasts from M ensemble members. - } - \item{obs}{ -Vector of N observations. } \item{posloop}{ Dimension nobs and nexp. diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 7cd5bac1..5982b713 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -10,17 +10,31 @@ The ratio RMSE(ens, obs) / RMSE(ens.ref, obs) is output.\cr The p-value is provided by a two-sided Fischer test. } \usage{ -RatioRMS(ens, ens.ref, obs, pval = TRUE) +RatioRMS(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) + +.RatioRMS(ens, ens.ref, obs, pval = TRUE) } \arguments{ - \item{ens}{ + \item{var_exp1}{ Array of experimental data 1. } - \item{ens.ref}{ + \item{var_exp2}{ Array of experimental data 2. + } + \item{var_obs}{ +Array of observations. + } + \item{ens}{ +Matrix of experimental data 1. } \item{ens.ref}{ +Matrix of experimental data 2. + } + \item{obs}{ Vector of observations. + } + \item{posRMS}{ +Dimension along which the RMSE are to be computed = the position of the start dates. } \item{pval}{ Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. @@ -33,7 +47,31 @@ p.val - the p value \cr The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. } \examples{ - leadtimes_dimension <- 4 +# See examples on Load() to understand the first lines in this example + \dontrun{ +data_path <- system.file('sample_data', package = 's2dverification') +expA <- list(name = 'experiment', path = file.path(data_path, + 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', + '$VAR_NAME$_$START_DATE$.nc')) +obsX <- list(name = 'observation', path = file.path(data_path, + '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', + '$VAR_NAME$_$YEAR$$MONTH$.nc')) + +# Now we are ready to use Load(). +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- Load('tos', list(expA), list(obsX), startDates, + output = 'lonlat', latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } + \dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } +leadtimes_dimension <- 4 initial_month <- 11 mean_start_month <- 12 mean_stop_month <- 2 diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 227591ee..2f0c5084 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -1,6 +1,6 @@ \name{RatioSDRMS} \alias{RatioSDRMS} -\alias{RatioSDRMS} +\alias{.RatioSDRMS} \title{ Computes the ratio between the ensemble spread and RMSE } @@ -13,7 +13,9 @@ The ratio between the standard deviation of the members around the ensemble mean The p-value is provided by a one-sided Fischer test. } \usage{ -RatioSDRMS(ens, obs, pval = TRUE) +RatioSDRMS(var_exp, var_obs, pval = TRUE) + +.RatioSDRMS(ens, obs, pval = TRUE) } \arguments{ \item{var_exp}{ diff --git a/man/Trend.Rd b/man/Trend.Rd index 0a3a930b..dc2d7813 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -1,5 +1,6 @@ \name{Trend} \alias{Trend} +\alias{.Trend} \title{ Computes the Trend of the Ensemble Mean } @@ -9,9 +10,14 @@ Trend() also provides the time series of the detrended ensemble mean forecasts.\ The confidence interval relies on a student-T distribution. } \usage{ -Trend(ens, interval = 1, siglev = 0.95, conf = TRUE) +Trend(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) + +.Trend(ens, interval = 1, siglev = 0.95, conf = TRUE) } \arguments{ + \item{var}{ +Array of any number of dimensions up to 10. + } \item{ens}{ M by N matrix of M forecasts from N ensemble members. } @@ -26,6 +32,9 @@ Confidence level for the computation of confidence interval. 0.95 by default. \item{conf}{ Whether to compute the confidence levels or not. TRUE by default. } + \item{posTR}{ +Position along which to compute the trend. + } } \value{ \item{$trend}{ -- GitLab From 16eb9ff7ae32c33a4dd8d3f08eb8aa2f2f3642ad Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 25 Nov 2016 14:19:34 +0100 Subject: [PATCH 14/20] Adding descriptions to documentation of hidden functions --- man/BrierScore.Rd | 2 ++ man/Corr.Rd | 24 +++++++++--------------- man/RMS.Rd | 2 ++ man/RatioRMS.Rd | 2 ++ man/RatioSDRMS.Rd | 2 ++ man/Trend.Rd | 2 ++ 6 files changed, 19 insertions(+), 15 deletions(-) diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd index e4cec048..d31998c7 100644 --- a/man/BrierScore.Rd +++ b/man/BrierScore.Rd @@ -9,6 +9,8 @@ Returns the values of the BS and its standard decomposition as well as the addit Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr Stephenson et al. (2008). Two extra components in the Brier score decomposition. Weather and Forecasting, 23: 752-757.\cr Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924.\cr + +.BrierScore provides the same functionality, but taking a matrix of ensemble members (ens) as input. } \usage{ BrierScore(obs, pred, thresholds = seq(0, 1, 0.1)) diff --git a/man/Corr.Rd b/man/Corr.Rd index 96041b46..d826a5e4 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -6,9 +6,9 @@ Computes the correlation coefficient between an array of forecasts and their cor } \description{ -Calculates the correlation coefficient (Pearson, Kendall or Spearman) between forecasts and observations. The input should be an array with dimensions c(no. of datasets, no. of start dates, no. of forecast times, no. of lons, no. of lats.), where the longitude and latitude dimensions are optional. The correlations are computed along the poscor dimension which should correspond to the startdate dimension. If compROW is given, the correlations are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes. \cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval is computed by a Fisher transformation.\cr The significance level relies on a one-sided student-T distribution.\cr We can modifiy the treshold of the test modifying siglev (default value=0.95). \cr +Calculates the correlation coefficient (Pearson, Kendall or Spearman) for an array of forecasts and observations. The input should be an array with dimensions c(no. of datasets, no. of start dates, no. of forecast times, no. of lons, no. of lats.), where the longitude and latitude dimensions are optional. The correlations are computed along the poscor dimension which should correspond to the startdate dimension. If compROW is given, the correlations are computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes. \cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval is computed by a Fisher transformation.\cr The significance level relies on a one-sided student-T distribution.\cr We can modifiy the treshold of the test modifying siglev (default value=0.95). \cr \cr -.Corr is the same function but with a matrix of experiments and a vector of observations as input. +.Corr calculates the correlation between the ensemble mean and the observations, using an N by M matrix (ens) of forecasts and a vector of observations (obs) as input. } \usage{ Corr(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, @@ -26,30 +26,24 @@ Array of experimental data. \item{var_obs}{ Array of observational data, same dimensions as var_exp except along posloop dimension, where the length can be nobs instead of nexp. } - + \item{ens}{ +N by M matrix of N forecasts from M ensemble members. + } + \item{obs}{ +Vector of the corresponding observations of length N. + } \item{posloop}{ Dimension nobs and nexp. } - \item{poscor}{ Dimension along which correlation are to be computed (the dimension of the start dates). } - \item{compROW}{ Data taken into account only if (compROW)th row is complete.\cr Default = NULL. } - \item{limits}{ Complete between limits[1] & limits[2]. Default = NULL. } - - \item{ens}{ -N by M matrix of N forecasts from M ensemble members. - } - \item{obs}{ -Vector of the corresponding observations of length N. - } - \item{siglev}{ Significance level. Default = 0.95. } @@ -64,7 +58,7 @@ Whether to compute statistical significance p-value (default = 'TRUE') or not (F } } - \value{ Matrix with dimensions :\cr c(# of datasets alogn posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr + \value{ Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. } \examples{ # Load sample data as in Load() example: example(Load) diff --git a/man/RMS.Rd b/man/RMS.Rd index 946c20f9..80ae569c 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -10,6 +10,8 @@ The RMSE is computed along the posRMS dimension which should correspond to the s If compROW is given, the RMSE is computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes.\cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr The confidence interval relies on a chi2 distribution. + +.RMS provides the same functionality but taking a matrix of ensemble members as input (ens). } \usage{ RMS(var_exp, var_obs, posloop = 1, posRMS = 2, compROW = NULL, limits = NULL, siglev = 0.95, conf = TRUE) diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 5982b713..e2d2d5a4 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -8,6 +8,8 @@ Computes the Ratio Between The RMSE of Two Experiments Calculates the ratio of the RMSE for two forecasts of the same observations.\cr The ratio RMSE(ens, obs) / RMSE(ens.ref, obs) is output.\cr The p-value is provided by a two-sided Fischer test. + +.RatioRMS provides the same functionality but taking two matrices of ensemble members (ens and ens.ref) as input. } \usage{ RatioRMS(var_exp1, var_exp2, var_obs, posRMS = 1, pval = TRUE) diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 2f0c5084..ef6b55da 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -11,6 +11,8 @@ and\cr c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)\cr The ratio between the standard deviation of the members around the ensemble mean in var_exp and the RMSE between var_exp and var_obs is output for each experiment and each observational dataset.\cr The p-value is provided by a one-sided Fischer test. + +.RatioSDRMS provides the same functionality but taking a matrix of ensemble members as input (ens). } \usage{ RatioSDRMS(var_exp, var_obs, pval = TRUE) diff --git a/man/Trend.Rd b/man/Trend.Rd index dc2d7813..428d5f70 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -8,6 +8,8 @@ Computes the Trend of the Ensemble Mean Computes the trend along the forecast time of the ensemble mean by least square fitting, and the associated error interval.\cr Trend() also provides the time series of the detrended ensemble mean forecasts.\cr The confidence interval relies on a student-T distribution. + +.Trend provides the same functionality but taking a matrix ensemble members as input (ens). } \usage{ Trend(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) -- GitLab From 0950613653995c5d7eb721453954909fbd03cb54 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Fri, 25 Nov 2016 14:36:41 +0100 Subject: [PATCH 15/20] Minor bugfix in Trend example --- man/Trend.Rd | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/man/Trend.Rd b/man/Trend.Rd index 428d5f70..c2981e0f 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -50,10 +50,17 @@ Same dimensions as var with linearly detrended var along the posTR dimension. } } \examples{ -ens <- matrix(rnorm(500), c(100, 5)) -#Add a trend to the first ensemble member -ens[,1] <- ens[, 1] + (seq(1, 100, 1) * 5) -ens.trend <- Trend(ens) +# Load sample data as in Load() example: +example(Load) +months_between_startdates <- 60 +trend <- Trend(sampleData$obs, 3, months_between_startdates) +PlotVsLTime(trend$trend, toptitle = "trend", ytitle = "K / (5 year)", + monini = 11, limits = c(-1,1), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, hlines = 0, + fileout = 'tos_obs_trend.eps') +PlotAno(trend$detrended, NULL, startDates, + toptitle = 'detrended anomalies (along the startdates)', ytitle = 'K', + legends = 'ERSST', biglab = FALSE, fileout = 'tos_detrended_obs.eps') } \author{ History:\cr -- GitLab From bca0686b0d49549027aea7545b203fbabcb89a83 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Mon, 28 Nov 2016 09:30:30 +0100 Subject: [PATCH 16/20] Add output values for hidden functions to manual --- man/Corr.Rd | 11 +++++++++-- man/RMS.Rd | 8 ++++++-- man/RatioRMS.Rd | 10 ++++++++-- man/RatioSDRMS.Rd | 5 ++++- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/man/Corr.Rd b/man/Corr.Rd index d826a5e4..696ce01a 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -58,8 +58,15 @@ Whether to compute statistical significance p-value (default = 'TRUE') or not (F } } - \value{ Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr -The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. } + \value{ Corr: Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr +The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. +.Corr: A list with elements 'corr', 'p.val', 'conf_low', 'conf_high' is output, corresponding to the correlation test statistic, the p value and the upper and lower confidence limits, respectively. +.Corr: +$corr: the correlation statistic\cr +$p.val: the p value\cr +$conf_low: the lower confidence limit\cr +$conf_high: the upper confidence limit\cr +} \examples{ # Load sample data as in Load() example: example(Load) clim <- Clim(sampleData$mod, sampleData$obs) diff --git a/man/RMS.Rd b/man/RMS.Rd index 80ae569c..bf972a17 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -52,9 +52,13 @@ Whether to compute confidence interval or not. TRUE by default. } } \value{ -Matrix with dimensions:\cr +RMS: Array with dimensions:\cr c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all other dimensions of var_exp & var_obs except posRMS).\cr -The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). +The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). \cr +.RMS: +$rms: root mean square error\cr +$conf_low: lower confidence interval\cr +$conf_high: upper confidence interval\cr } \examples{ # Load sample data as in Load() example: diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index e2d2d5a4..4e074d10 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -43,10 +43,16 @@ Whether to compute the p-value of Ho : RMSE1/RMSE2 = 1 or not. TRUE by default. } } \value{ -A list containing the following components :\cr +RatioRMS: A list containing the following components :\cr ratiorms - the ratio of the rms of the two ensembles \cr p.val - the p value \cr -The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. +The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1.\cr +.RatioRMS: +$ratiorms: the ratio of the root mean square errors\cr +$p.val: the p value\cr + + + } \examples{ # See examples on Load() to understand the first lines in this example diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index ef6b55da..0adcd5c2 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -41,9 +41,12 @@ Whether to compute the p-value of Ho : SD/RMSE = 1 or not. } } \value{ -Array with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to +RatioSDRMS: Array with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to c(nexp/nmod, nobs, 1 or 2, nltime, nlevel, nlat, nlon). The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1. +.RatioSDRMS: +$ratio: ratio of the ensemble spread and RMSE\cr +$p.val: the p value\cr } \examples{ # Load sample data as in Load() example: -- GitLab From 3785381c95ac7fb1c6b71568ad4d3286e0133b6e Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Mon, 28 Nov 2016 09:51:17 +0100 Subject: [PATCH 17/20] More small changes to documentation --- man/Corr.Rd | 20 ++++++++++++++------ man/RMS.Rd | 14 ++++++++++---- man/RatioRMS.Rd | 10 ++++++---- man/RatioSDRMS.Rd | 14 +++++++++----- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/man/Corr.Rd b/man/Corr.Rd index 696ce01a..3b2cae75 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -59,13 +59,21 @@ Whether to compute statistical significance p-value (default = 'TRUE') or not (F } \value{ Corr: Array with dimensions :\cr c(# of datasets along posloop in var_exp, # of datasets along posloop in var_obs, 4, all other dimensions of var_exp & var_obs except poscor).\cr -The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. -.Corr: A list with elements 'corr', 'p.val', 'conf_low', 'conf_high' is output, corresponding to the correlation test statistic, the p value and the upper and lower confidence limits, respectively. +The third dimension, of length 4 maximum, contains to the lower limit of the 95\% confidence interval, the correlation, the upper limit of the 95\% confidence interval and the 95\% significance level given by a one-sided T-test. If the p-value is disabled via \code{pval = FALSE}, this dimension will be of length 3. If the confidence intervals are disabled via \code{conf = FALSE}, this dimension will be of length 2. If both are disabled, this will be of length 2. \cr + .Corr: -$corr: the correlation statistic\cr -$p.val: the p value\cr -$conf_low: the lower confidence limit\cr -$conf_high: the upper confidence limit\cr + \item{$corr}{ +The correlation statistic. + } + \item{$p.val}{ +Corresponds to the p values for the \code{siglev}\% (only present if \code{pval = TRUE}) for the correlation. + } + \item{$conf_low}{ +Corresponds to the upper limit of the \code{siglev}\% (only present if \code{conf = TRUE}) for the correlation. + } + \item{$conf_high}{ +Corresponds to the lower limit of the \code{siglev}\% (only present if \code{conf = TRUE}) for the correlation. + } } \examples{ # Load sample data as in Load() example: example(Load) diff --git a/man/RMS.Rd b/man/RMS.Rd index bf972a17..663694d5 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -9,7 +9,7 @@ Computes the root mean square error for an array of forecasts, var_exp and an ar The RMSE is computed along the posRMS dimension which should correspond to the startdate dimension.\cr If compROW is given, the RMSE is computed only if rows along the compROW dimension are complete between limits[1] and limits[2], i.e. there are no NAs between limits[1] and limits[2]. This option can be activated if the user wishes to account only for the forecasts for which observations are available at all leadtimes.\cr Default: limits[1] = 1 and limits[2] = length(compROW dimension).\cr -The confidence interval relies on a chi2 distribution. +The confidence interval relies on a chi2 distribution. \cr .RMS provides the same functionality but taking a matrix of ensemble members as input (ens). } @@ -56,9 +56,15 @@ RMS: Array with dimensions:\cr c(length(posloop) in var_exp, length(posloop) in var_obs, 1 or 3, all other dimensions of var_exp & var_obs except posRMS).\cr The 3rd dimension corresponds to the lower limit of the 95\% confidence interval (only present if \code{conf = TRUE}), the RMSE, and the upper limit of the 95\% confidence interval (only present if \code{conf = TRUE}). \cr .RMS: -$rms: root mean square error\cr -$conf_low: lower confidence interval\cr -$conf_high: upper confidence interval\cr + \item{$rms}{ +The root mean square error, + } + \item{$conf_low}{ +Corresponding to the lower limit of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the rms. + } + \item{$conf_high}{ +Corresponding to the upper limit of the \code{siglev}\% confidence interval (only present if \code{conf = TRUE}) for the rms. + } } \examples{ # Load sample data as in Load() example: diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 4e074d10..b8a7486d 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -48,10 +48,12 @@ ratiorms - the ratio of the rms of the two ensembles \cr p.val - the p value \cr The dimension corresponds to the ratio between the RMSE (RMSE1/RMSE2) and the p.value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1.\cr .RatioRMS: -$ratiorms: the ratio of the root mean square errors\cr -$p.val: the p value\cr - - +\item{$ratiorms}{ +The ratio of the root mean square errors, + } + \item{$conf.int}{ +Corresponding to the p values of the ratio of the rmse statistics (only present if \code{pval = TRUE}). + } } \examples{ diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index 0adcd5c2..e38bd2c7 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -10,8 +10,7 @@ Arrays var_exp & var_obs should have dimensions between\cr and\cr c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)\cr The ratio between the standard deviation of the members around the ensemble mean in var_exp and the RMSE between var_exp and var_obs is output for each experiment and each observational dataset.\cr -The p-value is provided by a one-sided Fischer test. - +The p-value is provided by a one-sided Fischer test.\cr .RatioSDRMS provides the same functionality but taking a matrix of ensemble members as input (ens). } \usage{ @@ -43,10 +42,15 @@ Whether to compute the p-value of Ho : SD/RMSE = 1 or not. \value{ RatioSDRMS: Array with dimensions c(nexp/nmod, nobs, 1 or 2, nltime) up to c(nexp/nmod, nobs, 1 or 2, nltime, nlevel, nlat, nlon). -The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1. +The 3rd dimension corresponds to the ratio (SD/RMSE) and the p.value (only present if \code{pval = TRUE}) of the one-sided Fisher test with Ho: SD/RMSE = 1.\cr .RatioSDRMS: -$ratio: ratio of the ensemble spread and RMSE\cr -$p.val: the p value\cr +\item{$ratio}{ +The ratio of the ensemble spread and RMSE, + } + \item{$p.val}{ +Corresponds to the p values of the ratio (only present if \code{pval = TRUE}). + } + } \examples{ # Load sample data as in Load() example: -- GitLab From 559e22b313655e88ff9d34b839cbd2a9c9a70c84 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Mon, 5 Dec 2016 14:31:46 +0100 Subject: [PATCH 18/20] Minor bugfixes for BrierScore --- R/BrierScore.R | 68 ++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/R/BrierScore.R b/R/BrierScore.R index 50b38730..cce75377 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -82,51 +82,49 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { } else { nbins <- length(thresholds) - 1 # Number of bins n <- dim(ens)[1] # Number of observations + ens.mean <- rowMeans(ens, na.rm = TRUE) n.ens <- seq(1,dim(ens)[2],1) # Number of ensemble members - bins <- array(as.list(paste("bin", 1:nbins,sep = "")), c(nbins,dim(ens)[2])) + bins <- as.list(paste("bin", 1:nbins,sep = "")) for (i in 1:nbins) { if (i == nbins) { - bins[i,] <- apply(ens, MARGIN = 2, FUN = function(x) list(which(x >= thresholds[i] & x <= thresholds[i + 1]))) + bins[[i]] <- list(which(ens.mean >= thresholds[i] & ens.mean <= thresholds[i + 1])) } else { - bins[i,] <- apply(ens, MARGIN = 2, FUN = function(x) list(which(x >= thresholds[i] & x < thresholds[i + 1]))) + bins[[i]] <- list(which(ens.mean >= thresholds[i] & ens.mean < thresholds[i + 1])) } } - - - fkbar <- okbar <- nk <- array(0, dim = c(nbins,dim(ens)[2])) - for (k in 1:dim(ens)[2]) { - for (i in 1:nbins) { - nk[i,k] <- length(bins[[i,k]][[1]]) - fkbar[i,k] <- sum(ens[,k][bins[[i,k]][[1]]])/nk[i,k] - okbar[i,k] <- sum(obs[bins[[i,k]][[1]]]) / nk[i,k] + + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(ens.mean[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] } - } fkbar[fkbar == Inf] <- 0 okbar[is.nan(okbar)] <- 0 obar <- sum(obs) / length(obs) - relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- rep(0,dim(ens)[2]) + relsum <- ressum <- relsum1 <- ressum1 <- term1 <- term1a <- term2 <- term2a <- 0 - for (k in 1:dim(ens)[2]) { + for (i in 1:nbins) { - if (nk[i,k] > 0) { - relsum[k] <- relsum[k] + nk[i,k] * (fkbar[i,k] - okbar[i,k])^2 - ressum[k] <- ressum[k] + nk[i,k] * (okbar[i,k] - obar)^2 + if (nk[i] > 0) { + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 + ressum <- ressum + nk[i] * (okbar[i] - obar)^2 - for (j in 1:nk[i,k]) { - term1[k] <- term1[k] + (ens[,k][bins[[i,k]][[1]][j]] - fkbar[i,k])^2 - term2[k] <- term2[k] + (ens[,k][bins[[i,k]][[1]][j]] - fkbar[i,k]) * (obs[bins[[i,k]][[1]][j]] - okbar[i,k]) + for (j in 1:nk[i]) { + term1 <- term1 + (ens.mean[bins[[i]][[1]][j]] - fkbar[i])^2 + term2 <- term2 + (ens.mean[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) } } } } - + rel <- relsum / n res <- ressum / n unc <- obar * (1 - obar) - bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) - + #bs <- apply(ens, MARGIN = 2, FUN = function(x) sum((x - obs)^2) / n) + bs <- sum((rowMeans(ens, na.rm = T) - obs)^2) / n bs_check_res <- rel - res + unc bss_res <- (res - rel) / unc gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution @@ -136,24 +134,19 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { # # Estimating the bias-corrected components of the BS # - term3 <- array(0, dim = c(nbins,dim(ens)[2])) + term3 <- array(0, nbins) for (i in 1:nbins) { - for (k in 1:dim(ens)[2]) { - term3[i,k] <- (nk[i,k] / (nk[i,k] - 1)) * okbar[i,k] * (1 - okbar[i,k]) - } + term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) } - term_a <- apply(term3, MARGIN = 2, FUN = function(x) sum(x, na.rm = T) / n) + term_a <- sum(term3, na.rm = T) / n term_b <- (obar * (1 - obar)) / (n - 1) rel_bias_corrected <- rel - term_a gres_bias_corrected <- gres - term_a + term_b - rel_bias_corrected2 <- gres_bias_corrected2 <- rep(0, dim(ens)[2]) - for(j in 1:dim(ens)[2]) { - if (rel_bias_corrected[j] < 0 || gres_bias_corrected[j] < 0) { - rel_bias_corrected2[j] <- max(rel_bias_corrected[j], rel_bias_corrected[j] - gres_bias_corrected[j], 0) - gres_bias_corrected2[j] <- max(gres_bias_corrected[j], gres_bias_corrected[j] - rel_bias_corrected[j], 0) - rel_bias_corrected[j] <- rel_bias_corrected2[j] - gres_bias_corrected[j] <- gres_bias_corrected2[j] - } + if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { + rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) + gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) + rel_bias_corrected <- rel_bias_corrected2 + gres_bias_corrected <- gres_bias_corrected2 } unc_bias_corrected <- unc + term_b bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected @@ -163,6 +156,5 @@ BrierScore <- function(obs, pred, thresholds = seq(0, 1, 0.1)) { # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") #} - invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, okbar = okbar, bins = bins, ens = ens, obs = obs)) - } + invisible(list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, gres_bias_corrected = gres_bias_corrected, unc_bias_corrected = unc_bias_corrected, bss_bias_corrected = bss_bias_corrected)) } -- GitLab From 16ca0822a7ac8db087b435c326d9f0e21def91ad Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Wed, 21 Dec 2016 10:48:55 +0100 Subject: [PATCH 19/20] Minor bugfix --- R/RatioSDRMS.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index dd30eed6..543d6e05 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -100,6 +100,7 @@ RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { enorms <- Eno(dif,1) enlratiormssd <- std /rms + p.val <- 0 if (pval) { l1 <- enosd @@ -110,7 +111,10 @@ RatioSDRMS <- function(var_exp, var_obs, pval = TRUE) { if (!is.na(F) && !is.na(l1) && !is.na(l2) && l1 > 2 && l2 > 2) { p.val <- 1 - pf(F, l1 - 1, l2 - 1) - } + } + else { + p.val <- NA + } } -- GitLab From c7f41dfbdb2bcf6c5145e675c32c3a91de5000e5 Mon Sep 17 00:00:00 2001 From: "alasdair.hunter" Date: Tue, 17 Jan 2017 14:35:55 +0100 Subject: [PATCH 20/20] Minor changes --- R/Corr.R | 5 +++-- man/ACC.Rd | 17 ++++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index 40e89684..ce576b84 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -138,8 +138,7 @@ Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, } } - conf_low <- (1 - siglev) / 2 - conf_high <- 1 - conf_low + p <- c() conflow <- c() confhigh <- c() @@ -162,6 +161,8 @@ Corr <- function(var_exp, var_obs, posloop = 1, poscor = 2, compROW = NULL, } if (conf & method == "pearson") { + conf_low <- (1 - siglev) / 2 + conf_high <- 1 - conf_low conf.int <- c(tanh(atanh(CORR) + qnorm(conf_low) / sqrt( eno - 3)), tanh(atanh(CORR) + qnorm(conf_high) / sqrt( eno - 3))) diff --git a/man/ACC.Rd b/man/ACC.Rd index ccb69e71..2c92fe3e 100644 --- a/man/ACC.Rd +++ b/man/ACC.Rd @@ -1,13 +1,12 @@ \name{ACC} \alias{ACC} \title{ -Computes Anomaly Correlation Coefficient (Spatial Correlation) +Computes Anomaly Correlation Coefficient (Centred) } \description{ -Matrix var_exp & var_obs should have dimensions (nexp/nobs, nsdates, nltimes, nlat, nlon) or (nexp/nobs, nsdates, nmember, nltimes, nlat, nlon).\cr -ACC computes the Anomaly Correlation Coefficient for the ensemble mean of each jexp in 1:nexp and each jobs in 1:nobs which gives nexp x nobs ACC for each startdate and each leadtime.\cr -A domain can be selected by providing the list of longitudes/latitudes (lon/lat) of the grid together with the corner of the domain:\cr - lonlatbox = c(lonmin, lonmax, latmin, latmax) +Calculates the Anomaly Correlation Coefficient for the ensemble mean of each jexp in 1:nexp and each jobs in 1:nobs which gives nexp x nobs ACsC for each startdate and each leadtime.\cr +The domain of interest can be specified by providing the list of longitudes/latitudes (lon/lat) of the grid together with the corners of the domain:\cr +lonlatbox = c(lonmin, lonmax, latmin, latmax) } \usage{ ACC(var_exp, var_obs, lon = NULL, lat = NULL, lonlatbox = NULL, @@ -15,13 +14,13 @@ ACC(var_exp, var_obs, lon = NULL, lat = NULL, lonlatbox = NULL, } \arguments{ \item{var_exp}{ -Matrix of experimental anomalies with dimensions:\cr +Array of experimental anomalies with dimensions:\cr c(nexp, nsdates, nltimes, nlat, nlon)\cr or\cr c(nexp, nsdates, nmembers, nltimes, nlat, nlon)\cr } \item{var_obs}{ -Matrix of observational anomalies, same dimensions as var_exp except along the first dimension and the second if it corresponds to the member dimension. +Array of observational anomalies, same dimensions as var_exp except along the first dimension and the second if it corresponds to the member dimension. } \item{lon}{ Array of longitudes of the var_exp/var_obs grids, optional. @@ -57,6 +56,7 @@ Mean Anomaly Correlation Coefficient with dimensions:\cr c(nexp, nobs, nleadtimes) } } + \examples{ # See ?Load for explanations on the first part of this example. \dontrun{ @@ -92,6 +92,9 @@ ano_obs <- Ano(sampleData$obs, clim$clim_obs) acc <- ACC(Mean1Dim(ano_exp, 2), Mean1Dim(ano_obs, 2)) PlotACC(acc$ACC, startDates) } +\references{ +Joliffe and Stephenson (2012). Forecast Verification: A Practitioner's Guide in Atmospheric Science. Wiley-Blackwell. +} \author{ History:\cr 0.1 - 2013-08 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr -- GitLab