Commit dddec28b authored by aho's avatar aho
Browse files

Merge branch 'develop-Spread' into 'master'

Develop Spread

See merge request !55
parents 36b9fcd5 8adc6f1b
Pipeline #5797 passed with stage
in 3 minutes and 33 seconds
......@@ -38,6 +38,7 @@ export(Load)
export(MeanDims)
export(NAO)
export(Persistence)
export(Plot2VarsVsLTime)
export(PlotACC)
export(PlotAno)
export(PlotBoxWhisker)
......@@ -62,6 +63,7 @@ export(SPOD)
export(Season)
export(Smoothing)
export(Spectrum)
export(Spread)
export(StatSeasAtlHurr)
export(TPI)
export(ToyModel)
......@@ -103,12 +105,14 @@ importFrom(grDevices,rainbow)
importFrom(grDevices,rgb)
importFrom(grDevices,svg)
importFrom(grDevices,tiff)
importFrom(stats,IQR)
importFrom(stats,acf)
importFrom(stats,anova)
importFrom(stats,confint)
importFrom(stats,cor)
importFrom(stats,kmeans)
importFrom(stats,lm)
importFrom(stats,mad)
importFrom(stats,median)
importFrom(stats,na.fail)
importFrom(stats,na.omit)
......@@ -121,6 +125,7 @@ importFrom(stats,qnorm)
importFrom(stats,qt)
importFrom(stats,quantile)
importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(stats,spectrum)
......
#'Plot two scores with confidence intervals in a common plot
#'
#'Plot two input variables that have the same dimensions in a common plot.
#'One plot for all experiments.
#'The input variables should have dimensions (nexp/nmod, nltime).
#'
#'@param var1 Matrix of dimensions (nexp/nmod, nltime).
#'@param var2 Matrix of dimensions (nexp/nmod, nltime).
#'@param toptitle Main title, optional.
#'@param ytitle Title of Y-axis, optional.
#'@param monini Starting month between 1 and 12. Default = 1.
#'@param freq 1 = yearly, 12 = monthly, 4 = seasonal, ... Default = 12.
#'@param nticks Number of ticks and labels on the x-axis, optional.
#'@param limits c(lower limit, upper limit): limits of the Y-axis, optional.
#'@param listexp List of experiment names, up to three, optional.
#'@param listvars List of names of input variables, optional.
#'@param biglab TRUE/FALSE for presentation/paper plot. Default = FALSE.
#'@param hlines c(a, b, ...) Add horizontal black lines at Y-positions a, b,
#' ... The default value is NULL.
#'@param leg TRUE/FALSE if legend should be added or not to the plot.
#' Default = TRUE.
#'@param siglev TRUE/FALSE if significance level should replace confidence
#' interval.\cr
#' Default = FALSE.
#'@param sizetit Multiplicative factor to change title size, optional.
#'@param show_conf TRUE/FALSE to show/not confidence intervals for input
#' variables.
#'@param fileout Name of output file. Extensions allowed: eps/ps, jpeg, png,
#' pdf, bmp and tiff. The default value is NULL.
#'@param width File width, in the units specified in the parameter size_units
#' (inches by default). Takes 8 by default.
#'@param height File height, in the units specified in the parameter
#' size_units (inches by default). Takes 5 by default.
#'@param size_units Units of the size of the device (file or window) to plot
#' in. Inches ('in') by default. See ?Devices and the creator function of the
#' corresponding device.
#'@param res Resolution of the device (file or window) to plot in. See
#' ?Devices and the creator function of the corresponding device.
#'@param ... Arguments to be passed to the method. Only accepts the following
#' graphical parameters:\cr
#' adj ann ask bg bty cex.sub cin col.axis col.lab col.main col.sub cra crt
#' csi cxy err family fg fig font font.axis font.lab font.main font.sub lend
#' lheight ljoin lmitre mar mex mfcol mfrow mfg mkh oma omd omi page pch plt
#' smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr
#' For more information about the parameters see `par`.
#'
#'@details
#'Examples of input:\cr
#'------------------\cr\cr
#'RMSE error for a number of experiments and along lead-time: (nexp, nltime)
#'
#'@examples
#'# Load sample data as in Load() example:
#'example(Load)
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'ano_exp <- Ano(sampleData$mod, clim$clim_exp)
#'ano_obs <- Ano(sampleData$obs, clim$clim_obs)
#'runmean_months <- 12
#'smooth_ano_exp <- Smoothing(data = ano_exp, runmeanlen = runmean_months)
#'smooth_ano_obs <- Smoothing(data = ano_obs, runmeanlen = runmean_months)
#'dim_to_mean <- 'member' # mean along members
#'required_complete_row <- 'ftime' # discard startdates for which there are NA leadtimes
#'leadtimes_per_startdate <- 60
#'rms <- RMS(MeanDims(smooth_ano_exp, dim_to_mean),
#' MeanDims(smooth_ano_obs, dim_to_mean),
#' comp_dim = required_complete_row,
#' limits = c(ceiling((runmean_months + 1) / 2),
#' leadtimes_per_startdate - floor(runmean_months / 2)))
#'smooth_ano_exp_m_sub <- smooth_ano_exp - InsertDim(MeanDims(smooth_ano_exp, 'member',
#' na.rm = TRUE),
#' posdim = 3,
#' lendim = dim(smooth_ano_exp)['member'],
#' name = 'member')
#'spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate'))
#'#Combine rms outputs into one array
#'rms_combine <- abind::abind(rms$conf.lower, rms$rms, rms$conf.upper, along = 0)
#'rms_combine <- Reorder(rms_combine, c(2, 3, 1, 4))
#' \donttest{
#'Plot2VarsVsLTime(InsertDim(rms_combine[, , , ], 1, 1), Reorder(spread$sd, c(1, 3, 2)),
#' toptitle = 'RMSE and spread', monini = 11, freq = 12,
#' listexp = c('CMIP5 IC3'), listvar = c('RMSE', 'spread'))
#' }
#'
#'@importFrom grDevices png jpeg postscript pdf svg bmp tiff postscript dev.cur dev.new dev.off
#'@importFrom stats ts
#'@export
Plot2VarsVsLTime <- function(var1, var2, toptitle = '', ytitle = '', monini = 1,
freq = 12, nticks = NULL, limits = NULL, listexp =
c('exp1', 'exp2', 'exp3'), listvars = c('var1',
'var2'), biglab = FALSE, hlines = NULL, leg = TRUE,
siglev = FALSE, sizetit = 1, show_conf = TRUE,
fileout = NULL,
width = 8, height = 5, size_units = 'in', res = 100, ...) {
# Process the user graphical parameters that may be passed in the call
## Graphical parameters to exclude
excludedArgs <- c("cex", "cex.axis", "cex.lab", "cex.main", "col", "fin", "lab", "las", "lty", "lwd", "mai", "mgp", "new", "pin", "ps", "pty")
userArgs <- .FilterUserGraphicArgs(excludedArgs, ...)
# If there is any filenames to store the graphics, process them
# to select the right device
if (!is.null(fileout)) {
deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, units = size_units, res = res)
saveToFile <- deviceInfo$fun
fileout <- deviceInfo$files
}
#
nvars <- 2
if (length(dim(var1)) != length(dim(var2))) {
print("the two input variables should have the same dimensions")
stop()
}
if (length(dim(var1)) >= 4) {
print("dimensions of input variables should be 3")
stop()
}
nleadtime <- dim(var1)[3]
nexp <- dim(var1)[1]
var <- array(dim = c(nvars, nexp, 3, nleadtime))
for (jvar in 1:nvars) {
varname <- paste("var", as.character(jvar), sep = "")
var[jvar, , , ] <- get(varname)
rm(varname)
}
if (is.null(limits) == TRUE) {
ll <- min(var1, na.rm = TRUE)
ul <- max(var1, na.rm = TRUE)
if (biglab) {
ul <- ul + 0.4 * (ul - ll)
} else {
ul <- ul + 0.3 * (ul - ll)
}
} else {
ll <- limits[1]
ul <- limits[2]
}
lastyear <- (monini + (nleadtime - 1) * 12 / freq - 1) %/% 12
lastmonth <- (monini + (nleadtime - 1) * 12 / freq - 1) %% 12 + 1
empty_ts <- ts(start = c(0000, (monini - 1) %/% (12 / freq) + 1),
end = c(lastyear, (lastmonth - 1) %/% (12 / freq) + 1),
frequency = freq)
empty <- array(dim = length(empty_ts))
#
# Define some plot parameters
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
if (is.null(nticks)) {
if (biglab) {
nticks <- 5
} else {
nticks <- 10
}
}
labind <- seq(1, nleadtime, max(nleadtime %/% nticks, 1))
months <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
"Oct", "Nov", "Dec")
labyear <- ((labind - 1) * 12 / freq + monini - 1) %/% 12
labmonth <- months[((labind - 1) * 12 / freq + monini - 1) %% 12 + 1]
for (jx in 1:length(labmonth)) {
y2o3dig <- paste("0", as.character(labyear[jx]), sep = "")
labmonth[jx] <- paste(labmonth[jx], "\nYr ", substr(y2o3dig, nchar(y2o3dig)
- 1, nchar(y2o3dig)), sep = "")
}
color <- c("red1", "dodgerblue1", "green1", "orange1", "lightblue1",
"deeppink1", "mediumpurple1", "lightgoldenrod1", "olivedrab1",
"mediumorchid1")
type <- c(1, 3)
if (siglev == TRUE) {
lines <- c("n", "l", "n")
}
else{
lines <- c("l", "l", "l")
}
thickness <- array(dim = c(3))
thickness[1] <- c(1)
thickness[2] <- c(8)
thickness[3] <- thickness[1]
#
# Define plot layout
# ~~~~~~~~~~~~~~~~~~~~
#
# Open connection to graphical device
if (!is.null(fileout)) {
saveToFile(fileout)
} else if (names(dev.cur()) == 'null device') {
dev.new(units = size_units, res = res, width = width, height = height)
}
# Load the user parameters
par(userArgs)
if (biglab) {
par(mai = c(1.25, 1.4, 0.5, 1), mgp = c(4, 2.5, 0))
par(cex = 1.3, cex.lab = 2, cex.axis = 1.8)
cexmain <- 2.2
legsize <- 1.5
} else {
par(mai = c(1, 1.1, 0.5, 0), mgp = c(3, 1.8, 0))
par(cex = 1.3, cex.lab = 1.5, cex.axis = 1.1)
cexmain <- 1.5
legsize <- 1
}
plot(empty, ylim = c(ll, ul), xlab = "Time (months)", ylab = ytitle,
main = toptitle, cex.main = cexmain * sizetit, axes = FALSE)
axis(1, at = labind, labels = labmonth)
axis(2)
box()
if (is.null(hlines) != TRUE) {
for (jy in 1:length(hlines)) {
par(new = TRUE)
abline(h = hlines[jy])
}
}
#
# Loop on experimental data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
legendnames <- array(dim = nexp * nvars)
legendthick <- array(dim = nexp * nvars)
legendsty <- array(dim = nexp * nvars)
legendcol <- array(dim = nexp * nvars)
if (show_conf == TRUE) {
start_line <- 3
end_line <- 1
} else {
start_line <- 2
end_line <- 2
}
for (jint in seq(start_line, end_line, -1)) {
ind <- 1
for (jexp in 1:nexp) {
for (jvar in 1:nvars) {
par(new = TRUE)
plot(var[jvar, jexp, jint, ], type = lines[jint], ylim = c(ll, ul),
col = color[jexp], lty = type[jvar], lwd = thickness[jint],
ylab = "", xlab = "", axes = FALSE)
legendnames[ind] <- paste(listexp[jexp], listvars[jvar])
legendthick[ind] <- 2
legendsty[ind] <- type[jvar]
legendcol[ind] <- color[jexp]
ind <- ind + 1
}
}
}
if (leg) {
legend(1, ul, legendnames, lty = legendsty, lwd = legendthick,
col = legendcol, cex = legsize)
}
# If the graphic was saved to file, close the connection with the device
if(!is.null(fileout)) dev.off()
}
#'Compute interquartile range, maximum-minimum, standard deviation and median
#'absolute deviation
#'
#'Compute interquartile range, maximum-minimum, standard deviation and median
#'absolute deviation along the list of dimensions provided by the compute_dim
#'argument (typically along the ensemble member and start date dimension).
#'The confidence interval is computed by bootstrapping by 100 times. The input
#'data can be the output of \code{Load()}, \code{Ano()}, or
#'\code{Ano_CrossValid()}, for example.
#'
#'@param data A numeric vector or array with named dimensions to compute the
#' statistics. The dimensions should at least include 'compute_dim'.
#'@param compute_dim A vector of character strings of the dimension names along
#' which to compute the statistics. The default value is 'member'.
#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or
#' kept (FALSE) for computation. The default value is TRUE.
#'@param conf A logical value indicating whether to compute the confidence
#' intervals or not. The default value is TRUE.
#'@param conf.lev A numeric value of the confidence level for the computation.
#' The default value is 0.95.
#'@param ncores An integer indicating the number of cores to use for parallel
#' computation. The default value is NULL.
#'
#'@return
#'A list of numeric arrays with the same dimensions as 'data' but without
#''compute_dim' and with the first dimension 'stats'. If 'conf' is TRUE, the
#'length of 'stats' is 3 corresponding to the lower limit of the confidence
#'interval, the spread, and the upper limit of the confidence interval. If
#''conf' is FALSE, the length of 'stats' is 1 corresponding to the spread.
#'\item{$iqr}{
#' InterQuartile Range.
#'}
#'\item{$maxmin}{
#' Maximum - Minimum.
#'}
#'\item{$sd}{
#' Standard Deviation.
#'}
#'\item{$mad}{
#' Median Absolute Deviation.
#'}
#'
#'@examples
#'# Load sample data as in Load() example:
#'example(Load)
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'ano_exp <- Ano(sampleData$mod, clim$clim_exp)
#'runmean_months <- 12
#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months)
#'smooth_ano_exp_m_sub <- smooth_ano_exp - InsertDim(MeanDims(smooth_ano_exp, 'member',
#' na.rm = TRUE),
#' posdim = 3,
#' lendim = dim(smooth_ano_exp)['member'],
#' name = 'member')
#'spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate'))
#'
#'\donttest{
#'PlotVsLTime(Reorder(spread$iqr, c('dataset', 'stats', 'ftime')),
#' toptitle = "Inter-Quartile Range between ensemble members",
#' ytitle = "K", monini = 11, limits = NULL,
#' listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE,
#' hlines = c(0), fileout = 'tos_iqr.png')
#'PlotVsLTime(Reorder(spread$maxmin, c('dataset', 'stats', 'ftime')),
#' toptitle = "Maximum minus minimum of the members",
#' ytitle = "K", monini = 11, limits = NULL,
#' listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE,
#' hlines = c(0), fileout = 'tos_maxmin.png')
#'PlotVsLTime(Reorder(spread$sd, c('dataset', 'stats', 'ftime')),
#' toptitle = "Standard deviation of the members",
#' ytitle = "K", monini = 11, limits = NULL,
#' listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE,
#' hlines = c(0), fileout = 'tos_sd.png')
#'PlotVsLTime(Reorder(spread$mad, c('dataset', 'stats', 'ftime')),
#' toptitle = "Median Absolute Deviation of the members",
#' ytitle = "K", monini = 11, limits = NULL,
#' listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE,
#' hlines = c(0), fileout = 'tos_mad.png')
#'}
#'
#'@import multiApply
#'@importFrom stats IQR sd mad runif quantile
#'@export
Spread <- function(data, compute_dim = 'member', na.rm = TRUE,
conf = TRUE, conf.lev = 0.95, ncores = NULL) {
# Check inputs
## data
if (is.null(data)) {
stop("Parameter 'data' cannot be NULL.")
}
if (!is.numeric(data)) {
stop("Parameter 'data' must be a numeric array.")
}
if (is.null(dim(data))) { #is vector
dim(data) <- c(length(data))
names(dim(data)) <- compute_dim[1]
}
if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) {
stop("Parameter 'data' must have dimension names.")
}
## compute_dim
if (!is.character(compute_dim)) {
stop("Parameter 'compute_dim' must be a character vector.")
}
if (any(!compute_dim %in% names(dim(data)))) {
stop("Parameter 'compute_dim' has some element not in 'data' dimension names.")
}
## na.rm
if (!is.logical(na.rm) | length(na.rm) > 1) {
stop("Parameter 'na.rm' must be one logical value.")
}
## conf
if (!is.logical(conf) | length(conf) > 1) {
stop("Parameter 'conf' must be one logical value.")
}
## conf.lev
if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) {
stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.")
}
## ncores
if (!is.null(ncores)) {
if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
length(ncores) > 1) {
stop("Parameter 'ncores' must be a positive integer.")
}
}
###############################
# Calculate Spread
output <- Apply(list(data),
target_dims = compute_dim,
fun = .Spread,
output_dims = list(iqr = 'stats', maxmin = 'stats',
sd = 'stats', mad = 'stats'),
na.rm = na.rm,
conf = conf, conf.lev = conf.lev,
ncores = ncores)
return(output)
}
.Spread <- function(data, compute_dim = 'member', na.rm = TRUE,
conf = TRUE, conf.lev = 0.95) {
# data: compute_dim. [member] or [member, sdate] for example
# Compute spread
res_iqr <- IQR(data, na.rm = na.rm)
res_maxmin <- max(data, na.rm = na.rm) - min(data, na.rm = na.rm)
res_sd <- sd(data, na.rm = na.rm)
res_mad <- mad(data, na.rm = na.rm)
# Compute conf (bootstrapping)
if (conf) {
# The output length is 3, [conf.low, spread, conf.high]
res_iqr <- rep(res_iqr, 3)
res_maxmin <- rep(res_maxmin, 3)
res_sd <- rep(res_sd, 3)
res_mad <- rep(res_mad, 3)
conf_low <- (1 - conf.lev) / 2
conf_high <- 1 - conf_low
# Create vector for saving bootstrap result
iqr_bs <- c()
maxmin_bs <- c()
sd_bs <- c()
mad_bs <- c()
# bootstrapping for 100 times
num <- length(data)
for (jmix in 1:100) {
drawings <- round(runif(num, 0.5, num + 0.5))
iqr_bs <- c(iqr_bs, IQR(data[drawings], na.rm = na.rm))
maxmin_bs <- c(maxmin_bs, max(data[drawings], na.rm = na.rm) -
min(data[drawings], na.rm = na.rm))
sd_bs <- c(sd_bs, sd(data[drawings], na.rm = na.rm))
mad_bs <- c(mad_bs, mad(data[drawings], na.rm = na.rm))
}
# Calculate confidence interval with the bootstrapping results
res_iqr[1] <- quantile(iqr_bs, conf_low, na.rm = na.rm)
res_iqr[3] <- quantile(iqr_bs, conf_high, na.rm = na.rm)
res_maxmin[1] <- res_maxmin[2] + (quantile(maxmin_bs, conf_low, na.rm = na.rm) -
quantile(maxmin_bs, conf_high, na.rm = na.rm)) / 2
res_maxmin[3] <- res_maxmin[2] - (quantile(maxmin_bs, conf_low, na.rm = na.rm) -
quantile(maxmin_bs, conf_high, na.rm = na.rm)) / 2
res_sd[1] <- quantile(sd_bs, conf_low, na.rm = na.rm)
res_sd[3] <- quantile(sd_bs, conf_high, na.rm = na.rm)
res_mad[1] <- res_mad[2] + (quantile(mad_bs, conf_low, na.rm = na.rm) -
quantile(mad_bs, conf_high, na.rm = na.rm))
res_mad[3] <- res_mad[2] - (quantile(mad_bs, conf_low, na.rm = na.rm) -
quantile(mad_bs, conf_high, na.rm = na.rm))
}
# Turn infinite to NA
res_maxmin[which(is.infinite(res_maxmin))] <- NA
return(invisible(list(iqr = as.array(res_iqr), maxmin = as.array(res_maxmin),
sd = as.array(res_sd), mad = as.array(res_mad))))
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Plot2VarsVsLTime.R
\name{Plot2VarsVsLTime}
\alias{Plot2VarsVsLTime}
\title{Plot two scores with confidence intervals in a common plot}
\usage{
Plot2VarsVsLTime(
var1,
var2,
toptitle = "",
ytitle = "",
monini = 1,
freq = 12,
nticks = NULL,
limits = NULL,
listexp = c("exp1", "exp2", "exp3"),
listvars = c("var1", "var2"),
biglab = FALSE,
hlines = NULL,
leg = TRUE,
siglev = FALSE,
sizetit = 1,
show_conf = TRUE,
fileout = NULL,
width = 8,
height = 5,
size_units = "in",
res = 100,
...
)
}
\arguments{
\item{var1}{Matrix of dimensions (nexp/nmod, nltime).}
\item{var2}{Matrix of dimensions (nexp/nmod, nltime).}
\item{toptitle}{Main title, optional.}
\item{ytitle}{Title of Y-axis, optional.}
\item{monini}{Starting month between 1 and 12. Default = 1.}
\item{freq}{1 = yearly, 12 = monthly, 4 = seasonal, ... Default = 12.}
\item{nticks}{Number of ticks and labels on the x-axis, optional.}
\item{limits}{c(lower limit, upper limit): limits of the Y-axis, optional.}
\item{listexp}{List of experiment names, up to three, optional.}
\item{listvars}{List of names of input variables, optional.}
\item{biglab}{TRUE/FALSE for presentation/paper plot. Default = FALSE.}
\item{hlines}{c(a, b, ...) Add horizontal black lines at Y-positions a, b,
... The default value is NULL.}
\item{leg}{TRUE/FALSE if legend should be added or not to the plot.
Default = TRUE.}
\item{siglev}{TRUE/FALSE if significance level should replace confidence
interval.\cr
Default = FALSE.}
\item{sizetit}{Multiplicative factor to change title size, optional.}
\item{show_conf}{TRUE/FALSE to show/not confidence intervals for input
variables.}
\item{fileout}{Name of output file. Extensions allowed: eps/ps, jpeg, png,
pdf, bmp and tiff. The default value is NULL.}
\item{width}{File width, in the units specified in the parameter size_units
(inches by default). Takes 8 by default.}
\item{height}{File height, in the units specified in the parameter
size_units (inches by default). Takes 5 by default.}
\item{size_units}{Units of the size of the device (file or window) to plot
in. Inches ('in') by default. See ?Devices and the creator function of the
corresponding device.}
\item{res}{Resolution of the device (file or window) to plot in. See
?Devices and the creator function of the corresponding device.}
\item{...}{Arguments to be passed to the method. Only accepts the following
graphical parameters:\cr
adj ann ask bg bty cex.sub cin col.axis col.lab col.main col.sub cra crt
csi cxy err family fg fig font font.axis font.lab font.main font.sub lend
lheight ljoin lmitre mar mex mfcol mfrow mfg mkh oma omd omi page pch plt
smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr
For more information about the parameters see `par`.}
}
\description{
Plot two input variables that have the same dimensions in a common plot.
One plot for all experiments.