Newer
Older
#'Plot one or multiple ensemble forecast pdfs for the same event
#'
Llorenç Lledó
committed
#'@author Llorenç Lledó \email{llledo@bsc.es}
#'@description This function plots the probability distribution function of
#'several ensemble forecasts. Separate panels are used to plot forecasts valid
#'or initialized at different times or by different models or even at different
#'locations. Probabilities for tercile categories are computed, plotted in
#'colors and annotated. An asterisk marks the tercile with higher probabilities.
#'Probabilities for extreme categories (above P90 and below P10) can also be
#'included as hatched areas. Individual ensemble members can be plotted as
#'jittered points. The observed value is optionally shown as a diamond.
#'@param fcst A dataframe or array containing all the ensember members for each
#' forecast. If \code{'fcst'} is an array, it should have two labelled
#' dimensions, and one of them should be \code{'members'}. If \code{'fcsts'} is
#' a data.frame, each column shoul be a separate forecast, with the rows beeing
#' the different ensemble members.
#'@param tercile.limits An array or vector with P33 and P66 values that define
#' the tercile categories for each panel. Use an array of dimensions
#' (nforecasts,2) to define different terciles for each forecast panel, or a
#' vector with two elements to reuse the same tercile limits for all forecast
#' panels.
#'@param extreme.limits (optional) An array or vector with P10 and P90 values
#' that define the extreme categories for each panel. Use an array of
#' (nforecasts,2) to define different extreme limits for each forecast panel,
#' or a vector with two elements to reuse the same tercile limits for all
#' forecast panels. (Default: extreme categories are not shown).
#'@param obs (optional) A vector providing the observed values for each forecast
#' panel or a single value that will be reused for all forecast panels.
#' (Default: observation is not shown).
#'@param plotfile (optional) A filename (pdf, png...) where the plot will be
#' saved. (Default: the plot is not saved).
#'@param title A string with the plot title.
#'@param var.name A string with the variable name and units.
#'@param fcst.names (optional) An array of strings with the titles of each
#' individual forecast.
#'@param add.ensmemb Either to add the ensemble members \code{'above'} (default)
#' or \code{'below'} the pdf, or not (\code{'no'}).
#'@param color.set A selection of predefined color sets: use \code{'ggplot'}
#' (default) for blue/green/red, \code{'s2s4e'} for blue/grey/orange,
#' \code{'hydro'} for yellow/gray/blue (suitable for precipitation and
#'@param memb_dim A character string indicating the name of the member
#' dimension.
#'@importFrom data.table data.table
#'@importFrom data.table CJ
#'@importFrom data.table setkey
#'@importFrom reshape2 melt
#'@importFrom plyr .
#'@importFrom plyr dlply
#'fcsts <- data.frame(fcst1 = rnorm(10), fcst2 = rnorm(10, 0.5, 1.2))
PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = NULL,
plotfile = NULL, title = "Set a title", var.name = "Varname (units)",
fcst.names = NULL, add.ensmemb = c("above", "below", "no"),
color.set = c("ggplot", "s2s4e", "hydro", "vitigeoss"),
memb_dim = 'member') {
value <- init <- extremes <- x <- ymin <- ymax <- tercile <- NULL
y <- xend <- yend <- yjitter <- MLT <- lab.pos <- NULL
ggColorHue <- function(n) {
hues <- seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
#------------------------
# Define color sets
#------------------------
Llorenç Lledó
committed
colorFill <- c("#FF764D", "#b5b5b5", "#33BFD1") # AN, N, BN fill colors
colorHatch <- c("indianred3", "deepskyblue3") # AP90, BP10 line colors
colorMember <- c("#ffff7f")
colorObs <- "purple"
Llorenç Lledó
committed
colorLab <- c("red", "blue") # AP90, BP10 text colors
Llorenç Lledó
committed
colorFill <- c("#41CBC9", "#b5b5b5", "#FFAB38")
Llorenç Lledó
committed
colorHatch <- c("deepskyblue3", "darkorange1")
colorMember <- c("#ffff7f")
colorObs <- "purple"
Llorenç Lledó
committed
colorLab <- c("blue", "darkorange3")
Llorenç Lledó
committed
colorFill <- ggColorHue(3)
Llorenç Lledó
committed
colorHatch <- c("indianred3", "deepskyblue3")
colorMember <- c("#ffff7f")
colorObs <- "purple"
Llorenç Lledó
committed
colorLab <- c("red", "blue")
} else if (color.set == "vitigeoss") {
colorFill <- rev(c("#007be2", "#acb2b5", "#f40000"))
colorHatch <- rev(c("#211b79", "#ae0003"))
colorMember <- c("#ffff7f")
colorObs <- "purple"
} else {
stop("Parameter 'color.set' should be one of ggplot/s2s4e/hydro")
}
#------------------------
# Check input arguments
#------------------------
#------------------------
# Check fcst type and convert to data.frame if needed
#------------------------
if (is.array(fcst)) {
if (!memb_dim %in% names(dim(fcst)) | length(dim(fcst)) != 2) {
stop("Parameter 'fcst' should be a two-dimensional array with labelled dimensions and one of them should be for member. The name of this dimension can be adjusted with 'memb_dim'.")
dim.members <- which(names(dim(fcst)) == memb_dim)
if (dim.members == 1) {
fcst.df <- data.frame(fcst)
} else {
fcst.df <- data.frame(t(fcst))
}
} else if (is.data.frame(fcst)) {
fcst.df <- fcst
} else {
stop("Parameter 'fcst' should be an array or a data.frame")
Francesc Roura Adserias
committed
npanels <- ncol(fcst.df)
#------------------------
# Check observations
#------------------------
if (!is.null(obs)) {
if (!is.vector(obs)){
stop("Observations should be a vector")
} else if (!length(obs) %in% c(1, npanels)) {
stop("The number of observations should equal one or the number of forecasts")
Francesc Roura Adserias
committed
}
}
#------------------------
Francesc Roura Adserias
committed
# Check tercile limits
#------------------------
FRANCESC ROURA ADSERIAS
committed
if (is.vector(tercile.limits)) {
if (length(tercile.limits) != 2) {
FRANCESC ROURA ADSERIAS
committed
}
tercile.limits <- InsertDim(tercile.limits, 1, npanels, name = "new")
FRANCESC ROURA ADSERIAS
committed
} else if (is.array(tercile.limits)) {
if (length(dim(tercile.limits)) == 2) {
if (dim(tercile.limits)[2] != 2) {
stop("Provide two tercile limits for each panel")
}
if (dim(tercile.limits)[1] != npanels) {
stop("The number of tercile limits does not match the number of forecasts provided")
}
FRANCESC ROURA ADSERIAS
committed
} else {
stop("Tercile limits should have two dimensions")
Francesc Roura Adserias
committed
} else {
FRANCESC ROURA ADSERIAS
committed
stop("Tercile limits should be a vector of length two or an array of dimension (nfcsts,2)")
Francesc Roura Adserias
committed
}
# check consistency of tercile limits
if (any(tercile.limits[, 1] >= tercile.limits[, 2])) {
Francesc Roura Adserias
committed
stop("Inconsistent tercile limits")
}
#------------------------
# Check extreme limits
#------------------------
if (!is.null(extreme.limits)) {
FRANCESC ROURA ADSERIAS
committed
if (is.vector(extreme.limits)) {
if (length(extreme.limits) != 2) {
stop("Provide two extreme limits")
FRANCESC ROURA ADSERIAS
committed
}
extreme.limits <- InsertDim(extreme.limits, 1, npanels, name = "new")
FRANCESC ROURA ADSERIAS
committed
} else if (is.array(extreme.limits)) {
if (length(dim(extreme.limits)) == 2) {
if (dim(extreme.limits)[2] != 2) {
stop("Provide two extreme limits for each panel")
}
if (dim(extreme.limits)[1] != npanels) {
stop("The number of extreme limits does not match the number of forecasts provided")
}
} else {
stop("extreme limits should have two dimensions")
}
} else {
stop("Extreme limits should be a vector of length two or an array of dimensions (nfcsts,2)")
}
# Check that extreme limits are consistent with tercile limits
if (any(extreme.limits[, 1] >= tercile.limits[, 1])) {
FRANCESC ROURA ADSERIAS
committed
stop("Inconsistent lower extreme limits")
}
if (any(extreme.limits[, 2] <= tercile.limits[, 2])) {
Francesc Roura Adserias
committed
stop("Inconsistent higher extreme limits")
}
}
#------------------------
# Set proper fcst names
#------------------------
if (!is.null(fcst.names)) {
if (length(fcst.names) != dim(fcst.df)[2]) {
stop("Parameter 'fcst.names' should be an array with as many names as distinct forecasts provided")
}
colnames(fcst.df) <- factor(fcst.names, levels = fcst.names)
#------------------------
# Produce a first plot with the pdf for each init in a panel
#------------------------
Llorenç Lledó
committed
melt.df <- reshape2::melt(fcst.df, variable.name = "init", id.vars = NULL)
Llorenç Lledó
committed
plot <- ggplot(melt.df, aes(x = value)) +
geom_density(alpha = 1, na.rm = T) +
coord_flip() + facet_wrap(~init, strip.position = "top", nrow = 1) +
xlim(range(c(obs, density(melt.df$value, na.rm = T)$x)))
ggp <- ggplot_build(plot)
#------------------------
# Gather the coordinates of the plots together with init and corresponding
# terciles
#------------------------
tmp.df <- ggp$data[[1]][, c("x", "ymin", "ymax", "PANEL")]
if (!is.null(ggp$layout$layout)) {
tmp.df$init <- ggp$layout$layout$init[as.numeric(tmp.df$PANEL)]
} else if (!is.null(ggp$layout$panel_layout)) {
tmp.df$init <- ggp$layout$panel_layout$init[as.numeric(tmp.df$PANEL)]
stop("Cannot find PANELS in ggp object")
}
Llorenç Lledó
committed
tmp.df$tercile <- factor(ifelse(tmp.df$x < tercile.limits[tmp.df$PANEL, 1],
"Below normal", ifelse(tmp.df$x < tercile.limits[tmp.df$PANEL, 2],
"Normal", "Above normal")),
levels = c("Above normal", "Normal", "Below normal"))
#------------------------
# Get the height and width of a panel
#------------------------
pan.width <- diff(range(tmp.df$x))
pan.height <- max(tmp.df$ymax)
magic.ratio <- 9 * pan.height/pan.width
#------------------------
# Compute hatch coordinates for extremes
#------------------------
Francesc Roura Adserias
committed
if (!is.null(extreme.limits)) {
Llorenç Lledó
committed
tmp.df$extremes <- factor(ifelse(tmp.df$x < extreme.limits[tmp.df$PANEL, 1],
"Below P10", ifelse(tmp.df$x < extreme.limits[tmp.df$PANEL, 2],
"Normal", "Above P90")),
levels = c("Above P90", "Normal", "Below P10"))
hatch.ls <- dlply(tmp.df, .(init, extremes), function(x) {
# close the polygon
tmp.df2 <- data.frame(x = c(x$x, max(x$x), min(x$x)), y = c(x$ymax, 0,
0))
# compute the hatches for this polygon
hatches <- .polygon.fullhatch(tmp.df2$x, tmp.df2$y, angle = 60, density = 10,
width.units = pan.width, height.units = pan.height)
# add bottom segment
end1 <- data.frame(x = x$x[1], y = x$ymax[1], xend = x$x[1], yend = 0)
# add top segment
end2 <- data.frame(x = x$x[length(x$x)], y = x$ymax[length(x$x)], xend = x$x[length(x$x)],
yend = 0)
return(rbind(hatches, end1, end2))
})
attr <- attr(hatch.ls, "split_labels")
for (i in 1:length(hatch.ls)) {
Llorenç Lledó
committed
hatch.ls[[i]] <- cbind(hatch.ls[[i]], attr[i, ], row.names = NULL)
}
hatch.df <- do.call("rbind", hatch.ls)
# Compute max y for each extreme category
max.ls <- dlply(tmp.df, .(init, extremes), function(x) {
data.frame(y = min(0.85 * pan.height, max(x$ymax)))
})
attr <- attr(max.ls, "split_labels")
for (i in 1:length(max.ls)) {
Llorenç Lledó
committed
max.ls[[i]] <- cbind(max.ls[[i]], attr[i, ], row.names = NULL)
}
max.df <- do.call("rbind", max.ls)
}
#------------------------
# Compute jitter space for ensemble members
#------------------------
if (add.ensmemb != "no") {
Llorenç Lledó
committed
jitter.df <- reshape2::melt(data.frame(dlply(melt.df, .(init), function(x) {
Llorenç Lledó
committed
.jitter.ensmemb(sort(x$value, na.last = T), pan.width / 100)
}), check.names = F), value.name = "yjitter", variable.name = "init", id.vars = NULL)
Llorenç Lledó
committed
jitter.df$x <- reshape2::melt(data.frame(dlply(melt.df, .(init), function(x) {
sort(x$value, na.last = T)
})), value.name = "x", id.vars = NULL)$x
}
#------------------------
# Get y coordinates for observed x values, using a cool data.table feature: merge
# to nearest value
#------------------------
if (!is.null(obs)) {
tmp.dt <- data.table(tmp.df, key = c("init", "x"))
obs.dt <- data.table(init = factor(colnames(fcst.df), levels = colnames(fcst.df)),
value = rep(obs, dim(fcst.df)[2]))
setkey(obs.dt, init, value)
obs.xy <- tmp.dt[obs.dt, roll = "nearest"]
}
#------------------------
# Fill each pdf with different colors for the terciles
#------------------------
Llorenç Lledó
committed
plot <- plot +
geom_ribbon(data = tmp.df,
aes(x = x, ymin = ymin, ymax = ymax, fill = tercile),
alpha = 0.7)
#------------------------
# Add hatches for extremes
#------------------------
if (!is.null(extreme.limits)) {
if (nrow(hatch.df[hatch.df$extremes != "Normal", ]) == 0) {
warning("The provided extreme categories are outside the plot bounds. The extremes will not be drawn.")
extreme.limits <- NULL
} else {
Llorenç Lledó
committed
plot <- plot +
geom_segment(data = hatch.df[hatch.df$extremes != "Normal", ],
aes(x = x, y = y,
xend = xend, yend = yend, color = extremes))
#------------------------
# Add obs line
#------------------------
if (!is.null(obs)) {
Llorenç Lledó
committed
plot <- plot +
geom_vline(data = obs.dt,
aes(xintercept = value),
linetype = "dashed", color = colorObs)
Llorenç Lledó
committed
}
#------------------------
# Add ensemble members
#------------------------
if (add.ensmemb == "below") {
Llorenç Lledó
committed
plot <- plot +
# this adds a grey box for ensmembers
geom_rect(aes(xmin = -Inf, xmax = Inf,
ymin = -Inf, ymax = -pan.height / 10),
fill = "gray95", color = "black", width = 0.2) +
# this adds the ensemble members
geom_point(data = jitter.df,
aes(x = x,
Llorenç Lledó
committed
y = -pan.height / 10 - magic.ratio * yjitter,
Llorenç Lledó
committed
shape = "Ensemble members"),
color = "black", fill = colorMember, alpha = 1)
} else if (add.ensmemb == "above") {
Llorenç Lledó
committed
plot <- plot +
geom_point(data = jitter.df,
aes(x = x,
y = 0.7 * magic.ratio * yjitter,
shape = "Ensemble members"),
color = "black", fill = colorMember, alpha = 1)
#------------------------
# Add obs diamond
#------------------------
if (!is.null(obs)) {
Llorenç Lledó
committed
plot <- plot +
# this adds the obs diamond
geom_point(data = obs.xy,
aes(x = x, y = ymax, size = "Observation"),
shape = 23, color = "black", fill = colorObs)
#------------------------
# Compute probability for each tercile and identify MLT
#------------------------
tmp.dt <- data.table(tmp.df)
pct <- tmp.dt[, .(pct = integrate(approxfun(x, ymax), lower = min(x), upper = max(x))$value),
by = .(init, tercile)]
Llorenç Lledó
committed
# include potentially missing groups
pct <- merge(pct, CJ(init = factor(levels(pct$init), levels = levels(pct$init)),
tercile = factor(c("Below normal", "Normal", "Above normal"),
Llorenç Lledó
committed
levels = c("Above normal", "Normal", "Below normal"))),
Llorenç Lledó
committed
by = c("init", "tercile"), all.y = T)
pct[is.na(pct),"pct"] <- 0
tot <- pct[, .(tot = sum(pct)), by = init]
pct <- merge(pct, tot, by = "init")
Llorenç Lledó
committed
pct$pct <- round(100 * pct$pct / pct$tot, 0)
pct$MLT <- pct[, .(MLT = pct == max(pct)), by = init]$MLT
Llorenç Lledó
committed
pct <- pct[order(init, tercile)]
pct$lab.pos <- as.vector(apply(tercile.limits, 1, function(x) {c(max(x), mean(x), min(x))}))
#------------------------
# Compute probability for extremes
#------------------------
if (!is.null(extreme.limits)) {
pct2 <- tmp.dt[, .(pct = integrate(approxfun(x, ymax), lower = min(x), upper = max(x))$value),
by = .(init, extremes)]
Llorenç Lledó
committed
# include potentially missing groups
pct2 <- merge(pct2, CJ(init = factor(levels(pct2$init), levels = levels(pct2$init)),
extremes = factor(c("Below P10", "Normal", "Above P90"),
Llorenç Lledó
committed
levels = c("Above P90", "Normal", "Below P10"))),
Llorenç Lledó
committed
by = c("init", "extremes"), all.y=T)
pct2[is.na(pct),"pct"] <- 0
tot2 <- pct2[, .(tot = sum(pct)), by = init]
pct2 <- merge(pct2, tot2, by = "init")
Llorenç Lledó
committed
pct2$pct <- round(100 * pct2$pct / pct2$tot, 0)
Llorenç Lledó
committed
pct2 <- pct2[order(init, extremes)]
pct2$lab.pos <- as.vector(apply(extreme.limits, 1, function(x) {c(x[2], NA, x[1])}))
pct2 <- merge(pct2, max.df, by = c("init", "extremes"), all.x = T)
}
#------------------------
# Add probability labels for terciles
#------------------------
if (add.ensmemb == "above") {
labpos <- -0.2 * pan.height
vjust <- 0
labpos <- 0
vjust <- -0.5
}
Llorenç Lledó
committed
plot <- plot +
geom_text(data = pct,
aes(x = lab.pos, y = labpos, label = paste0(pct, "%"),
hjust = as.integer(tercile) * -1.5 + 3.5),
vjust = vjust, angle = -90, size = 3.2) +
geom_text(data = pct[MLT == T, ],
aes(x = lab.pos, y = labpos, label = "*",
hjust = as.integer(tercile) * -3.5 + 9),
vjust = 0.1, angle = -90, size = 7, color = "black")
#------------------------
# Add probability labels for extremes
#------------------------
if (!is.null(extreme.limits)) {
Llorenç Lledó
committed
plot <- plot +
geom_text(data = pct2[extremes != "Normal", ],
aes(x = lab.pos, y = 0.9 * y, label = paste0(pct, "%"),
hjust = as.integer(extremes) * -1.5 + 3.5),
vjust = -0.5, angle = -90, size = 3.2,
color = rep(colorLab, dim(fcst.df)[2]))
#------------------------
# Finish all theme and legend details
#------------------------
Llorenç Lledó
committed
plot <- plot +
theme_minimal() +
scale_fill_manual(name = "Probability of\nterciles",
values = colorFill, drop = F) +
scale_color_manual(name = "Probability of\nextremes",
values = colorHatch) +
scale_shape_manual(name = "Ensemble\nmembers",
values = c(21)) +
scale_size_manual(name = "Observation",
values = c(3)) +
labs(x = var.name,
y = "Probability density\n(total area=1)",
title = title) +
theme(axis.text.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.key.size = unit(0.3, "in"),
panel.border = element_rect(fill = NA, color = "black"),
strip.background = element_rect(colour = "black", fill = "gray80"),
panel.spacing = unit(0.2, "in"),
panel.grid.major.x = element_line(color = "grey93")) +
guides(fill = guide_legend(order = 1),
color = guide_legend(order = 2),
shape = guide_legend(order = 3, label = F),
size = guide_legend(order = 4, label = F))
#------------------------
# Save to plotfile if needed, and return plot
#------------------------
if (!is.null(plotfile)) {
ggsave(plotfile, plot)
}
return(plot)
.jitter.ensmemb <- function(x, thr = 0.1) {
# Idea: start with first level. Loop all points, and if distance to last point in
# the level is more than a threshold, include the point to the level. Otherwise
# keep the point for another round. Do one round in each direction to avoid
# uggly patterns.
if (is.unsorted(x, na.rm = T)) {
stop("Provide a sorted array!")
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
lev <- x * 0
lev[is.na(lev)] <- -1
level <- 1
while (any(lev == 0)) {
last <- -1/0
for (i in 1:length(x)) {
if (lev[i] != 0) {
next
}
if (x[i] - last > thr) {
lev[i] <- level
last <- x[i]
}
}
level <- level + 1
last <- 1/0
for (i in seq(length(x), 1, -1)) {
if (lev[i] != 0) {
next
}
if (last - x[i] > thr) {
lev[i] <- level
last <- x[i]
}
}
level <- level + 1
lev[lev == -1] <- NA
return(lev * thr * sqrt(3)/2)
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
.polygon.onehatch <- function(x, y, x0, y0, xd, yd, fillOddEven = F) {
halfplane <- as.integer(xd * (y - y0) - yd * (x - x0) <= 0)
cross <- halfplane[-1L] - halfplane[-length(halfplane)]
does.cross <- cross != 0
if (!any(does.cross)) {
return()
}
x1 <- x[-length(x)][does.cross]
y1 <- y[-length(y)][does.cross]
x2 <- x[-1L][does.cross]
y2 <- y[-1L][does.cross]
t <- (((x1 - x0) * (y2 - y1) - (y1 - y0) * (x2 - x1))/(xd * (y2 - y1) - yd *
(x2 - x1)))
o <- order(t)
tsort <- t[o]
crossings <- cumsum(cross[does.cross][o])
if (fillOddEven) {
crossings <- crossings%%2
}
drawline <- crossings != 0
lx <- x0 + xd * tsort
ly <- y0 + yd * tsort
lx1 <- lx[-length(lx)][drawline]
ly1 <- ly[-length(ly)][drawline]
lx2 <- lx[-1L][drawline]
ly2 <- ly[-1L][drawline]
return(data.frame(x = lx1, y = ly1, xend = lx2, yend = ly2))
.polygon.fullhatch <- function(x, y, density, angle, width.units, height.units, inches = c(5,
1)) {
x <- c(x, x[1L])
y <- c(y, y[1L])
angle <- angle%%180
upi <- c(width.units, height.units)/inches
if (upi[1L] < 0) {
angle <- 180 - angle
if (upi[2L] < 0) {
angle <- 180 - angle
upi <- abs(upi)
xd <- cos(angle/180 * pi) * upi[1L]
yd <- sin(angle/180 * pi) * upi[2L]
hatch.ls <- list()
i <- 1
if (angle < 45 || angle > 135) {
if (angle < 45) {
first.x <- max(x)
last.x <- min(x)
} else {
first.x <- min(x)
last.x <- max(x)
}
y.shift <- upi[2L]/density/abs(cos(angle/180 * pi))
x0 <- 0
y0 <- floor((min(y) - first.x * yd/xd)/y.shift) * y.shift
y.end <- max(y) - last.x * yd/xd
while (y0 < y.end) {
hatch.ls[[i]] <- .polygon.onehatch(x, y, x0, y0, xd, yd)
i <- i + 1
y0 <- y0 + y.shift
}
if (angle < 90) {
first.y <- max(y)
last.y <- min(y)
} else {
first.y <- min(y)
last.y <- max(y)
}
x.shift <- upi[1L]/density/abs(sin(angle/180 * pi))
x0 <- floor((min(x) - first.y * xd/yd)/x.shift) * x.shift
y0 <- 0
x.end <- max(x) - last.y * xd/yd
while (x0 < x.end) {
hatch.ls[[i]] <- .polygon.onehatch(x, y, x0, y0, xd, yd)
i <- i + 1
x0 <- x0 + x.shift
}
return(do.call("rbind", hatch.ls))