Newer
Older
#'Plot one or multiple ensemble forecast pdfs for the same event
#'
#'@author Llorenç Lledó \email{llledo@bsc.es}
#'@description This function plots the probability distribution function of several ensemble forecasts for the same event, either initialized at different moments or by different models. Probabilities for tercile categories are computed, plotted in colors and annotated. Probabilities for extreme categories (above P90 and below P10) can also be included as hatched areas. Individual ensemble members can be plotted as jittered points. The observed value is optionally shown as a diamond.
#'
#'@param fcst_df a dataframe blabla TODO
#'@param tercile_limits an array with P33 and P66 values that define the tercile categories.
#'@param extreme_limits (optional) an array with P10 and P90 values that define the extreme categories. (Default: extreme categories are not shown).
#'@param obs (optional) a number with the observed value. (Default: observation is not shown).
#'@param plotfile (optional) a filename (pdf, png...) where the plot will be saved. (Default: the plot is not saved).
#'@param title a string with the plot title.
#'@param varname a string with the variable name and units.
#'@param fcst_names (optional) an array of strings with the titles of each individual forecast.
#'@param add_ensmemb either to add the ensemble members (above or below the pdf) or not.
#'@param colors a selection of predefined color sets: ggplot for blue/green/red, s2s4e for , or hydro for a suitable scale for precipitation/inflows.
#'
#'@return a ggplot object containing the plot.
#'
#'@import data.table
#'@import ggplot2
#'@import reshape2
#'@import plyr
#'
#'@examples
#' Blabla
#'
#'@export
PlotForecastPDF <- function(fcst_df, tercile_limits, extreme_limits=NULL,
obs=NULL, plotfile=NULL, title="Set a title",
varname="Varname (units)", fcst_names=NULL,
add_ensmemb=c("above", "below", "no"),
colors=c("ggplot", "s2s4e", "hydro")) {
#------------------------
colors <- match.arg(colors)
if (colors == "s2s4e") {
colorFill <- rev(c("#FF764D", "#b5b5b5", "#33BFD1"))
colorHatch <- c("deepskyblue3", "indianred3")
colorMember <- c("#ffff7f")
colorObs <- "purple"
colorLab <- c("blue", "red")
} else if (colors == "hydro") {
colorFill <- rev(c("#41CBC9", "#b5b5b5", "#FFAB38"))
colorHatch <- c("darkorange1", "deepskyblue3")
colorMember <- c("#ffff7f")
colorObs <- "purple"
colorLab <- c("darkorange3", "blue")
} else if (colors == "ggplot") {
gg_color_hue <- function(n) {
hues=seq(15, 375, length=n+1)
hcl(h=hues, l=65, c=100)[1:n]
}
colorFill <- rev(gg_color_hue(3))
colorHatch <- c("deepskyblue3", "indianred1")
colorMember <- c("#ffff7f")
colorObs <- "purple"
colorLab <- c("blue", "red")
} else {
stop("Unknown color set")
}
#------------------------
# Check input arguments
#------------------------
add_ensmemb <- match.arg(add_ensmemb)
if (length(tercile_limits) != 2) {
stop("Provide two limits for delimiting tercile categories")
}
if (tercile_limits[1] >= tercile_limits[2]) {
stop("The provided tercile limits are in the wrong order")
}
if (!is.null(extreme_limits)) {
if (length(extreme_limits) != 2) {
stop("Provide two limits for delimiting extreme categories")
}
if (extreme_limits[1] >= tercile_limits[1] |
extreme_limits[2] <= tercile_limits[2]) {
stop("The provided extreme limits are not consistent with tercile limits")
}
}
#------------------------
# Set proper fcst names
#------------------------
if (!is.null(fcst_names)) {
colnames(fcst_df) <- factor(fcst_names, levels=fcst_names)
}
#------------------------
# Produce a first plot with the pdf for each init in a panel
#------------------------
melt_df <- melt(fcst_df, variable.name="init")
plot <- ggplot(melt_df, aes(x=value)) +
geom_density(alpha=1, na.rm=T) +
coord_flip() +
facet_wrap(~init, strip.position="top", nrow=1) +
xlim(range(c(obs, density(melt_df$value, na.rm=T)$x)))
#------------------------
# Gather the coordinates of the plots
# together with init and corresponding terciles
#------------------------
tmp.df <- ggp$data[[1]][, c("x", "ymin", "ymax", "PANEL")]
tmp.df$init <- ggp$layout$layout$init[as.numeric(tmp.df$PANEL)]
tmp.df$tercile <- factor(ifelse(tmp.df$x<tercile_limits[1], "Below normal",
ifelse(tmp.df$x<tercile_limits[2], "Normal", "Above normal")),
levels=c("Below normal", "Normal", "Above normal"))
#------------------------
# Get the height and width of a panel
#------------------------
pan_width <- diff(range(tmp.df$x))
pan_height <- max(tmp.df$ymax)
magicratio <- 9 * pan_height / pan_width
#------------------------
# Compute hatch coordinates for extremes
#------------------------
if (!is.null(extreme_limits)) {
tmp.df$extremes <- factor(ifelse(tmp.df$x<extreme_limits[1], "Below P10",
ifelse(tmp.df$x<extreme_limits[2], "Normal", "Above P90")),
levels=c("Below P10", "Normal", "Above P90"))
hatch.ls <- dlply(tmp.df, .(init, extremes), function(x) {
# close the polygon
tmp.df2 <- data.frame(x=c(x$x, max(x$x), min(x$x)), y=c(x$ymax, 0, 0))
# compute the hatches for this polygon
hatches <- .polygon.fullhatch(tmp.df2$x, tmp.df2$y, angle=60, density=10,
width_units=pan_width, height_units=pan_height)
# add bottom segment
end1 <- data.frame(x=x$x[1], y=x$ymax[1], xend=x$x[1], yend=0)
# add top segment
end2 <- data.frame(x=x$x[length(x$x)], y=x$ymax[length(x$x)],
xend=x$x[length(x$x)], yend=0)
return(rbind(hatches, end1, end2))
})
attr <- attr(hatch.ls,"split_labels")
for (i in 1:length(hatch.ls)) {
hatch.ls[[i]] <- cbind(hatch.ls[[i]], attr[i,])
}
hatch.df <- do.call("rbind", hatch.ls)
# Compute max y for each extreme category
max.ls <- dlply(tmp.df, .(init,extremes), function(x) {
data.frame(y=min(0.85 * pan_height, max(x$ymax)))
})
attr <- attr(max.ls, "split_labels")
for (i in 1:length(max.ls)) {
max.ls[[i]] <- cbind(max.ls[[i]], attr[i,])
}
max.df <- do.call("rbind", max.ls)
}
#------------------------
# Compute jitter space for ensemble members
#------------------------
jitter_df <- melt(data.frame(dlply(melt_df, .(init), function(x) {
.jitter_ensmemb(sort(x$value), pan_width / 100) }), check.names=F),
value.name="yjitter", variable.name="init")
jitter_df$x <- melt(data.frame(dlply(melt_df, .(init), function(x) {
sort(x$value)}) ), value.name="x")$x
#------------------------
# Get y coordinates for observed x values,
# using a cool data.table feature: merge to nearest value
#------------------------
if (!is.null(obs)) {
tmp.dt <- data.table(tmp.df, key=c("init", "x"))
obs_dt <- data.table(init=factor(colnames(fcst_df), levels=colnames(fcst_df)),
value=rep(obs, dim(fcst_df)[2]))
setkey(obs_dt, init, value)
obs_xy <- tmp.dt[obs_dt, roll="nearest"]
}
#------------------------
# Fill each pdf with different colors for the terciles
#------------------------
plot <- plot +
geom_ribbon(data=tmp.df, aes(x=x, ymin=ymin, ymax=ymax, fill=tercile), alpha=0.7)
#------------------------
# Add hatches for extremes
#------------------------
if (!is.null(extreme_limits)) {
if (nrow(hatch.df[hatch.df$extremes!="Normal", ])==0) {
warning("The provided extreme categories are outside the plot bounds. The extremes will not be drawn.")
extreme_limits <- NULL
} else {
plot <- plot +
geom_segment(data=hatch.df[hatch.df$extremes!="Normal", ], aes(x=x, y=y,
xend=xend, yend=yend, color=extremes))
#------------------------
# Add obs line
#------------------------
if (!is.null(obs)) {
plot <- plot +
geom_vline(data=obs_dt, aes(xintercept=value), linetype="dashed", color=colorObs)
}
#------------------------
# Add ensemble members
#------------------------
if (add_ensmemb == "below") {
plot <- plot +
# this adds a grey box for ensmembers
geom_rect(aes(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=-pan_height/10),
fill="gray95", color="black", width=0.2) +
# this adds the ensemble members
geom_point(data=jitter_df, color="black", fill=colorMember, alpha=1,
aes(x=x, y=-pan_height/10-magicratio*yjitter, shape="Ensemble members"))
} else if (add_ensmemb == "above") {
plot <- plot +
geom_point(data=jitter_df, color="black", fill=colorMember, alpha=1,
aes(x=x, y=0.7*magicratio*yjitter, shape="Ensemble members"))
#------------------------
# Add obs diamond
#------------------------
if (!is.null(obs)) {
plot <- plot +
geom_point(data=obs_xy, aes(x=x, y=ymax, size="Observation"),
shape=23, color="black", fill=colorObs)
#------------------------
# Compute probability for each tercile and identify MLT
#------------------------
tmp.dt <- data.table(tmp.df)
pct <- tmp.dt[, .(pct=integrate(approxfun(x, ymax), lower=min(x), upper=max(x))$value),
by=.(init, tercile)]
tot <- pct[, .(tot=sum(pct)), by=init]
pct <- merge(pct, tot, by="init")
pct$pct <- round(100 * pct$pct / pct$tot, 0)
pct$MLT <- pct[, .(MLT=pct==max(pct)), by=init]$MLT
lab_pos <- c(tercile_limits[1], mean(tercile_limits), tercile_limits[2])
#------------------------
# Compute probability for extremes
#------------------------
if (!is.null(extreme_limits)) {
pct2 <- tmp.dt[, .(pct=integrate(approxfun(x, ymax), lower=min(x), upper=max(x))$value),
by=.(init, extremes)]
tot2 <- pct2[, .(tot=sum(pct)), by=init]
pct2 <- merge(pct2, tot2, by="init")
pct2$pct <- round(100 * pct2$pct / pct2$tot, 0)
lab_pos2 <- c(extreme_limits[1], NA, extreme_limits[2])
pct2 <- merge(pct2, max.df, by=c("init", "extremes"))
# include potentially missing groups
pct2 <- pct2[CJ(levels(pct2$init), factor(c("Below P10", "Normal", "Above P90"),
levels=c("Below P10", "Normal", "Above P90"))), ]
#------------------------
# Add probability labels for terciles
#------------------------
if (add_ensmemb == "above") {
labpos <- -0.2 * pan_height
vjust <- 0
} else {
labpos <- 0
vjust <- -0.5
}
plot <- plot +
geom_text(data=pct, aes(x=lab_pos[as.integer(tercile)], y=labpos, label=paste0(pct, "%"),
hjust=as.integer(tercile) * 1.5 - 2.5), vjust=vjust, angle=-90, size=3.2) +
geom_text(data=pct[MLT==T, ], aes(x=lab_pos[as.integer(tercile)], y=labpos,
label="*", hjust=as.integer(tercile) * 3.5 - 5.0), vjust=0.1, angle=-90,
size=7, color="black")
#------------------------
# Add probability labels for extremes
#------------------------
if (!is.null(extreme_limits)) {
plot <- plot +
geom_text(data=pct2[extremes!="Normal", ], aes(x=lab_pos2[as.integer(extremes)],
y=0.9*y, label=paste0(pct, "%"), hjust=as.integer(extremes) * 1.5 - 2.5),
vjust=-0.5, angle=-90, size=3.2, color=rep(colorLab, dim(fcst_df)[2]))
#------------------------
# Finish all theme and legend details
#------------------------
plot <- plot +
theme_minimal() +
scale_fill_manual(name="Probability of\nterciles",
breaks=c("Above normal", "Normal", "Below normal"),
values=colorFill, drop=F) +
scale_color_manual(name="Probability of\nextremes", values=colorHatch) +
scale_shape_manual(name="Ensemble\nmembers", values=c(21)) +
scale_size_manual(name="Observation", values=c(3)) +
labs(x=varname, y="Probabilty density\n(total area=1)", title=title) +
theme(axis.text.x=element_blank(), panel.grid.minor.x=element_blank(),
legend.key.size=unit(0.3, "in"),
panel.border=element_rect(fill=NA, color="black"),
strip.background=element_rect(colour="black", fill="gray80"),
panel.spacing=unit(0.2, "in"),
panel.grid.major.x=element_line(color="grey93")) +
guides(fill=guide_legend(order=1), color=guide_legend(order=2, reverse=T),
shape=guide_legend(order=3, label=F), size=guide_legend(order=4, label=F))
#------------------------
# Save to plotfile if needed, and return plot
#------------------------
if (!is.null(plotfile)) {
ggsave(plotfile, plot)
}
}
#==================
# A function to distribute ensemble members so not to overlap
# Requires a sorted array of values.
#==================
.jitter_ensmemb <- function(x, thr=0.1) {
# Idea: start with first level. Loop all points,
# and if distance to last point in the level is more than a threshold,
# include the point to the level.
# Otherwise keep the point for another round.
# Do one round in each direction to avoid uggly patterns.
if (is.unsorted(x)) {
stop("Provide a sorted array!")
}
lev <- x * 0
level <- 1
while (any(lev == 0)) {
last <- -1/0
if (lev[i] != 0) {
next
}
if (x[i] - last > thr) {
lev[i] <- level
last <- x[i]
}
}
level <- level + 1
last <- 1 / 0
for (i in seq(length(x), 1, -1)) {
if (lev[i] != 0) {
next
}
if (last - x[i] > thr) {
lev[i] <- level
last <- x[i]
}
}
level <- level + 1
}
return(lev * thr * sqrt(3) / 2)
}
#==================
# Hatch one polygon
# Based on base polygon function
#==================
.polygon.onehatch <- function(x, y, x0, y0, xd, yd, fillOddEven=F) {
halfplane <- as.integer(xd * (y - y0) - yd * (x - x0) <= 0)
cross <- halfplane[-1L] - halfplane[-length(halfplane)]
does.cross <- cross != 0
if (!any(does.cross)) {
return()
}
x1 <- x[-length(x)][does.cross]
y1 <- y[-length(y)][does.cross]
x2 <- x[-1L][does.cross]
y2 <- y[-1L][does.cross]
t <- (((x1 - x0) * (y2 - y1) - (y1 - y0) * (x2 - x1))/(xd * (y2 - y1) - yd * (x2 - x1)))
o <- order(t)
tsort <- t[o]
crossings <- cumsum(cross[does.cross][o])
if (fillOddEven) {
crossings <- crossings%%2
}
drawline <- crossings != 0
lx <- x0 + xd * tsort
ly <- y0 + yd * tsort
lx1 <- lx[-length(lx)][drawline]
ly1 <- ly[-length(ly)][drawline]
lx2 <- lx[-1L][drawline]
ly2 <- ly[-1L][drawline]
return(data.frame(x=lx1, y=ly1, xend=lx2, yend=ly2))
}
#==================
# Hatch one polygon
# Based on base polygon function
#==================
.polygon.fullhatch <- function(x, y, density, angle, width_units, height_units, inches=c(5, 1)) {
x <- c(x, x[1L])
y <- c(y, y[1L])
angle <- angle%%180
upi <- c(width_units, height_units) / inches
if (upi[1L] < 0) {
angle <- 180 - angle
}
if (upi[2L] < 0) {
angle <- 180 - angle
}
upi <- abs(upi)
xd <- cos(angle / 180 * pi) * upi[1L]
yd <- sin(angle / 180 * pi) * upi[2L]
hatch_ls <- list()
i <- 1
if (angle < 45 || angle > 135) {
if (angle < 45) {
first.x <- max(x)
last.x <- min(x)
} else {
first.x <- min(x)
last.x <- max(x)
}
y.shift <- upi[2L] / density / abs(cos(angle / 180 * pi))
x0 <- 0
y0 <- floor((min(y) - first.x * yd/xd) / y.shift) * y.shift
y.end <- max(y) - last.x * yd/xd
while (y0 < y.end) {
hatch_ls[[i]] <- .polygon.onehatch(x, y, x0, y0, xd, yd)
y0 <- y0 + y.shift
}
} else {
if (angle < 90) {
first.y <- max(y)
last.y <- min(y)
} else {
first.y <- min(y)
last.y <- max(y)
x.shift <- upi[1L] / density / abs(sin(angle / 180 * pi))
x0 <- floor((min(x) - first.y * xd / yd) / x.shift) * x.shift
hatch_ls[[i]] <- .polygon.onehatch(x, y, x0, y0, xd, yd)
}
return(do.call("rbind", hatch_ls))