#'Plot one or multiple ensemble forecast pdfs for the same event #' #'@author Llorenç Lledó \email{llledo@bsc.es} #'@description This function plots the probability distribution function of several ensemble forecasts for the same event, either initialized at different moments or by different models. Probabilities for tercile categories are computed, plotted in colors and annotated. Probabilities for extreme categories (above P90 and below P10) can also be included as hatched areas. Individual ensemble members can be plotted as jittered points. The observed value is optionally shown as a diamond. #' #'@param fcst a dataframe or array containing all the ensember members for each frecast. If \code{"fcst"} is an array, it should have two labelled dimensions, and one of them should be \code{"members"}. If \code{"fcsts"} is a data.frame, each column shoul be a separate forecast, with the rows beeing the different ensemble members. #'@param tercile.limits an array with P33 and P66 values that define the tercile categories. #'@param extreme.limits (optional) an array with P10 and P90 values that define the extreme categories. (Default: extreme categories are not shown). #'@param obs (optional) a number with the observed value. (Default: observation is not shown). #'@param plotfile (optional) a filename (pdf, png...) where the plot will be saved. (Default: the plot is not saved). #'@param title a string with the plot title. #'@param var.name a string with the variable name and units. #'@param fcst.names (optional) an array of strings with the titles of each individual forecast. #'@param add.ensmemb either to add the ensemble members (above or below the pdf) or not. #'@param color.set a selection of predefined color sets: use \code{"ggplot"} (default) for blue/green/red, \code{"s2s4e"} for blue/grey/orange, or \code{"hydro"} for yellow/gray/blue (suitable for precipitation and inflows). #' #'@return a ggplot object containing the plot. #' #'@import data.table #'@import ggplot2 #'@import reshape2 #'@import plyr #' #'@examples #'fcsts <- data.frame(fcst1=rnorm(50),fcst2=rnorm(50,0.5,1.2),fcst3=rnorm(50,-0.5,0.9)) #'PlotForecastPDF(fcsts,c(-1,1)) #'fcsts2 <- array(rnorm(100),dim=c(members=20,fcst=5)) #'PlotForecastPDF(fcsts2,c(-0.66,0.66),extreme.limits=c(-1.2,1.2),fcst.names=paste0("random fcst ",1:5),obs=0.7) #' #' #'@export PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits=NULL, obs=NULL, plotfile=NULL, title="Set a title", var.name="Varname (units)", fcst.names=NULL, add.ensmemb=c("above", "below", "no"), color.set=c("ggplot", "s2s4e", "hydro")) { #------------------------ # Define color sets #------------------------ color.set <- match.arg(color.set) if (color.set == "s2s4e") { colorFill <- rev(c("#FF764D", "#b5b5b5", "#33BFD1")) colorHatch <- c("deepskyblue3", "indianred3") colorMember <- c("#ffff7f") colorObs <- "purple" colorLab <- c("blue", "red") } else if (color.set == "hydro") { colorFill <- rev(c("#41CBC9", "#b5b5b5", "#FFAB38")) colorHatch <- c("darkorange1", "deepskyblue3") colorMember <- c("#ffff7f") colorObs <- "purple" colorLab <- c("darkorange3", "blue") } else if (color.set == "ggplot") { gg.color.hue <- function(n) { hues=seq(15, 375, length=n+1) hcl(h=hues, l=65, c=100)[1:n] } colorFill <- rev(gg.color.hue(3)) colorHatch <- c("deepskyblue3", "indianred1") colorMember <- c("#ffff7f") colorObs <- "purple" colorLab <- c("blue", "red") } else { stop("Parameter 'color.set' should be one of ggplot/s2s4e/hydro") } #------------------------ # Check input arguments #------------------------ add.ensmemb <- match.arg(add.ensmemb) if (length(tercile.limits) != 2) { stop("Parameter 'tercile.limits' should be an array with two limits for delimiting tercile categories") } if (tercile.limits[1] >= tercile.limits[2]) { stop("The provided tercile limits are in the wrong order") } if (!is.null(extreme.limits)) { if (length(extreme.limits) != 2) { stop("Parameter 'extreme.limits' should be an array with two limits for delimiting extreme categories") } if (extreme.limits[1] >= tercile.limits[1] | extreme.limits[2] <= tercile.limits[2]) { stop("The provided extreme limits are not consistent with tercile limits") } } #------------------------ # Check fcst type and convert to data.frame if needed #------------------------ if (is.array(fcst)) { if (!"members" %in% names(dim(fcst)) | length(dim(fcst)) != 2) { stop("Parameter 'fcst' should be a two-dimensional array with labelled dimensions and one of them should be 'members'") } dim.members <- which(names(dim(fcst)) == "members") if (dim.members == 1) { fcst.df <- data.frame(fcst) } else { fcst.df <- data.frame(t(fcst)) } } else if (is.data.frame(fcst)) { fcst.df <- fcst } else { stop("Parameter 'fcst' should be an array or a data.frame") } #------------------------ # Set proper fcst names #------------------------ if (!is.null(fcst.names)) { if (length(fcst.names) != dim(fcst.df)[2]) { stop("Parameter 'fcst.names' should be an array with as many names as distinct forecasts provided") } colnames(fcst.df) <- factor(fcst.names, levels=fcst.names) } #------------------------ # Produce a first plot with the pdf for each init in a panel #------------------------ melt.df <- melt(fcst.df, variable.name="init", id.vars=NULL) plot <- ggplot(melt.df, aes(x=value)) + geom_density(alpha=1, na.rm=T) + coord_flip() + facet_wrap(~init, strip.position="top", nrow=1) + xlim(range(c(obs, density(melt.df$value, na.rm=T)$x))) ggp <- ggplot_build(plot) #------------------------ # Gather the coordinates of the plots # together with init and corresponding terciles #------------------------ tmp.df <- ggp$data[[1]][, c("x", "ymin", "ymax", "PANEL")] if (!is.null(ggp$layout$layout)) { tmp.df$init <- ggp$layout$layout$init[as.numeric(tmp.df$PANEL)] } else if (!is.null(ggp$layout$panel_layout)) { tmp.df$init <- ggp$layout$panel_layout$init[as.numeric(tmp.df$PANEL)] } else { stop("Cannot find PANELS in ggp object") } tmp.df$tercile <- factor(ifelse(tmp.df$x < tercile.limits[1], "Below normal", ifelse(tmp.df$x < tercile.limits[2], "Normal", "Above normal")), levels=c("Below normal", "Normal", "Above normal")) #------------------------ # Get the height and width of a panel #------------------------ pan.width <- diff(range(tmp.df$x)) pan.height <- max(tmp.df$ymax) magic.ratio <- 9 * pan.height / pan.width #------------------------ # Compute hatch coordinates for extremes #------------------------ if (!is.null(extreme.limits)) { tmp.df$extremes <- factor(ifelse(tmp.df$x thr) { lev[i] <- level last <- x[i] } } level <- level + 1 last <- 1 / 0 for (i in seq(length(x), 1, -1)) { if (lev[i] != 0) { next } if (last - x[i] > thr) { lev[i] <- level last <- x[i] } } level <- level + 1 } lev[lev==-1] <- NA return(lev * thr * sqrt(3) / 2) } #================== # Hatch one polygon # Based on base polygon function #================== .polygon.onehatch <- function(x, y, x0, y0, xd, yd, fillOddEven=F) { halfplane <- as.integer(xd * (y - y0) - yd * (x - x0) <= 0) cross <- halfplane[-1L] - halfplane[-length(halfplane)] does.cross <- cross != 0 if (!any(does.cross)) { return() } x1 <- x[-length(x)][does.cross] y1 <- y[-length(y)][does.cross] x2 <- x[-1L][does.cross] y2 <- y[-1L][does.cross] t <- (((x1 - x0) * (y2 - y1) - (y1 - y0) * (x2 - x1))/(xd * (y2 - y1) - yd * (x2 - x1))) o <- order(t) tsort <- t[o] crossings <- cumsum(cross[does.cross][o]) if (fillOddEven) { crossings <- crossings%%2 } drawline <- crossings != 0 lx <- x0 + xd * tsort ly <- y0 + yd * tsort lx1 <- lx[-length(lx)][drawline] ly1 <- ly[-length(ly)][drawline] lx2 <- lx[-1L][drawline] ly2 <- ly[-1L][drawline] return(data.frame(x=lx1, y=ly1, xend=lx2, yend=ly2)) } #================== # Hatch one polygon # Based on base polygon function #================== .polygon.fullhatch <- function(x, y, density, angle, width.units, height.units, inches=c(5, 1)) { x <- c(x, x[1L]) y <- c(y, y[1L]) angle <- angle%%180 upi <- c(width.units, height.units) / inches if (upi[1L] < 0) { angle <- 180 - angle } if (upi[2L] < 0) { angle <- 180 - angle } upi <- abs(upi) xd <- cos(angle / 180 * pi) * upi[1L] yd <- sin(angle / 180 * pi) * upi[2L] hatch.ls <- list() i <- 1 if (angle < 45 || angle > 135) { if (angle < 45) { first.x <- max(x) last.x <- min(x) } else { first.x <- min(x) last.x <- max(x) } y.shift <- upi[2L] / density / abs(cos(angle / 180 * pi)) x0 <- 0 y0 <- floor((min(y) - first.x * yd/xd) / y.shift) * y.shift y.end <- max(y) - last.x * yd/xd while (y0 < y.end) { hatch.ls[[i]] <- .polygon.onehatch(x, y, x0, y0, xd, yd) i <- i + 1 y0 <- y0 + y.shift } } else { if (angle < 90) { first.y <- max(y) last.y <- min(y) } else { first.y <- min(y) last.y <- max(y) } x.shift <- upi[1L] / density / abs(sin(angle / 180 * pi)) x0 <- floor((min(x) - first.y * xd / yd) / x.shift) * x.shift y0 <- 0 x.end <- max(x) - last.y * xd / yd while (x0 < x.end) { hatch.ls[[i]] <- .polygon.onehatch(x, y, x0, y0, xd, yd) i <- i + 1 x0 <- x0 + x.shift } } return(do.call("rbind", hatch.ls)) }