diff --git a/DESCRIPTION b/DESCRIPTION index 09b9cf06450e0e4b680739590b7975f895565646..640884728ab9f13d33bc33a14610120e7d7a28ce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -58,6 +58,7 @@ Imports: data.table, reshape2, ggplot2, + RColorBrewer, graphics, grDevices, stats, diff --git a/NAMESPACE b/NAMESPACE index e070ff78e4d57862b7f3fa934184e1c1d9d2ffea..8ba1c4cd936c405bdc16425b237afb2b73f648bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,6 +52,7 @@ import(s2dverification) import(stats) importFrom(ClimProjDiags,SelBox) importFrom(ClimProjDiags,Subset) +importFrom(RColorBrewer,brewer.pal) importFrom(data.table,CJ) importFrom(data.table,data.table) importFrom(data.table,setkey) @@ -68,12 +69,18 @@ importFrom(grDevices,png) importFrom(grDevices,postscript) importFrom(grDevices,svg) importFrom(grDevices,tiff) +importFrom(graphics,axis) importFrom(graphics,box) importFrom(graphics,image) importFrom(graphics,layout) importFrom(graphics,mtext) importFrom(graphics,par) +importFrom(graphics,plot) importFrom(graphics,plot.new) +importFrom(graphics,points) +importFrom(graphics,polygon) +importFrom(graphics,text) +importFrom(graphics,title) importFrom(maps,map) importFrom(plyr,.) importFrom(plyr,dlply) diff --git a/NEWS.md b/NEWS.md index 1a9069e5925f3a2869dc8c18f823b2a5e0a5baa7..fab70c708389c95d71195da561f6b346cfed7d2b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,7 @@ + CST_WeatherRegimes/WeatherRegimes and CST_RegimeAssign/RegimeAssign + PlotPDFsOLE plots two probability density gaussian functions and the optimal linear estimation + CST_RFTemp/RF_Temp functions available for downscaling temperature + + Weather Regimes vignette - Fixes + CST_Anomaly handles exp, obs or both diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 0885f82d515b54776cc3c4c444014833933dec46..b0817a71e46a10051eba46eaa53bd5e9fbe78650 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -15,9 +15,11 @@ #'@param data a 's2dv_cube' object. #'@param ref_maps a 's2dv_cube' object as the output of CST_WeatherRegimes. -#'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or -#' the maximum spatial correlation (method=’ACC’) between the maps. -#'@param composite a logical parameter indicating if the composite maps are computed or not (default=FALSE). +#'@param method whether the matching will be performed in terms of minimum distance (default = ’distance’) or +#' the maximum spatial correlation (method = ’ACC’) between the maps. +#'@param composite a logical parameter indicating if the composite maps are computed or not (default = FALSE). +#'@param memb a logical value indicating whether to compute composites for separate members (default FALSE) or as unique ensemble (TRUE). +#'This option is only available for when parameter 'composite' is set to TRUE and the data object has a dimension named 'member'. #'@param ncores the number of multicore threads to use for parallel computation. #'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)) (only when composite = 'TRUE') and \code{$statistics} that includes @@ -29,16 +31,16 @@ #'@import multiApply #'@importFrom ClimProjDiags Subset #'@examples -#'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) -#'res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) -#'res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) +#'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFs = FALSE, ncenters = 4) +#'res1 <- CST_RegimesAssign(data = lonlat_data$exp, ref_maps = regimes, composite = FALSE) +#'res2 <- CST_RegimesAssign(data = lonlat_data$exp, ref_maps = regimes, composite = TRUE) #'@export #' CST_RegimesAssign <- function(data, ref_maps, - method = "distance", - composite = FALSE, - ncores=NULL) { + method = "distance", + composite = FALSE, + memb = FALSE, ncores = NULL) { if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") @@ -51,17 +53,18 @@ CST_RegimesAssign <- function(data, ref_maps, if ('lat' %in% names(data)){ lat <- data$lat - }else { + } else { lat <- NULL } - result <- Apply(data=list(data=data$data, ref_maps=ref_maps$data), lat= lat, fun=RegimesAssign, - target_dims=list(names(dim(data$data)),c('lat','lon','cluster')), - method = method, composite = composite, ncores=ncores) + result <- Apply(data = list(data = data$data, ref_maps = ref_maps$data), + lat = lat, fun = RegimesAssign, + target_dims = list(names(dim(data$data)), c('lat', 'lon', 'cluster')), + method = method, memb = memb, composite = composite, ncores = ncores) if (composite){ data$data <- result$composite data$statistics <- result[-1] - }else{ + } else { data <- NULL data$statistics <- result } @@ -83,28 +86,33 @@ CST_RegimesAssign <- function(data, ref_maps, #'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} #' #'@param data an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. -#'@param ref_maps array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching. -#'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or +#'@param ref_maps array with 3-dimensions ('lon', 'lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching. +#'@param method whether the matching will be performed in terms of minimum distance (default = ’distance’) or #' the maximum spatial correlation (method=’ACC’) between the maps. +#'@param lat a vector of latitudes corresponding to the positions provided in data and ref_maps. #'@param composite a logical parameter indicating if the composite maps are computed or not (default=FALSE). +#'@param memb a logical value indicating whether to compute composites for separate members (default FALSE) or as unique ensemble (TRUE). +#'This option is only available for when parameter 'composite' is set to TRUE and the data object has a dimension named 'member'. #'@param ncores the number of multicore threads to use for parallel computation. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)) (only if composite='TRUE'), #' \code{$pvalue} ( array with the same structure as \code{$composite} containing the pvalue of the composites obtained through a t-test #' that accounts for the serial dependence of the data with the same structure as Composite.) (only if composite='TRUE'), #' \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , -#' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +#' \code{$frequency} (A vector of integers (from k = 1, ... k n reference maps) indicating the percentage of assignations corresponding to each map.), #' #'@import s2dverification #'@import multiApply #'@importFrom ClimProjDiags Subset #'@examples -#'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite -#'res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = drop(regimes), -#'lat=lonlat_data$exp$lat,composite=FALSE) +#'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat = lonlat_data$obs$lat, +#' EOFs = FALSE, ncenters = 4)$composite +#'res1 <- RegimesAssign(data = lonlat_data$exp$data, ref_maps = drop(regimes), +#' lat = lonlat_data$exp$lat, composite = FALSE) #'@export -RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = FALSE, ncores=NULL) { +RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = FALSE, + memb = FALSE, ncores = NULL) { if (is.null(names(dim(data)))) { stop("Parameter 'data' must be an array with named dimensions.") @@ -116,11 +124,15 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = if (is.null(lat)) { stop("Parameter 'lat' must be specified.") } - if (is.null(names(dim(ref_maps)))) { stop("Parameter 'ref_maps' must be an array with named dimensions.") } - + if (!is.logical(memb)) { + stop("Parameter 'memb' must be logical.") + } + if (!is.logical(composite)) { + stop("Parameter 'memb' must be logical.") + } dimData <- names(dim(data)) if (!all( c('lat', 'lon') %in% dimData)) { @@ -154,13 +166,12 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = stop("Parameter 'data' must have temporal dimensions.") } } - - index <- Apply( data = list(target = data), - target_dims = c('lat','lon'), - fun = .RegimesAssign, - ref = ref_maps, - lat = lat, method = method, - ncores=ncores)[[1]] + ref_maps <- drop(ref_maps) + index <- Apply(data = list(ref = ref_maps, target = data), + target_dims = list(c('lat', 'lon', 'cluster'), c('lat', 'lon')), + fun = .RegimesAssign, + lat = lat, method = method, + ncores = ncores)[[1]] nclust <- dim(ref_maps)['cluster'] freqs <- rep(NA, nclust) @@ -182,11 +193,15 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = pvalue = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), postime, dim(ref_maps)['composite.cluster'])) } else { + if (memb) { + dataComp <- MergeDims(dataComp, merge_dims = c('time', 'member'), rename_dim = 'time') + index <- MergeDims(index, merge_dims = c('time', 'member'), rename_dim = 'time') + } recon <- - Apply(data = list(var = dataComp, occ = index), - target_dims = list(c('lon', 'lat', 'time'), c('time')), - fun = Composite, - K = dim(ref_maps)['cluster']) + Apply(data = list(var = dataComp, occ = index), + target_dims = list(c('lon', 'lat', 'time'), c('time')), + fun = Composite, + K = dim(ref_maps)['cluster']) } output <- list(composite = recon$composite, @@ -208,7 +223,6 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = poslon <- which(names(dim(ref)) == 'lon') nclust <- dim(ref)[posdim] - if (all(dim(ref)[-posdim] != dim(target))) { stop('The target should have the same dimensions [lat,lon] that the reference ') @@ -229,17 +243,14 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = if (is.na(max(target))){ assign <- NA - }else{ + } else{ # This dimensions are reorganized ref <- aperm(ref, c(posdim, poslat, poslon)) - target <- - aperm(target, c(which(names(dim( - target - )) == 'lat'), which(names(dim( - target - )) == 'lon'))) + target <- aperm(target, + c(which(names(dim(target)) == 'lat'), + which(names(dim(target)) == 'lon'))) # weights are defined latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(ref)[3]) diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 209aa1b67e3df6a6d611d5667428360e0f745f28..f4495a29303e2768818ed6c2752a38b162a8d385 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -26,7 +26,7 @@ #'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). #' For more details about these methods see the hclust function documentation included in the stats package. #'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). -#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@param nstart Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). #'@param ncores The number of multicore threads to use for parallel computation. #'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)) and \code{$statistics} that includes @@ -37,15 +37,15 @@ #'@import s2dverification #'@import multiApply #'@examples -#'res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) -#'res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = TRUE, ncenters = 3) +#'res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFs = FALSE, ncenters = 4) +#'res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFs = TRUE, ncenters = 3) #'@export #' CST_WeatherRegimes <- function(data, ncenters = NULL, - EOFS = TRUE,neofs = 30, + EOFs = TRUE, neofs = 30, varThreshold = NULL, method = "kmeans", - iter.max=100, nstart = 30, + iter.max = 100, nstart = 30, ncores = NULL) { if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube', ", @@ -57,7 +57,7 @@ CST_WeatherRegimes <- function(data, ncenters = NULL, lon <- NULL } result <- WeatherRegime(data$data,ncenters = ncenters, - EOFS = EOFS, neofs = neofs, + EOFs = EOFs, neofs = neofs, varThreshold = varThreshold, lon = lon, lat = data$lat, method = method, iter.max=iter.max, nstart = nstart, @@ -98,7 +98,8 @@ CST_WeatherRegimes <- function(data, ncenters = NULL, #'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). #' For more details about these methods see the hclust function documentation included in the stats package. #'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). -#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@param nstart Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@param ncores The number of multicore threads to use for parallel computation. #'@return A list with elements \code{$composite} (array with at least 3-d ('lat', 'lon', 'cluster') containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), #' \code{pvalue} (array with at least 3-d ('lat','lon','cluster') with the pvalue of the composites obtained through a t-test that accounts for the serial @@ -109,11 +110,12 @@ CST_WeatherRegimes <- function(data, ncenters = NULL, #'@import s2dverification #'@import multiApply #'@examples -#'res <- WeatherRegime(data=lonlat_data$obs$data, lat= lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4) +#'res <- WeatherRegime(data = lonlat_data$obs$data, lat = lonlat_data$obs$lat, +#' EOFs = FALSE, ncenters = 4) #'@export WeatherRegime <- function(data, ncenters = NULL, - EOFS = TRUE,neofs = 30, + EOFs = TRUE,neofs = 30, varThreshold = NULL, lon = NULL, lat = NULL, method = "kmeans", iter.max=100, nstart = 30, @@ -133,7 +135,7 @@ WeatherRegime <- function(data, ncenters = NULL, nsdates <- dim(data)['sdate'] nftimes <- dim(data)['ftime'] data <- MergeDims(data, - merge_dims = c('ftime','sdate'), + merge_dims = c('ftime', 'sdate'), rename_dim = 'time') } else if ('sdate' %in% dimData | 'ftime' %in% dimData) { names(dim(data))[which(dimData == 'sdate' | dimData == 'ftime') ] = 'time' @@ -147,37 +149,37 @@ WeatherRegime <- function(data, ncenters = NULL, output <- Apply(data = list(data), target_dims = c('time','lat','lon'), fun = .WeatherRegime, - EOFS = EOFS, neofs = neofs, + EOFs = EOFs, neofs = neofs, varThreshold = varThreshold, lon = lon, lat = lat, ncenters = ncenters, method = method, ncores = ncores) - if (method=='kmeans' && 'sdate' %in% dimData && 'ftime' %in% dimData) { + if (method == 'kmeans' && 'sdate' %in% dimData && 'ftime' %in% dimData) { # The frequency and the persistency are computed as they are useful # parameters in the cluster analysis extra_output <- Apply(data = output$cluster, - target_dims = 'time', - fun = .freqPer, - nsdates = nsdates, - nftimes = nftimes , - ncenters = ncenters) + target_dims = 'time', + fun = .freqPer, + nsdates = nsdates, + nftimes = nftimes , + ncenters = ncenters) - output$cluster <- t(array(output$cluster,dim=c(nftimes,nsdates))) - names(dim(output$cluster)) <- c('sdate','ftime') + output$cluster <- t(array(output$cluster, dim = c(nftimes, nsdates))) + names(dim(output$cluster)) <- c('sdate', 'ftime') - output <- list(composite=output$composite, - pvalue=output$pvalue, - cluster=output$cluster, - frequency=extra_output$frequency, - persistence=extra_output$persistence) + output <- list(composite = output$composite, + pvalue = output$pvalue, + cluster = output$cluster, + frequency = extra_output$frequency, + persistence = extra_output$persistence) } return(output) } -.WeatherRegime <- function(data, ncenters = NULL, EOFS = TRUE,neofs = 30, +.WeatherRegime <- function(data, ncenters = NULL, EOFs = TRUE,neofs = 30, varThreshold = NULL, lon = NULL, lat = NULL, method = "kmeans", iter.max=100, nstart = 30) { @@ -193,7 +195,7 @@ WeatherRegime <- function(data, ncenters = NULL, if (is.null(ncenters)) { stop("Parameter 'ncenters' must be specified.") } - if (EOFS == TRUE && is.null(lon)) { + if (EOFs == TRUE && is.null(lon)) { stop("Parameter 'lon' must be specified.") } if (is.null(lat)) { @@ -211,7 +213,7 @@ WeatherRegime <- function(data, ncenters = NULL, } } - if (EOFS == TRUE) { + if (EOFs == TRUE) { if (is.null(varThreshold)) { dataPC <- EOF(data, lat = as.vector(lat), diff --git a/R/PlotPDFsOLE.R b/R/PlotPDFsOLE.R index c7239c73a3089ab8389c91f677b18767cd97371f..fc4ad76e230b535456f6c6e5aa5930dc42761424 100644 --- a/R/PlotPDFsOLE.R +++ b/R/PlotPDFsOLE.R @@ -44,7 +44,7 @@ PlotPDFsOLE <- function(pdf_1, pdf_2, nsigma = 3, plotfile = NULL, width = 30, height = 15, units = "cm", dpi = 300) { - + y <- type <- NULL if(!is.null(plotfile)){ if (!is.numeric(dpi)) { @@ -168,30 +168,30 @@ PlotPDFsOLE <- function(pdf_1, pdf_2, nsigma = 3, plotfile = NULL, #----------------------------------------------------------------------------- nameBest <- paste0(name1, " + ", name2) graphicTitle <- "OPTIMAL LINEAR ESTIMATION" - xlimSup <- max(nsigma*sigmaBest+meanBest, nsigma*sigma1+mean1, - nsigma*sigma2+mean2) - xlimInf <- min(-nsigma*sigmaBest+meanBest, -nsigma*sigma1+mean1, - -nsigma*sigma2+mean2) + xlimSup <- max(nsigma * sigmaBest + meanBest, nsigma * sigma1 + mean1, + nsigma * sigma2 + mean2) + xlimInf <- min(-nsigma * sigmaBest+meanBest, - nsigma * sigma1 + mean1, + -nsigma * sigma2 + mean2) # deltax <- 0.02 - deltax <- (xlimSup - xlimInf)/10000 + deltax <- (xlimSup - xlimInf) / 10000 - x <- seq(xlimInf,xlimSup,deltax) - df1 <- data.frame(x=x,y=dnorm(x,mean=mean1, sd=sigma1), - type=name1) - df2 <- data.frame(x=x,y=dnorm(x,mean=mean2, sd=sigma2), - type=name2) - df3 <- data.frame(x=x,y=dnorm(x,mean=meanBest, sd=sigmaBest), - type=nameBest) - df123 <- rbind(df1,df2,df3) - label1 <- paste0(name1, ": N(mean=",round(mean1,2),", sd=", round(sigma1,2), + x <- seq(xlimInf, xlimSup, deltax) + df1 <- data.frame(x = x, y = dnorm(x, mean = mean1, sd = sigma1), + type = name1) + df2 <- data.frame(x = x, y = dnorm(x, mean = mean2, sd = sigma2), + type = name2) + df3 <- data.frame(x = x, y = dnorm(x, mean = meanBest, sd = sigmaBest), + type = nameBest) + df123 <- rbind(df1, df2, df3) + label1 <- paste0(name1, ": N(mean=",round(mean1, 2), ", sd=", round(sigma1, 2), ")") - label2 <- paste0(name2, ": N(mean=",round(mean2,2),", sd=", round(sigma2,2), + label2 <- paste0(name2, ": N(mean=",round(mean2, 2), ", sd=", round(sigma2, 2), ")") - labelBest <- paste0(nameBest, ": N(mean=",round(meanBest,2),", sd=", - round(sigmaBest,2), ")") + labelBest <- paste0(nameBest, ": N(mean=",round(meanBest,2), ", sd=", + round(sigmaBest, 2), ")") cols <- c("#DC3912", "#13721A", "#1F5094") names(cols) <- c(name1, name2, nameBest) - g <- ggplot(df123)+geom_line(aes(x,y,colour=type), size=rel(1.2)) + g <- ggplot(df123) + geom_line(aes(x, y, colour = type), size = rel(1.2)) g <- g + scale_colour_manual(values = cols, limits = c(name1, name2, nameBest), diff --git a/R/PlotTriangles4Categories.R b/R/PlotTriangles4Categories.R index cda320f9a71b7ac0a32a3dfb3821e8f931f3cb9e..fcfb36bbb68b7b58019634d9417ce1ad0609523a 100644 --- a/R/PlotTriangles4Categories.R +++ b/R/PlotTriangles4Categories.R @@ -47,6 +47,7 @@ #' creator function of the corresponding device. #'@param res A positive number indicating resolution of the device (file or window) #' to plot in. See ?Devices and the creator function of the corresponding device. +#'@param figure.width a numeric value to control the width of the plot. #'@param ... The additional parameters to be passed to function ColorBar() in #' s2dverification for color legend creation. #'@return A figure in popup window by default, or saved to the specified path. @@ -69,24 +70,21 @@ #' xlabels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", #' "Aug", "Sep", "Oct", "Nov", "Dec")) #'@importFrom grDevices dev.new dev.off dev.cur +#'@importFrom graphics plot points polygon text title axis +#'@importFrom RColorBrewer brewer.pal #'@export - -PlotTriangles4Categories <- function(data,cols=NULL,brks=NULL, - toptitle=NULL, - sig_data=NULL,col_sig='black',pch_sig=18, - cex_sig=1, - labx=TRUE, - laby=TRUE, - xlabels=NULL, xtitle=NULL, - ylabels=NULL, ytitle=NULL, - legend=TRUE,lab_legend=NULL, - cex_leg=1,col_leg='black', - fileout=NULL, - size_units = 'px', res = 100, - figure.width = 1, - ...){ +PlotTriangles4Categories <- function(data, brks = NULL, cols = NULL, + toptitle = NULL, sig_data = NULL, + pch_sig = 18, col_sig = 'black', + cex_sig = 1, xlab = TRUE, ylab = TRUE, + xlabels = NULL, xtitle = NULL, + ylabels = NULL, ytitle = NULL, + legend = TRUE, lab_legend = NULL, + cex_leg = 1, col_leg = 'black', + fileout = NULL, size_units = 'px', + res = 100, figure.width = 1, ...) { # Checking the dimensions - if (length(dim(data))!=3) { + if (length(dim(data)) != 3) { stop("Parameter 'data' must be an array with three dimensions.") } @@ -97,7 +95,8 @@ PlotTriangles4Categories <- function(data,cols=NULL,brks=NULL, if (is.null(names(dim(data)))) { stop("Parameter 'data' must be an array with named dimensions.") }else{ - if (!any(names(dim(data))=='dimx')|!any(names(dim(data))=='dimy')|!any(names(dim(data))=='dimcat')){ + if (!any(names(dim(data)) == 'dimx') | !any(names(dim(data)) == 'dimy') | + !any(names(dim(data)) == 'dimcat')) { stop("Parameter 'data' should contain 'dimx', 'dimy' and 'dimcat' dimension names. ") } } @@ -105,12 +104,12 @@ PlotTriangles4Categories <- function(data,cols=NULL,brks=NULL, if (!is.null(sig_data)) { if (!is.logical(sig_data)) { stop("Parameter 'sig_data' array must be logical.")} - else if (length(dim(sig_data)) !=3) { + else if (length(dim(sig_data)) != 3) { stop("Parameter 'sig_data' must be an array with three dimensions.") }else if (any(dim(sig_data) != dim(data))){ stop("Parameter 'sig_data' must be an array with the same dimensions as 'data'.") - }else if(!is.null(names(dim(sig_data)))){ - if (any(names(dim(sig_data)) != names(dim(data)))){ + }else if(!is.null(names(dim(sig_data)))) { + if (any(names(dim(sig_data)) != names(dim(data)))) { stop("Parameter 'sig_data' must be an array with the same named dimensions as 'data'.")} } } @@ -123,7 +122,7 @@ PlotTriangles4Categories <- function(data,cols=NULL,brks=NULL, # Checking what is available and generating missing information if (!is.null(lab_legend) && - length(lab_legend) != 4 && length(lab_legend) != 2){ + length(lab_legend) != 4 && length(lab_legend) != 2) { stop("Parameter 'lab_legend' should contain two or four names.") } @@ -135,10 +134,10 @@ PlotTriangles4Categories <- function(data,cols=NULL,brks=NULL, # If there is any filenames to store the graphics, process them # to select the right device if (!is.null(fileout)) { - deviceInfo <- s2dverification:::.SelectDevice(fileout = fileout, - width = 80 * ncol * figure.width, - height = 80 * nrow, - units = size_units, res = res) + deviceInfo <- .SelectDevice(fileout = fileout, + width = 80 * ncol * figure.width, + height = 80 * nrow, + units = size_units, res = res) saveToFile <- deviceInfo$fun fileout <- deviceInfo$files } @@ -148,26 +147,26 @@ PlotTriangles4Categories <- function(data,cols=NULL,brks=NULL, saveToFile(fileout) } else if (names(dev.cur()) == 'null device') { dev.new(units = size_units, res = res, - width = 8 * figure.width, height =5) + width = 8 * figure.width, height = 5) } if (is.null(xlabels)){ - xlabels=1:ncol + xlabels = 1:ncol } if (is.null(ylabels)){ - ylabels=1:nrow + ylabels = 1:nrow } - if (!is.null(brks) && !is.null(cols)){ - if (length(brks) != length(cols)+1){ + if (!is.null(brks) && !is.null(cols)) { + if (length(brks) != length(cols) + 1) { stop("The length of the parameter 'brks' must be one more than 'cols'.") } } - if (is.null(brks)){ - brks <- seq(min(data, na.rm=T), max(data, na.rm=T),length.out = 9) + if (is.null(brks)) { + brks <- seq(min(data, na.rm = T), max(data, na.rm = T), length.out = 9) } - if (is.null(cols)){ - cols<-rev(brewer.pal(length(brks)-1,'RdBu')) + if (is.null(cols)) { + cols <- rev(brewer.pal(length(brks) - 1, 'RdBu')) } # The colours for each triangle/category are defined @@ -178,41 +177,42 @@ PlotTriangles4Categories <- function(data,cols=NULL,brks=NULL, } if(legend){ - layout(matrix(c(1,2,1,3),2,2,byrow=T),widths =c(10,3.4),heights=c(10,3.5)) - par(oma=c(1,1,1,1),mar=c(5,4,0,0)) - if(is.null(lab_legend)){ - lab_legend=1:ncat + layout(matrix(c(1, 2, 1, 3), 2, 2, byrow = T), + widths = c(10, 3.4), heights = c(10, 3.5)) + par(oma = c(1, 1, 1, 1), mar = c(5, 4, 0, 0)) + if(is.null(lab_legend)) { + lab_legend = 1:ncat } } - plot(ncol,nrow,xlim = c(0,ncol),ylim=c(0,nrow), xaxs="i",yaxs='i',type="n", - xaxt="n", yaxt="n",ann=F,axes=F) + plot(ncol, nrow, xlim = c(0, ncol), ylim=c(0, nrow), xaxs = "i", yaxs = 'i', type = "n", + xaxt = "n", yaxt = "n", ann = F, axes = F) - box(col='black',lwd=1) + box(col = 'black',lwd = 1) if (! is.null(toptitle)){ - title(toptitle, cex=1.5) + title(toptitle, cex = 1.5) } if (!is.null(xtitle)){ - mtext(side = 1, text = xtitle, line = 4, cex=1.5) + mtext(side = 1, text = xtitle, line = 4, cex = 1.5) } if (!is.null(ytitle)){ - mtext(side = 2, text = ytitle, line = 2.5, cex=1.5) + mtext(side = 2, text = ytitle, line = 2.5, cex = 1.5) } - if (labx){ - axis(1, at=(1:ncol)-0.5, las=2, labels=xlabels, cex.axis=1.5) + if (xlab){ + axis(1, at =(1:ncol) - 0.5, las = 2, labels = xlabels, cex.axis = 1.5) } - if (laby){ - axis(2, at=(1:nrow)-0.5, las=2, labels=ylabels, cex.axis=1.5) + if (ylab){ + axis(2, at = (1:nrow) - 0.5, las = 2, labels = ylabels, cex.axis = 1.5) } #The triangles are plotted for(p in 1:ncol){ for(l in 1:nrow){ - if (ncat==4){ + if (ncat == 4){ coord_triangl <- list(xs=list(c(p-1, p-0.5, p-1),c(p-1, p-0.5, p),c(p, p-0.5, p),c(p-1, p-0.5, p)), ys=list( c(l-1, -0.5+l, l), c(l-1, -0.5+l, l-1),c(l-1, -0.5+l, l),c(l, -0.5+l, l))) @@ -251,21 +251,24 @@ PlotTriangles4Categories <- function(data,cols=NULL,brks=NULL, if(legend){ # Colorbar par(mar=c(0,0,0,0)) - ColorBar(brks = brks, cols = cols, vert=T,draw_ticks = T, draw_separators = T, + ColorBar(brks = brks, cols = cols, vertical = T, draw_ticks = T, draw_separators = T, # extra_margin = c(0,0,2.5,0),label_scale = 1.5,...) - extra_margin = c(0,0,0,0),label_scale = 1.5,...) + extra_margin = c( 0, 0, 0, 0), label_scale = 1.5, ...) - par(mar=c(0.5,2.5,0.5,2.5)) - plot(1,1,xlim = c(0,1),ylim=c(0,1), xaxs="i",yaxs='i',type="n", - xaxt="n", yaxt="n",ann=F,axes=F) + par(mar = c(0.5, 2.5, 0.5, 2.5)) + plot(1, 1, xlim = c(0, 1), ylim =c(0, 1), xaxs = "i", yaxs = 'i', type = "n", + xaxt = "n", yaxt = "n", ann = F, axes = F) - box(col=col_leg) - p=l=1 - if (ncat==4){ - coord_triangl <- list(xs=list(c(p-1, p-0.5, p-1),c(p-1, p-0.5, p),c(p, p-0.5, p),c(p-1, p-0.5, p)), - ys=list( c(l-1, -0.5+l, l), c(l-1, -0.5+l, l-1),c(l-1, -0.5+l, l),c(l, -0.5+l, l))) + box(col = col_leg) + p = l = 1 + if (ncat == 4){ + coord_triangl <- list(xs = list(c(p -1, p - 0.5, p - 1), c(p - 1, p - 0.5, p), + c(p, p - 0.5, p), c(p - 1, p - 0.5, p)), + ys = list(c(l - 1, - 0.5 + l, l), c(l - 1, - 0.5 + l, l - 1), + c(l - 1, - 0.5 + l, l), c(l, - 0.5 + l, l))) - coord_sig <- list(x=c(p-0.75,p-0.5,p-0.25,p-0.5),y=c(l-0.5,l-0.75,l-0.5,l-0.25)) + coord_sig <- list(x = c(p - 0.75, p - 0.5, p - 0.25, p - 0.5), + y = c(l - 0.5, l - 0.75, l - 0.5, l - 0.25)) } if (ncat==2){ @@ -284,3 +287,143 @@ PlotTriangles4Categories <- function(data,cols=NULL,brks=NULL, # If the graphic was saved to file, close the connection with the device if (!is.null(fileout)) dev.off() } +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' are not ", ext[1],". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + warning(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + diff --git a/man/CST_RegimesAssign.Rd b/man/CST_RegimesAssign.Rd index f10dc1427ba7f9797c37cdd7e71dda0e08b9ab49..22b762b209b7471bf4a7fc652ac79f863bf42deb 100644 --- a/man/CST_RegimesAssign.Rd +++ b/man/CST_RegimesAssign.Rd @@ -10,6 +10,7 @@ CST_RegimesAssign( ref_maps, method = "distance", composite = FALSE, + memb = FALSE, ncores = NULL ) } @@ -18,10 +19,13 @@ CST_RegimesAssign( \item{ref_maps}{a 's2dv_cube' object as the output of CST_WeatherRegimes.} -\item{method}{whether the matching will be performed in terms of minimum distance (default=’distance’) or -the maximum spatial correlation (method=’ACC’) between the maps.} +\item{method}{whether the matching will be performed in terms of minimum distance (default = ’distance’) or +the maximum spatial correlation (method = ’ACC’) between the maps.} -\item{composite}{a logical parameter indicating if the composite maps are computed or not (default=FALSE).} +\item{composite}{a logical parameter indicating if the composite maps are computed or not (default = FALSE).} + +\item{memb}{a logical value indicating whether to compute composites for separate members (default FALSE) or as unique ensemble (TRUE). +This option is only available for when parameter 'composite' is set to TRUE and the data object has a dimension named 'member'.} \item{ncores}{the number of multicore threads to use for parallel computation.} } @@ -39,9 +43,9 @@ for which the minimum Eucledian distance (method=’distance’) or highest spat (method=‘ACC’) is obtained. } \examples{ -regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) -res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) -res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) +regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFs = FALSE, ncenters = 4) +res1 <- CST_RegimesAssign(data = lonlat_data$exp, ref_maps = regimes, composite = FALSE) +res2 <- CST_RegimesAssign(data = lonlat_data$exp, ref_maps = regimes, composite = TRUE) } \references{ Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools diff --git a/man/CST_WeatherRegimes.Rd b/man/CST_WeatherRegimes.Rd index 4ffb3c4cd43ef8d05204f848d048d50f0cfa5b22..a840c59535b0721e296b17134aac588101400ee8 100644 --- a/man/CST_WeatherRegimes.Rd +++ b/man/CST_WeatherRegimes.Rd @@ -7,7 +7,7 @@ CST_WeatherRegimes( data, ncenters = NULL, - EOFS = TRUE, + EOFs = TRUE, neofs = 30, varThreshold = NULL, method = "kmeans", @@ -21,6 +21,8 @@ CST_WeatherRegimes( \item{ncenters}{Number of clusters to be calculated with the clustering function.} +\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data.} + \item{neofs}{number of modes to be kept (default = 30).} \item{varThreshold}{Value with the percentage of variance to be explained by the PCs. @@ -33,11 +35,9 @@ For more details about these methods see the hclust function documentation inclu \item{iter.max}{Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected).} -\item{ncores}{The number of multicore threads to use for parallel computation.} - -\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data.} +\item{nstart}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} -\item{nstarts}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} +\item{ncores}{The number of multicore threads to use for parallel computation.} } \value{ A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) @@ -54,8 +54,8 @@ The cluster analysis can be performed with the traditional k-means or those meth included in the hclust (stats package). } \examples{ -res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) -res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = TRUE, ncenters = 3) +res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFs = FALSE, ncenters = 4) +res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFs = TRUE, ncenters = 3) } \references{ Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). diff --git a/man/PlotTriangles4Categories.Rd b/man/PlotTriangles4Categories.Rd index 14ab51147cbd08770af4f36bf4df41159492be62..6e95df38351cb9f4c7bfcfa749e36af265f0d160 100644 --- a/man/PlotTriangles4Categories.Rd +++ b/man/PlotTriangles4Categories.Rd @@ -6,15 +6,15 @@ \usage{ PlotTriangles4Categories( data, - cols = NULL, brks = NULL, + cols = NULL, toptitle = NULL, sig_data = NULL, - col_sig = "black", pch_sig = 18, + col_sig = "black", cex_sig = 1, - labx = TRUE, - laby = TRUE, + xlab = TRUE, + ylab = TRUE, xlabels = NULL, xtitle = NULL, ylabels = NULL, @@ -34,28 +34,32 @@ PlotTriangles4Categories( \item{data}{array with three named dimensions: 'dimx', 'dimy', 'dimcat', containing the values to be displayed in a coloured image with triangles.} +\item{brks}{A vector of the color bar intervals. The length must be one more +than the parameter 'cols'. Use ColorBar() to generate default values.} + \item{cols}{A vector of valid colour identifiers for color bar. The length must be one less than the parameter 'brks'. Use ColorBar() to generate default values.} -\item{brks}{A vector of the color bar intervals. The length must be one more -than the parameter 'cols'. Use ColorBar() to generate default values.} - \item{toptitle}{A string of the title of the grid. Set NULL as default.} \item{sig_data}{logical array with the same dimensions as 'data' to add layers to the plot. A value of TRUE at a grid cell will draw a dot/symbol on the corresponding triangle of the plot. Set NULL as default.} -\item{col_sig}{colour of the symbol to represent sig_data.} - \item{pch_sig}{symbol to be used to represent sig_data. Takes 18 (diamond) by default. See 'pch' in par() for additional accepted options.} +\item{col_sig}{colour of the symbol to represent sig_data.} + \item{cex_sig}{parameter to increase/reduce the size of the symbols used to represent sig_data.} +\item{xlab}{A logical value (TRUE) indicating if xlabels should be plotted} + +\item{ylab}{A logical value (TRUE) indicating if ylabels should be plotted} + \item{xlabels}{A vector of labels of the x-axis The length must be length of the col of parameter 'data'. Set the sequence from 1 to the length of the row of parameter 'data' as default.} @@ -90,12 +94,10 @@ creator function of the corresponding device.} \item{res}{A positive number indicating resolution of the device (file or window) to plot in. See ?Devices and the creator function of the corresponding device.} +\item{figure.width}{a numeric value to control the width of the plot.} + \item{...}{The additional parameters to be passed to function ColorBar() in s2dverification for color legend creation.} - -\item{xlab}{A logical value (TRUE) indicating if xlabels should be plotted} - -\item{ylab}{A logical value (TRUE) indicating if ylabels should be plotted} } \value{ A figure in popup window by default, or saved to the specified path. diff --git a/man/RegimesAssign.Rd b/man/RegimesAssign.Rd index 40daf6bea7152c8748d00d6d356d8a09f6996dd8..d578e5b24863216db69830f36374714fa590ff33 100644 --- a/man/RegimesAssign.Rd +++ b/man/RegimesAssign.Rd @@ -11,19 +11,25 @@ RegimesAssign( lat, method = "distance", composite = FALSE, + memb = FALSE, ncores = NULL ) } \arguments{ \item{data}{an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon.} -\item{ref_maps}{array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching.} +\item{ref_maps}{array with 3-dimensions ('lon', 'lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching.} -\item{method}{whether the matching will be performed in terms of minimum distance (default=’distance’) or +\item{lat}{a vector of latitudes corresponding to the positions provided in data and ref_maps.} + +\item{method}{whether the matching will be performed in terms of minimum distance (default = ’distance’) or the maximum spatial correlation (method=’ACC’) between the maps.} \item{composite}{a logical parameter indicating if the composite maps are computed or not (default=FALSE).} +\item{memb}{a logical value indicating whether to compute composites for separate members (default FALSE) or as unique ensemble (TRUE). +This option is only available for when parameter 'composite' is set to TRUE and the data object has a dimension named 'member'.} + \item{ncores}{the number of multicore threads to use for parallel computation.} } \value{ @@ -31,7 +37,7 @@ A list with elements \code{$composite} (3-d array (lon, lat, k) containing the c \code{$pvalue} ( array with the same structure as \code{$composite} containing the pvalue of the composites obtained through a t-test that accounts for the serial dependence of the data with the same structure as Composite.) (only if composite='TRUE'), \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , - \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), + \code{$frequency} (A vector of integers (from k = 1, ... k n reference maps) indicating the percentage of assignations corresponding to each map.), } \description{ This function performs the matching between a field of anomalies and a set @@ -40,9 +46,10 @@ for which the minimum Eucledian distance (method=’distance’) or highest spat (method=‘ACC’) is obtained. } \examples{ -regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite -res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = drop(regimes), -lat=lonlat_data$exp$lat,composite=FALSE) +regimes <- WeatherRegime(data = lonlat_data$obs$data, lat = lonlat_data$obs$lat, + EOFs = FALSE, ncenters = 4)$composite +res1 <- RegimesAssign(data = lonlat_data$exp$data, ref_maps = drop(regimes), + lat = lonlat_data$exp$lat, composite = FALSE) } \references{ Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} diff --git a/man/WeatherRegimes.Rd b/man/WeatherRegimes.Rd index 22cc4f081b732844c045860064160851d3c35056..812585ce17ff7fac6becb833c29c69bd0bf5883a 100644 --- a/man/WeatherRegimes.Rd +++ b/man/WeatherRegimes.Rd @@ -7,7 +7,7 @@ WeatherRegime( data, ncenters = NULL, - EOFS = TRUE, + EOFs = TRUE, neofs = 30, varThreshold = NULL, lon = NULL, @@ -23,6 +23,8 @@ WeatherRegime( \item{ncenters}{Number of clusters to be calculated with the clustering function.} +\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data.} + \item{neofs}{number of modes to be kept only if EOFs = TRUE has been selected. (default = 30).} \item{varThreshold}{Value with the percentage of variance to be explained by the PCs. @@ -39,9 +41,9 @@ For more details about these methods see the hclust function documentation inclu \item{iter.max}{Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected).} -\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data.} +\item{nstart}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} -\item{nstarts}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} +\item{ncores}{The number of multicore threads to use for parallel computation.} } \value{ A list with elements \code{$composite} (array with at least 3-d ('lat', 'lon', 'cluster') containing the composites k=1,..,K for case (*1) @@ -59,7 +61,8 @@ The cluster analysis can be performed with the traditional k-means or those meth included in the hclust (stats package). } \examples{ -res <- WeatherRegime(data=lonlat_data$obs$data, lat= lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4) +res <- WeatherRegime(data = lonlat_data$obs$data, lat = lonlat_data$obs$lat, + EOFs = FALSE, ncenters = 4) } \references{ Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). diff --git a/tests/testthat/test-CST_WeatherRegimes.R b/tests/testthat/test-CST_WeatherRegimes.R index e2bc6feb989f2c11aa7ecae275950b288e2aaefa..5f2967a172f393659dea3c93f8f1cc06986a9099 100644 --- a/tests/testthat/test-CST_WeatherRegimes.R +++ b/tests/testthat/test-CST_WeatherRegimes.R @@ -41,7 +41,7 @@ test_that("Sanity checks", { paste0("Parameter 'lon' must be specified.")) expect_equal( - names(dim(CST_WeatherRegimes(data = data1, ncenters = 3, EOFS = FALSE)$data)), + names(dim(CST_WeatherRegimes(data = data1, ncenters = 3, EOFs = FALSE)$data)), c('lat', 'lon', 'cluster')) data1 <- 1 : 400 @@ -53,9 +53,9 @@ test_that("Sanity checks", { expect_equal( dim(CST_WeatherRegimes(data = data1 , ncenters = nclusters, - EOFS = FALSE)$statistics$frequency), c(2, nclusters)) + EOFs = FALSE)$statistics$frequency), c(2, nclusters)) expect_equal( - names(dim(CST_WeatherRegimes(data = data1, nclusters, EOFS = FALSE)$data)), + names(dim(CST_WeatherRegimes(data = data1, nclusters, EOFs = FALSE)$data)), c('lat', 'lon', 'cluster')) data1 <- 1 : 400 @@ -81,7 +81,7 @@ test_that("Sanity checks", { data1 <- list(data = data1, lat = 1:5 ,lon = 1:4) class(data1) <- 's2dv_cube' expect_error( - CST_WeatherRegimes(data = data1, ncenters = 3, EOFS = FALSE), + CST_WeatherRegimes(data = data1, ncenters = 3, EOFs = FALSE), paste0("Parameter 'data' contains NAs in the 'time' dimensions.")) data1 <- 1 : 400 @@ -90,10 +90,10 @@ test_that("Sanity checks", { data1 <- list(data = data1, lat = 1:5 ,lon = 1:4) class(data1) <- 's2dv_cube' expect_equal( - any(is.na(CST_WeatherRegimes(data = data1, ncenters =3, EOFS = FALSE)$data)), + any(is.na(CST_WeatherRegimes(data = data1, ncenters = 3, EOFs = FALSE)$data)), TRUE) expect_equal( - names(dim(CST_WeatherRegimes(data = data1, ncenters =3, EOFS = FALSE)$data)), + names(dim(CST_WeatherRegimes(data = data1, ncenters = 3, EOFs = FALSE)$data)), c('lat', 'lon', 'cluster')) }) diff --git a/vignettes/Figures/Obs_Persistence.png b/vignettes/Figures/Obs_Persistence.png new file mode 100644 index 0000000000000000000000000000000000000000..6e4c33058489ed0491441ea7d77179861cbd1ebc Binary files /dev/null and b/vignettes/Figures/Obs_Persistence.png differ diff --git a/vignettes/Figures/observed_regimes.png b/vignettes/Figures/observed_regimes.png new file mode 100644 index 0000000000000000000000000000000000000000..1bb134a108ea158a23e743b3d4d0ca94634ff47e Binary files /dev/null and b/vignettes/Figures/observed_regimes.png differ diff --git a/vignettes/Figures/predicted_regimes.png b/vignettes/Figures/predicted_regimes.png new file mode 100644 index 0000000000000000000000000000000000000000..354afb7db5ff6d9d219ad6445abcd0976070d6f6 Binary files /dev/null and b/vignettes/Figures/predicted_regimes.png differ diff --git a/vignettes/WeatherRegimes_vignette.Rmd b/vignettes/WeatherRegimes_vignette.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..47d78aee2c7055098c7fe530cfe103684f46cb13 --- /dev/null +++ b/vignettes/WeatherRegimes_vignette.Rmd @@ -0,0 +1,162 @@ +--- +author: "Verónica Torralba, Nicola Cortesi and Núria Pérez-Zanón" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteEngine{knitr::knitr} + %\VignetteIndexEntry{Weather Regime Analysis} + %\usepackage[utf8]{inputenc} +--- + + +Weather regime analysis +======================== + +Weather regimes are a set of patterns able to characterize the different circulation structures occurring each month/season. This vignette aims to illustrate a step-by-step example of how to perform a weather regime assessment using CSTools functionalities. + +### 1- Required packages + +The functions to compute the Weather Regimes are part of CSTools while plotting functions are included in s2dverification package: + +```r +library(CSTools) +library(s2dverification) +library(zeallot) +``` + + +### 2- Retrive data from files + +The data employed in this example are described below. +- Sea level pressure (psl): this has been selected as the circulation variable, however other variables such as geopotential at 500 hPa can be also used. +- Region: Euro-Atlantic domain [85.5ºW-45ºE; 27-81ºN]. +- Datasets: seasonal predictions from ECMWF System 4 ([**Molteni et al. 2011**] (https://www.ecmwf.int/sites/default/files/elibrary/2011/11209-new-ecmwf-seasonal-forecast-system-system-4.pdf)) and ERA-Interim reanalysis ([**Dee et al. 2011**] (http://onlinelibrary.wiley.com/doi/10.1002/qj.828/pdf)) as a reference dataset. +- Period: 1991-2010. Only 20 years have been selected for illustrative purposes, but the full hindcast period could be used for the analysis. + + +```r +sdates <- paste0(1991:2010, '1201') +c(exp, obs) %<-% CST_Load(var = 'psl', exp = 'system4_m1', + obs = 'erainterim', + sdates = sdates, + storefreq ='daily', + leadtimemin = 1, leadtimemax = 31, + latmin = 27, latmax = 81, + lonmin = 274.5, lonmax = 45, output = 'lonlat') +``` + +Notice that you need the files to be stored locally in your computer or server with correct configuration file. If you are interested into run this vignette, contact nuria.perez at bsc.es to get a data sample. + +The objects returned by `CST_Load()` are s2v_cube class. They contains among others, the array with the requested data. + +```r +> dim(exp$data) +dataset member sdate ftime lat lon + 1 15 20 31 77 186 +> dim(obs$data) +dataset member sdate ftime lat lon + 1 1 20 31 77 186 +``` + +## #3- Daily anomalies based on a smoothed climatology) + +The weather regimes classification is based on daily anomalies, which have been computed by following these steps: + + +```r +c(ano_exp, ano_obs) %<-% CST_Anomaly(exp = exp, obs = obs, filter_span = 1) +``` + +The LOESS filter has been applied to the climatology to remove the short-term variability and retain the annual cycle. The parameter `loess_span` should be adapted to the length of the period used to compute the climatology (e.g. season, month, week,...). In this example we are using daily data, so we have selected `loess_span = 1`. + + +### 4- Weather regimes in observations + +`CST_WeatherRegimes()` function is used to define the clusters based on the sea level pressure anomalies from ERA-Interim. This function is based on the [*kmeans function*] (http://stat.ethz.ch/R-manual/R-devel/library/stats/html/kmeans.html) +from the stats R package. In this example we have made different assumptions: four clusters (`ncenters=4`) will be produced and the Empirical orthogonal functions are not used to filter the data (`EOFS=FALSE`) just to take into account the extreme values. More details about the methodology can be found in Cortesi et al. 2018 (submitted). + + +```r +WR_obs <- CST_WeatherRegimes(data = ano_obs, EOFS = FALSE, ncenters = 4) +``` + + +`CST_WeatherRegime()` provides a s2dv_cube object with several elements. `$data` the 4 weather regimes composites are stored while `$statistics` contains extra information (`$pvalue`, `$cluster`, `$persistence` and `$frequency`) which are the needed parameters for the weather regimes assessment. Further details about the outputs provided by the `CST_WeatherRegime()` function can be found in the package documentation or typing `?CST_WeatherRegimes` in the R session. + + + +### 5- Visualisation of the observed weather regimes + + +To plot the composite maps of each regime and the mean frequencies of each cluster, we have employed the `PlotLayout()` and `PlotEquiMap()` functions available in s2dverifcation. The object `WR_obs$data` is divided by 100 to change from Pa to hPa. As the `WR_obs$statistics$frequency` provides the monthly frequencies, the climatological frequencies are obtained as the average across the 20 years of the monthly frequencies. Note that these frequencies could slightly change as a consequence of the randomness inherent to the iterative processes involved in the k-means. + + ```r +clim_frequencies <- paste0('freq = ', + round(Mean1Dim(WR_obs$statistics$frequency, 1), 1), '%') +PlotLayout(PlotEquiMap, c(1, 2), lon = obs$lon, lat = obs$lat, + var = WR_obs$data / 100, + titles = paste0(paste0('Cluster ', 1:4), ' (', clim_frequencies,' )'), + filled.continents = FALSE, + axelab = FALSE, draw_separators = TRUE, subsampleg = 1, + brks = seq(-16, 16, by = 2), + bar_extra_labels = c(2, 0, 0, 0), fileout = './Figures/observed_regimes.png') +``` + + + +### 6- Visualisation of the observed regime persistence + +Persistence measures the average number of days a regime lasts before evolving to a different regime. To plot regime persistence for each start date, we have employed the `PlotTriangles4Categories()` function available in CSTools. + +```r +freq_obs <- WR_obs$statistics$persistence +freq_obs[is.na(freq_obs)] <- 0 +dim(freq_obs) <- c(dimy = 20, dimcat = 4, dimx = 1) + +PlotTriangles4Categories(freq_obs, toptitle = 'Persistence', + xtitle = 'Start Dates', ytitle = '', xlab = FALSE, + ylabels = substr(sdates, 1, 4), cex_leg = 0.6, + lab_legend = c('AR', 'NAO-', 'BL', 'NAO+'), figure.width = .7) + +``` + + + + + + +### 7- Weather regimes in the predictions + +Predicted anomalies for each day, month, member and lead time are matched with the observed clusters (obtained in step 4). The assignment of the anomalies to a pre-defined set of clusters guarantees that the predicted weather regimes have very similar spatial structures to the observed regimes, which is an essential requirement for the verification of weather regimes. This is an example of how to produce a set of weather regimes based on the predictions that can be verified with the observational dataset, but this approach can be also used in an operational context for which the probability of occurence of each cluster could be estimated. + + +The matching is based on the minimization of Eucledian distance `method='distance'`, but it can also be also done in terms of spatial correlation `method='ACC'`. However the computational efficiency is superior for the distance method. + + +```r +WR_exp <- CST_RegimesAssign(data = ano_exp, ref_maps = WR_obs, method = 'distance', composite = TRUE, memb = TRUE) +``` + + +`CST_RegimesAssign()` provides a object of 's2dv_cube' class which lists several elements. The composites are stored in the $data element which in the case of `memb = TRUE` corresponds to the mean composites for all member. `$pvalue`, `$cluster` and `$frequency` are stored in element `$statistics. This elements contain similar information to that provided by the `WeatherRegime()` function. Further details about the output can be found in the package documentation. + +### 8- Visualisation of the predicted weather regimes + + +The outputs of `RegimesAssign()` have been represented to be compared with those obtained for the observational classification (step 5). + + +```r +PlotLayout(PlotEquiMap, c(1, 2),lon = exp$lon, lat = exp$lat, + var = WR_exp$data/100, + titles = paste0(paste0('Cluster ',1:4), ' (',paste0('freq = ', + round(WR_exp$statistics$frequency,1),'%'),' )'), + filled.continents = FALSE, + axelab = FALSE, draw_separators = TRUE, + subsampleg = 1, brks = seq(-16, 16, by = 2), + bar_extra_labels = c(2, 0, 0, 0), fileout = './Figures/predicted_regimes.png') +``` + + + +Observed and predicted weather regimes are very similar although their frequencies are slightly different. Cluster 1 is the Atlantic Ridge and cluster 3 the Blocking pattern, while cluster 4 and 2 are the positive and negative phases of the NAO. This patterns can change depending on the period analyzed.