From badda4f14ae36785bafbfeb6564150af4b5fb9fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 11 Dec 2019 11:34:02 +0100 Subject: [PATCH 01/45] copying functions from s2dverification to the cstools repository --- R/PlotTableTriangles.R | 262 +++++++++++++++++++++++++++++++++++++++++ R/RegimesAssign.R | 130 ++++++++++++++++++++ R/WeatherRegime.R | 109 +++++++++++++++++ 3 files changed, 501 insertions(+) create mode 100644 R/PlotTableTriangles.R create mode 100644 R/RegimesAssign.R create mode 100644 R/WeatherRegime.R diff --git a/R/PlotTableTriangles.R b/R/PlotTableTriangles.R new file mode 100644 index 00000000..4c986bf4 --- /dev/null +++ b/R/PlotTableTriangles.R @@ -0,0 +1,262 @@ +#'Function to convert any numerical array to a grid of coloured triangles. +#' +#'This function converts a numerical data array into a coloured +#'grid with triangles. It is useful for a slide or article to present tabular results as +#'colors instead of numbers. This can be used to compare the outputs of two or four categories (e.g. modes +#' of variability, clusters, or forecast systems). +#' +#'@param var an array with three dimensions: rows, columns and categories +#' containing the values to be displayed in a colored image with triangles. +#'@param poscols the dimension that will be represented as columns. +#'@param posrows the dimension that will be reprsented as rows. +#'@param brks A vector of the color bar intervals. The length must be one more +#' than the parameter 'cols'. Use ColorBar() to generate default values. +#'@param cols A vector of valid colour identifiers for color bar. The length +#' must be one less than the parameter 'brks'. Use ColorBar() to generate +#' default values. +#'@param toptitle A string of the title of the grid. Set NULL as default. +#'@param sig_var logical array with the same dimensions as 'var' to add layers +#' to the plot. A value of TRUE at a grid cell will draw a dot/symbol on the +#' corresponding triangle of the plot. Set NULL as default. +#'@param pch_sig symbol to be used to represent sig_var. Takes 18 +#' (diamond) by default. See 'pch' in par() for additional +#' accepted options. +#'@param col_sig colour of the symbol to represent sig_var. +#'@param cex_sig parameter to increase/reduce the size of the symbols used +#' to represent sig_var. +#'@param xlabels A vector of labels of the x-axis The length must be +#' length of the col of parameter 'var'. Set the sequence from 1 to the +#' length of the row of parameter 'var' as default. +#'@param xtitle A string of title of the x-axis. Set NULL as default. +#'@param ylabels A vector of labels of the y-axis The length must be +#' length of the row of parameter 'var'. Set the sequence from 1 to the +#' length of the row of parameter 'var' as default. +#'@param ytitle A string of title of the y-axis. Set NULL as default. +#'@param legend A logical value to decide to draw the color bar legend or not. +#' Set TRUE as default. +#'@param lab_legend A vector of labels indicating what is represented in each +#'category (i.e. triangle). Set the sequence from 1 to the length of +#' the categories (2 or 4). +#'@param cex_leg a number to indicate the increase/reductuion of the lab_legend used +#' to represent sig_var. +#'@param col_leg color of the legend (triangles). +#'@param fileout A string of full directory path and file name indicating where +#' to save the plot. If not specified (default), a graphics device will pop up. +#'@param size_units A string indicating the units of the size of the device +#' (file or window) to plot in. Set 'px' as default. See ?Devices and the +#' creator function of the corresponding device. +#'@param res A positive number indicating resolution of the device (file or window) +#' to plot in. See ?Devices and the creator function of the corresponding device. +#'@param ... The additional parameters to be passed to function ColorBar() in +#' s2dverification for color legend creation. +#'@return A figure in popup window by default, or saved to the specified path. +#' +#'@author History:\cr +#'1.0 - 2019-10 (V.Torralba, \email{veronica.torralba@bsc.es}) - Original code +#' +#'@examples +#'#Example with random data +#' arr1<- arr1<- array(runif(n = 12 * 7 * 4, min=-1, max=1),dim = c(12,7,4)) +#'arr2<- array(TRUE,dim = dim(arr1)) +#'arr2[which(arr1 < 0.3)] = FALSE +#'PlotTableTriangles(var = arr1, +#' cols = c('white','#fef0d9','#fdd49e','#fdbb84','#fc8d59', +#' '#e34a33','#b30000', '#7f0000'), +#' brks = c(-1, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 1), +#' xtitle = "Target month", ytitle = "Lead time", +#' xlabels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", +#' "Aug", "Sep", "Oct", "Nov", "Dec")) +#'@importFrom grDevices dev.new dev.off dev.cur +#'@export + +PlotTableTriangles<-function(var,poscols=1,posrows=2,cols=NULL,brks=NULL, + toptitle=NULL, + sig_var=NULL,col_sig='black',pch_sig=18, + cex_sig=1, + xlabels=NULL, xtitle=NULL, + ylabels=NULL, ytitle=NULL, + legend=TRUE,lab_legend=NULL, + cex_leg=1,col_leg='black', + fileout=NULL, + size_units = 'px', res = 100, + figure.width = 1, + ...){ + # Checking the dimensions + vardim<-dim(var) + nrow<-vardim[posrows] + ncol<-vardim[poscols] + poscat<-vardim[setdiff(1:length(vardim),c(posrows,poscols))] + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- s2dverification:::.SelectDevice(fileout = fileout, + width = 80 * ncol * figure.width, + height = 80 * nrow, + units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, + width = 8 * figure.width, height = 5) + } + + if (!is.null(sig_var)){ + if (!is.logical(sig_var)){ + stop('sig_var array should be provided as TRUE/FALSE') + } + if (all(dim(sig_var)!=dim(var))){ + stop('sig_var array should have the same dimensions that var') + } + } + + if (length(vardim)!=3){ + stop('var must be an array with three dimensions (4, cols, rows) ') + } + + if (poscat!=4 && poscat!=2){ + stop('var must be an array with one of the dimensions being equal to 4 or 2') + } + + # The dimensions are named + names(dim(var))[poscols]<-'cols' + names(dim(var))[posrows]<-'rows' + names(dim(var))[setdiff(1:length(vardim),c(posrows,poscols))]<-'cat' + + if (!is.null(sig_var)){ + names(dim(sig_var))<-names(dim(var)) + } + + # Checking what is available and generating missing information + if (!is.null(lab_legend)&&length(lab_legend)!=4){ + stop('The legend should contain 4 names') + } + + if (is.null(xlabels)){ + xlabels=1:ncol + } + if (is.null(ylabels)){ + ylabels=1:nrow + } + if (is.null(brks)){ + brks<-seq(min(var),max(var),length.out = 9) + } + if (is.null(cols)){ + cols<-rev(brewer.pal(length(brks)-1,'RdBu')) + } + + # The colours for each triangle/category are defined + var_cat <- array(cols[length(cols)], dim = vardim) + names(dim(var_cat)) <- names(dim(var)) + for (i in (length(cols) - 1):1) { + var_cat[var < brks[i + 1]] <- cols[i] + } + + if(legend){ + layout(matrix(c(1,2,1,3),2,2,byrow=T),widths =c(10,2),heights=c(10,2)) + par(oma=c(1,1,1,1),mar=c(5,4,4,0)) + if(is.null(lab_legend)){ + lab_legend=1:4 + } + } + + plot(ncol,nrow,xlim = c(0,ncol),ylim=c(0,nrow), xaxs="i",yaxs='i',type="n", + xaxt="n", yaxt="n",ann=F,axes=F) + + box(col='black',lwd=1) + + if (! is.null(toptitle)){ + title(toptitle, cex=1.5) + } + + if (!is.null(xtitle)){ + mtext(side = 1, text = xtitle, line = 4, cex=1.5) + } + if (!is.null(ytitle)){ + mtext(side = 2, text = ytitle, line = 2.5, cex=1.5) + } + + axis(1, at=(1:ncol)-0.5, las=2, labels=xlabels, cex.axis=1.5) + axis(2, at=(1:nrow)-0.5, las=2, labels=ylabels, cex.axis=1.5) + + + #The triangles are plotted + for(p in 1:ncol){ + for(l in 1:nrow){ + if (poscat==4){ + coord_triangl <- list(xs=list(c(p-1, p-0.5, p-1),c(p-1, p-0.5, p),c(p, p-0.5, p),c(p-1, p-0.5, p)), + ys=list( c(l-1, -0.5+l, l), c(l-1, -0.5+l, l-1),c(l-1, -0.5+l, l),c(l, -0.5+l, l))) + + coord_sig <- list(x=c(p-0.75,p-0.5,p-0.25,p-0.5),y=c(l-0.5,l-0.75,l-0.5,l-0.25)) + } + + if (poscat==2){ + coord_triangl<- list(xs=list(c(p-1, p, p),c(p-1, p, p-1)), + ys=list( c(l-1,l-1, l), c(l-1, l, l))) + coord_sig<- list(x=c(p-(2/3),p-(1/3)),y=c(l-(1/3),l-(2/3))) + } + for (n in 1:poscat) { + polygon(coord_triangl$xs[[n]], + coord_triangl$ys[[n]], + col = Subset( + var_cat, + along = c('cat', 'cols', 'rows'), + indices = list(n, p, l))) + if (!is.null(sig_var) && + Subset(sig_var,along = c('cat', 'cols', 'rows'), + indices = list(n, p, l))) { + points( + x = coord_sig$x[n], + y = coord_sig$y[n], + pch = pch_sig, + cex = cex_sig, + col = col_sig + ) + } + } + } + } + + # legend + + if(legend){ + # Colorbar + par(mar=c(0,0,0,0)) + ColorBar(brks = brks, cols = cols, vert=T,draw_ticks = T, draw_separators = T, + extra_margin = c(0,0,2.5,0),label_scale = 1.5,...) + + par(mar=c(0.5,2.5,0,2.5)) + plot(1,1,xlim = c(0,1),ylim=c(0,1), xaxs="i",yaxs='i',type="n", + xaxt="n", yaxt="n",ann=F,axes=F) + + box(col=col_leg) + p=l=1 + if (poscat==4){ + coord_triangl <- list(xs=list(c(p-1, p-0.5, p-1),c(p-1, p-0.5, p),c(p, p-0.5, p),c(p-1, p-0.5, p)), + ys=list( c(l-1, -0.5+l, l), c(l-1, -0.5+l, l-1),c(l-1, -0.5+l, l),c(l, -0.5+l, l))) + + coord_sig <- list(x=c(p-0.75,p-0.5,p-0.25,p-0.5),y=c(l-0.5,l-0.75,l-0.5,l-0.25)) + } + + if (poscat==2){ + coord_triangl<- list(xs=list(c(p-1, p, p),c(p-1, p, p-1)), + ys=list( c(l-1,l-1, l), c(l-1, l, l))) + coord_sig<- list(x=c(p-(2/3),p-(1/3)),y=c(l-(1/3),l-(2/3))) + } + for (n in 1:poscat) { + polygon(coord_triangl$xs[[n]], + coord_triangl$ys[[n]],border=col_leg) + text(x=coord_sig$x[[n]],y=coord_sig$y[[n]],labels = lab_legend[n],cex=cex_leg,col=col_leg) + + } + } + + # If the graphic was saved to file, close the connection with the device + if (!is.null(fileout)) dev.off() +} + diff --git a/R/RegimesAssign.R b/R/RegimesAssign.R new file mode 100644 index 00000000..5fa4992b --- /dev/null +++ b/R/RegimesAssign.R @@ -0,0 +1,130 @@ +anom2regime <- function(ref, target, method = 'distance', lat) { + posdim <- which(names(dim(ref)) == 'nclust') + poslat <- which(names(dim(ref)) == 'lat') + poslon <- which(names(dim(ref)) == 'lon') + + nclust <- dim(ref)[posdim] + + if (all(dim(ref)[-posdim] != dim(target))) { + stop('The target should have the same dimensions [lat,lon] that + the reference ') + } + + if (is.null(names(dim(ref))) | is.null(names(dim(target)))) { + stop( + 'The arrays should include dimensions names ref[nclust,lat,lon] + and target [lat,lon]' + ) + } + + if (length(lat) != dim(ref)[poslat]) { + stop('latitudes do not match with the maps') + } + + # This dimensions are reorganized + ref <- aperm(ref, c(posdim, poslat, poslon)) + target <- + aperm(target, c(which(names(dim( + target + )) == 'lat'), which(names(dim( + target + )) == 'lon'))) + + # weights are defined + latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(ref)[3]) + + + rmsdiff <- function(x, y) { + dims <- dim(x) + ndims <- length(dims) + if (ndims != 2 | ndims != length(dim(y))) { + stop('x and y should be maps') + } + map_diff <- NA * x + for (i in 1:dims[1]) { + for (j in 1:dims[2]) { + map_diff[i, j] <- (x[i, j] - y[i, j]) ^ 2 + } + } + rmsdiff <- sqrt(mean(map_diff)) + return(rmsdiff) + } + + if (method == 'ACC') { + corr <- rep(NA, nclust) + for (i in 1:nclust) { + corr[i] <- + ACC(InsertDim(InsertDim( + InsertDim(ref[i, , ] * latWeights, 1, 1), 2, 1 + ), 3, 1), + InsertDim(InsertDim( + InsertDim(target * latWeights, 1, 1), 2, 1 + ), 3, 1))$ACC[2] + } + assign <- which(corr == max(corr)) + } + + if (method == 'distance') { + rms <- rep(NA, nclust) + for (i in 1:nclust) { + rms[i] <- rmsdiff(ref[i, , ] * latWeights, target * latWeights) + } + assign <- which(rms == min(rms)) + } + + return(assign) + } + +RegimesAssign <- function(var_ano, ref_maps, lats, method = 'distance') { + posdim <- which(names(dim(ref_maps)) == 'nclust') + poslat <- which(names(dim(ref_maps)) == 'lat') + poslon <- which(names(dim(ref_maps)) == 'lon') + poslat_ano <- which(names(dim(var_ano)) == 'lat') + poslon_ano <- which(names(dim(var_ano)) == 'lon') + + nclust <- dim(ref_maps)[posdim] + nlat <- dim(ref_maps)[poslat] + nlon <- dim(ref_maps)[poslon] + + + if (is.null(names(dim(ref_maps))) | is.null(names(dim(var_ano)))) { + stop( + 'The arrays should include dimensions names ref[nclust,lat,lon] + and target [lat,lon]' + ) + } + + if (length(lats) != dim(ref_maps)[poslat]) { + stop('latitudes do not match with the maps') + } + + assign <- + Apply( + data = list(target = var_ano), + margins = c((1:length(dim( + var_ano + )))[-c(poslat_ano, poslon_ano)]) , + fun = 'anom2regime', + ref = ref_maps, + lat = lats, + method = method + ) + + anom_array <- + array(var_ano, dim = c(prod(dim(var_ano)[-c(poslat_ano, poslon_ano)]), nlat, nlon)) + rm(var_ano) + index <- as.vector(assign$output1) + recon <- Composite(var = aperm(anom_array, c(3, 2, 1)), occ = index) + freqs <- rep(NA, nclust) + for (n in 1:nclust) { + freqs[n] <- (length(which(index == n)) / length(index)) * 100 + } + output <- + list( + composite = recon$composite, + pvalue = recon$pvalue, + cluster = assign$output1, + frequency = freqs + ) + return(output) + } diff --git a/R/WeatherRegime.R b/R/WeatherRegime.R new file mode 100644 index 00000000..f0167b91 --- /dev/null +++ b/R/WeatherRegime.R @@ -0,0 +1,109 @@ +AtomicWeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", + nstart = 30) { + + names(dim(data)) <- c("sdate", "ftime", "lat", "lon") + sdate <- which(names(dim(data)) == "sdate") + ftime <- which(names(dim(data)) == "ftime") + nftimes <- dim(data)[ftime] + nsdates <- dim(data)[sdate] + lon2 <- which(names(dim(data)) == "lon") + lat2 <- which(names(dim(data)) == "lat") + data <- aperm(data, c(ftime, sdate, lat2, lon2)) + nlon <- dim(data)[lon2] + nlat <- dim(data)[lat2] + dim(data) <- c(nftimes * nsdates, nlat, nlon) + + if (is.null(ncenters)) { + stop("ncenters must be specified") + } + if (EOFS == TRUE && is.null(lon)) { + stop("longitudes must be specified") + } + if (EOFS == TRUE && is.null(lat)) { + stop("latitudes must be specified") + } + + if (EOFS == TRUE) { + #data <- princomp(data[,subs], cor = FALSE)$scores + dataPC <- EOF(data, lat = as.vector(lat), lon = as.vector(lon), neofs = neofs) + if (is.null(threshold)){ + threshold <- sum(dataPC$var) + cluster_input <- dataPC$PC + } else { + threshold <- threshold + minPC <- head(as.numeric(which(cumsum(dataPC$var) > threshold)), 1) + cluster_input <- dataPC$PC[, 1 : minPC] } + } else { + cluster_input <- data + latWeights <- InsertDim(InsertDim(cos(lat*pi/180), 1, nftimes*nsdates), 3, nlon) + cluster_input <- cluster_input * latWeights + dim(cluster_input) <- c(nftimes * nsdates, nlat * nlon) + } + if (method == "kmeans") { + result <- kmeans(cluster_input, centers = ncenters, + iter.max = 100, nstart = nstart, trace = FALSE) + reconstructed <- array(0, c(ncenters, nlat, nlon)) + data <- aperm(data, c(2, 3, 1)) + reconstructed <- Composite(data, result$cluster) + names(dim(reconstructed$composite)) <- c("lon", "lat", "cluster") + cluster_timeseries <- list(lengths = c(), values = c()) + frequency <- persistence <- matrix(NA, nsdates, ncenters) + for (i in 1 : nsdates) { + occurences <- rle(result$cluster[((i * nftimes) + 1 - nftimes) : (i * nftimes)]) + cluster_timeseries <- list(lengths = c(cluster_timeseries$lengths, occurences$lengths), + values = c(cluster_timeseries$values, occurences$values)) + for (j in 1 : ncenters) { + frequency[i,j] <- (sum(occurences$lengths[occurences$values == j]) / nftimes) * 100 + persistence[i,j] <- mean(occurences$lengths[occurences$values == j]) + } + } + # for (i in 1 : ncenters) { + # for (j in 1 : nsdates) { + # timeseries_subset <- cluster_timeseries$lengths#[(1 +((nftimes -1) * j)) : (nftimes * j)] + # cluster_subset <- cluster_timeseries$values[(1 +((nftimes -1) * j)) : (nftimes * j)] + # print(timeseries_subset) + # print(cluster_subset) + # print((1 +((nftimes -1) * j)) : (nftimes * j)) + # stop() + # frequency[i, j] <- (sum(timeseries_subset[cluster_subset == i]) / length(nftimes)) * 100 + # persistence[i,j] <- mean(timeseries_subset[cluster_subset == i]) + # } + # #frequency[i] <- (sum(cluster_timeseries$lengths[cluster_timeseries$values == i]) / length(result$cluster)) * 100 + # } + } else { + result <- hclust(dist(cluster_input), method = method) + clusterCut <- cutree(result, ncenters) + data <- aperm(data, c(3, 2, 1)) + result <- Composite(data, clusterCut) + } + if (method == "kmeans") { + return(list(composite = reconstructed$composite, pvalue = reconstructed$pvalue, cluster = as.array(result$cluster), center = as.array(result$center), + cluster_lengths = as.array(cluster_timeseries$lengths), cluster_values = as.array(cluster_timeseries$values), + persistence = as.array(persistence), frequency = frequency)) + } else { + return(list(composite = result$composite, pvalue = result$pvalue, cluster = as.array(clusterCut))) + } +} + + + + +WeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", + nstart = 30, iter.max = 100, ncores = NULL) { + if (length(dim(data)) > 4) { + sdate <- which(names(dim(data)) == "sdate") + ftime <- which(names(dim(data)) == "ftime") + lon_dim <- which(names(dim(data)) == "lon") + lat_dim <- which(names(dim(data)) == "lat") + dims <- c(1 : length(dim(data)))[-c(sdate, ftime, lon_dim, lat_dim)] + data <- aperm(data, c(sdate, ftime, lat_dim, lon_dim, dims)) + margins <- 5 : length(dim(data)) + result <- Apply(data = list(data), margins = list(margins), fun = "AtomicWeatherRegime", EOFS = EOFS, neofs = neofs, + threshold = threshold, lon = lon, lat = lat, ncenters = ncenters, method = method, + ncores = ncores) + } else { + result <- AtomicWeatherRegime(data, EOFS = EOFS, neofs = neofs, threshold = threshold, + lon = lon, lat = lat, ncenters = ncenters, method = method) + } + +} -- GitLab From 8dfbd4460b8d9c0abe17abb6129b722aa6688ba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 11 Dec 2019 12:02:08 +0100 Subject: [PATCH 02/45] removing function --- R/PlotTableTriangles.R | 262 ----------------------------------------- 1 file changed, 262 deletions(-) delete mode 100644 R/PlotTableTriangles.R diff --git a/R/PlotTableTriangles.R b/R/PlotTableTriangles.R deleted file mode 100644 index 4c986bf4..00000000 --- a/R/PlotTableTriangles.R +++ /dev/null @@ -1,262 +0,0 @@ -#'Function to convert any numerical array to a grid of coloured triangles. -#' -#'This function converts a numerical data array into a coloured -#'grid with triangles. It is useful for a slide or article to present tabular results as -#'colors instead of numbers. This can be used to compare the outputs of two or four categories (e.g. modes -#' of variability, clusters, or forecast systems). -#' -#'@param var an array with three dimensions: rows, columns and categories -#' containing the values to be displayed in a colored image with triangles. -#'@param poscols the dimension that will be represented as columns. -#'@param posrows the dimension that will be reprsented as rows. -#'@param brks A vector of the color bar intervals. The length must be one more -#' than the parameter 'cols'. Use ColorBar() to generate default values. -#'@param cols A vector of valid colour identifiers for color bar. The length -#' must be one less than the parameter 'brks'. Use ColorBar() to generate -#' default values. -#'@param toptitle A string of the title of the grid. Set NULL as default. -#'@param sig_var logical array with the same dimensions as 'var' to add layers -#' to the plot. A value of TRUE at a grid cell will draw a dot/symbol on the -#' corresponding triangle of the plot. Set NULL as default. -#'@param pch_sig symbol to be used to represent sig_var. Takes 18 -#' (diamond) by default. See 'pch' in par() for additional -#' accepted options. -#'@param col_sig colour of the symbol to represent sig_var. -#'@param cex_sig parameter to increase/reduce the size of the symbols used -#' to represent sig_var. -#'@param xlabels A vector of labels of the x-axis The length must be -#' length of the col of parameter 'var'. Set the sequence from 1 to the -#' length of the row of parameter 'var' as default. -#'@param xtitle A string of title of the x-axis. Set NULL as default. -#'@param ylabels A vector of labels of the y-axis The length must be -#' length of the row of parameter 'var'. Set the sequence from 1 to the -#' length of the row of parameter 'var' as default. -#'@param ytitle A string of title of the y-axis. Set NULL as default. -#'@param legend A logical value to decide to draw the color bar legend or not. -#' Set TRUE as default. -#'@param lab_legend A vector of labels indicating what is represented in each -#'category (i.e. triangle). Set the sequence from 1 to the length of -#' the categories (2 or 4). -#'@param cex_leg a number to indicate the increase/reductuion of the lab_legend used -#' to represent sig_var. -#'@param col_leg color of the legend (triangles). -#'@param fileout A string of full directory path and file name indicating where -#' to save the plot. If not specified (default), a graphics device will pop up. -#'@param size_units A string indicating the units of the size of the device -#' (file or window) to plot in. Set 'px' as default. See ?Devices and the -#' creator function of the corresponding device. -#'@param res A positive number indicating resolution of the device (file or window) -#' to plot in. See ?Devices and the creator function of the corresponding device. -#'@param ... The additional parameters to be passed to function ColorBar() in -#' s2dverification for color legend creation. -#'@return A figure in popup window by default, or saved to the specified path. -#' -#'@author History:\cr -#'1.0 - 2019-10 (V.Torralba, \email{veronica.torralba@bsc.es}) - Original code -#' -#'@examples -#'#Example with random data -#' arr1<- arr1<- array(runif(n = 12 * 7 * 4, min=-1, max=1),dim = c(12,7,4)) -#'arr2<- array(TRUE,dim = dim(arr1)) -#'arr2[which(arr1 < 0.3)] = FALSE -#'PlotTableTriangles(var = arr1, -#' cols = c('white','#fef0d9','#fdd49e','#fdbb84','#fc8d59', -#' '#e34a33','#b30000', '#7f0000'), -#' brks = c(-1, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 1), -#' xtitle = "Target month", ytitle = "Lead time", -#' xlabels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", -#' "Aug", "Sep", "Oct", "Nov", "Dec")) -#'@importFrom grDevices dev.new dev.off dev.cur -#'@export - -PlotTableTriangles<-function(var,poscols=1,posrows=2,cols=NULL,brks=NULL, - toptitle=NULL, - sig_var=NULL,col_sig='black',pch_sig=18, - cex_sig=1, - xlabels=NULL, xtitle=NULL, - ylabels=NULL, ytitle=NULL, - legend=TRUE,lab_legend=NULL, - cex_leg=1,col_leg='black', - fileout=NULL, - size_units = 'px', res = 100, - figure.width = 1, - ...){ - # Checking the dimensions - vardim<-dim(var) - nrow<-vardim[posrows] - ncol<-vardim[poscols] - poscat<-vardim[setdiff(1:length(vardim),c(posrows,poscols))] - - # If there is any filenames to store the graphics, process them - # to select the right device - if (!is.null(fileout)) { - deviceInfo <- s2dverification:::.SelectDevice(fileout = fileout, - width = 80 * ncol * figure.width, - height = 80 * nrow, - units = size_units, res = res) - saveToFile <- deviceInfo$fun - fileout <- deviceInfo$files - } - - # Open connection to graphical device - if (!is.null(fileout)) { - saveToFile(fileout) - } else if (names(dev.cur()) == 'null device') { - dev.new(units = size_units, res = res, - width = 8 * figure.width, height = 5) - } - - if (!is.null(sig_var)){ - if (!is.logical(sig_var)){ - stop('sig_var array should be provided as TRUE/FALSE') - } - if (all(dim(sig_var)!=dim(var))){ - stop('sig_var array should have the same dimensions that var') - } - } - - if (length(vardim)!=3){ - stop('var must be an array with three dimensions (4, cols, rows) ') - } - - if (poscat!=4 && poscat!=2){ - stop('var must be an array with one of the dimensions being equal to 4 or 2') - } - - # The dimensions are named - names(dim(var))[poscols]<-'cols' - names(dim(var))[posrows]<-'rows' - names(dim(var))[setdiff(1:length(vardim),c(posrows,poscols))]<-'cat' - - if (!is.null(sig_var)){ - names(dim(sig_var))<-names(dim(var)) - } - - # Checking what is available and generating missing information - if (!is.null(lab_legend)&&length(lab_legend)!=4){ - stop('The legend should contain 4 names') - } - - if (is.null(xlabels)){ - xlabels=1:ncol - } - if (is.null(ylabels)){ - ylabels=1:nrow - } - if (is.null(brks)){ - brks<-seq(min(var),max(var),length.out = 9) - } - if (is.null(cols)){ - cols<-rev(brewer.pal(length(brks)-1,'RdBu')) - } - - # The colours for each triangle/category are defined - var_cat <- array(cols[length(cols)], dim = vardim) - names(dim(var_cat)) <- names(dim(var)) - for (i in (length(cols) - 1):1) { - var_cat[var < brks[i + 1]] <- cols[i] - } - - if(legend){ - layout(matrix(c(1,2,1,3),2,2,byrow=T),widths =c(10,2),heights=c(10,2)) - par(oma=c(1,1,1,1),mar=c(5,4,4,0)) - if(is.null(lab_legend)){ - lab_legend=1:4 - } - } - - plot(ncol,nrow,xlim = c(0,ncol),ylim=c(0,nrow), xaxs="i",yaxs='i',type="n", - xaxt="n", yaxt="n",ann=F,axes=F) - - box(col='black',lwd=1) - - if (! is.null(toptitle)){ - title(toptitle, cex=1.5) - } - - if (!is.null(xtitle)){ - mtext(side = 1, text = xtitle, line = 4, cex=1.5) - } - if (!is.null(ytitle)){ - mtext(side = 2, text = ytitle, line = 2.5, cex=1.5) - } - - axis(1, at=(1:ncol)-0.5, las=2, labels=xlabels, cex.axis=1.5) - axis(2, at=(1:nrow)-0.5, las=2, labels=ylabels, cex.axis=1.5) - - - #The triangles are plotted - for(p in 1:ncol){ - for(l in 1:nrow){ - if (poscat==4){ - coord_triangl <- list(xs=list(c(p-1, p-0.5, p-1),c(p-1, p-0.5, p),c(p, p-0.5, p),c(p-1, p-0.5, p)), - ys=list( c(l-1, -0.5+l, l), c(l-1, -0.5+l, l-1),c(l-1, -0.5+l, l),c(l, -0.5+l, l))) - - coord_sig <- list(x=c(p-0.75,p-0.5,p-0.25,p-0.5),y=c(l-0.5,l-0.75,l-0.5,l-0.25)) - } - - if (poscat==2){ - coord_triangl<- list(xs=list(c(p-1, p, p),c(p-1, p, p-1)), - ys=list( c(l-1,l-1, l), c(l-1, l, l))) - coord_sig<- list(x=c(p-(2/3),p-(1/3)),y=c(l-(1/3),l-(2/3))) - } - for (n in 1:poscat) { - polygon(coord_triangl$xs[[n]], - coord_triangl$ys[[n]], - col = Subset( - var_cat, - along = c('cat', 'cols', 'rows'), - indices = list(n, p, l))) - if (!is.null(sig_var) && - Subset(sig_var,along = c('cat', 'cols', 'rows'), - indices = list(n, p, l))) { - points( - x = coord_sig$x[n], - y = coord_sig$y[n], - pch = pch_sig, - cex = cex_sig, - col = col_sig - ) - } - } - } - } - - # legend - - if(legend){ - # Colorbar - par(mar=c(0,0,0,0)) - ColorBar(brks = brks, cols = cols, vert=T,draw_ticks = T, draw_separators = T, - extra_margin = c(0,0,2.5,0),label_scale = 1.5,...) - - par(mar=c(0.5,2.5,0,2.5)) - plot(1,1,xlim = c(0,1),ylim=c(0,1), xaxs="i",yaxs='i',type="n", - xaxt="n", yaxt="n",ann=F,axes=F) - - box(col=col_leg) - p=l=1 - if (poscat==4){ - coord_triangl <- list(xs=list(c(p-1, p-0.5, p-1),c(p-1, p-0.5, p),c(p, p-0.5, p),c(p-1, p-0.5, p)), - ys=list( c(l-1, -0.5+l, l), c(l-1, -0.5+l, l-1),c(l-1, -0.5+l, l),c(l, -0.5+l, l))) - - coord_sig <- list(x=c(p-0.75,p-0.5,p-0.25,p-0.5),y=c(l-0.5,l-0.75,l-0.5,l-0.25)) - } - - if (poscat==2){ - coord_triangl<- list(xs=list(c(p-1, p, p),c(p-1, p, p-1)), - ys=list( c(l-1,l-1, l), c(l-1, l, l))) - coord_sig<- list(x=c(p-(2/3),p-(1/3)),y=c(l-(1/3),l-(2/3))) - } - for (n in 1:poscat) { - polygon(coord_triangl$xs[[n]], - coord_triangl$ys[[n]],border=col_leg) - text(x=coord_sig$x[[n]],y=coord_sig$y[[n]],labels = lab_legend[n],cex=cex_leg,col=col_leg) - - } - } - - # If the graphic was saved to file, close the connection with the device - if (!is.null(fileout)) dev.off() -} - -- GitLab From 3ea51d1b844012bb64756ab413584c19f22d3413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 11 Dec 2019 12:54:47 +0100 Subject: [PATCH 03/45] header in WeatherRegime for documentation --- R/WeatherRegime.R | 147 ++++++++++++++++++++++++++++++---------------- 1 file changed, 97 insertions(+), 50 deletions(-) diff --git a/R/WeatherRegime.R b/R/WeatherRegime.R index f0167b91..70a750e9 100644 --- a/R/WeatherRegime.R +++ b/R/WeatherRegime.R @@ -1,5 +1,70 @@ -AtomicWeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", - nstart = 30) { +#' @rdname WeatherRegimes +#' @title Function for Calculating the Cluster analysis +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function computes the weather regimes from a cluster analysis. +#'It can be applied over the dataset with dimensions +#'c(year/month, month/day, lon, lat), or by using PCs obtained from the application of the +#'EOFs analysis to filter the dataset. +#'The cluster analysis can be performed with the traditional k-means or those methods +#'included in the hclust (stats package). +#' +#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +#' 4961–4976, doi:10.1007/s00382-019-04839-5. +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data Array of data (anomalies) with dimensions c(year/month, month/day, lon, lat). +#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data. +#'@param threshold Value with the percentage of variance to be explained by the PCs. +#' Only sufficient PCs to explain this much variance will be used in the clustering. +#'@param lon Vector of longitudes. +#'@param lat Vector of latitudes. +#'@param ncenters Number of clusters to be calculated with the clustering function. +#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +#'but the function also support the different methods included in the hclust . These methods are: +#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +#' For more details about these methods see the hclust function documentation included in the stats package. +#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). +#'@param ncores The number of multicore threads to use for parallel computation}. +#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)), +#' \code{pvalue} (a matrix of EOF patterns obtained by regression for each variable), +#' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), +#' \code{center} (A vector of integers (from 1:ncenters) indicating the cluster to which each point is allocated (only if method=’kmeans’ has been selected).), +#' \code{cluster_lengths} (Length of repeated values in the cluster time series (only if method=’kmeans’ has been selected).), +#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), +#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +#' +#'@examples +#'@export + +WeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", + nstart = 30, iter.max = 100, ncores = NULL) { + if (length(dim(data)) > 4) { + sdate <- which(names(dim(data)) == "sdate") + ftime <- which(names(dim(data)) == "ftime") + lon_dim <- which(names(dim(data)) == "lon") + lat_dim <- which(names(dim(data)) == "lat") + dims <- c(1 : length(dim(data)))[-c(sdate, ftime, lon_dim, lat_dim)] + data <- aperm(data, c(sdate, ftime, lat_dim, lon_dim, dims)) + margins <- 5 : length(dim(data)) + result <- Apply(data = list(data), margins = list(margins), fun = ".WeatherRegime", EOFS = EOFS, neofs = neofs, + threshold = threshold, lon = lon, lat = lat, ncenters = ncenters, method = method, + ncores = ncores) + } else { + result <- .WeatherRegime(data, EOFS = EOFS, neofs = neofs, threshold = threshold, + lon = lon, lat = lat, ncenters = ncenters, method = method) + } + return(result) +} + + +.WeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", + nstart = 30) { names(dim(data)) <- c("sdate", "ftime", "lat", "lon") sdate <- which(names(dim(data)) == "sdate") @@ -24,8 +89,9 @@ AtomicWeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, } if (EOFS == TRUE) { - #data <- princomp(data[,subs], cor = FALSE)$scores - dataPC <- EOF(data, lat = as.vector(lat), lon = as.vector(lon), neofs = neofs) + dataPC <- EOF(data, lat = as.vector(lat), + lon = as.vector(lon), + neofs = neofs) if (is.null(threshold)){ threshold <- sum(dataPC$var) cluster_input <- dataPC$PC @@ -40,36 +106,26 @@ AtomicWeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, dim(cluster_input) <- c(nftimes * nsdates, nlat * nlon) } if (method == "kmeans") { - result <- kmeans(cluster_input, centers = ncenters, + clust <- kmeans(cluster_input, centers = ncenters, iter.max = 100, nstart = nstart, trace = FALSE) - reconstructed <- array(0, c(ncenters, nlat, nlon)) + result <- array(0, c(ncenters, nlat, nlon)) data <- aperm(data, c(2, 3, 1)) - reconstructed <- Composite(data, result$cluster) - names(dim(reconstructed$composite)) <- c("lon", "lat", "cluster") + result <- Composite(data, clust$cluster) + names(dim(result$composite)) <- c("lon", "lat", "cluster") cluster_timeseries <- list(lengths = c(), values = c()) frequency <- persistence <- matrix(NA, nsdates, ncenters) for (i in 1 : nsdates) { - occurences <- rle(result$cluster[((i * nftimes) + 1 - nftimes) : (i * nftimes)]) - cluster_timeseries <- list(lengths = c(cluster_timeseries$lengths, occurences$lengths), - values = c(cluster_timeseries$values, occurences$values)) + occurences <- + rle(result$cluster[((i * nftimes) + 1 - nftimes):(i * nftimes)]) + cluster_timeseries <-list( lengths = c(cluster_timeseries$lengths, + occurences$lengths), + values = c(cluster_timeseries$values, occurences$values) + ) for (j in 1 : ncenters) { frequency[i,j] <- (sum(occurences$lengths[occurences$values == j]) / nftimes) * 100 persistence[i,j] <- mean(occurences$lengths[occurences$values == j]) } } - # for (i in 1 : ncenters) { - # for (j in 1 : nsdates) { - # timeseries_subset <- cluster_timeseries$lengths#[(1 +((nftimes -1) * j)) : (nftimes * j)] - # cluster_subset <- cluster_timeseries$values[(1 +((nftimes -1) * j)) : (nftimes * j)] - # print(timeseries_subset) - # print(cluster_subset) - # print((1 +((nftimes -1) * j)) : (nftimes * j)) - # stop() - # frequency[i, j] <- (sum(timeseries_subset[cluster_subset == i]) / length(nftimes)) * 100 - # persistence[i,j] <- mean(timeseries_subset[cluster_subset == i]) - # } - # #frequency[i] <- (sum(cluster_timeseries$lengths[cluster_timeseries$values == i]) / length(result$cluster)) * 100 - # } } else { result <- hclust(dist(cluster_input), method = method) clusterCut <- cutree(result, ncenters) @@ -77,33 +133,24 @@ AtomicWeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, result <- Composite(data, clusterCut) } if (method == "kmeans") { - return(list(composite = reconstructed$composite, pvalue = reconstructed$pvalue, cluster = as.array(result$cluster), center = as.array(result$center), - cluster_lengths = as.array(cluster_timeseries$lengths), cluster_values = as.array(cluster_timeseries$values), - persistence = as.array(persistence), frequency = frequency)) + return( + list( + composite = result$composite, + pvalue = result$pvalue, + cluster = as.array(clust$cluster), + center = as.array(clust$center), + cluster_lengths = as.array(cluster_timeseries$lengths), + cluster_values = as.array(cluster_timeseries$values), + persistence = as.array(persistence), + frequency = frequency + ) + ) } else { - return(list(composite = result$composite, pvalue = result$pvalue, cluster = as.array(clusterCut))) + return(list( + composite = result$composite, + pvalue = result$pvalue, + cluster = as.array(clusterCut) + )) } } - - - -WeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", - nstart = 30, iter.max = 100, ncores = NULL) { - if (length(dim(data)) > 4) { - sdate <- which(names(dim(data)) == "sdate") - ftime <- which(names(dim(data)) == "ftime") - lon_dim <- which(names(dim(data)) == "lon") - lat_dim <- which(names(dim(data)) == "lat") - dims <- c(1 : length(dim(data)))[-c(sdate, ftime, lon_dim, lat_dim)] - data <- aperm(data, c(sdate, ftime, lat_dim, lon_dim, dims)) - margins <- 5 : length(dim(data)) - result <- Apply(data = list(data), margins = list(margins), fun = "AtomicWeatherRegime", EOFS = EOFS, neofs = neofs, - threshold = threshold, lon = lon, lat = lat, ncenters = ncenters, method = method, - ncores = ncores) - } else { - result <- AtomicWeatherRegime(data, EOFS = EOFS, neofs = neofs, threshold = threshold, - lon = lon, lat = lat, ncenters = ncenters, method = method) - } - -} -- GitLab From 796e547b22eedb66f0d04424adfa729e4b484979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 11 Dec 2019 13:18:21 +0100 Subject: [PATCH 04/45] adding the first version of the documentation --- R/RegimesAssign.R | 184 +++++++++++++++++++++++++++------------------- R/WeatherRegime.R | 3 +- 2 files changed, 110 insertions(+), 77 deletions(-) diff --git a/R/RegimesAssign.R b/R/RegimesAssign.R index 5fa4992b..fd7ca6a4 100644 --- a/R/RegimesAssign.R +++ b/R/RegimesAssign.R @@ -1,37 +1,123 @@ -anom2regime <- function(ref, target, method = 'distance', lat) { - posdim <- which(names(dim(ref)) == 'nclust') - poslat <- which(names(dim(ref)) == 'lat') - poslon <- which(names(dim(ref)) == 'lon') +#' @rdname RegimesAssign +#' @title Function for matching a field of anomalies with +#' a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function). +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function performs the matching between a field of anomalies and a set +#' of maps which will be used as a reference. The anomalies will be assigned to the reference map +#' for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +#' (method=‘ACC’) is obtained. +#' +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. +#'@param ref.maps Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching. +#'@param lat Vector of latitudes. +#'@param method Whether the matching will be performed in terms of minimum distance (default=’distance’) or +#' the maximum spatial correlation (method=’ACC’) between the maps. +#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)), +#' \code{pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test +#' that accounts for the serial dependence of the data with the same structure as Composite.), +#' \code{cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , +#' \code{frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +#' +#' +#'@examples +#'@export + + +RegimesAssign <- function(data, ref.maps, lat, method = 'distance') { + posdim <- which(names(dim(ref.maps)) == 'nclust') + poslat <- which(names(dim(ref.maps)) == 'lat') + poslon <- which(names(dim(ref.maps)) == 'lon') + poslat_ano <- which(names(dim(data)) == 'lat') + poslon_ano <- which(names(dim(data)) == 'lon') + + nclust <- dim(ref.maps)[posdim] + nlat <- dim(ref.maps)[poslat] + nlon <- dim(ref.maps)[poslon] + + + if (is.null(names(dim(ref.maps))) | is.null(names(dim(data)))) { + stop( + 'The arrays should include dimensions names map.ref[nclust,lat,lon] + and map.targ [lat,lon]' + ) + } + + if (length(lat) != dim(ref.maps)[poslat]) { + stop('latitudes do not match with the maps') + } + + assign <- + Apply( + data = list(map.targ = data), + margins = c((1:length(dim( + data + )))[-c(poslat_ano, poslon_ano)]) , + fun = '.anom2regime', + map.ref = ref.maps, + lat = lat, + method = method + ) + + anom_array <- + array(data, dim = c(prod(dim(data)[-c(poslat_ano, poslon_ano)]), nlat, nlon)) + rm(data) + index <- as.vector(assign$output1) + recon <- Composite(var = aperm(anom_array, c(3, 2, 1)), occ = index) + freqs <- rep(NA, nclust) + for (n in 1:nclust) { + freqs[n] <- (length(which(index == n)) / length(index)) * 100 + } + output <- + list( + composite = recon$composite, + pvalue = recon$pvalue, + cluster = assign$output1, + frequency = freqs + ) + return(output) +} + + +.anom2regime <- function(map.ref, map.targ, method = 'distance', lat) { + posdim <- which(names(dim(map.ref)) == 'nclust') + poslat <- which(names(dim(map.ref)) == 'lat') + poslon <- which(names(dim(map.ref)) == 'lon') - nclust <- dim(ref)[posdim] + nclust <- dim(map.ref)[posdim] - if (all(dim(ref)[-posdim] != dim(target))) { - stop('The target should have the same dimensions [lat,lon] that - the reference ') + if (all(dim(map.ref)[-posdim] != dim(map.targ))) { + stop('The map.targ should have the same dimensions [lat,lon] that + the map.reference ') } - if (is.null(names(dim(ref))) | is.null(names(dim(target)))) { + if (is.null(names(dim(map.ref))) | is.null(names(dim(map.targ)))) { stop( - 'The arrays should include dimensions names ref[nclust,lat,lon] - and target [lat,lon]' + 'The arrays should include dimensions names map.ref[nclust,lat,lon] + and map.targ [lat,lon]' ) } - if (length(lat) != dim(ref)[poslat]) { + if (length(lat) != dim(map.ref)[poslat]) { stop('latitudes do not match with the maps') } # This dimensions are reorganized - ref <- aperm(ref, c(posdim, poslat, poslon)) - target <- - aperm(target, c(which(names(dim( - target + map.ref <- aperm(map.ref, c(posdim, poslat, poslon)) + map.targ <- + aperm(map.targ, c(which(names(dim( + map.targ )) == 'lat'), which(names(dim( - target + map.targ )) == 'lon'))) # weights are defined - latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(ref)[3]) + latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(map.ref)[3]) rmsdiff <- function(x, y) { @@ -55,10 +141,10 @@ anom2regime <- function(ref, target, method = 'distance', lat) { for (i in 1:nclust) { corr[i] <- ACC(InsertDim(InsertDim( - InsertDim(ref[i, , ] * latWeights, 1, 1), 2, 1 + InsertDim(map.ref[i, , ] * latWeights, 1, 1), 2, 1 ), 3, 1), InsertDim(InsertDim( - InsertDim(target * latWeights, 1, 1), 2, 1 + InsertDim(map.targ * latWeights, 1, 1), 2, 1 ), 3, 1))$ACC[2] } assign <- which(corr == max(corr)) @@ -67,64 +153,10 @@ anom2regime <- function(ref, target, method = 'distance', lat) { if (method == 'distance') { rms <- rep(NA, nclust) for (i in 1:nclust) { - rms[i] <- rmsdiff(ref[i, , ] * latWeights, target * latWeights) + rms[i] <- rmsdiff(map.ref[i, , ] * latWeights, map.targ * latWeights) } assign <- which(rms == min(rms)) } return(assign) - } - -RegimesAssign <- function(var_ano, ref_maps, lats, method = 'distance') { - posdim <- which(names(dim(ref_maps)) == 'nclust') - poslat <- which(names(dim(ref_maps)) == 'lat') - poslon <- which(names(dim(ref_maps)) == 'lon') - poslat_ano <- which(names(dim(var_ano)) == 'lat') - poslon_ano <- which(names(dim(var_ano)) == 'lon') - - nclust <- dim(ref_maps)[posdim] - nlat <- dim(ref_maps)[poslat] - nlon <- dim(ref_maps)[poslon] - - - if (is.null(names(dim(ref_maps))) | is.null(names(dim(var_ano)))) { - stop( - 'The arrays should include dimensions names ref[nclust,lat,lon] - and target [lat,lon]' - ) - } - - if (length(lats) != dim(ref_maps)[poslat]) { - stop('latitudes do not match with the maps') - } - - assign <- - Apply( - data = list(target = var_ano), - margins = c((1:length(dim( - var_ano - )))[-c(poslat_ano, poslon_ano)]) , - fun = 'anom2regime', - ref = ref_maps, - lat = lats, - method = method - ) - - anom_array <- - array(var_ano, dim = c(prod(dim(var_ano)[-c(poslat_ano, poslon_ano)]), nlat, nlon)) - rm(var_ano) - index <- as.vector(assign$output1) - recon <- Composite(var = aperm(anom_array, c(3, 2, 1)), occ = index) - freqs <- rep(NA, nclust) - for (n in 1:nclust) { - freqs[n] <- (length(which(index == n)) / length(index)) * 100 - } - output <- - list( - composite = recon$composite, - pvalue = recon$pvalue, - cluster = assign$output1, - frequency = freqs - ) - return(output) - } +} diff --git a/R/WeatherRegime.R b/R/WeatherRegime.R index 70a750e9..7200240d 100644 --- a/R/WeatherRegime.R +++ b/R/WeatherRegime.R @@ -32,7 +32,8 @@ #'@param ncores The number of multicore threads to use for parallel computation}. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{pvalue} (a matrix of EOF patterns obtained by regression for each variable), +#' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial +# dependence of the data with the same structure as Composite.), #' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), #' \code{center} (A vector of integers (from 1:ncenters) indicating the cluster to which each point is allocated (only if method=’kmeans’ has been selected).), #' \code{cluster_lengths} (Length of repeated values in the cluster time series (only if method=’kmeans’ has been selected).), -- GitLab From 717f4efb2ae17b653bc2eaebe32ac3dce0c0041b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 11 Dec 2019 18:37:56 +0100 Subject: [PATCH 05/45] changes in the functions --- R/RegimesAssign.R | 96 +++++++------- R/WeatherRegime.R | 309 ++++++++++++++++++++++++++++------------------ 2 files changed, 237 insertions(+), 168 deletions(-) diff --git a/R/RegimesAssign.R b/R/RegimesAssign.R index fd7ca6a4..aaa5ead7 100644 --- a/R/RegimesAssign.R +++ b/R/RegimesAssign.R @@ -24,72 +24,69 @@ #' \code{cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , #' \code{frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), #' -#' +#'@import s2dverification +#'@import multiApply #'@examples #'@export - RegimesAssign <- function(data, ref.maps, lat, method = 'distance') { - posdim <- which(names(dim(ref.maps)) == 'nclust') - poslat <- which(names(dim(ref.maps)) == 'lat') - poslon <- which(names(dim(ref.maps)) == 'lon') - poslat_ano <- which(names(dim(data)) == 'lat') - poslon_ano <- which(names(dim(data)) == 'lon') - - nclust <- dim(ref.maps)[posdim] - nlat <- dim(ref.maps)[poslat] - nlon <- dim(ref.maps)[poslon] - - + if (is.null(names(dim(ref.maps))) | is.null(names(dim(data)))) { stop( - 'The arrays should include dimensions names map.ref[nclust,lat,lon] + 'The arrays should include dimensions names map.ref[lat,lon,cluster] and map.targ [lat,lon]' ) } - if (length(lat) != dim(ref.maps)[poslat]) { + if (length(lat) != dim(ref.maps)['lat']) { stop('latitudes do not match with the maps') } + + if (any(names(dim(ref.maps))=='member')){ + names(dim(ref.maps)['member'])='member_obs' + } + if (any(names(dim(ref.maps))=='dataset')){ + names(dim(ref.maps)['dataset'])='dataset_obs' + } + print(dim(ref.maps)) assign <- - Apply( - data = list(map.targ = data), - margins = c((1:length(dim( - data - )))[-c(poslat_ano, poslon_ano)]) , + Apply( data = list(map.targ = data,map.ref=ref.maps), + target_dims = list(c('lat','lon'), + c('lat','lon','cluster')), fun = '.anom2regime', - map.ref = ref.maps, lat = lat, method = method - ) - + )[[1]] + anom_array <- array(data, dim = c(prod(dim(data)[-c(poslat_ano, poslon_ano)]), nlat, nlon)) - rm(data) - index <- as.vector(assign$output1) - recon <- Composite(var = aperm(anom_array, c(3, 2, 1)), occ = index) + # to be replaced with MergeDims + + recon <- Composite(var = aperm(anom_array, c(3, 2, 1)), occ = assign) freqs <- rep(NA, nclust) for (n in 1:nclust) { - freqs[n] <- (length(which(index == n)) / length(index)) * 100 + freqs[n] <- (length(which(assign == n)) / length(assign)) * 100 } output <- list( composite = recon$composite, pvalue = recon$pvalue, - cluster = assign$output1, + cluster = assign, frequency = freqs ) return(output) } + + .anom2regime <- function(map.ref, map.targ, method = 'distance', lat) { - posdim <- which(names(dim(map.ref)) == 'nclust') + posdim <- which(names(dim(map.ref)) == 'cluster') poslat <- which(names(dim(map.ref)) == 'lat') poslon <- which(names(dim(map.ref)) == 'lon') - nclust <- dim(map.ref)[posdim] + nclust <- dim(map.ref)['cluster'] if (all(dim(map.ref)[-posdim] != dim(map.targ))) { stop('The map.targ should have the same dimensions [lat,lon] that @@ -103,7 +100,7 @@ RegimesAssign <- function(data, ref.maps, lat, method = 'distance') { ) } - if (length(lat) != dim(map.ref)[poslat]) { + if (length(lat) != dim(map.ref)['lat']) { stop('latitudes do not match with the maps') } @@ -117,24 +114,8 @@ RegimesAssign <- function(data, ref.maps, lat, method = 'distance') { )) == 'lon'))) # weights are defined - latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(map.ref)[3]) - - - rmsdiff <- function(x, y) { - dims <- dim(x) - ndims <- length(dims) - if (ndims != 2 | ndims != length(dim(y))) { - stop('x and y should be maps') - } - map_diff <- NA * x - for (i in 1:dims[1]) { - for (j in 1:dims[2]) { - map_diff[i, j] <- (x[i, j] - y[i, j]) ^ 2 - } - } - rmsdiff <- sqrt(mean(map_diff)) - return(rmsdiff) - } + latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(map.ref)['lon']) + names(dim(latWeights))<-c('lat','lon') if (method == 'ACC') { corr <- rep(NA, nclust) @@ -153,10 +134,25 @@ RegimesAssign <- function(data, ref.maps, lat, method = 'distance') { if (method == 'distance') { rms <- rep(NA, nclust) for (i in 1:nclust) { - rms[i] <- rmsdiff(map.ref[i, , ] * latWeights, map.targ * latWeights) + rms[i] <- .rmsdiff(map.ref[i, , ] * latWeights, map.targ * latWeights) } assign <- which(rms == min(rms)) } return(assign) } +.rmsdiff <- function(x, y) { + dims <- dim(x) + ndims <- length(dims) + if (ndims != 2 | ndims != length(dim(y))) { + stop('x and y should be maps') + } + map_diff <- NA * x + for (i in 1:dims[1]) { + for (j in 1:dims[2]) { + map_diff[i, j] <- (x[i, j] - y[i, j]) ^ 2 + } + } + rmsdiff <- sqrt(mean(map_diff)) + return(rmsdiff) +} diff --git a/R/WeatherRegime.R b/R/WeatherRegime.R index 7200240d..8f50babb 100644 --- a/R/WeatherRegime.R +++ b/R/WeatherRegime.R @@ -3,33 +3,33 @@ #' #' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} #' -#' @description This function computes the weather regimes from a cluster analysis. -#'It can be applied over the dataset with dimensions -#'c(year/month, month/day, lon, lat), or by using PCs obtained from the application of the -#'EOFs analysis to filter the dataset. -#'The cluster analysis can be performed with the traditional k-means or those methods +#' @description This function computes the weather regimes from a cluster analysis. +#'It can be applied over the dataset with dimensions +#'c(year/month, month/day, lon, lat), or by using PCs obtained from the application of the +#'EOFs analysis to filter the dataset. +#'The cluster analysis can be performed with the traditional k-means or those methods #'included in the hclust (stats package). #' -#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). -#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, -#' 4961–4976, doi:10.1007/s00382-019-04839-5. -#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +#' 4961–4976, doi:10.1007/s00382-019-04839-5. +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools #' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} #' -#'@param data Array of data (anomalies) with dimensions c(year/month, month/day, lon, lat). +#'@param data n array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. #'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data. #'@param threshold Value with the percentage of variance to be explained by the PCs. #' Only sufficient PCs to explain this much variance will be used in the clustering. #'@param lon Vector of longitudes. #'@param lat Vector of latitudes. #'@param ncenters Number of clusters to be calculated with the clustering function. -#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) -#'but the function also support the different methods included in the hclust . These methods are: -#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +#'but the function also support the different methods included in the hclust . These methods are: +#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). #' For more details about these methods see the hclust function documentation included in the stats package. #'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). #'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). -#'@param ncores The number of multicore threads to use for parallel computation}. +#'@param ncores The number of multicore threads to use for parallel computation}. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), #' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial @@ -39,119 +39,192 @@ #' \code{cluster_lengths} (Length of repeated values in the cluster time series (only if method=’kmeans’ has been selected).), #' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), #' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), -#' -#'@examples +#'@import s2dverification +#'@import multiApply +#'@examples #'@export -WeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", - nstart = 30, iter.max = 100, ncores = NULL) { - if (length(dim(data)) > 4) { - sdate <- which(names(dim(data)) == "sdate") - ftime <- which(names(dim(data)) == "ftime") - lon_dim <- which(names(dim(data)) == "lon") - lat_dim <- which(names(dim(data)) == "lat") - dims <- c(1 : length(dim(data)))[-c(sdate, ftime, lon_dim, lat_dim)] - data <- aperm(data, c(sdate, ftime, lat_dim, lon_dim, dims)) - margins <- 5 : length(dim(data)) - result <- Apply(data = list(data), margins = list(margins), fun = ".WeatherRegime", EOFS = EOFS, neofs = neofs, - threshold = threshold, lon = lon, lat = lat, ncenters = ncenters, method = method, - ncores = ncores) - } else { - result <- .WeatherRegime(data, EOFS = EOFS, neofs = neofs, threshold = threshold, - lon = lon, lat = lat, ncenters = ncenters, method = method) +WeatherRegime <- + function(data, + EOFS = TRUE, + neofs = 30, + threshold = NULL, + lon = NULL, + lat = NULL, + ncenters = NULL, + method = "kmeans", + nstart = 30, + iter.max = 100, + ncores = NULL) { + if (is.null(names(dim(data)))) { + stop('data must be an array with named dimensions') + } + if (length(dim(data)) > 4) { + sdate <- which(names(dim(data)) == "sdate") + ftime <- which(names(dim(data)) == "ftime") + lon_dim <- which(names(dim(data)) == "lon") + lat_dim <- which(names(dim(data)) == "lat") + dims <- + c(1:length(dim(data)))[-c(sdate, ftime, lon_dim, lat_dim)] + data <- aperm(data, c(sdate, ftime, lat_dim, lon_dim, dims)) + margins <- 5:length(dim(data)) + result <- + Apply( + data = list(data), + margins = list(margins), + fun = ".WeatherRegime", + EOFS = EOFS, + neofs = neofs, + threshold = threshold, + lon = lon, + lat = lat, + ncenters = ncenters, + method = method, + ncores = ncores + ) + } else { + result <- + .WeatherRegime( + data, + EOFS = EOFS, + neofs = neofs, + threshold = threshold, + lon = lon, + lat = lat, + ncenters = ncenters, + method = method + ) + } + return(result) } - return(result) -} -.WeatherRegime <- function(data, EOFS = TRUE, neofs = 30, threshold = NULL, lon = NULL, lat = NULL, ncenters = NULL, method = "kmeans", - nstart = 30) { - - names(dim(data)) <- c("sdate", "ftime", "lat", "lon") - sdate <- which(names(dim(data)) == "sdate") - ftime <- which(names(dim(data)) == "ftime") - nftimes <- dim(data)[ftime] - nsdates <- dim(data)[sdate] - lon2 <- which(names(dim(data)) == "lon") - lat2 <- which(names(dim(data)) == "lat") - data <- aperm(data, c(ftime, sdate, lat2, lon2)) - nlon <- dim(data)[lon2] - nlat <- dim(data)[lat2] - dim(data) <- c(nftimes * nsdates, nlat, nlon) - - if (is.null(ncenters)) { - stop("ncenters must be specified") - } - if (EOFS == TRUE && is.null(lon)) { - stop("longitudes must be specified") - } - if (EOFS == TRUE && is.null(lat)) { - stop("latitudes must be specified") - } - - if (EOFS == TRUE) { - dataPC <- EOF(data, lat = as.vector(lat), - lon = as.vector(lon), - neofs = neofs) - if (is.null(threshold)){ - threshold <- sum(dataPC$var) - cluster_input <- dataPC$PC +.WeatherRegime <- + function(data, + EOFS = TRUE, + neofs = 30, + threshold = NULL, + lon = NULL, + lat = NULL, + ncenters = NULL, + method = "kmeans", + nstart = 30) { + if (is.null(names(dim(data)))) { + stop('data must be an array with named dimensions') + } + if (!is.null(lat) && dim(data)['lat'] != length(lat)) { + stop('the latitudes do not match with the lat dimension of data') + } + if (is.null(ncenters)) { + stop("ncenters must be specified") + } + if (EOFS == TRUE && is.null(lon)) { + stop("longitudes must be specified") + } + if (EOFS == TRUE && is.null(lat)) { + stop("latitudes must be specified") + } + + sdate <- which(names(dim(data)) == "sdate") + ftime <- which(names(dim(data)) == "ftime") + nftimes <- dim(data)[ftime] + nsdates <- dim(data)[sdate] + lon2 <- which(names(dim(data)) == "lon") + lat2 <- which(names(dim(data)) == "lat") + data <- aperm(data, c(ftime, sdate, lat2, lon2)) + nlon <- dim(data)[lon2] + nlat <- dim(data)[lat2] + dim(data) <- + c(nftimes * nsdates, nlat, nlon)# to be changed by mergedims + names(dim(data)) <- c('time', 'lat', 'lon') + + + if (EOFS == TRUE) { + dataPC <- EOF(data, + lat = as.vector(lat), + lon = as.vector(lon), + neofs = neofs) + if (is.null(threshold)) { + threshold <- sum(dataPC$var) + cluster_input <- dataPC$PC + } else { + threshold <- threshold + minPC <- + head(as.numeric(which(cumsum(dataPC$var) > threshold)), 1) + cluster_input <- dataPC$PC[, 1:minPC] + } } else { - threshold <- threshold - minPC <- head(as.numeric(which(cumsum(dataPC$var) > threshold)), 1) - cluster_input <- dataPC$PC[, 1 : minPC] } - } else { - cluster_input <- data - latWeights <- InsertDim(InsertDim(cos(lat*pi/180), 1, nftimes*nsdates), 3, nlon) - cluster_input <- cluster_input * latWeights - dim(cluster_input) <- c(nftimes * nsdates, nlat * nlon) - } - if (method == "kmeans") { - clust <- kmeans(cluster_input, centers = ncenters, - iter.max = 100, nstart = nstart, trace = FALSE) - result <- array(0, c(ncenters, nlat, nlon)) - data <- aperm(data, c(2, 3, 1)) - result <- Composite(data, clust$cluster) - names(dim(result$composite)) <- c("lon", "lat", "cluster") - cluster_timeseries <- list(lengths = c(), values = c()) - frequency <- persistence <- matrix(NA, nsdates, ncenters) - for (i in 1 : nsdates) { - occurences <- - rle(result$cluster[((i * nftimes) + 1 - nftimes):(i * nftimes)]) - cluster_timeseries <-list( lengths = c(cluster_timeseries$lengths, - occurences$lengths), - values = c(cluster_timeseries$values, occurences$values) - ) - for (j in 1 : ncenters) { - frequency[i,j] <- (sum(occurences$lengths[occurences$values == j]) / nftimes) * 100 - persistence[i,j] <- mean(occurences$lengths[occurences$values == j]) + cluster_input <- data + latWeights <- + InsertDim(InsertDim(cos(lat * pi / 180), 1, nftimes * nsdates), 3, nlon) + cluster_input <- cluster_input * latWeights + dim(cluster_input) <- c(nftimes * nsdates, nlat * nlon) + } + if (method == "kmeans") { + if (any(is.na(cluster_input))) { + posnas <- unique(which(is.na(cluster_input), arr.ind = T)[, 2]) + cluster_input <- cluster_input[, -posnas] + } + clust <- kmeans( + cluster_input, + centers = ncenters, + iter.max = 100, + nstart = nstart, + trace = FALSE + ) + + if (any(is.na(cluster_input))) { + centers <- array(NA, dim = c(ncenters, nlon * nlat)) + centers [, -c(posnas)] <- as.array(clust$center) + } else{ + centers <- as.array(clust$center) + } + result <- array(0, c(ncenters, nlat, nlon)) + # the order of the data dimensions is changed ('lat','lon','time') + result <- Composite(aperm(data, c(2, 3, 1)), clust$cluster) + names(dim(result$composite)) <- c("lat", "lon", "cluster") + cluster_timeseries <- list(lengths = c(), values = c()) + frequency <- persistence <- matrix(NA, nsdates, ncenters) + for (i in 1:nsdates) { + occurences <- + rle(clust$cluster[((i * nftimes) + 1 - nftimes):(i * nftimes)]) + cluster_timeseries <- + list( + lengths = c(cluster_timeseries$lengths, + occurences$lengths), + values = c(cluster_timeseries$values, occurences$values) + ) + for (j in 1:ncenters) { + frequency[i, j] <- + (sum(occurences$lengths[occurences$values == j]) / nftimes) * 100 + persistence[i, j] <- + mean(occurences$lengths[occurences$values == j]) + } } + } else { + result <- hclust(dist(cluster_input), method = method) + clusterCut <- cutree(result, ncenters) + result <- Composite(aperm(data, c(2, 3, 1)), clusterCut) } - } else { - result <- hclust(dist(cluster_input), method = method) - clusterCut <- cutree(result, ncenters) - data <- aperm(data, c(3, 2, 1)) - result <- Composite(data, clusterCut) - } - if (method == "kmeans") { - return( - list( + if (method == "kmeans") { + return( + list( + composite = result$composite, + pvalue = result$pvalue, + cluster = as.array(clust$cluster), + center = centers, + cluster_lengths = as.array(cluster_timeseries$lengths), + cluster_values = as.array(cluster_timeseries$values), + persistence = as.array(persistence), + frequency = frequency + ) + ) + } else { + return(list( composite = result$composite, pvalue = result$pvalue, - cluster = as.array(clust$cluster), - center = as.array(clust$center), - cluster_lengths = as.array(cluster_timeseries$lengths), - cluster_values = as.array(cluster_timeseries$values), - persistence = as.array(persistence), - frequency = frequency - ) - ) - } else { - return(list( - composite = result$composite, - pvalue = result$pvalue, - cluster = as.array(clusterCut) - )) + cluster = as.array(clusterCut) + )) + } } -} -- GitLab From e4c28c85e29dc4a12f3834503855a71aa678b3a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 4 Mar 2020 19:15:24 +0100 Subject: [PATCH 06/45] Adding changes in Weather Regimes --- R/CST_WeatherRegime.R | 279 ++++++++++++++++++++++++++++++++++++++++++ R/WeatherRegime.R | 230 ---------------------------------- 2 files changed, 279 insertions(+), 230 deletions(-) create mode 100644 R/CST_WeatherRegime.R delete mode 100644 R/WeatherRegime.R diff --git a/R/CST_WeatherRegime.R b/R/CST_WeatherRegime.R new file mode 100644 index 00000000..7b0d64fa --- /dev/null +++ b/R/CST_WeatherRegime.R @@ -0,0 +1,279 @@ +#' @rdname CST_WeatherRegimes +#' @title Function for Calculating the Cluster analysis +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function computes the weather regimes from a cluster analysis. +#'It is applied on the array \code{data} in a 's2dv_cube' object. The dimensionality of this object can be also reduced +#'by using PCs obtained from the application of the #'EOFs analysis to filter the dataset. +#'The cluster analysis can be performed with the traditional k-means or those methods +#'included in the hclust (stats package). +#' +#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +#' 4961–4976, doi:10.1007/s00382-019-04839-5. +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data a 's2dv_cube' object + +#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data. +#'@param neofs number of modes to be kept (default = 30). +#'@param varThreshold Value with the percentage of variance to be explained by the PCs. +#' Only sufficient PCs to explain this much variance will be used in the clustering. +#'@param lon Vector of longitudes. +#'@param lat Vector of latitudes. +#'@param ncenters Number of clusters to be calculated with the clustering function. +#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +#'but the function also support the different methods included in the hclust . These methods are: +#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +#' For more details about these methods see the hclust function documentation included in the stats package. +#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). +#'@param ncores The number of multicore threads to use for parallel computation}. +#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)), +#' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial +# dependence of the data with the same structure as Composite.), +#' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), +#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), +#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +#'@import s2dverification +#'@import multiApply +#'@examples +#'@export +#' +CST_WeatherRegime <- function(data,ncenters = NULL, + EOFS = TRUE,neofs = 30, + varThreshold = NULL, lon = NULL, + lat = NULL, method = "kmeans", + iter.max=100, nstart = 30, + ncores = NULL) { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + data$data <- WeatherRegime(data$data,ncenters = ncenters , + EOFS = EOFS, neofs = neofs, + varThreshold = varThreshold, lon = lon, + lat = lat, method = method, + iter.max=iter.max, nstart = nstart, + ncores = ncores) + return(data) +} + +#' @rdname WeatherRegimes +#' @title Function for Calculating the Cluster analysis +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function computes the weather regimes from a cluster analysis. +#'It can be applied over the dataset with dimensions +#'c(year/month, month/day, lon, lat), or by using PCs obtained from the application of the +#'EOFs analysis to filter the dataset. +#'The cluster analysis can be performed with the traditional k-means or those methods +#'included in the hclust (stats package). +#' +#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +#' 4961–4976, doi:10.1007/s00382-019-04839-5. +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data n array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. +#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data. +#'@param neofs number of modes to be kept (default = 30). +#'@param varThreshold Value with the percentage of variance to be explained by the PCs. +#' Only sufficient PCs to explain this much variance will be used in the clustering. +#'@param lon Vector of longitudes. +#'@param lat Vector of latitudes. +#'@param ncenters Number of clusters to be calculated with the clustering function. +#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +#'but the function also support the different methods included in the hclust . These methods are: +#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +#' For more details about these methods see the hclust function documentation included in the stats package. +#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). +#'@param ncores The number of multicore threads to use for parallel computation}. +#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)), +#' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial +# dependence of the data with the same structure as Composite.), +#' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), +#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), +#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +#'@import s2dverification +#'@import multiApply +#'@examples +#'@export + +WeatherRegime <- function(data, ncenters = NULL, + EOFS = TRUE,neofs = 30, + varThreshold = NULL, lon = NULL, + lat = NULL, method = "kmeans", + iter.max=100, nstart = 30, + ncores = NULL) { + + if (is.null(names(dim(data)))) { + stop('data must be an array with named dimensions') + } + + dimData <- names(dim(data)) + nsdates <- dim(data)['sdate'] + nftimes <- dim(data)['ftime'] + + if ('sdate' %in% dimData && 'ftime' %in% dimData){ + data <- MergeDims(data, + merge_dims = c('ftime','sdate'), + rename_dim = 'time') + } + + output <- Apply(data = list(data), + target_dims = c('time','lat','lon'), + fun = ".WeatherRegime", + EOFS = EOFS, neofs = neofs, + varThreshold = varThreshold, + lon = lon, lat = lat, + ncenters = ncenters, + method = method, + ncores = ncores) + + if (method=='kmeans' && 'sdate' %in% dimData && 'ftime' %in% dimData){ + # The frequency and the persistency are computed as they are useful + # parameters in the cluster analysis + extra_output <- Apply(data=output$cluster, + target_dims = 'time', + fun=.freqPer, + nsdates=nsdates, + nftimes=nftimes , + ncenters = ncenters) + + output <- list(composite=output$composite, + pvalue=output$pvalue, + cluster=output$cluster, + frequency=extra_output$frequency, + persistence=extra_output$persistence) + } + return(output) +} + +.WeatherRegime <- function(data, ncenters = NULL, EOFS = TRUE,neofs = 30, + varThreshold = NULL, lon = NULL, + lat = NULL, method = "kmeans", + iter.max=100, nstart = 30) { + + if (is.null(names(dim(data)))) { + stop('data must be an array with named dimensions') + } + if (!is.null(lat) && dim(data)['lat'] != length(lat)) { + stop('the latitudes do not match with the lat dimension of data') + } + if (is.null(ncenters)) { + stop("ncenters must be specified") + } + if (EOFS == TRUE && is.null(lon)) { + stop("longitudes must be specified") + } + if (EOFS == TRUE && is.null(lat)) { + stop("latitudes must be specified") + } + + nlon <- dim(data)['lat'] + nlat <- dim(data)['lon'] + + if (EOFS == TRUE) { + if (is.null(varThreshold)) { + dataPC <- EOF(data, + lat = as.vector(lat), + lon = as.vector(lon), + neofs = neofs) + cluster_input <- dataPC$PC + } else { + dataPC <- EOF(data, + lat = as.vector(lat), + lon = as.vector(lon), + neofs = 30) + minPC <- + head(as.numeric(which(cumsum(dataPC$var) > varThreshold)), 1) + cluster_input <- dataPC$PC[, 1:minPC] + } + } else { + #if (latWeights){ + # latitude weights are applied on the data + dataW <- aperm(Apply(data, target_dims = 'lat', + function (x, la) { + x * cos(la * pi / 180)}, + la = lat)[[1]], c(2, 1, 3)) + + cluster_input <- MergeDims(dataW, merge_dims = c('lat','lon'), + rename_dim = 'space',na.rm=T) + #}else { + #cluster_input <- MergeDims(data2, merge_dims = c('lat','lon'), + # rename_dim = 'space',na.rm=T) + #} + } + + if (method == "kmeans") { + if (any(is.na(cluster_input))) { + posnas <- unique(which(is.na(cluster_input), arr.ind = T)[, 2]) + cluster_input <- cluster_input[, -posnas] + } + clust <- kmeans( + cluster_input, + centers = ncenters, + iter.max = iter.max, + nstart = nstart, + trace = FALSE) + + if (any(is.na(cluster_input))) { + centers <- array(NA, dim = c(ncenters, nlon * nlat)) + centers [, -c(posnas)] <- as.array(clust$center) + } else{ + centers <- as.array(clust$center) + } + result <- array(0, c(ncenters, nlat, nlon)) + # the order of the data dimensions is changed ('lat','lon','time') + result <- Composite(aperm(data,c(3,2,1)), clust$cluster) + + } else { + result <- hclust(dist(cluster_input), method = method) + clusterCut <- cutree(result, ncenters) + result <- Composite(aperm(data, c(2, 3, 1)), clusterCut) + } + result <- lapply(1:length(result), + function (n) {names(dim(result[[n]])) <- c("lon", "lat", "cluster") + return (result[[n]])}) + + names(result) <- c('composite','pvalue') + + if (method == "kmeans") { + clust <- as.array(clust$cluster) + names(dim(clust)) <- 'time' +: return(list( + composite = result$composite, + pvalue = result$pvalue, + cluster = clust)) + } else { + clust <- as.array(clusterCut) + names(dim(clust)) <- 'time' + return(list( + composite = result$composite, + pvalue = result$pvalue, + cluster = clust)) + } +} + +.freqPer<- function (clust, nsdates, nftimes, ncenters){ + frequency <- persistence <- matrix(NA, nsdates, ncenters) + x <- as.vector(clust) + for (i in 1:nsdates) { + occurences <-rle(x[((i * nftimes) + 1 - nftimes):(i * nftimes)]) + for (j in 1:ncenters) { + frequency[i, j] <-(sum(occurences$lengths[occurences$values == j]) / nftimes) * 100 + persistence[i, j] <- mean(occurences$lengths[occurences$values == j]) + } + } + return(list(frequency=frequency, + persistence=persistence)) +} + diff --git a/R/WeatherRegime.R b/R/WeatherRegime.R deleted file mode 100644 index 8f50babb..00000000 --- a/R/WeatherRegime.R +++ /dev/null @@ -1,230 +0,0 @@ -#' @rdname WeatherRegimes -#' @title Function for Calculating the Cluster analysis -#' -#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -#' -#' @description This function computes the weather regimes from a cluster analysis. -#'It can be applied over the dataset with dimensions -#'c(year/month, month/day, lon, lat), or by using PCs obtained from the application of the -#'EOFs analysis to filter the dataset. -#'The cluster analysis can be performed with the traditional k-means or those methods -#'included in the hclust (stats package). -#' -#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). -#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, -#' 4961–4976, doi:10.1007/s00382-019-04839-5. -#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools -#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} -#' -#'@param data n array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. -#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data. -#'@param threshold Value with the percentage of variance to be explained by the PCs. -#' Only sufficient PCs to explain this much variance will be used in the clustering. -#'@param lon Vector of longitudes. -#'@param lat Vector of latitudes. -#'@param ncenters Number of clusters to be calculated with the clustering function. -#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) -#'but the function also support the different methods included in the hclust . These methods are: -#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). -#' For more details about these methods see the hclust function documentation included in the stats package. -#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). -#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). -#'@param ncores The number of multicore threads to use for parallel computation}. -#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) -# or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial -# dependence of the data with the same structure as Composite.), -#' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), -#' \code{center} (A vector of integers (from 1:ncenters) indicating the cluster to which each point is allocated (only if method=’kmeans’ has been selected).), -#' \code{cluster_lengths} (Length of repeated values in the cluster time series (only if method=’kmeans’ has been selected).), -#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), -#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), -#'@import s2dverification -#'@import multiApply -#'@examples -#'@export - -WeatherRegime <- - function(data, - EOFS = TRUE, - neofs = 30, - threshold = NULL, - lon = NULL, - lat = NULL, - ncenters = NULL, - method = "kmeans", - nstart = 30, - iter.max = 100, - ncores = NULL) { - if (is.null(names(dim(data)))) { - stop('data must be an array with named dimensions') - } - if (length(dim(data)) > 4) { - sdate <- which(names(dim(data)) == "sdate") - ftime <- which(names(dim(data)) == "ftime") - lon_dim <- which(names(dim(data)) == "lon") - lat_dim <- which(names(dim(data)) == "lat") - dims <- - c(1:length(dim(data)))[-c(sdate, ftime, lon_dim, lat_dim)] - data <- aperm(data, c(sdate, ftime, lat_dim, lon_dim, dims)) - margins <- 5:length(dim(data)) - result <- - Apply( - data = list(data), - margins = list(margins), - fun = ".WeatherRegime", - EOFS = EOFS, - neofs = neofs, - threshold = threshold, - lon = lon, - lat = lat, - ncenters = ncenters, - method = method, - ncores = ncores - ) - } else { - result <- - .WeatherRegime( - data, - EOFS = EOFS, - neofs = neofs, - threshold = threshold, - lon = lon, - lat = lat, - ncenters = ncenters, - method = method - ) - } - return(result) - } - - -.WeatherRegime <- - function(data, - EOFS = TRUE, - neofs = 30, - threshold = NULL, - lon = NULL, - lat = NULL, - ncenters = NULL, - method = "kmeans", - nstart = 30) { - if (is.null(names(dim(data)))) { - stop('data must be an array with named dimensions') - } - if (!is.null(lat) && dim(data)['lat'] != length(lat)) { - stop('the latitudes do not match with the lat dimension of data') - } - if (is.null(ncenters)) { - stop("ncenters must be specified") - } - if (EOFS == TRUE && is.null(lon)) { - stop("longitudes must be specified") - } - if (EOFS == TRUE && is.null(lat)) { - stop("latitudes must be specified") - } - - sdate <- which(names(dim(data)) == "sdate") - ftime <- which(names(dim(data)) == "ftime") - nftimes <- dim(data)[ftime] - nsdates <- dim(data)[sdate] - lon2 <- which(names(dim(data)) == "lon") - lat2 <- which(names(dim(data)) == "lat") - data <- aperm(data, c(ftime, sdate, lat2, lon2)) - nlon <- dim(data)[lon2] - nlat <- dim(data)[lat2] - dim(data) <- - c(nftimes * nsdates, nlat, nlon)# to be changed by mergedims - names(dim(data)) <- c('time', 'lat', 'lon') - - - if (EOFS == TRUE) { - dataPC <- EOF(data, - lat = as.vector(lat), - lon = as.vector(lon), - neofs = neofs) - if (is.null(threshold)) { - threshold <- sum(dataPC$var) - cluster_input <- dataPC$PC - } else { - threshold <- threshold - minPC <- - head(as.numeric(which(cumsum(dataPC$var) > threshold)), 1) - cluster_input <- dataPC$PC[, 1:minPC] - } - } else { - cluster_input <- data - latWeights <- - InsertDim(InsertDim(cos(lat * pi / 180), 1, nftimes * nsdates), 3, nlon) - cluster_input <- cluster_input * latWeights - dim(cluster_input) <- c(nftimes * nsdates, nlat * nlon) - } - if (method == "kmeans") { - if (any(is.na(cluster_input))) { - posnas <- unique(which(is.na(cluster_input), arr.ind = T)[, 2]) - cluster_input <- cluster_input[, -posnas] - } - clust <- kmeans( - cluster_input, - centers = ncenters, - iter.max = 100, - nstart = nstart, - trace = FALSE - ) - - if (any(is.na(cluster_input))) { - centers <- array(NA, dim = c(ncenters, nlon * nlat)) - centers [, -c(posnas)] <- as.array(clust$center) - } else{ - centers <- as.array(clust$center) - } - result <- array(0, c(ncenters, nlat, nlon)) - # the order of the data dimensions is changed ('lat','lon','time') - result <- Composite(aperm(data, c(2, 3, 1)), clust$cluster) - names(dim(result$composite)) <- c("lat", "lon", "cluster") - cluster_timeseries <- list(lengths = c(), values = c()) - frequency <- persistence <- matrix(NA, nsdates, ncenters) - for (i in 1:nsdates) { - occurences <- - rle(clust$cluster[((i * nftimes) + 1 - nftimes):(i * nftimes)]) - cluster_timeseries <- - list( - lengths = c(cluster_timeseries$lengths, - occurences$lengths), - values = c(cluster_timeseries$values, occurences$values) - ) - for (j in 1:ncenters) { - frequency[i, j] <- - (sum(occurences$lengths[occurences$values == j]) / nftimes) * 100 - persistence[i, j] <- - mean(occurences$lengths[occurences$values == j]) - } - } - } else { - result <- hclust(dist(cluster_input), method = method) - clusterCut <- cutree(result, ncenters) - result <- Composite(aperm(data, c(2, 3, 1)), clusterCut) - } - if (method == "kmeans") { - return( - list( - composite = result$composite, - pvalue = result$pvalue, - cluster = as.array(clust$cluster), - center = centers, - cluster_lengths = as.array(cluster_timeseries$lengths), - cluster_values = as.array(cluster_timeseries$values), - persistence = as.array(persistence), - frequency = frequency - ) - ) - } else { - return(list( - composite = result$composite, - pvalue = result$pvalue, - cluster = as.array(clusterCut) - )) - } - } - -- GitLab From d2301437b4379e9e351a93df97fa4901917c9463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 4 Mar 2020 19:16:41 +0100 Subject: [PATCH 07/45] change name --- R/{CST_WeatherRegime.R => CST_WeatherRegimes.R} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename R/{CST_WeatherRegime.R => CST_WeatherRegimes.R} (99%) diff --git a/R/CST_WeatherRegime.R b/R/CST_WeatherRegimes.R similarity index 99% rename from R/CST_WeatherRegime.R rename to R/CST_WeatherRegimes.R index 7b0d64fa..cc8e8b9f 100644 --- a/R/CST_WeatherRegime.R +++ b/R/CST_WeatherRegimes.R @@ -43,7 +43,7 @@ #'@examples #'@export #' -CST_WeatherRegime <- function(data,ncenters = NULL, +CST_WeatherRegimes <- function(data,ncenters = NULL, EOFS = TRUE,neofs = 30, varThreshold = NULL, lon = NULL, lat = NULL, method = "kmeans", -- GitLab From 6b187edd5aa42db0140fbc0734b8d6f3ebaab631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 4 Mar 2020 21:10:37 +0100 Subject: [PATCH 08/45] bugfix --- R/CST_WeatherRegimes.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index cc8e8b9f..2b5e6e53 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -249,7 +249,7 @@ WeatherRegime <- function(data, ncenters = NULL, if (method == "kmeans") { clust <- as.array(clust$cluster) names(dim(clust)) <- 'time' -: return(list( + return(list( composite = result$composite, pvalue = result$pvalue, cluster = clust)) -- GitLab From 916c614a95cf6f72feee606761e3ce0b3d5a3fc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Thu, 5 Mar 2020 14:52:46 +0100 Subject: [PATCH 09/45] minor changes --- R/CST_WeatherRegimes.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 2b5e6e53..7a6e0913 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -53,10 +53,16 @@ CST_WeatherRegimes <- function(data,ncenters = NULL, stop("Parameter 'data' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } + + if ('lon' %in% names(data)){ + lon <- data$lon + }else { + lon <- NULL + } data$data <- WeatherRegime(data$data,ncenters = ncenters , EOFS = EOFS, neofs = neofs, varThreshold = varThreshold, lon = lon, - lat = lat, method = method, + lat = data$lat, method = method, iter.max=iter.max, nstart = nstart, ncores = ncores) return(data) @@ -118,6 +124,10 @@ WeatherRegime <- function(data, ncenters = NULL, stop('data must be an array with named dimensions') } + if (is.null(lat)) { + stop("latitudes must be specified") + } + dimData <- names(dim(data)) nsdates <- dim(data)['sdate'] nftimes <- dim(data)['ftime'] @@ -174,7 +184,7 @@ WeatherRegime <- function(data, ncenters = NULL, if (EOFS == TRUE && is.null(lon)) { stop("longitudes must be specified") } - if (EOFS == TRUE && is.null(lat)) { + if (is.null(lat)) { stop("latitudes must be specified") } -- GitLab From 43dc6d3d18cfa9ec22068c76005a047f96998514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Tue, 10 Mar 2020 10:33:33 +0100 Subject: [PATCH 10/45] improving checks --- R/CST_WeatherRegimes.R | 107 ++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 56 deletions(-) diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 7a6e0913..d8b48312 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -45,8 +45,8 @@ #' CST_WeatherRegimes <- function(data,ncenters = NULL, EOFS = TRUE,neofs = 30, - varThreshold = NULL, lon = NULL, - lat = NULL, method = "kmeans", + varThreshold = NULL, + method = "kmeans", iter.max=100, nstart = 30, ncores = NULL) { if (!inherits(data, 's2dv_cube')) { @@ -59,12 +59,14 @@ CST_WeatherRegimes <- function(data,ncenters = NULL, }else { lon <- NULL } - data$data <- WeatherRegime(data$data,ncenters = ncenters , - EOFS = EOFS, neofs = neofs, - varThreshold = varThreshold, lon = lon, - lat = data$lat, method = method, - iter.max=iter.max, nstart = nstart, - ncores = ncores) + result <- WeatherRegime(data$data,ncenters = ncenters, + EOFS = EOFS, neofs = neofs, + varThreshold = varThreshold, lon = lon, + lat = data$lat, method = method, + iter.max=iter.max, nstart = nstart, + ncores = ncores) + data$data <- result$composite + data$statistics <- result[-1] return(data) } @@ -86,11 +88,11 @@ CST_WeatherRegimes <- function(data,ncenters = NULL, #'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools #' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} #' -#'@param data n array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. +#'@param data an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'. #'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data. -#'@param neofs number of modes to be kept (default = 30). +#'@param neofs number of modes to be kept only if EOFs = TRUE has been selected. (default = 30). #'@param varThreshold Value with the percentage of variance to be explained by the PCs. -#' Only sufficient PCs to explain this much variance will be used in the clustering. +#' Only sufficient PCs to explain this much variance will be used in the clustering. #'@param lon Vector of longitudes. #'@param lat Vector of latitudes. #'@param ncenters Number of clusters to be calculated with the clustering function. @@ -121,21 +123,27 @@ WeatherRegime <- function(data, ncenters = NULL, ncores = NULL) { if (is.null(names(dim(data)))) { - stop('data must be an array with named dimensions') + stop("Parameter 'data' must be an array with named dimensions") } if (is.null(lat)) { - stop("latitudes must be specified") + stop("Parameter 'lat' must be specified.") } dimData <- names(dim(data)) - nsdates <- dim(data)['sdate'] - nftimes <- dim(data)['ftime'] - - if ('sdate' %in% dimData && 'ftime' %in% dimData){ + + if ('sdate' %in% dimData && 'ftime' %in% dimData) { + nsdates <- dim(data)['sdate'] + nftimes <- dim(data)['ftime'] data <- MergeDims(data, merge_dims = c('ftime','sdate'), rename_dim = 'time') + } else if ('sdate' %in% dimData | 'ftime' %in% dimData) { + names(dim(data))[which(dimData == 'sdate' | dimData == 'ftime') ] = 'time' + } else { + if (!('time' %in% dimData)) { + stop("Parameter 'data' must have temporal dimension(s).") + } } output <- Apply(data = list(data), @@ -148,15 +156,15 @@ WeatherRegime <- function(data, ncenters = NULL, method = method, ncores = ncores) - if (method=='kmeans' && 'sdate' %in% dimData && 'ftime' %in% dimData){ + if (method=='kmeans' && 'sdate' %in% dimData && 'ftime' %in% dimData) { # The frequency and the persistency are computed as they are useful # parameters in the cluster analysis - extra_output <- Apply(data=output$cluster, - target_dims = 'time', - fun=.freqPer, - nsdates=nsdates, - nftimes=nftimes , - ncenters = ncenters) + extra_output <- Apply(data = output$cluster, + target_dims = 'time', + fun = .freqPer, + nsdates = nsdates, + nftimes = nftimes , + ncenters = ncenters) output <- list(composite=output$composite, pvalue=output$pvalue, @@ -173,19 +181,20 @@ WeatherRegime <- function(data, ncenters = NULL, iter.max=100, nstart = 30) { if (is.null(names(dim(data)))) { - stop('data must be an array with named dimensions') + stop("Parameter 'data' must be an array with 'time', 'lat' and 'lon' dimensions.") } if (!is.null(lat) && dim(data)['lat'] != length(lat)) { - stop('the latitudes do not match with the lat dimension of data') + stop("The length of the paramter 'lat' does not match with the ['lat'] dimension of + the parameter 'data'") } if (is.null(ncenters)) { - stop("ncenters must be specified") + stop("Parameter 'ncenters' must be specified") } if (EOFS == TRUE && is.null(lon)) { - stop("longitudes must be specified") + stop("Parameter 'lon' must be specified") } if (is.null(lat)) { - stop("latitudes must be specified") + stop("Parameter 'lat' must be specified") } nlon <- dim(data)['lat'] @@ -202,32 +211,25 @@ WeatherRegime <- function(data, ncenters = NULL, dataPC <- EOF(data, lat = as.vector(lat), lon = as.vector(lon), - neofs = 30) + neofs = neofs) minPC <- head(as.numeric(which(cumsum(dataPC$var) > varThreshold)), 1) cluster_input <- dataPC$PC[, 1:minPC] } } else { - #if (latWeights){ - # latitude weights are applied on the data + dataW <- aperm(Apply(data, target_dims = 'lat', function (x, la) { x * cos(la * pi / 180)}, la = lat)[[1]], c(2, 1, 3)) cluster_input <- MergeDims(dataW, merge_dims = c('lat','lon'), - rename_dim = 'space',na.rm=T) - #}else { - #cluster_input <- MergeDims(data2, merge_dims = c('lat','lon'), - # rename_dim = 'space',na.rm=T) - #} + rename_dim = 'space',na.rm = TRUE) + } if (method == "kmeans") { - if (any(is.na(cluster_input))) { - posnas <- unique(which(is.na(cluster_input), arr.ind = T)[, 2]) - cluster_input <- cluster_input[, -posnas] - } + clust <- kmeans( cluster_input, centers = ncenters, @@ -235,15 +237,9 @@ WeatherRegime <- function(data, ncenters = NULL, nstart = nstart, trace = FALSE) - if (any(is.na(cluster_input))) { - centers <- array(NA, dim = c(ncenters, nlon * nlat)) - centers [, -c(posnas)] <- as.array(clust$center) - } else{ - centers <- as.array(clust$center) - } result <- array(0, c(ncenters, nlat, nlon)) # the order of the data dimensions is changed ('lat','lon','time') - result <- Composite(aperm(data,c(3,2,1)), clust$cluster) + result <- Composite(aperm(data,c(2, 3, 1)), clust$cluster) } else { result <- hclust(dist(cluster_input), method = method) @@ -251,8 +247,10 @@ WeatherRegime <- function(data, ncenters = NULL, result <- Composite(aperm(data, c(2, 3, 1)), clusterCut) } result <- lapply(1:length(result), - function (n) {names(dim(result[[n]])) <- c("lon", "lat", "cluster") - return (result[[n]])}) + function (n) { + names(dim(result[[n]])) <- c("lon", "lat", "cluster") + return (result[[n]]) + }) names(result) <- c('composite','pvalue') @@ -266,10 +264,7 @@ WeatherRegime <- function(data, ncenters = NULL, } else { clust <- as.array(clusterCut) names(dim(clust)) <- 'time' - return(list( - composite = result$composite, - pvalue = result$pvalue, - cluster = clust)) + c(2, 3, 1) } } @@ -283,7 +278,7 @@ WeatherRegime <- function(data, ncenters = NULL, persistence[i, j] <- mean(occurences$lengths[occurences$values == j]) } } - return(list(frequency=frequency, - persistence=persistence)) + return(list(frequency = frequency, + persistence = persistence)) } -- GitLab From 29e88d82fd34a7fc46736744397e51a03b7b2d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Fri, 3 Apr 2020 12:43:53 +0200 Subject: [PATCH 11/45] the tests are added --- tests/testthat/test-CST_WeatherRegimes.R | 93 ++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 tests/testthat/test-CST_WeatherRegimes.R diff --git a/tests/testthat/test-CST_WeatherRegimes.R b/tests/testthat/test-CST_WeatherRegimes.R new file mode 100644 index 00000000..16dba6f4 --- /dev/null +++ b/tests/testthat/test-CST_WeatherRegimes.R @@ -0,0 +1,93 @@ +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_WeatherRegimes(data = 1), + paste0("Parameter 'data' must be of the class 's2dv_cube', as output by ", + "CSTools::CST_Load.")) + + data1 <- 1 : 20 + data1 <- list(data = data1) + class(data1) <- 's2dv_cube' + expect_error( + CST_WeatherRegimes(data = data1), + paste0("Parameter 'data' must be an array with named dimensions.")) + + data1 <- 1 : 20 + dim(data1) <- c(lat = 5, lon=4) + data1 <- list(data = data1 , lat=1:5) + class(data1) <- 's2dv_cube' + expect_error( + CST_WeatherRegimes(data = data1), + paste0("Parameter 'data' must have temporal dimensions.")) + + data1 <- 1 : 20 + dim(data1) <- c(time = 20) + data1 <- list(data = data1) + class(data1) <- 's2dv_cube' + expect_error( + CST_WeatherRegimes(data = data1) , + paste0("Parameter 'lat' must be specified.")) + + data1 <- 1 : 400 + dim(data1) <- c(time = 20, lat = 5, lon=4) + data1 <- list(data = data1, lat=1:5) + class(data1) <- 's2dv_cube' + expect_error( + CST_WeatherRegimes(data = data1), + paste0("Parameter 'ncenters' must be specified.")) + + expect_error( + CST_WeatherRegimes(data = data1, ncenters=3), + paste0("Parameter 'lon' must be specified.")) + + expect_equal( + names(dim(CST_WeatherRegimes(data = data1, ncenters=3, EOFS= FALSE)$data)), + c('lon', 'lat', 'cluster')) + + data1 <- 1 : 400 + dim(data1) <- c(sdate = 2, ftime = 10, lat = 5, lon=4) + data1 <- list(data = data1, lat=1:5) + class(data1) <- 's2dv_cube' + nclusters <- 3 + + expect_equal( + dim(CST_WeatherRegimes(data = data1 , + ncenters = nclusters, + EOFS = FALSE)$statistics$frequency),c(2, nclusters)) + + data1 <- 1 : 400 + dim(data1) <- c(sdate = 2, ftime = 10, lat = 5, lon=4) + data1 <- list(data = data1, lat=1:5 ,lon=1:4) + class(data1) <- 's2dv_cube' + + expect_equal( + names(CST_WeatherRegimes(data = data1 , ncenters = 4)$statistics), + c('pvalue', 'cluster', 'frequency', 'persistence')) + + expect_equal( + names(CST_WeatherRegimes(data = data1 , ncenters = 4, method='ward.D')$statistics), + c('pvalue', 'cluster')) + + data1 <- 1 : 400 + dim(data1) <- c(time = 20, lat = 5, lon=4) + data1[4,,] <- NA + data1 <- list(data = data1, lat=1:5 ,lon=1:4) + class(data1) <- 's2dv_cube' + expect_error( + CST_WeatherRegimes(data = data1, ncenters=3, EOFS = FALSE), + paste0("Parameter 'data' contains NAs in the 'time' dimensions.")) + + data1 <- 1 : 400 + dim(data1) <- c(time = 20, lat = 5, lon=4) + data1[,2,3] <- NA + data1 <- list(data = data1, lat=1:5 ,lon=1:4) + class(data1) <- 's2dv_cube' + expect_equal( + any(is.na(CST_WeatherRegimes(data = data1, ncenters=3, EOFS = FALSE)$data)), + TRUE) +}) + + + + + -- GitLab From 31e2f9071201777170ed8ec9c7a711bcc4b17cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Tue, 14 Apr 2020 13:11:48 +0200 Subject: [PATCH 12/45] more tests have been added --- R/CST_WeatherRegimes.R | 31 +++++++++++++++++------- tests/testthat/test-CST_WeatherRegimes.R | 12 ++++++++- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index d8b48312..2fd94ec6 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -1,3 +1,4 @@ + #' @rdname CST_WeatherRegimes #' @title Function for Calculating the Cluster analysis #' @@ -43,7 +44,7 @@ #'@examples #'@export #' -CST_WeatherRegimes <- function(data,ncenters = NULL, +CST_WeatherRegimes <- function(data, ncenters = NULL, EOFS = TRUE,neofs = 30, varThreshold = NULL, method = "kmeans", @@ -53,7 +54,6 @@ CST_WeatherRegimes <- function(data,ncenters = NULL, stop("Parameter 'data' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } - if ('lon' %in% names(data)){ lon <- data$lon }else { @@ -123,7 +123,7 @@ WeatherRegime <- function(data, ncenters = NULL, ncores = NULL) { if (is.null(names(dim(data)))) { - stop("Parameter 'data' must be an array with named dimensions") + stop("Parameter 'data' must be an array with named dimensions.") } if (is.null(lat)) { @@ -142,10 +142,11 @@ WeatherRegime <- function(data, ncenters = NULL, names(dim(data))[which(dimData == 'sdate' | dimData == 'ftime') ] = 'time' } else { if (!('time' %in% dimData)) { - stop("Parameter 'data' must have temporal dimension(s).") + stop("Parameter 'data' must have temporal dimensions.") } } + output <- Apply(data = list(data), target_dims = c('time','lat','lon'), fun = ".WeatherRegime", @@ -183,23 +184,32 @@ WeatherRegime <- function(data, ncenters = NULL, if (is.null(names(dim(data)))) { stop("Parameter 'data' must be an array with 'time', 'lat' and 'lon' dimensions.") } + if (!is.null(lat) && dim(data)['lat'] != length(lat)) { stop("The length of the paramter 'lat' does not match with the ['lat'] dimension of - the parameter 'data'") + the parameter 'data'.") } if (is.null(ncenters)) { - stop("Parameter 'ncenters' must be specified") + stop("Parameter 'ncenters' must be specified.") } if (EOFS == TRUE && is.null(lon)) { - stop("Parameter 'lon' must be specified") + stop("Parameter 'lon' must be specified.") } if (is.null(lat)) { - stop("Parameter 'lat' must be specified") + stop("Parameter 'lat' must be specified.") } nlon <- dim(data)['lat'] nlat <- dim(data)['lon'] + if (any(is.na(data))){ + nas_test <- MergeDims(data, merge_dims = c('lat','lon'), + rename_dim = 'space',na.rm = TRUE) + if (dim(nas_test)['space']== c(nlat*nlon)){ + stop("Parameter 'data' contains NAs in the 'time' dimensions.") + } + } + if (EOFS == TRUE) { if (is.null(varThreshold)) { dataPC <- EOF(data, @@ -264,7 +274,10 @@ WeatherRegime <- function(data, ncenters = NULL, } else { clust <- as.array(clusterCut) names(dim(clust)) <- 'time' - c(2, 3, 1) + return(list( + composite = result$composite, + pvalue = result$pvalue, + cluster = clust)) } } diff --git a/tests/testthat/test-CST_WeatherRegimes.R b/tests/testthat/test-CST_WeatherRegimes.R index 16dba6f4..8ccdb2d4 100644 --- a/tests/testthat/test-CST_WeatherRegimes.R +++ b/tests/testthat/test-CST_WeatherRegimes.R @@ -1,6 +1,6 @@ context("Generic tests") test_that("Sanity checks", { - expect_error( + expect_error( CST_WeatherRegimes(data = 1), paste0("Parameter 'data' must be of the class 's2dv_cube', as output by ", "CSTools::CST_Load.")) @@ -54,6 +54,9 @@ test_that("Sanity checks", { dim(CST_WeatherRegimes(data = data1 , ncenters = nclusters, EOFS = FALSE)$statistics$frequency),c(2, nclusters)) + expect_equal( + names(dim(CST_WeatherRegimes(data = data1, nclusters, EOFS= FALSE)$data)), + c('lon', 'lat', 'cluster')) data1 <- 1 : 400 dim(data1) <- c(sdate = 2, ftime = 10, lat = 5, lon=4) @@ -68,6 +71,10 @@ test_that("Sanity checks", { names(CST_WeatherRegimes(data = data1 , ncenters = 4, method='ward.D')$statistics), c('pvalue', 'cluster')) + expect_equal( + names(dim(CST_WeatherRegimes(data = data1, ncenters=4)$data)), + c('lon', 'lat', 'cluster')) + data1 <- 1 : 400 dim(data1) <- c(time = 20, lat = 5, lon=4) data1[4,,] <- NA @@ -85,6 +92,9 @@ test_that("Sanity checks", { expect_equal( any(is.na(CST_WeatherRegimes(data = data1, ncenters=3, EOFS = FALSE)$data)), TRUE) + expect_equal( + names(dim(CST_WeatherRegimes(data = data1, ncenters=3, EOFS = FALSE)$data)), + c('lon', 'lat', 'cluster')) }) -- GitLab From e6a5e0d1fd7d0612b81dd5058589710a79f3782c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Tue, 14 Apr 2020 13:38:50 +0200 Subject: [PATCH 13/45] examples --- R/CST_WeatherRegimes.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 2fd94ec6..092b7f04 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -42,6 +42,8 @@ #'@import s2dverification #'@import multiApply #'@examples +#'res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) +#'res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = TRUE, ncenters = 3) #'@export #' CST_WeatherRegimes <- function(data, ncenters = NULL, @@ -113,6 +115,7 @@ CST_WeatherRegimes <- function(data, ncenters = NULL, #'@import s2dverification #'@import multiApply #'@examples +#'res <- WeatherRegime(data=lonlat_data$obs$data, lat= lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4) #'@export WeatherRegime <- function(data, ncenters = NULL, -- GitLab From d801b7b5017bab33cee5cfd3a04a4b9cef39922f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Mon, 25 May 2020 13:02:58 +0200 Subject: [PATCH 14/45] fix in the documentation and CST_RegimesAssign --- R/CST_RegimesAssign.R | 168 +++++++++++++++++++++++++++++++++++++++++ R/CST_WeatherRegimes.R | 15 ++-- R/RegimesAssign.R | 158 -------------------------------------- 3 files changed, 174 insertions(+), 167 deletions(-) create mode 100644 R/CST_RegimesAssign.R delete mode 100644 R/RegimesAssign.R diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R new file mode 100644 index 00000000..77a1de41 --- /dev/null +++ b/R/CST_RegimesAssign.R @@ -0,0 +1,168 @@ +source('/esarchive/scratch/vtorralba/vtesis_scripts/weather_regimes/CompositeVero.R') + +#' +#'@param data an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'. +#' +RegimesAssign <- function(data, ref_maps, lats, method = 'distance', composite=FALSE) { + + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must be an array with named dimensions.") + } + + if (is.null(names(dim(ref_maps)))) { + stop("Parameter 'ref_maps' must be an array with named dimensions.") + } + + if (is.null(lats)) { + stop("Parameter 'lat' must be specified.") + } + + if (length(lats) != dim(data)['lat'] | (length(lats) != dim(ref_maps)['lat']) ) { + stop(" Parameter 'lats' does not match with the dimension 'lat' in the + parameter 'data' or in the parameter 'ref_maps'.") + } + + dimData <- names(dim(data)) + + if ('sdate' %in% dimData && 'ftime' %in% dimData) { + nsdates <- dim(data)['sdate'] + nftimes <- dim(data)['ftime'] + data <- MergeDims(data, + merge_dims = c('ftime','sdate'), + rename_dim = 'time') + } else if ('sdate' %in% dimData | 'ftime' %in% dimData) { + names(dim(data))[which(dimData == 'sdate' | dimData == 'ftime') ] = 'time' + } else { + if (!('time' %in% dimData)) { + stop("Parameter 'data' must have temporal dimensions.") + } + } + + index <- Apply( data = list(target = data), + target_dims = c('lat','lon'), + fun = '.RegimesAssign', + ref = ref_maps, + lat = lats, method = method)[[1]] + + + nclust <- dim(ref_maps)['nclust'] + freqs <- rep(NA, nclust) + for (n in 1:nclust) { + freqs[n] <- (length(which(index == n)) / length(index)) * 100 + } + + if (composite){ + poslon <- which(names(dim(data))=='lon') + poslat <- which(names(dim(data))=='lat') + postime <- which(names(dim(data))=='time') + + + if (any(is.na(index))) { + recon <- + list( + composite = array(NA, + dim = c(dim(ref_maps)['lon'], dim(ref_maps)['lat'], dim(ref_maps)['nclust'])), + pvalue = array(NA, + dim = c(dim(ref_maps)['lon'], dim(ref_maps)['lat'], dim(ref_maps)['nclust']))) + } else { + recon <- Composite(var = aperm(data, c(poslon,poslat,postime)), occ = index, K=dim(ref_maps)['nclust']) + } + + output <- list(composite = recon$composite, + pvalue = recon$pvalue, + cluster = index, + frequency = freqs) + } else{ + + output <- list(cluster = index, + frequency = freqs) + } + + return(output) +} + +.RegimesAssign <- function(ref, target, method = 'distance', lat, composite=FALSE) { + posdim <- which(names(dim(ref)) == 'nclust') + poslat <- which(names(dim(ref)) == 'lat') + poslon <- which(names(dim(ref)) == 'lon') + + nclust <- dim(ref)[posdim] + + if (all(dim(ref)[-posdim] != dim(target))) { + stop('The target should have the same dimensions [lat,lon] that + the reference ') + } + + if (is.null(names(dim(ref))) | is.null(names(dim(target)))) { + stop( + 'The arrays should include dimensions names ref[nclust,lat,lon] + and target [lat,lon]' + ) + } + + + if (length(lat) != dim(ref)[poslat]) { + stop('latitudes do not match with the maps') + } + + if (is.na(max(target))){ + assign <- NA + + }else{ + + + # This dimensions are reorganized + ref <- aperm(ref, c(posdim, poslat, poslon)) + target <- + aperm(target, c(which(names(dim( + target + )) == 'lat'), which(names(dim( + target + )) == 'lon'))) + + # weights are defined + latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(ref)[3]) + + + rmsdiff <- function(x, y) { + dims <- dim(x) + ndims <- length(dims) + if (ndims != 2 | ndims != length(dim(y))) { + stop('x and y should be maps') + } + map_diff <- NA * x + for (i in 1:dims[1]) { + for (j in 1:dims[2]) { + map_diff[i, j] <- (x[i, j] - y[i, j]) ^ 2 + } + } + rmsdiff <- sqrt(mean(map_diff)) + return(rmsdiff) + } + + if (method == 'ACC') { + corr <- rep(NA, nclust) + for (i in 1:nclust) { + corr[i] <- + ACC(InsertDim(InsertDim( + InsertDim(ref[i, , ] * latWeights, 1, 1), 2, 1 + ), 3, 1), + InsertDim(InsertDim( + InsertDim(target * latWeights, 1, 1), 2, 1 + ), 3, 1))$ACC[2] + } + assign <- which(corr == max(corr)) + } + + if (method == 'distance') { + rms <- rep(NA, nclust) + for (i in 1:nclust) { + rms[i] <- rmsdiff(ref[i, , ] * latWeights, target * latWeights) + } + assign <- which(rms == min(rms)) + } + } + + return(assign) +} + diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 092b7f04..12ec20d9 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -17,28 +17,25 @@ #' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} #' #'@param data a 's2dv_cube' object - +#'@param ncenters Number of clusters to be calculated with the clustering function. #'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data. #'@param neofs number of modes to be kept (default = 30). #'@param varThreshold Value with the percentage of variance to be explained by the PCs. #' Only sufficient PCs to explain this much variance will be used in the clustering. -#'@param lon Vector of longitudes. -#'@param lat Vector of latitudes. -#'@param ncenters Number of clusters to be calculated with the clustering function. #'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) #'but the function also support the different methods included in the hclust . These methods are: #'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). #' For more details about these methods see the hclust function documentation included in the stats package. -#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). #'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). +#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). #'@param ncores The number of multicore threads to use for parallel computation}. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial +#' \code{$pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial # dependence of the data with the same structure as Composite.), -#' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), -#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), -#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +#' \code{$cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), +#' \code{$persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), +#' \code{$frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), #'@import s2dverification #'@import multiApply #'@examples diff --git a/R/RegimesAssign.R b/R/RegimesAssign.R deleted file mode 100644 index aaa5ead7..00000000 --- a/R/RegimesAssign.R +++ /dev/null @@ -1,158 +0,0 @@ -#' @rdname RegimesAssign -#' @title Function for matching a field of anomalies with -#' a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function). -#' -#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -#' -#' @description This function performs the matching between a field of anomalies and a set -#' of maps which will be used as a reference. The anomalies will be assigned to the reference map -#' for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation -#' (method=‘ACC’) is obtained. -#' -#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools -#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} -#' -#'@param data an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. -#'@param ref.maps Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching. -#'@param lat Vector of latitudes. -#'@param method Whether the matching will be performed in terms of minimum distance (default=’distance’) or -#' the maximum spatial correlation (method=’ACC’) between the maps. -#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) -# or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test -#' that accounts for the serial dependence of the data with the same structure as Composite.), -#' \code{cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , -#' \code{frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), -#' -#'@import s2dverification -#'@import multiApply -#'@examples -#'@export - -RegimesAssign <- function(data, ref.maps, lat, method = 'distance') { - - if (is.null(names(dim(ref.maps))) | is.null(names(dim(data)))) { - stop( - 'The arrays should include dimensions names map.ref[lat,lon,cluster] - and map.targ [lat,lon]' - ) - } - - if (length(lat) != dim(ref.maps)['lat']) { - stop('latitudes do not match with the maps') - } - - if (any(names(dim(ref.maps))=='member')){ - names(dim(ref.maps)['member'])='member_obs' - } - - if (any(names(dim(ref.maps))=='dataset')){ - names(dim(ref.maps)['dataset'])='dataset_obs' - } - print(dim(ref.maps)) - assign <- - Apply( data = list(map.targ = data,map.ref=ref.maps), - target_dims = list(c('lat','lon'), - c('lat','lon','cluster')), - fun = '.anom2regime', - lat = lat, - method = method - )[[1]] - - anom_array <- - array(data, dim = c(prod(dim(data)[-c(poslat_ano, poslon_ano)]), nlat, nlon)) - # to be replaced with MergeDims - - recon <- Composite(var = aperm(anom_array, c(3, 2, 1)), occ = assign) - freqs <- rep(NA, nclust) - for (n in 1:nclust) { - freqs[n] <- (length(which(assign == n)) / length(assign)) * 100 - } - output <- - list( - composite = recon$composite, - pvalue = recon$pvalue, - cluster = assign, - frequency = freqs - ) - return(output) -} - - - - -.anom2regime <- function(map.ref, map.targ, method = 'distance', lat) { - posdim <- which(names(dim(map.ref)) == 'cluster') - poslat <- which(names(dim(map.ref)) == 'lat') - poslon <- which(names(dim(map.ref)) == 'lon') - - nclust <- dim(map.ref)['cluster'] - - if (all(dim(map.ref)[-posdim] != dim(map.targ))) { - stop('The map.targ should have the same dimensions [lat,lon] that - the map.reference ') - } - - if (is.null(names(dim(map.ref))) | is.null(names(dim(map.targ)))) { - stop( - 'The arrays should include dimensions names map.ref[nclust,lat,lon] - and map.targ [lat,lon]' - ) - } - - if (length(lat) != dim(map.ref)['lat']) { - stop('latitudes do not match with the maps') - } - - # This dimensions are reorganized - map.ref <- aperm(map.ref, c(posdim, poslat, poslon)) - map.targ <- - aperm(map.targ, c(which(names(dim( - map.targ - )) == 'lat'), which(names(dim( - map.targ - )) == 'lon'))) - - # weights are defined - latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(map.ref)['lon']) - names(dim(latWeights))<-c('lat','lon') - - if (method == 'ACC') { - corr <- rep(NA, nclust) - for (i in 1:nclust) { - corr[i] <- - ACC(InsertDim(InsertDim( - InsertDim(map.ref[i, , ] * latWeights, 1, 1), 2, 1 - ), 3, 1), - InsertDim(InsertDim( - InsertDim(map.targ * latWeights, 1, 1), 2, 1 - ), 3, 1))$ACC[2] - } - assign <- which(corr == max(corr)) - } - - if (method == 'distance') { - rms <- rep(NA, nclust) - for (i in 1:nclust) { - rms[i] <- .rmsdiff(map.ref[i, , ] * latWeights, map.targ * latWeights) - } - assign <- which(rms == min(rms)) - } - - return(assign) -} -.rmsdiff <- function(x, y) { - dims <- dim(x) - ndims <- length(dims) - if (ndims != 2 | ndims != length(dim(y))) { - stop('x and y should be maps') - } - map_diff <- NA * x - for (i in 1:dims[1]) { - for (j in 1:dims[2]) { - map_diff[i, j] <- (x[i, j] - y[i, j]) ^ 2 - } - } - rmsdiff <- sqrt(mean(map_diff)) - return(rmsdiff) -} -- GitLab From 15b2df04b3b81c66da2d44edbf381c53f1b4201a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Tue, 26 May 2020 12:34:16 +0200 Subject: [PATCH 15/45] writing CST_RegimesAssign --- R/CST_RegimesAssign.R | 153 +++++++++++++++++++++++++++++++++-------- R/CST_WeatherRegimes.R | 18 ++--- 2 files changed, 135 insertions(+), 36 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 77a1de41..ff89e41d 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -1,24 +1,117 @@ -source('/esarchive/scratch/vtorralba/vtesis_scripts/weather_regimes/CompositeVero.R') - +#' @rdname CST_RegimesAssign +#' @title Function for matching a field of anomalies with +#' a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function) +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function performs the matching between a field of anomalies and a set +#' of maps which will be used as a reference. The anomalies will be assigned to the reference map +#' for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +#' (method=‘ACC’) is obtained. #' -#'@param data an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'. -#' -RegimesAssign <- function(data, ref_maps, lats, method = 'distance', composite=FALSE) { +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data a 's2dv_cube' object + +#'@param ref.maps Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching. +#'@param method Whether the matching will be performed in terms of minimum distance (default=’distance’) or +#' the maximum spatial correlation (method=’ACC’) between the maps. +#'@param ncores The number of multicore threads to use for parallel computation}. +#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)), +#' \code{$pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test +#' that accounts for the serial dependence of the data with the same structure as Composite.), +#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , +#' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +#'@import s2dverification +#'@import multiApply +#'@examples +#'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite +#'res1 <- CST_RegimesAssign(data = lonlat_data$exp, ref_maps=regimes, composite=F) +#'res2 <- CST_Regimesassign(data = lonlat_data$exp , ref_maps=regimes, composite=T) +#'@export +#' + +CST_RegimesAssign <- function(data, ref_maps, + method = "distance", + composite = FALSE, + ncores=NULL) { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + if ('lon' %in% names(data)){ + lon <- data$lon + }else { + lon <- NULL + } + result <- RegimesAssign(data$data, ref_maps, data$lat, + method = "distance", composite = composite) + + if (composite){ + data$data <- result$composite + data$statistics <- result[-1] + }else{ + data$statistics <- result + } + + return(data) +} + +#' @rdname RegimesAssign +#' @title Function for matching a field of anomalies with +#' a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function). +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function performs the matching between a field of anomalies and a set +#' of maps which will be used as a reference. The anomalies will be assigned to the reference map +#' for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +#' (method=‘ACC’) is obtained. +#' +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. +#'@param ref.maps Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching. +#'@param lat Vector of latitudes. +#'@param method Whether the matching will be performed in terms of minimum distance (default=’distance’) or +#' the maximum spatial correlation (method=’ACC’) between the maps. +#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)), +#' \code{pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test +#' that accounts for the serial dependence of the data with the same structure as Composite.), +#' \code{cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , +#' \code{frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +#' +#'@import s2dverification +#'@import multiApply +#'@examples +#'@export + +RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = FALSE, ncores=NULL) { if (is.null(names(dim(data)))) { stop("Parameter 'data' must be an array with named dimensions.") } - + if (is.null(names(dim(ref_maps)))) { stop("Parameter 'ref_maps' must be an array with named dimensions.") } - if (is.null(lats)) { + dimRef <- names(dim(ref_maps)) + + if (!all( c('cluster', 'lat', 'lon') %in% dimRef)) { + "Parameter 'ref_maps' must contain the named dimensions + 'cluster','lat' and 'lon'." + } + + if (is.null(lat)) { stop("Parameter 'lat' must be specified.") } - if (length(lats) != dim(data)['lat'] | (length(lats) != dim(ref_maps)['lat']) ) { - stop(" Parameter 'lats' does not match with the dimension 'lat' in the + if (length(lat) != dim(data)['lat'] | (length(lat) != dim(ref_maps)['lat']) ) { + stop(" Parameter 'lat' does not match with the dimension 'lat' in the parameter 'data' or in the parameter 'ref_maps'.") } @@ -42,32 +135,36 @@ RegimesAssign <- function(data, ref_maps, lats, method = 'distance', composite=F target_dims = c('lat','lon'), fun = '.RegimesAssign', ref = ref_maps, - lat = lats, method = method)[[1]] - - - nclust <- dim(ref_maps)['nclust'] + lat = lat, method = method, + ncores=ncores)[[1]] + + nclust <- dim(ref_maps)['cluster'] freqs <- rep(NA, nclust) for (n in 1:nclust) { freqs[n] <- (length(which(index == n)) / length(index)) * 100 } if (composite){ - poslon <- which(names(dim(data))=='lon') - poslat <- which(names(dim(data))=='lat') - postime <- which(names(dim(data))=='time') - + poslon <- which(names(dim(data)) == 'lon') + poslat <- which(names(dim(data)) == 'lat') + postime <- which(names(dim(data)) == 'time') + posdim <- setdiff(1:length(dim(data)), c(postime, poslat, poslon)) + dataComp <- aperm(data, c(poslon, poslat, postime, posdim)) if (any(is.na(index))) { - recon <- - list( - composite = array(NA, - dim = c(dim(ref_maps)['lon'], dim(ref_maps)['lat'], dim(ref_maps)['nclust'])), - pvalue = array(NA, - dim = c(dim(ref_maps)['lon'], dim(ref_maps)['lat'], dim(ref_maps)['nclust']))) + recon <-list( + composite = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), + postime, dim(ref_maps)['cluster']), + pvalue = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), + postime, dim(ref_maps)['cluster'])) } else { - recon <- Composite(var = aperm(data, c(poslon,poslat,postime)), occ = index, K=dim(ref_maps)['nclust']) + recon <- + Apply(data = list(var = dataComp, occ = index), + target_dims = list(c('lon', 'lat', 'time'), c('time')), + fun = Composite, + K = dim(ref_maps)['cluster']) } - + output <- list(composite = recon$composite, pvalue = recon$pvalue, cluster = index, @@ -82,12 +179,12 @@ RegimesAssign <- function(data, ref_maps, lats, method = 'distance', composite=F } .RegimesAssign <- function(ref, target, method = 'distance', lat, composite=FALSE) { - posdim <- which(names(dim(ref)) == 'nclust') + posdim <- which(names(dim(ref)) == 'cluster') poslat <- which(names(dim(ref)) == 'lat') poslon <- which(names(dim(ref)) == 'lon') nclust <- dim(ref)[posdim] - + if (all(dim(ref)[-posdim] != dim(target))) { stop('The target should have the same dimensions [lat,lon] that the reference ') @@ -95,7 +192,7 @@ RegimesAssign <- function(data, ref_maps, lats, method = 'distance', composite=F if (is.null(names(dim(ref))) | is.null(names(dim(target)))) { stop( - 'The arrays should include dimensions names ref[nclust,lat,lon] + 'The arrays should include dimensions names ref[cluster,lat,lon] and target [lat,lon]' ) } diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 12ec20d9..f8392ab9 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -17,25 +17,28 @@ #' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} #' #'@param data a 's2dv_cube' object -#'@param ncenters Number of clusters to be calculated with the clustering function. + #'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data. #'@param neofs number of modes to be kept (default = 30). #'@param varThreshold Value with the percentage of variance to be explained by the PCs. #' Only sufficient PCs to explain this much variance will be used in the clustering. +#'@param lon Vector of longitudes. +#'@param lat Vector of latitudes. +#'@param ncenters Number of clusters to be calculated with the clustering function. #'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) #'but the function also support the different methods included in the hclust . These methods are: #'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). #' For more details about these methods see the hclust function documentation included in the stats package. -#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). #'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). #'@param ncores The number of multicore threads to use for parallel computation}. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{$pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial +#' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial # dependence of the data with the same structure as Composite.), -#' \code{$cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), -#' \code{$persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), -#' \code{$frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +#' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), +#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), +#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), #'@import s2dverification #'@import multiApply #'@examples @@ -258,7 +261,7 @@ WeatherRegime <- function(data, ncenters = NULL, } result <- lapply(1:length(result), function (n) { - names(dim(result[[n]])) <- c("lon", "lat", "cluster") + names(dim(result[[n]])) <- c("lat", "lon", "cluster") return (result[[n]]) }) @@ -294,4 +297,3 @@ WeatherRegime <- function(data, ncenters = NULL, return(list(frequency = frequency, persistence = persistence)) } - -- GitLab From d3e85c22eaf43bd5746516110de12c7fa28a58db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Tue, 26 May 2020 12:41:00 +0200 Subject: [PATCH 16/45] minor change in the documentation --- R/CST_RegimesAssign.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index ff89e41d..3be4e91f 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -79,10 +79,10 @@ CST_RegimesAssign <- function(data, ref_maps, #' the maximum spatial correlation (method=’ACC’) between the maps. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test +#' \code{$pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test #' that accounts for the serial dependence of the data with the same structure as Composite.), -#' \code{cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , -#' \code{frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , +#' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), #' #'@import s2dverification #'@import multiApply -- GitLab From b7af5404316bd77e1f8886c917860444d6e84303 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Tue, 26 May 2020 18:06:23 +0200 Subject: [PATCH 17/45] changes in the error messages and tests --- R/CST_RegimesAssign.R | 41 +++++++----- tests/testthat/test-CST_RegimesAssign.R | 83 +++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 14 deletions(-) create mode 100644 tests/testthat/test-CST_RegimesAssign.R diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 3be4e91f..a5763942 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -41,18 +41,22 @@ CST_RegimesAssign <- function(data, ref_maps, stop("Parameter 'data' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } - if ('lon' %in% names(data)){ - lon <- data$lon + if (is.null(ref_maps)) { + stop("Parameter 'ref_maps' must be specified.") + } + if ('lat' %in% names(data)){ + lat <- data$lat }else { - lon <- NULL + lat <- NULL } - result <- RegimesAssign(data$data, ref_maps, data$lat, + result <- RegimesAssign(data$data, ref_maps, lat= lat, method = "distance", composite = composite) if (composite){ data$data <- result$composite data$statistics <- result[-1] }else{ + data <- NULL data$statistics <- result } @@ -94,28 +98,37 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = if (is.null(names(dim(data)))) { stop("Parameter 'data' must be an array with named dimensions.") } + if (is.null(ref_maps)) { + stop("Parameter 'ref_maps' must be specified.") + } + + if (is.null(lat)) { + stop("Parameter 'lat' must be specified.") + } if (is.null(names(dim(ref_maps)))) { stop("Parameter 'ref_maps' must be an array with named dimensions.") } + dimData <- names(dim(data)) + + if (!all( c('time', 'lat', 'lon') %in% dimData)) { + stop("Parameter 'data' must contain the named dimensions 'time','lat' and 'lon'.") + } + dimRef <- names(dim(ref_maps)) if (!all( c('cluster', 'lat', 'lon') %in% dimRef)) { - "Parameter 'ref_maps' must contain the named dimensions - 'cluster','lat' and 'lon'." - } - - if (is.null(lat)) { - stop("Parameter 'lat' must be specified.") + stop("Parameter 'ref_maps' must contain the named dimensions + 'cluster','lat' and 'lon'.") } - + + if (length(lat) != dim(data)['lat'] | (length(lat) != dim(ref_maps)['lat']) ) { stop(" Parameter 'lat' does not match with the dimension 'lat' in the parameter 'data' or in the parameter 'ref_maps'.") } - dimData <- names(dim(data)) if ('sdate' %in% dimData && 'ftime' %in% dimData) { nsdates <- dim(data)['sdate'] @@ -154,9 +167,9 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = if (any(is.na(index))) { recon <-list( composite = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), - postime, dim(ref_maps)['cluster']), + postime, dim(ref_maps)['composite.cluster']), pvalue = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), - postime, dim(ref_maps)['cluster'])) + postime, dim(ref_maps)['composite.cluster'])) } else { recon <- Apply(data = list(var = dataComp, occ = index), diff --git a/tests/testthat/test-CST_RegimesAssign.R b/tests/testthat/test-CST_RegimesAssign.R new file mode 100644 index 00000000..52283991 --- /dev/null +++ b/tests/testthat/test-CST_RegimesAssign.R @@ -0,0 +1,83 @@ +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_RegimesAssign(data = 1), + paste0("Parameter 'data' must be of the class 's2dv_cube', as output by ", + "CSTools::CST_Load.")) + + data1 <- 1 : 20 + data1 <- list(data = data1) + class(data1) <- 's2dv_cube' + expect_error( + CST_RegimesAssign(data = data1,ref_maps=1), + paste0("Parameter 'data' must be an array with named dimensions.")) + + data1 <- 1 : 20 + dim(data1) <- c(lat = 5, lon=4) + data1 <- list(data = data1 , lat=1:5) + class(data1) <- 's2dv_cube' + expect_error( + CST_RegimesAssign(data = data1,ref_maps = 1), + paste0("Parameter 'ref_maps' must be an array with named dimensions.")) + + regimes <- 1:20 + dim(regimes) <- c(lat = 5, lon=2, cluster=2) + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), + paste0("Parameter 'data' must contain the named dimensions 'time','lat' and 'lon'.")) + + + data1 <- 1 : 20 + dim(data1) <- c(time=20) + data1 <- list(data = data1) + class(data1) <- 's2dv_cube' + + regimes <- 1:20 + dim(regimes) <- c(lat = 5, lon=2, cluster=2) + + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), + paste0("Parameter 'lat' must be specified.")) + + data1 <- 1 : 20 + dim(data1) <- c(time=20) + data1 <- list(data = data1,lat=1:5) + class(data1) <- 's2dv_cube' + + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), + paste0("Parameter 'data' must contain the named dimensions 'time','lat' and 'lon'.")) + + data1 <- 1: 20 + dim(data1) <- c(lat = 2, lon=5, time=2) + data1 <- list(data = data1, lat=1:5) + class(data1) <- 's2dv_cube' + + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), + paste0(" Parameter 'lat' does not match with the dimension 'lat' in the + parameter 'data' or in the parameter 'ref_maps'.")) + + data1 <- 1: 20 + dim(data1) <- c(lat = 5, lon=2, time=2) + data1 <- list(data = data1, lat=1:5) + class(data1) <- 's2dv_cube' + + expect_equal(names(CST_RegimesAssign(data = data1, ref_maps = regimes)$statistics), + c('cluster', 'frequency')) + + expect_equal(names( + CST_RegimesAssign( + data = data1, + ref_maps = regimes, + composite = TRUE)$statistics), c('pvalue', 'cluster', 'frequency')) + + + expect_equal(names(dim( + CST_RegimesAssign( + data = data1, + ref_maps = regimes, + composite = TRUE)$data)), c('lon', 'lat', 'composite.cluster')) + +}) + -- GitLab From fa1c7fe8a7d51d7c7ff5116601231c4d328f4815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 10:54:25 +0200 Subject: [PATCH 18/45] more tests and fixing documentation --- R/CST_RegimesAssign.R | 26 ++++++++++++++++++-------- R/CST_WeatherRegimes.R | 5 ++--- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index a5763942..085fb1dd 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -14,10 +14,10 @@ #' #'@param data a 's2dv_cube' object -#'@param ref.maps Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching. +#'@param ref_maps Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching. #'@param method Whether the matching will be performed in terms of minimum distance (default=’distance’) or #' the maximum spatial correlation (method=’ACC’) between the maps. -#'@param ncores The number of multicore threads to use for parallel computation}. +#'@param ncores The number of multicore threads to use for parallel computation. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), #' \code{$pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test @@ -27,9 +27,10 @@ #'@import s2dverification #'@import multiApply #'@examples -#'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite -#'res1 <- CST_RegimesAssign(data = lonlat_data$exp, ref_maps=regimes, composite=F) -#'res2 <- CST_Regimesassign(data = lonlat_data$exp , ref_maps=regimes, composite=T) +#'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data +#'maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') +#'res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = maps_ref,composite=FALSE) +#'res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = maps_ref,composite=TRUE) #'@export #' @@ -77,10 +78,12 @@ CST_RegimesAssign <- function(data, ref_maps, #'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} #' #'@param data an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. -#'@param ref.maps Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching. +#'@param ref_maps Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching. #'@param lat Vector of latitudes. #'@param method Whether the matching will be performed in terms of minimum distance (default=’distance’) or #' the maximum spatial correlation (method=’ACC’) between the maps. +#' @param composite a logical indicating if the composite maps are computed or not. +#'@param ncores The number of multicore threads to use for parallel computation. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), #' \code{$pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test @@ -91,6 +94,10 @@ CST_RegimesAssign <- function(data, ref_maps, #'@import s2dverification #'@import multiApply #'@examples +#'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite +#'maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') +#'res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = maps_ref, +#'lat=lonlat_data$exp$lat,composite=FALSE) #'@export RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = FALSE, ncores=NULL) { @@ -112,8 +119,8 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = dimData <- names(dim(data)) - if (!all( c('time', 'lat', 'lon') %in% dimData)) { - stop("Parameter 'data' must contain the named dimensions 'time','lat' and 'lon'.") + if (!all( c('lat', 'lon') %in% dimData)) { + stop("Parameter 'data' must contain the named dimensions 'lat' and 'lon'.") } dimRef <- names(dim(ref_maps)) @@ -276,3 +283,6 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = return(assign) } + + + diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index f8392ab9..9cc7f9c8 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -1,4 +1,3 @@ - #' @rdname CST_WeatherRegimes #' @title Function for Calculating the Cluster analysis #' @@ -31,7 +30,7 @@ #' For more details about these methods see the hclust function documentation included in the stats package. #'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). #'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). -#'@param ncores The number of multicore threads to use for parallel computation}. +#'@param ncores The number of multicore threads to use for parallel computation. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), #' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial @@ -104,7 +103,7 @@ CST_WeatherRegimes <- function(data, ncenters = NULL, #' For more details about these methods see the hclust function documentation included in the stats package. #'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). #'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). -#'@param ncores The number of multicore threads to use for parallel computation}. +#'@param ncores The number of multicore threads to use for parallel computation. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), #' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial -- GitLab From 1404c7633a82ea34d89184d7d207c495f8ab4616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 10:55:48 +0200 Subject: [PATCH 19/45] documentation --- DESCRIPTION | 2 +- NAMESPACE | 4 ++ man/Analogs.Rd | 25 +++++--- man/BEI_PDFBest.Rd | 20 ++++--- man/BEI_Weights.Rd | 7 +-- man/CST_Analogs.Rd | 20 ++++--- man/CST_Anomaly.Rd | 10 ++-- man/CST_BEI_Weighting.Rd | 15 +++-- man/CST_BiasCorrection.Rd | 7 +-- man/CST_Calibration.Rd | 20 ++++--- man/CST_CategoricalEnsCombination.Rd | 17 ++++-- man/CST_EnsClustering.Rd | 17 ++++-- man/CST_Load.Rd | 1 - man/CST_MultiEOF.Rd | 11 +++- man/CST_MultiMetric.Rd | 9 ++- man/CST_MultivarRMSE.Rd | 7 +-- man/CST_QuantileMapping.Rd | 20 ++++--- man/CST_RFSlope.Rd | 1 - man/CST_RFWeights.Rd | 7 +-- man/CST_RainFARM.Rd | 24 +++++--- man/CST_RegimesAssign.Rd | 51 +++++++++++++++++ man/CST_SaveExp.Rd | 7 +-- man/CST_SplitDim.Rd | 1 - man/CST_WeatherRegimes.Rd | 74 ++++++++++++++++++++++++ man/EnsClustering.Rd | 19 +++++-- man/MultiEOF.Rd | 17 ++++-- man/PlotCombinedMap.Rd | 33 ++++++++--- man/PlotForecastPDF.Rd | 17 ++++-- man/PlotMostLikelyQuantileMap.Rd | 18 ++++-- man/RFSlope.Rd | 4 +- man/RainFARM.Rd | 23 ++++++-- man/RegimesAssign.Rd | 55 ++++++++++++++++++ man/SplitDim.Rd | 1 - man/WeatherRegimes.Rd | 76 +++++++++++++++++++++++++ man/areave_data.Rd | 1 - man/as.s2dv_cube.Rd | 7 +-- man/lonlat_data.Rd | 1 - man/lonlat_prec.Rd | 1 - man/s2dv_cube.Rd | 19 +++++-- tests/testthat/test-CST_RegimesAssign.R | 41 ++++++++++--- 40 files changed, 552 insertions(+), 158 deletions(-) create mode 100644 man/CST_RegimesAssign.Rd create mode 100644 man/CST_WeatherRegimes.Rd create mode 100644 man/RegimesAssign.Rd create mode 100644 man/WeatherRegimes.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 0e47736a..8f2666f4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -70,4 +70,4 @@ VignetteBuilder: knitr License: Apache License 2.0 Encoding: UTF-8 LazyData: true -RoxygenNote: 5.0.0 +RoxygenNote: 7.0.1 diff --git a/NAMESPACE b/NAMESPACE index e7d7c003..194148f2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,8 +18,10 @@ export(CST_QuantileMapping) export(CST_RFSlope) export(CST_RFWeights) export(CST_RainFARM) +export(CST_RegimesAssign) export(CST_SaveExp) export(CST_SplitDim) +export(CST_WeatherRegimes) export(EnsClustering) export(MultiEOF) export(PlotCombinedMap) @@ -27,7 +29,9 @@ export(PlotForecastPDF) export(PlotMostLikelyQuantileMap) export(RFSlope) export(RainFARM) +export(RegimesAssign) export(SplitDim) +export(WeatherRegime) export(as.s2dv_cube) export(s2dv_cube) import(abind) diff --git a/man/Analogs.Rd b/man/Analogs.Rd index ee8a737e..06107c07 100644 --- a/man/Analogs.Rd +++ b/man/Analogs.Rd @@ -4,9 +4,19 @@ \alias{Analogs} \title{Analogs based on large scale fields.} \usage{ -Analogs(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, - criteria = "Large_dist", lonVar = NULL, latVar = NULL, region = NULL, - nAnalogs = NULL, return_list = FALSE) +Analogs( + expL, + obsL, + time_obsL, + expVar = NULL, + obsVar = NULL, + criteria = "Large_dist", + lonVar = NULL, + latVar = NULL, + region = NULL, + nAnalogs = NULL, + return_list = FALSE +) } \arguments{ \item{expL}{an array of N named dimensions containing the experimental field @@ -377,11 +387,6 @@ Local_scalecor <- Analogs(expL=expSLP, str(Local_scalecor) Local_scalecor$AnalogsInfo -} -\author{ -M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} - -Nuria Perez-Zanon \email{nuria.perez@bsc.es} } \references{ Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, @@ -389,4 +394,8 @@ and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. \email{pascal.yiou@lsce.ipsl.fr} } +\author{ +M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} +Nuria Perez-Zanon \email{nuria.perez@bsc.es} +} diff --git a/man/BEI_PDFBest.Rd b/man/BEI_PDFBest.Rd index f836ab72..0ba24a84 100644 --- a/man/BEI_PDFBest.Rd +++ b/man/BEI_PDFBest.Rd @@ -4,9 +4,16 @@ \alias{BEI_PDFBest} \title{Computing the Best Index PDFs combining Index PDFs from two SFSs} \usage{ -BEI_PDFBest(index_obs, index_hind1, index_hind2, index_fcst1 = NULL, - index_fcst2 = NULL, method_BC = "none", time_dim_name = "time", - na.rm = FALSE) +BEI_PDFBest( + index_obs, + index_hind1, + index_hind2, + index_fcst1 = NULL, + index_fcst2 = NULL, + method_BC = "none", + time_dim_name = "time", + na.rm = FALSE +) } \arguments{ \item{index_obs}{Index (e.g. NAO index) array from an observational database @@ -113,12 +120,11 @@ dim(res) # time statistic season # 1 2 2 } -\author{ -Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} -} \references{ Regionally improved seasonal forecast of precipitation through Best estimation of winter NAO, Sanchez-Garcia, E. et al., Adv. Sci. Res., 16, 165174, 2019, https://doi.org/10.5194/asr-16-165-2019 } - +\author{ +Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} +} diff --git a/man/BEI_Weights.Rd b/man/BEI_Weights.Rd index 61db33af..867a4eb0 100644 --- a/man/BEI_Weights.Rd +++ b/man/BEI_Weights.Rd @@ -43,13 +43,12 @@ dim(res) # sdate dataset member season # 10 3 5 1 -} -\author{ -Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} } \references{ Regionally improved seasonal forecast of precipitation through Best estimation of winter NAO, Sanchez-Garcia, E. et al., Adv. Sci. Res., 16, 165174, 2019, https://doi.org/10.5194/asr-16-165-2019 } - +\author{ +Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} +} diff --git a/man/CST_Analogs.Rd b/man/CST_Analogs.Rd index 7c9a1e6f..d7dd5e14 100644 --- a/man/CST_Analogs.Rd +++ b/man/CST_Analogs.Rd @@ -4,8 +4,15 @@ \alias{CST_Analogs} \title{Downscaling using Analogs based on large scale fields.} \usage{ -CST_Analogs(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, - region = NULL, criteria = "Large_dist") +CST_Analogs( + expL, + obsL, + time_obsL, + expVar = NULL, + obsVar = NULL, + region = NULL, + criteria = "Large_dist" +) } \arguments{ \item{expL}{an 's2dv_cube' object containing the experimental field on the @@ -81,11 +88,6 @@ adapted version of the method of Yiou et al 2013. \examples{ res <- CST_Analogs(expL = lonlat_data$exp, obsL = lonlat_data$obs) -} -\author{ -M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} - -Nuria Perez-Zanon \email{nuria.perez@bsc.es} } \references{ Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, @@ -97,4 +99,8 @@ from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. code{\link{CST_Load}}, \code{\link[s2dverification]{Load}} and \code{\link[s2dverification]{CDORemap}} } +\author{ +M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} +Nuria Perez-Zanon \email{nuria.perez@bsc.es} +} diff --git a/man/CST_Anomaly.Rd b/man/CST_Anomaly.Rd index e1c31f0c..07691ea7 100644 --- a/man/CST_Anomaly.Rd +++ b/man/CST_Anomaly.Rd @@ -4,8 +4,7 @@ \alias{CST_Anomaly} \title{Anomalies relative to a climatology along selected dimension with or without cross-validation} \usage{ -CST_Anomaly(exp = NULL, obs = NULL, cross = FALSE, memb = TRUE, - dim_anom = 3) +CST_Anomaly(exp = NULL, obs = NULL, cross = FALSE, memb = TRUE, dim_anom = 3) } \arguments{ \item{exp}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}.} @@ -53,13 +52,12 @@ str(anom3) anom4 <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = FALSE) str(anom4) +} +\seealso{ +\code{\link[s2dverification]{Ano_CrossValid}}, \code{\link[s2dverification]{Clim}} and \code{\link{CST_Load}} } \author{ Perez-Zanon Nuria, \email{nuria.perez@bsc.es} Pena Jesus, \email{jesus.pena@bsc.es} } -\seealso{ -\code{\link[s2dverification]{Ano_CrossValid}}, \code{\link[s2dverification]{Clim}} and \code{\link{CST_Load}} -} - diff --git a/man/CST_BEI_Weighting.Rd b/man/CST_BEI_Weighting.Rd index 6b9a448a..d8a729bd 100644 --- a/man/CST_BEI_Weighting.Rd +++ b/man/CST_BEI_Weighting.Rd @@ -4,8 +4,12 @@ \alias{CST_BEI_Weighting} \title{Weighting SFSs of a CSTools object.} \usage{ -CST_BEI_Weighting(var_exp, aweights, type = "ensembleMean", - time_dim_name = "time") +CST_BEI_Weighting( + var_exp, + aweights, + type = "ensembleMean", + time_dim_name = "time" +) } \arguments{ \item{var_exp}{An object of the class 's2dv_cube' containing the variable @@ -63,12 +67,11 @@ dim(res_CST$data) # time lat lon dataset # 2 3 2 2 } -\author{ -Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} -} \references{ Regionally improved seasonal forecast of precipitation through Best estimation of winter NAO, Sanchez-Garcia, E. et al., Adv. Sci. Res., 16, 165174, 2019, https://doi.org/10.5194/asr-16-165-2019 } - +\author{ +Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} +} diff --git a/man/CST_BiasCorrection.Rd b/man/CST_BiasCorrection.Rd index 485199ea..a1b415fb 100644 --- a/man/CST_BiasCorrection.Rd +++ b/man/CST_BiasCorrection.Rd @@ -35,10 +35,9 @@ attr(obs, 'class') <- 's2dv_cube' a <- CST_BiasCorrection(exp = exp, obs = obs) str(a) } -\author{ -Verónica Torralba, \email{veronica.torralba@bsc.es} -} \references{ Torralba, V., F.J. Doblas-Reyes, D. MacLeod, I. Christel and M. Davis (2017). Seasonal climate prediction: a new source of information for the management of wind energy resources. Journal of Applied Meteorology and Climatology, 56, 1231-1247, doi:10.1175/JAMC-D-16-0204.1. (CLIM4ENERGY, EUPORIAS, NEWA, RESILIENCE, SPECS) } -\encoding{UTF-8} +\author{ +Verónica Torralba, \email{veronica.torralba@bsc.es} +} diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 36171dbd..95ca561b 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -4,8 +4,13 @@ \alias{CST_Calibration} \title{Forecast Calibration} \usage{ -CST_Calibration(exp, obs, cal.method = "mse_min", - eval.method = "leave-one-out", multi.model = F) +CST_Calibration( + exp, + obs, + cal.method = "mse_min", + eval.method = "leave-one-out", + multi.model = F +) } \arguments{ \item{exp}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}.} @@ -26,11 +31,6 @@ Four types of member-by-member bias correction can be performed. The \code{bias} Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. } -\author{ -Verónica Torralba, \email{veronica.torralba@bsc.es} - -Bert Van Schaeybroeck, \email{bertvs@meteo.be} -} \references{ Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x @@ -57,4 +57,8 @@ attr(obs, 'class') <- 's2dv_cube' a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample") str(a) } -\encoding{UTF-8} +\author{ +Verónica Torralba, \email{veronica.torralba@bsc.es} + +Bert Van Schaeybroeck, \email{bertvs@meteo.be} +} diff --git a/man/CST_CategoricalEnsCombination.Rd b/man/CST_CategoricalEnsCombination.Rd index e551c3ec..c23f8341 100644 --- a/man/CST_CategoricalEnsCombination.Rd +++ b/man/CST_CategoricalEnsCombination.Rd @@ -4,8 +4,14 @@ \alias{CST_CategoricalEnsCombination} \title{Make categorical forecast based on a multi-model forecast with potential for calibrate} \usage{ -CST_CategoricalEnsCombination(exp, obs, cat.method = "pool", - eval.method = "leave-one-out", amt.cat = 3, ...) +CST_CategoricalEnsCombination( + exp, + obs, + cat.method = "pool", + eval.method = "leave-one-out", + amt.cat = 3, + ... +) } \arguments{ \item{exp}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}. The amount of forecasting models is equal to the size of the \code{dataset} dimension of the data array. The amount of members per model may be different. The size of the \code{member} dimension of the data array is equal to the maximum of the ensemble members among the models. Models with smaller ensemble sizes have residual indices of \code{member} dimension in the data array filled with NA values.} @@ -83,9 +89,6 @@ attr(obs, 'class') <- 's2dv_cube' a <- CST_CategoricalEnsCombination(exp = exp, obs = obs, amt.cat = 3, cat.method = "mmw") } } -\author{ -Bert Van Schaeybroeck, \email{bertvs@meteo.be} -} \references{ Rajagopalan, B., Lall, U., & Zebiak, S. E. (2002). Categorical climate forecasts through regularization and optimal combination of multiple GCM ensembles. Monthly Weather Review, 130(7), 1792-1811. @@ -93,4 +96,6 @@ Robertson, A. W., Lall, U., Zebiak, S. E., & Goddard, L. (2004). Improved combin Van Schaeybroeck, B., & Vannitsem, S. (2019). Postprocessing of Long-Range Forecasts. In Statistical Postprocessing of Ensemble Forecasts (pp. 267-290). } - +\author{ +Bert Van Schaeybroeck, \email{bertvs@meteo.be} +} diff --git a/man/CST_EnsClustering.Rd b/man/CST_EnsClustering.Rd index c13bf205..154541d5 100644 --- a/man/CST_EnsClustering.Rd +++ b/man/CST_EnsClustering.Rd @@ -4,10 +4,18 @@ \alias{CST_EnsClustering} \title{Ensemble clustering} \usage{ -CST_EnsClustering(exp, time_moment = "mean", numclus = NULL, - lon_lim = NULL, lat_lim = NULL, variance_explained = 80, - numpcs = NULL, time_percentile = 90, cluster_dim = "member", - verbose = F) +CST_EnsClustering( + exp, + time_moment = "mean", + numclus = NULL, + lon_lim = NULL, + lat_lim = NULL, + variance_explained = 80, + numpcs = NULL, + time_percentile = 90, + cluster_dim = "member", + verbose = F +) } \arguments{ \item{exp}{An object of the class 's2dv_cube', containing the variables to be analysed. @@ -125,4 +133,3 @@ Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } - diff --git a/man/CST_Load.Rd b/man/CST_Load.Rd index 1fee022c..bf03ba42 100644 --- a/man/CST_Load.Rd +++ b/man/CST_Load.Rd @@ -47,4 +47,3 @@ obs <- CSTools::lonlat_data$obs \author{ Nicolau Manubens, \email{nicolau.manubens@bsc.es} } - diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd index fb584751..036a6470 100644 --- a/man/CST_MultiEOF.Rd +++ b/man/CST_MultiEOF.Rd @@ -4,8 +4,14 @@ \alias{CST_MultiEOF} \title{EOF analysis of multiple variables} \usage{ -CST_MultiEOF(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, - lon_lim = NULL, lat_lim = NULL) +CST_MultiEOF( + datalist, + neof_max = 40, + neof_composed = 5, + minvar = 0.6, + lon_lim = NULL, + lat_lim = NULL +) } \arguments{ \item{datalist}{A list of objects of the class 's2dv_cube', containing the variables to be analysed. @@ -69,4 +75,3 @@ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} } - diff --git a/man/CST_MultiMetric.Rd b/man/CST_MultiMetric.Rd index 079a5588..8e3ce593 100644 --- a/man/CST_MultiMetric.Rd +++ b/man/CST_MultiMetric.Rd @@ -37,15 +37,14 @@ c(ano_exp, ano_obs) \%<-\% CST_Anomaly(exp = exp, obs = obs, cross = TRUE, memb a <- CST_MultiMetric(exp = ano_exp, obs = ano_obs) str(a) } -\author{ -Mishra Niti, \email{niti.mishra@bsc.es} - -Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -} \references{ Mishra, N., Prodhomme, C., & Guemas, V. (n.d.). Multi-Model Skill Assessment of Seasonal Temperature and Precipitation Forecasts over Europe, 29-31.\url{http://link.springer.com/10.1007/s00382-018-4404-z} } \seealso{ \code{\link[s2dverification]{Corr}}, \code{\link[s2dverification]{RMS}}, \code{\link[s2dverification]{RMSSS}} and \code{\link{CST_Load}} } +\author{ +Mishra Niti, \email{niti.mishra@bsc.es} +Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +} diff --git a/man/CST_MultivarRMSE.Rd b/man/CST_MultivarRMSE.Rd index 685eaf77..24af608c 100644 --- a/man/CST_MultivarRMSE.Rd +++ b/man/CST_MultivarRMSE.Rd @@ -56,10 +56,9 @@ weight <- c(1, 2) a <- CST_MultivarRMSE(exp = ano_exp, obs = ano_obs, weight = weight) str(a) } -\author{ -Deborah Verfaillie, \email{deborah.verfaillie@bsc.es} -} \seealso{ \code{\link[s2dverification]{RMS}} and \code{\link{CST_Load}} } - +\author{ +Deborah Verfaillie, \email{deborah.verfaillie@bsc.es} +} diff --git a/man/CST_QuantileMapping.Rd b/man/CST_QuantileMapping.Rd index 1c93843e..ad8f4b6c 100644 --- a/man/CST_QuantileMapping.Rd +++ b/man/CST_QuantileMapping.Rd @@ -4,9 +4,16 @@ \alias{CST_QuantileMapping} \title{Quantiles Mapping for seasonal or decadal forecast data} \usage{ -CST_QuantileMapping(exp, obs, exp_cor = NULL, sample_dims = c("sdate", - "ftime", "member"), sample_length = NULL, method = "QUANT", - ncores = NULL, ...) +CST_QuantileMapping( + exp, + obs, + exp_cor = NULL, + sample_dims = c("sdate", "ftime", "member"), + sample_length = NULL, + method = "QUANT", + ncores = NULL, + ... +) } \arguments{ \item{exp}{an object of class \code{s2dv_cube}} @@ -77,10 +84,9 @@ res <- CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', method = 'DIST') } } -\author{ -Nuria Perez-Zanon, \email{nuria.perez@bsc.es} -} \seealso{ \code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} } - +\author{ +Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +} diff --git a/man/CST_RFSlope.Rd b/man/CST_RFSlope.Rd index d2b5aec0..0c4e1671 100644 --- a/man/CST_RFSlope.Rd +++ b/man/CST_RFSlope.Rd @@ -50,4 +50,3 @@ slopes \author{ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } - diff --git a/man/CST_RFWeights.Rd b/man/CST_RFWeights.Rd index 08a7b850..ef5ebe4d 100644 --- a/man/CST_RFWeights.Rd +++ b/man/CST_RFWeights.Rd @@ -47,9 +47,6 @@ nf <- 8 ww <- CST_RFWeights("./worldclim.nc", nf, lon, lat, fsmooth = TRUE) } } -\author{ -Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} -} \references{ Terzago, S., Palazzi, E., & von Hardenberg, J. (2018). Stochastic downscaling of precipitation in complex orography: @@ -57,4 +54,6 @@ A simple method to reproduce a realistic fine-scale climatology. Natural Hazards and Earth System Sciences, 18(11), 2825-2840. http://doi.org/10.5194/nhess-18-2825-2018 . } - +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +} diff --git a/man/CST_RainFARM.Rd b/man/CST_RainFARM.Rd index 4a667f9a..1c609e08 100644 --- a/man/CST_RainFARM.Rd +++ b/man/CST_RainFARM.Rd @@ -4,9 +4,20 @@ \alias{CST_RainFARM} \title{RainFARM stochastic precipitation downscaling of a CSTools object} \usage{ -CST_RainFARM(data, nf, weights = 1, slope = 0, kmin = 1, nens = 1, - fglob = FALSE, fsmooth = TRUE, nprocs = 1, time_dim = NULL, - verbose = FALSE, drop_realization_dim = FALSE) +CST_RainFARM( + data, + nf, + weights = 1, + slope = 0, + kmin = 1, + nens = 1, + fglob = FALSE, + fsmooth = TRUE, + nprocs = 1, + time_dim = NULL, + verbose = FALSE, + drop_realization_dim = FALSE +) } \arguments{ \item{data}{An object of the class 's2dv_cube' as returned by `CST_Load`, @@ -95,13 +106,12 @@ dim(res$data) # dataset member realization sdate ftime lat lon # 1 2 3 3 4 64 64 -} -\author{ -Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } \references{ Terzago, S. et al. (2018). NHESS 18(11), 2825-2840. http://doi.org/10.5194/nhess-18-2825-2018 ; D'Onofrio et al. (2014), J of Hydrometeorology 15, 830-843; Rebora et. al. (2006), JHM 7, 724. } - +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +} diff --git a/man/CST_RegimesAssign.Rd b/man/CST_RegimesAssign.Rd new file mode 100644 index 00000000..e90c6bc7 --- /dev/null +++ b/man/CST_RegimesAssign.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_RegimesAssign.R +\name{CST_RegimesAssign} +\alias{CST_RegimesAssign} +\title{Function for matching a field of anomalies with +a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function)} +\usage{ +CST_RegimesAssign( + data, + ref_maps, + method = "distance", + composite = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{a 's2dv_cube' object} + +\item{ref_maps}{Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching.} + +\item{method}{Whether the matching will be performed in terms of minimum distance (default=’distance’) or +the maximum spatial correlation (method=’ACC’) between the maps.} + +\item{ncores}{The number of multicore threads to use for parallel computation.} +} +\value{ +A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) + \code{$pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test + that accounts for the serial dependence of the data with the same structure as Composite.), + \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , + \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +} +\description{ +This function performs the matching between a field of anomalies and a set +of maps which will be used as a reference. The anomalies will be assigned to the reference map +for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +(method=‘ACC’) is obtained. +} +\examples{ +regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data +maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') +res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = maps_ref,composite=FALSE) +res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = maps_ref,composite=TRUE) +} +\references{ +Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +} +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/CST_SaveExp.Rd b/man/CST_SaveExp.Rd index 17537205..0e49c119 100644 --- a/man/CST_SaveExp.Rd +++ b/man/CST_SaveExp.Rd @@ -29,11 +29,10 @@ destination <- "./path/" CST_SaveExp(data = data, destination = destination) } -} -\author{ -Perez-Zanon Nuria, \email{nuria.perez@bsc.es} } \seealso{ \code{\link{CST_Load}}, \code{\link{as.s2dv_cube}} and \code{\link{s2dv_cube}} } - +\author{ +Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +} diff --git a/man/CST_SplitDim.Rd b/man/CST_SplitDim.Rd index 2019ea7b..ee93aedc 100644 --- a/man/CST_SplitDim.Rd +++ b/man/CST_SplitDim.Rd @@ -43,4 +43,3 @@ dim(new_data$data) \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } - diff --git a/man/CST_WeatherRegimes.Rd b/man/CST_WeatherRegimes.Rd new file mode 100644 index 00000000..7a06c0fd --- /dev/null +++ b/man/CST_WeatherRegimes.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_WeatherRegimes.R +\name{CST_WeatherRegimes} +\alias{CST_WeatherRegimes} +\title{Function for Calculating the Cluster analysis} +\usage{ +CST_WeatherRegimes( + data, + ncenters = NULL, + EOFS = TRUE, + neofs = 30, + varThreshold = NULL, + method = "kmeans", + iter.max = 100, + nstart = 30, + ncores = NULL +) +} +\arguments{ +\item{data}{a 's2dv_cube' object} + +\item{ncenters}{Number of clusters to be calculated with the clustering function.} + +\item{neofs}{number of modes to be kept (default = 30).} + +\item{varThreshold}{Value with the percentage of variance to be explained by the PCs. +Only sufficient PCs to explain this much variance will be used in the clustering.} + +\item{method}{Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +but the function also support the different methods included in the hclust . These methods are: +"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +For more details about these methods see the hclust function documentation included in the stats package.} + +\item{iter.max}{Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected).} + +\item{ncores}{The number of multicore threads to use for parallel computation.} + +\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data.} + +\item{lon}{Vector of longitudes.} + +\item{lat}{Vector of latitudes.} + +\item{nstarts}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} +} +\value{ +A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) + \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial + \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), + \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), + \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +} +\description{ +This function computes the weather regimes from a cluster analysis. +It is applied on the array \code{data} in a 's2dv_cube' object. The dimensionality of this object can be also reduced +by using PCs obtained from the application of the #'EOFs analysis to filter the dataset. +The cluster analysis can be performed with the traditional k-means or those methods +included in the hclust (stats package). +} +\examples{ +res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) +res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = TRUE, ncenters = 3) +} +\references{ +Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +4961–4976, doi:10.1007/s00382-019-04839-5. + +Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +} +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/EnsClustering.Rd b/man/EnsClustering.Rd index 27aca453..2fd8a3f1 100644 --- a/man/EnsClustering.Rd +++ b/man/EnsClustering.Rd @@ -4,10 +4,20 @@ \alias{EnsClustering} \title{Ensemble clustering} \usage{ -EnsClustering(data, lat, lon, time_moment = "mean", numclus = NULL, - lon_lim = NULL, lat_lim = NULL, variance_explained = 80, - numpcs = NULL, time_percentile = 90, cluster_dim = "member", - verbose = T) +EnsClustering( + data, + lat, + lon, + time_moment = "mean", + numclus = NULL, + lon_lim = NULL, + lat_lim = NULL, + variance_explained = 80, + numpcs = NULL, + time_percentile = 90, + cluster_dim = "member", + verbose = T +) } \arguments{ \item{data}{A matrix of dimensions 'dataset member sdate ftime lat lon' containing the variables to be analysed.} @@ -67,4 +77,3 @@ Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } - diff --git a/man/MultiEOF.Rd b/man/MultiEOF.Rd index 1e822fc4..dd0fc7fe 100644 --- a/man/MultiEOF.Rd +++ b/man/MultiEOF.Rd @@ -4,9 +4,19 @@ \alias{MultiEOF} \title{EOF analysis of multiple variables starting from an array (reduced version)} \usage{ -MultiEOF(data, lon, lat, time, lon_dim = "lon", lat_dim = "lat", - neof_max = 40, neof_composed = 5, minvar = 0.6, lon_lim = NULL, - lat_lim = NULL) +MultiEOF( + data, + lon, + lat, + time, + lon_dim = "lon", + lat_dim = "lat", + neof_max = 40, + neof_composed = 5, + minvar = 0.6, + lon_lim = NULL, + lat_lim = NULL +) } \arguments{ \item{data}{A multidimensional array with dimension \code{"var"}, @@ -46,4 +56,3 @@ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} } - diff --git a/man/PlotCombinedMap.Rd b/man/PlotCombinedMap.Rd index 6857c64d..616b84f9 100644 --- a/man/PlotCombinedMap.Rd +++ b/man/PlotCombinedMap.Rd @@ -4,11 +4,27 @@ \alias{PlotCombinedMap} \title{Plot Multiple Lon-Lat Variables In a Single Map According to a Decision Function} \usage{ -PlotCombinedMap(maps, lon, lat, map_select_fun, display_range, - map_dim = "map", brks = NULL, cols = NULL, col_unknown_map = "white", - mask = NULL, col_mask = "grey", bar_titles = NULL, legend_scale = 1, - fileout = NULL, width = 8, height = 5, size_units = "in", res = 100, - ...) +PlotCombinedMap( + maps, + lon, + lat, + map_select_fun, + display_range, + map_dim = "map", + brks = NULL, + cols = NULL, + col_unknown_map = "white", + mask = NULL, + col_mask = "grey", + bar_titles = NULL, + legend_scale = 1, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) } \arguments{ \item{maps}{List of matrices to plot, each with (longitude, latitude) dimensions, or 3-dimensional array with the dimensions (longitude, latitude, map). Dimension names are required.} @@ -67,12 +83,11 @@ PlotCombinedMap(list(a, b, c), lons, lats, bar_titles = paste('\% of belonging to', c('a', 'b', 'c')), brks = 20, width = 10, height = 8) } +\seealso{ +\code{PlotCombinedMap} and \code{PlotEquiMap} +} \author{ Nicolau Manubens, \email{nicolau.manubens@bsc.es} Veronica Torralba, \email{veronica.torralba@bsc.es} } -\seealso{ -\code{PlotCombinedMap} and \code{PlotEquiMap} -} - diff --git a/man/PlotForecastPDF.Rd b/man/PlotForecastPDF.Rd index bed0bd31..bf50f7dd 100644 --- a/man/PlotForecastPDF.Rd +++ b/man/PlotForecastPDF.Rd @@ -4,10 +4,18 @@ \alias{PlotForecastPDF} \title{Plot one or multiple ensemble forecast pdfs for the same event} \usage{ -PlotForecastPDF(fcst, tercile.limits, extreme.limits = NULL, obs = NULL, - plotfile = NULL, title = "Set a title", var.name = "Varname (units)", - fcst.names = NULL, add.ensmemb = c("above", "below", "no"), - color.set = c("ggplot", "s2s4e", "hydro")) +PlotForecastPDF( + fcst, + tercile.limits, + extreme.limits = NULL, + obs = NULL, + plotfile = NULL, + title = "Set a title", + var.name = "Varname (units)", + fcst.names = NULL, + add.ensmemb = c("above", "below", "no"), + color.set = c("ggplot", "s2s4e", "hydro") +) } \arguments{ \item{fcst}{a dataframe or array containing all the ensember members for each frecast. If \code{'fcst'} is an array, it should have two labelled dimensions, and one of them should be \code{'members'}. If \code{'fcsts'} is a data.frame, each column shoul be a separate forecast, with the rows beeing the different ensemble members.} @@ -49,4 +57,3 @@ PlotForecastPDF(fcsts2, c(-0.66, 0.66), extreme.limits = c(-1.2, 1.2), \author{ Llorenç Lledó \email{llledo@bsc.es} } -\encoding{UTF-8} diff --git a/man/PlotMostLikelyQuantileMap.Rd b/man/PlotMostLikelyQuantileMap.Rd index 6c92850e..4c400b18 100644 --- a/man/PlotMostLikelyQuantileMap.Rd +++ b/man/PlotMostLikelyQuantileMap.Rd @@ -4,8 +4,15 @@ \alias{PlotMostLikelyQuantileMap} \title{Plot Maps of Most Likely Quantiles} \usage{ -PlotMostLikelyQuantileMap(probs, lon, lat, cat_dim = "bin", - bar_titles = NULL, col_unknown_cat = "white", ...) +PlotMostLikelyQuantileMap( + probs, + lon, + lat, + cat_dim = "bin", + bar_titles = NULL, + col_unknown_cat = "white", + ... +) } \arguments{ \item{probs}{a list of bi-dimensional arrays with the named dimensions 'latitude' (or 'lat') and 'longitude' (or 'lon'), with equal size and in the same order, or a single tri-dimensional array with an additional dimension (e.g. 'bin') for the different categories. The arrays must contain probability values between 0 and 1, and the probabilities for all categories of a grid cell should not exceed 1 when added.} @@ -109,11 +116,10 @@ PlotMostLikelyQuantileMap(bins, lons, lats, mask = 1 - (w1 + w2 / max(c(w1, w2))), brks = 20, width = 10, height = 8) -} -\author{ -Veronica Torralba, \email{veronica.torralba@bsc.es}, Nicolau Manubens, \email{nicolau.manubens@bsc.es} } \seealso{ \code{PlotCombinedMap} and \code{PlotEquiMap} } - +\author{ +Veronica Torralba, \email{veronica.torralba@bsc.es}, Nicolau Manubens, \email{nicolau.manubens@bsc.es} +} diff --git a/man/RFSlope.Rd b/man/RFSlope.Rd index 09a24ff5..db3f0e10 100644 --- a/man/RFSlope.Rd +++ b/man/RFSlope.Rd @@ -4,8 +4,7 @@ \alias{RFSlope} \title{RainFARM spectral slopes from an array (reduced version)} \usage{ -RFSlope(data, kmin = 1, time_dim = NULL, lon_dim = "lon", - lat_dim = "lat") +RFSlope(data, kmin = 1, time_dim = NULL, lon_dim = "lon", lat_dim = "lat") } \arguments{ \item{data}{Array containing the spatial precipitation fields to downscale. @@ -60,4 +59,3 @@ slopes \author{ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } - diff --git a/man/RainFARM.Rd b/man/RainFARM.Rd index 984dcd42..0db84679 100644 --- a/man/RainFARM.Rd +++ b/man/RainFARM.Rd @@ -4,10 +4,24 @@ \alias{RainFARM} \title{RainFARM stochastic precipitation downscaling (reduced version)} \usage{ -RainFARM(data, lon, lat, nf, weights = 1, nens = 1, slope = 0, kmin = 1, - fglob = FALSE, fsmooth = TRUE, nprocs = 1, time_dim = NULL, - lon_dim = "lon", lat_dim = "lat", drop_realization_dim = FALSE, - verbose = FALSE) +RainFARM( + data, + lon, + lat, + nf, + weights = 1, + nens = 1, + slope = 0, + kmin = 1, + fglob = FALSE, + fsmooth = TRUE, + nprocs = 1, + time_dim = NULL, + lon_dim = "lon", + lat_dim = "lat", + drop_realization_dim = FALSE, + verbose = FALSE +) } \arguments{ \item{data}{Precipitation array to downscale. @@ -117,4 +131,3 @@ dim(res$data) \author{ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } - diff --git a/man/RegimesAssign.Rd b/man/RegimesAssign.Rd new file mode 100644 index 00000000..4f056399 --- /dev/null +++ b/man/RegimesAssign.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_RegimesAssign.R +\name{RegimesAssign} +\alias{RegimesAssign} +\title{Function for matching a field of anomalies with +a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function).} +\usage{ +RegimesAssign( + data, + ref_maps, + lat, + method = "distance", + composite = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon.} + +\item{ref_maps}{Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching.} + +\item{lat}{Vector of latitudes.} + +\item{method}{Whether the matching will be performed in terms of minimum distance (default=’distance’) or +the maximum spatial correlation (method=’ACC’) between the maps.} + +\item{composite}{a logical indicating if the composite maps are computed or not.} + +\item{ncores}{The number of multicore threads to use for parallel computation.} +} +\value{ +A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) + \code{$pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test + that accounts for the serial dependence of the data with the same structure as Composite.), + \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , + \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +} +\description{ +This function performs the matching between a field of anomalies and a set +of maps which will be used as a reference. The anomalies will be assigned to the reference map +for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +(method=‘ACC’) is obtained. +} +\examples{ +regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite +maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') +res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = maps_ref, +lat=lonlat_data$exp$lat,composite=FALSE) +} +\references{ +Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +} +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/SplitDim.Rd b/man/SplitDim.Rd index e36aa8a5..f07e4756 100644 --- a/man/SplitDim.Rd +++ b/man/SplitDim.Rd @@ -35,4 +35,3 @@ new_data <- SplitDim(data, indices = time, freq = 'year') \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } - diff --git a/man/WeatherRegimes.Rd b/man/WeatherRegimes.Rd new file mode 100644 index 00000000..887f202a --- /dev/null +++ b/man/WeatherRegimes.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_WeatherRegimes.R +\name{WeatherRegime} +\alias{WeatherRegime} +\title{Function for Calculating the Cluster analysis} +\usage{ +WeatherRegime( + data, + ncenters = NULL, + EOFS = TRUE, + neofs = 30, + varThreshold = NULL, + lon = NULL, + lat = NULL, + method = "kmeans", + iter.max = 100, + nstart = 30, + ncores = NULL +) +} +\arguments{ +\item{data}{an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'.} + +\item{ncenters}{Number of clusters to be calculated with the clustering function.} + +\item{neofs}{number of modes to be kept only if EOFs = TRUE has been selected. (default = 30).} + +\item{varThreshold}{Value with the percentage of variance to be explained by the PCs. +Only sufficient PCs to explain this much variance will be used in the clustering.} + +\item{lon}{Vector of longitudes.} + +\item{lat}{Vector of latitudes.} + +\item{method}{Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +but the function also support the different methods included in the hclust . These methods are: +"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +For more details about these methods see the hclust function documentation included in the stats package.} + +\item{iter.max}{Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected).} + +\item{ncores}{The number of multicore threads to use for parallel computation.} + +\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data.} + +\item{nstarts}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} +} +\value{ +A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) + \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial + \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), + \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), + \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +} +\description{ +This function computes the weather regimes from a cluster analysis. +It can be applied over the dataset with dimensions +c(year/month, month/day, lon, lat), or by using PCs obtained from the application of the +EOFs analysis to filter the dataset. +The cluster analysis can be performed with the traditional k-means or those methods +included in the hclust (stats package). +} +\examples{ +res <- WeatherRegime(data=lonlat_data$obs$data, lat= lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4) +} +\references{ +Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +4961–4976, doi:10.1007/s00382-019-04839-5. + +Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +} +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/areave_data.Rd b/man/areave_data.Rd index cc79c85c..a772220a 100644 --- a/man/areave_data.Rd +++ b/man/areave_data.Rd @@ -41,4 +41,3 @@ areave_data <- Nicolau Manubens \email{nicolau.manubens@bsc.es} } \keyword{data} - diff --git a/man/as.s2dv_cube.Rd b/man/as.s2dv_cube.Rd index 13a2a296..c2b8f3a8 100644 --- a/man/as.s2dv_cube.Rd +++ b/man/as.s2dv_cube.Rd @@ -40,12 +40,11 @@ data <- as.s2dv_cube(data) class(data) } } +\seealso{ +\code{\link{s2dv_cube}}, \code{\link[s2dverification]{Load}}, \code{\link[startR]{Start}} and \code{\link{CST_Load}} +} \author{ Perez-Zanon Nuria, \email{nuria.perez@bsc.es} Nicolau Manubens, \email{nicolau.manubens@bsc.es} } -\seealso{ -\code{\link{s2dv_cube}}, \code{\link[s2dverification]{Load}}, \code{\link[startR]{Start}} and \code{\link{CST_Load}} -} - diff --git a/man/lonlat_data.Rd b/man/lonlat_data.Rd index eca7abac..0c6ee30f 100644 --- a/man/lonlat_data.Rd +++ b/man/lonlat_data.Rd @@ -41,4 +41,3 @@ lonlat_data <- Nicolau Manubens \email{nicolau.manubens@bsc.es} } \keyword{data} - diff --git a/man/lonlat_prec.Rd b/man/lonlat_prec.Rd index 69cb94e8..345e3cab 100644 --- a/man/lonlat_prec.Rd +++ b/man/lonlat_prec.Rd @@ -29,4 +29,3 @@ lonlat_prec <- CST_Load('prlr', exp = list(infile), obs = NULL, Jost von Hardenberg \email{j.vonhardenberg@isac.cnr.it} } \keyword{data} - diff --git a/man/s2dv_cube.Rd b/man/s2dv_cube.Rd index 48af7bbb..b0ce8966 100644 --- a/man/s2dv_cube.Rd +++ b/man/s2dv_cube.Rd @@ -4,8 +4,16 @@ \alias{s2dv_cube} \title{Creation of a 's2dv_cube' object} \usage{ -s2dv_cube(data, lon = NULL, lat = NULL, Variable = NULL, - Datasets = NULL, Dates = NULL, when = NULL, source_files = NULL) +s2dv_cube( + data, + lon = NULL, + lat = NULL, + Variable = NULL, + Datasets = NULL, + Dates = NULL, + when = NULL, + source_files = NULL +) } \arguments{ \item{data}{an array with any number of named dimensions, typically an object output from CST_Load, with the following dimensions: dataset, member, sdate, ftime, lat and lon.} @@ -75,10 +83,9 @@ exp8 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), end = paste0(rep("31", 10), rep("01", 10), 1990:1999))) class(exp8) } -\author{ -Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -} \seealso{ \code{\link[s2dverification]{Load}} and \code{\link{CST_Load}} } - +\author{ +Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +} diff --git a/tests/testthat/test-CST_RegimesAssign.R b/tests/testthat/test-CST_RegimesAssign.R index 52283991..92c63a80 100644 --- a/tests/testthat/test-CST_RegimesAssign.R +++ b/tests/testthat/test-CST_RegimesAssign.R @@ -1,3 +1,4 @@ + context("Generic tests") test_that("Sanity checks", { expect_error( @@ -24,9 +25,9 @@ test_that("Sanity checks", { dim(regimes) <- c(lat = 5, lon=2, cluster=2) expect_error( CST_RegimesAssign(data = data1,ref_maps = regimes), - paste0("Parameter 'data' must contain the named dimensions 'time','lat' and 'lon'.")) + paste0("Parameter 'data' must have temporal dimensions.")) - + data1 <- 1 : 20 dim(data1) <- c(time=20) data1 <- list(data = data1) @@ -39,6 +40,7 @@ test_that("Sanity checks", { CST_RegimesAssign(data = data1,ref_maps = regimes), paste0("Parameter 'lat' must be specified.")) + data1 <- 1 : 20 dim(data1) <- c(time=20) data1 <- list(data = data1,lat=1:5) @@ -46,8 +48,8 @@ test_that("Sanity checks", { expect_error( CST_RegimesAssign(data = data1,ref_maps = regimes), - paste0("Parameter 'data' must contain the named dimensions 'time','lat' and 'lon'.")) - + paste0("Parameter 'data' must contain the named dimensions 'lat' and 'lon'.")) + data1 <- 1: 20 dim(data1) <- c(lat = 2, lon=5, time=2) data1 <- list(data = data1, lat=1:5) @@ -55,8 +57,9 @@ test_that("Sanity checks", { expect_error( CST_RegimesAssign(data = data1,ref_maps = regimes), - paste0(" Parameter 'lat' does not match with the dimension 'lat' in the - parameter 'data' or in the parameter 'ref_maps'.")) + " Parameter 'lat' does not match with the dimension 'lat' in the + parameter 'data' or in the parameter 'ref_maps'.") + data1 <- 1: 20 dim(data1) <- c(lat = 5, lon=2, time=2) @@ -72,12 +75,34 @@ test_that("Sanity checks", { ref_maps = regimes, composite = TRUE)$statistics), c('pvalue', 'cluster', 'frequency')) - expect_equal(names(dim( CST_RegimesAssign( data = data1, ref_maps = regimes, composite = TRUE)$data)), c('lon', 'lat', 'composite.cluster')) + data1 <- 1: 160 + dim(data1) <- c(lat = 5, lon=2, time=2, member=8) + data1 <- list(data = data1, lat=1:5) + class(data1) <- 's2dv_cube' + + expect_equal(names(dim( + CST_RegimesAssign( + data = data1, + ref_maps = regimes, + composite = TRUE)$data)), c('lon', 'lat', 'composite.cluster', 'member')) + + expect_equal(names(dim( + CST_RegimesAssign( + data = data1, + ref_maps = regimes, + composite = TRUE)$statistics$cluster)), c('time', 'member')) + + regimes <- 1:60 + dim(regimes) <- c(lat = 5, lon=2, cluster=6) + + expect_equal(max(CST_RegimesAssign(data = data1, ref_maps = regimes, + composite = FALSE)$statistics$cluster), + unname(dim(regimes)['cluster'])) + }) - -- GitLab From 0aaa3adf8190abf4de50c0503eac851e452370cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 11:41:43 +0200 Subject: [PATCH 20/45] adding composite --- R/CST_RegimesAssign.R | 57 +++++++++++++++++++++++++ tests/testthat/test-CST_RegimesAssign.R | 8 ++-- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 085fb1dd..4dd77132 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -284,5 +284,62 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = } +Composite <- function(var, occ, lag = 0, eno = FALSE, K = NULL, fileout = NULL) { + + if ( dim(var)[3] != length(occ) ) { + stop("Temporal dimension of var is not equal to length of occ.") + } + if (is.null(K)) { + K <- max(occ) + } + composite <- array(dim = c(dim(var)[1:2], composite = K)) + tvalue <- array(dim = dim(var)[1:2]) + dof <- array(dim = dim(var)[1:2]) + pvalue <- array(dim = c(dim(var)[1:2], composite = K)) + + if (eno == TRUE) { + n_tot <- Eno(var, posdim = 3) + } else { + n_tot <- length(occ) + } + mean_tot <- Mean1Dim(var, posdim = 3, narm = TRUE) + stdv_tot <- apply(var, c(1, 2), sd, na.rm = TRUE) + + for (k in 1 : K) { + if (length(which(occ == k)) >= 1) { + indices <- which(occ == k) + lag + toberemoved <- which(0 > indices | indices > dim(var)[3]) + + if (length(toberemoved) > 0) { + indices <- indices[-toberemoved] + } + if (eno == TRUE) { + n_k <- Eno(var[, , indices], posdim = 3) + } else { + n_k <- length(indices) + } + if (length(indices) == 1) { + composite[, , k] <- var[, , indices] + warning(paste("Composite", k, "has length 1 and pvalue is NA.")) + } else { + composite[, , k] <- Mean1Dim(var[, , indices], posdim = 3, narm = TRUE) + } + stdv_k <- apply(var[, , indices], c(1, 2), sd, na.rm = TRUE) + + tvalue <- (mean_tot - composite[, , k]) / + sqrt(stdv_tot ^ 2 / n_tot + stdv_k ^ 2 / n_k) + dof <- (stdv_tot ^ 2 / n_tot + stdv_k ^ 2 / n_k) ^ 2 / + ((stdv_tot ^ 2 / n_tot) ^ 2 / (n_tot - 1) + + (stdv_k ^ 2 / n_k) ^ 2 / (n_k - 1)) + pvalue[, , k] <- 2 * pt(-abs(tvalue), df = dof) + } + } + if (is.null(fileout) == FALSE) { + output <- list(composite = composite, pvalue = pvalue) + save(output, file = paste(fileout, '.sav', sep = '')) + } + + invisible(list(composite = composite, pvalue = pvalue)) +} diff --git a/tests/testthat/test-CST_RegimesAssign.R b/tests/testthat/test-CST_RegimesAssign.R index 92c63a80..c9b4802a 100644 --- a/tests/testthat/test-CST_RegimesAssign.R +++ b/tests/testthat/test-CST_RegimesAssign.R @@ -1,4 +1,3 @@ - context("Generic tests") test_that("Sanity checks", { expect_error( @@ -55,10 +54,9 @@ test_that("Sanity checks", { data1 <- list(data = data1, lat=1:5) class(data1) <- 's2dv_cube' - expect_error( - CST_RegimesAssign(data = data1,ref_maps = regimes), - " Parameter 'lat' does not match with the dimension 'lat' in the - parameter 'data' or in the parameter 'ref_maps'.") + #expect_error( + # CST_RegimesAssign(data = data1,ref_maps = regimes), + # " Parameter 'lat' does not match with the dimension 'lat' in the parameter 'data' or in the parameter 'ref_maps'.") data1 <- 1: 20 -- GitLab From e98d1c3a4c1dc30b340204f7a19bd787d0b1166a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 11:47:11 +0200 Subject: [PATCH 21/45] changes in the tests --- tests/testthat/test-CST_RegimesAssign.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-CST_RegimesAssign.R b/tests/testthat/test-CST_RegimesAssign.R index c9b4802a..33f177f5 100644 --- a/tests/testthat/test-CST_RegimesAssign.R +++ b/tests/testthat/test-CST_RegimesAssign.R @@ -54,9 +54,10 @@ test_that("Sanity checks", { data1 <- list(data = data1, lat=1:5) class(data1) <- 's2dv_cube' - #expect_error( - # CST_RegimesAssign(data = data1,ref_maps = regimes), - # " Parameter 'lat' does not match with the dimension 'lat' in the parameter 'data' or in the parameter 'ref_maps'.") + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), + " Parameter 'lat' does not match with the dimension 'lat' in the + parameter 'data' or in the parameter 'ref_maps'.") data1 <- 1: 20 -- GitLab From 4f1e4a188b96aef86870cdfa159e3c3ebc4b5f52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 12:32:55 +0200 Subject: [PATCH 22/45] quotation marks in .WeatherRegime --- R/CST_WeatherRegimes.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 9cc7f9c8..016e0186 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -151,7 +151,7 @@ WeatherRegime <- function(data, ncenters = NULL, output <- Apply(data = list(data), target_dims = c('time','lat','lon'), - fun = ".WeatherRegime", + fun = .WeatherRegime, EOFS = EOFS, neofs = neofs, varThreshold = varThreshold, lon = lon, lat = lat, -- GitLab From 4fa6dfd56296cd35a82e493b1617cc83ff512dc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 12:50:01 +0200 Subject: [PATCH 23/45] adding climprojdiags --- R/CST_RegimesAssign.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 4dd77132..52bd3720 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -26,6 +26,7 @@ #' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), #'@import s2dverification #'@import multiApply +#'@import ClimProjDiags #'@examples #'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data #'maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') @@ -92,7 +93,8 @@ CST_RegimesAssign <- function(data, ref_maps, #' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), #' #'@import s2dverification -#'@import multiApply +#'@import multiApply +#'@import ClimProjDiags #'@examples #'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite #'maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') -- GitLab From 02a2253d0dfdfb8f57eee01f51b45bb13d16c329 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 12:56:35 +0200 Subject: [PATCH 24/45] climprojdiags again --- R/CST_RegimesAssign.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 52bd3720..0f8bb686 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -26,7 +26,7 @@ #' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), #'@import s2dverification #'@import multiApply -#'@import ClimProjDiags +#'@importFrom ClimProjDiags Subset #'@examples #'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data #'maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') @@ -94,7 +94,7 @@ CST_RegimesAssign <- function(data, ref_maps, #' #'@import s2dverification #'@import multiApply -#'@import ClimProjDiags +#'@importFrom ClimProjDiags Subset #'@examples #'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite #'maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') -- GitLab From e8393f05922595ae95995d1c037b07f84ceb5f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 13:09:00 +0200 Subject: [PATCH 25/45] changes in the import --- NAMESPACE | 1 + man/CST_BEI_Weighting.Rd | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index e2418d7c..a7fed60c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,7 @@ import(rainfarmr) import(s2dverification) import(stats) importFrom(ClimProjDiags,SelBox) +importFrom(ClimProjDiags,Subset) importFrom(data.table,CJ) importFrom(data.table,data.table) importFrom(data.table,setkey) diff --git a/man/CST_BEI_Weighting.Rd b/man/CST_BEI_Weighting.Rd index 238a3e1c..d6f65bb5 100644 --- a/man/CST_BEI_Weighting.Rd +++ b/man/CST_BEI_Weighting.Rd @@ -7,6 +7,7 @@ CST_BEI_Weighting( var_exp, aweights, + terciles = NULL, type = "ensembleMean", time_dim_name = "time" ) -- GitLab From 9b51054efdbd565ee348ce04bb6d8656c1a5db68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 13:18:02 +0200 Subject: [PATCH 26/45] removing subset --- R/CST_RegimesAssign.R | 5 ++--- man/CST_RegimesAssign.Rd | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 0f8bb686..c7f325a8 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -29,9 +29,8 @@ #'@importFrom ClimProjDiags Subset #'@examples #'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data -#'maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') -#'res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = maps_ref,composite=FALSE) -#'res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = maps_ref,composite=TRUE) +#'res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = drop(regimes),composite=FALSE) +#'res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = drop(regimes),composite=TRUE) #'@export #' diff --git a/man/CST_RegimesAssign.Rd b/man/CST_RegimesAssign.Rd index e90c6bc7..2f47b2f6 100644 --- a/man/CST_RegimesAssign.Rd +++ b/man/CST_RegimesAssign.Rd @@ -38,9 +38,8 @@ for which the minimum Eucledian distance (method=’distance’) or highest spat } \examples{ regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data -maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') -res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = maps_ref,composite=FALSE) -res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = maps_ref,composite=TRUE) +res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = drop(regimes),composite=FALSE) +res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = drop(regimes),composite=TRUE) } \references{ Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools -- GitLab From 23e804fc285534049276cbde0e02b18627277f4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 13:24:01 +0200 Subject: [PATCH 27/45] quotation marks --- R/CST_RegimesAssign.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index c7f325a8..48d4a315 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -154,7 +154,7 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = index <- Apply( data = list(target = data), target_dims = c('lat','lon'), - fun = '.RegimesAssign', + fun = .RegimesAssign, ref = ref_maps, lat = lat, method = method, ncores=ncores)[[1]] -- GitLab From a5c9f5ca3b923e7883737ebd541e4070eddfd296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 13:30:23 +0200 Subject: [PATCH 28/45] remove subset --- R/CST_RegimesAssign.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 48d4a315..7af20f15 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -96,8 +96,7 @@ CST_RegimesAssign <- function(data, ref_maps, #'@importFrom ClimProjDiags Subset #'@examples #'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite -#'maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') -#'res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = maps_ref, +#'res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = drop(regimes), #'lat=lonlat_data$exp$lat,composite=FALSE) #'@export -- GitLab From 6c4c5198c2365ce32c6fc8c789891423937dd028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 13:31:34 +0200 Subject: [PATCH 29/45] change in the documentation --- man/RegimesAssign.Rd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/man/RegimesAssign.Rd b/man/RegimesAssign.Rd index 4f056399..dded79fd 100644 --- a/man/RegimesAssign.Rd +++ b/man/RegimesAssign.Rd @@ -43,8 +43,7 @@ for which the minimum Eucledian distance (method=’distance’) or highest spat } \examples{ regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite -maps_ref <- Subset(regimes, along=c('dataset','member'), indices=list(1,1),drop='selected') -res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = maps_ref, +res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = drop(regimes), lat=lonlat_data$exp$lat,composite=FALSE) } \references{ -- GitLab From 8a1800b60ef89102e82f1b5af0748384d86209b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 13:48:28 +0200 Subject: [PATCH 30/45] changes in the tests of CST_WeatherRegimes.R --- tests/testthat/test-CST_WeatherRegimes.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-CST_WeatherRegimes.R b/tests/testthat/test-CST_WeatherRegimes.R index 8ccdb2d4..33e75e24 100644 --- a/tests/testthat/test-CST_WeatherRegimes.R +++ b/tests/testthat/test-CST_WeatherRegimes.R @@ -42,7 +42,7 @@ test_that("Sanity checks", { expect_equal( names(dim(CST_WeatherRegimes(data = data1, ncenters=3, EOFS= FALSE)$data)), - c('lon', 'lat', 'cluster')) + c('lat', 'lon', 'cluster')) data1 <- 1 : 400 dim(data1) <- c(sdate = 2, ftime = 10, lat = 5, lon=4) @@ -56,7 +56,7 @@ test_that("Sanity checks", { EOFS = FALSE)$statistics$frequency),c(2, nclusters)) expect_equal( names(dim(CST_WeatherRegimes(data = data1, nclusters, EOFS= FALSE)$data)), - c('lon', 'lat', 'cluster')) + c('lat', 'lon', 'cluster')) data1 <- 1 : 400 dim(data1) <- c(sdate = 2, ftime = 10, lat = 5, lon=4) @@ -73,7 +73,7 @@ test_that("Sanity checks", { expect_equal( names(dim(CST_WeatherRegimes(data = data1, ncenters=4)$data)), - c('lon', 'lat', 'cluster')) + c('lat', 'lon', 'cluster')) data1 <- 1 : 400 dim(data1) <- c(time = 20, lat = 5, lon=4) @@ -94,7 +94,7 @@ test_that("Sanity checks", { TRUE) expect_equal( names(dim(CST_WeatherRegimes(data = data1, ncenters=3, EOFS = FALSE)$data)), - c('lon', 'lat', 'cluster')) + c('lat', 'lon', 'cluster')) }) -- GitLab From f347eab97279874df7a109e5b8d2b03baf87cc23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 18:38:44 +0200 Subject: [PATCH 31/45] updating CST_WeatherRegimes documentation --- R/1 | 295 +++++++++++++++++++++++++++++++++++++++++ R/CST_WeatherRegimes.R | 27 ++-- 2 files changed, 307 insertions(+), 15 deletions(-) create mode 100644 R/1 diff --git a/R/1 b/R/1 new file mode 100644 index 00000000..961a268a --- /dev/null +++ b/R/1 @@ -0,0 +1,295 @@ +#' @rdname CST_WeatherRegimes +#' @title Function for Calculating the Cluster analysis +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function computes the weather regimes from a cluster analysis. +#'It is applied on the array \code{data} in a 's2dv_cube' object. The dimensionality of this object can be also reduced +#'by using PCs obtained from the application of the #'EOFs analysis to filter the dataset. +#'The cluster analysis can be performed with the traditional k-means or those methods +#'included in the hclust (stats package). +#' +#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +#' 4961–4976, doi:10.1007/s00382-019-04839-5. +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data a 's2dv_cube' object +#'@param ncenters Number of clusters to be calculated with the clustering function. +#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data. +#'@param neofs number of modes to be kept (default = 30). +#'@param varThreshold Value with the percentage of variance to be explained by the PCs. +#' Only sufficient PCs to explain this much variance will be used in the clustering. +#'@param lon Vector of longitudes. +#'@param lat Vector of latitudes. +#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +#'but the function also support the different methods included in the hclust . These methods are: +#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +#' For more details about these methods see the hclust function documentation included in the stats package. +#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). +#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@param ncores The number of multicore threads to use for parallel computation. +#'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)) and \code{$statistics} that includes +#' \code{$pvalue} (array with the same stru containing the pvalue of the composites obtained through a t-test that accounts for the serial), +#' \code{$cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), +#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), +#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +#'@import s2dverification +#'@import multiApply +#'@examples +#'res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) +#'res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = TRUE, ncenters = 3) +#'@export +#' +CST_WeatherRegimes <- function(data, ncenters = NULL, + EOFS = TRUE,neofs = 30, + varThreshold = NULL, + method = "kmeans", + iter.max=100, nstart = 30, + ncores = NULL) { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + if ('lon' %in% names(data)){ + lon <- data$lon + }else { + lon <- NULL + } + result <- WeatherRegime(data$data,ncenters = ncenters, + EOFS = EOFS, neofs = neofs, + varThreshold = varThreshold, lon = lon, + lat = data$lat, method = method, + iter.max=iter.max, nstart = nstart, + ncores = ncores) + data$data <- result$composite + data$statistics <- result[-1] + return(data) +} + +#' @rdname WeatherRegimes +#' @title Function for Calculating the Cluster analysis +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function computes the weather regimes from a cluster analysis. +#'It can be applied over the dataset with dimensions +#'c(year/month, month/day, lon, lat), or by using PCs obtained from the application of the +#'EOFs analysis to filter the dataset. +#'The cluster analysis can be performed with the traditional k-means or those methods +#'included in the hclust (stats package). +#' +#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +#' 4961–4976, doi:10.1007/s00382-019-04839-5. +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'. +#'@param ncenters Number of clusters to be calculated with the clustering function. +#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data. +#'@param neofs number of modes to be kept only if EOFs = TRUE has been selected. (default = 30). +#'@param varThreshold Value with the percentage of variance to be explained by the PCs. +#' Only sufficient PCs to explain this much variance will be used in the clustering. +#'@param lon Vector of longitudes. +#'@param lat Vector of latitudes. +#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +#'but the function also support the different methods included in the hclust . These methods are: +#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +#' For more details about these methods see the hclust function documentation included in the stats package. +#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). +#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@return A list with elements \code{$composite} (array with at least 3-d ('lat', 'lon', 'cluster') containing the composites k=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)), +#' \code{pvalue} (array with at least 3-d ('lat','lon','cluster') with the pvalue of the composites obtained through a t-test that accounts for the serial +# dependence of the data with the same structure as Composite.), +#' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), +#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), +#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +#'@import s2dverification +#'@import multiApply +#'@examples +#'res <- WeatherRegime(data=lonlat_data$obs$data, lat= lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4) +#'@export + +WeatherRegime <- function(data, ncenters = NULL, + EOFS = TRUE,neofs = 30, + varThreshold = NULL, lon = NULL, + lat = NULL, method = "kmeans", + iter.max=100, nstart = 30, + ncores = NULL) { + + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must be an array with named dimensions.") + } + + if (is.null(lat)) { + stop("Parameter 'lat' must be specified.") + } + + dimData <- names(dim(data)) + + if ('sdate' %in% dimData && 'ftime' %in% dimData) { + nsdates <- dim(data)['sdate'] + nftimes <- dim(data)['ftime'] + data <- MergeDims(data, + merge_dims = c('ftime','sdate'), + rename_dim = 'time') + } else if ('sdate' %in% dimData | 'ftime' %in% dimData) { + names(dim(data))[which(dimData == 'sdate' | dimData == 'ftime') ] = 'time' + } else { + if (!('time' %in% dimData)) { + stop("Parameter 'data' must have temporal dimensions.") + } + } + + + output <- Apply(data = list(data), + target_dims = c('time','lat','lon'), + fun = .WeatherRegime, + EOFS = EOFS, neofs = neofs, + varThreshold = varThreshold, + lon = lon, lat = lat, + ncenters = ncenters, + method = method, + ncores = ncores) + + if (method=='kmeans' && 'sdate' %in% dimData && 'ftime' %in% dimData) { + # The frequency and the persistency are computed as they are useful + # parameters in the cluster analysis + extra_output <- Apply(data = output$cluster, + target_dims = 'time', + fun = .freqPer, + nsdates = nsdates, + nftimes = nftimes , + ncenters = ncenters) + + output <- list(composite=output$composite, + pvalue=output$pvalue, + cluster=output$cluster, + frequency=extra_output$frequency, + persistence=extra_output$persistence) + } + return(output) +} + +.WeatherRegime <- function(data, ncenters = NULL, EOFS = TRUE,neofs = 30, + varThreshold = NULL, lon = NULL, + lat = NULL, method = "kmeans", + iter.max=100, nstart = 30) { + + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must be an array with 'time', 'lat' and 'lon' dimensions.") + } + + if (!is.null(lat) && dim(data)['lat'] != length(lat)) { + stop("The length of the paramter 'lat' does not match with the ['lat'] dimension of + the parameter 'data'.") + } + if (is.null(ncenters)) { + stop("Parameter 'ncenters' must be specified.") + } + if (EOFS == TRUE && is.null(lon)) { + stop("Parameter 'lon' must be specified.") + } + if (is.null(lat)) { + stop("Parameter 'lat' must be specified.") + } + + nlon <- dim(data)['lat'] + nlat <- dim(data)['lon'] + + if (any(is.na(data))){ + nas_test <- MergeDims(data, merge_dims = c('lat','lon'), + rename_dim = 'space',na.rm = TRUE) + if (dim(nas_test)['space']== c(nlat*nlon)){ + stop("Parameter 'data' contains NAs in the 'time' dimensions.") + } + } + + if (EOFS == TRUE) { + if (is.null(varThreshold)) { + dataPC <- EOF(data, + lat = as.vector(lat), + lon = as.vector(lon), + neofs = neofs) + cluster_input <- dataPC$PC + } else { + dataPC <- EOF(data, + lat = as.vector(lat), + lon = as.vector(lon), + neofs = neofs) + minPC <- + head(as.numeric(which(cumsum(dataPC$var) > varThreshold)), 1) + cluster_input <- dataPC$PC[, 1:minPC] + } + } else { + + dataW <- aperm(Apply(data, target_dims = 'lat', + function (x, la) { + x * cos(la * pi / 180)}, + la = lat)[[1]], c(2, 1, 3)) + + cluster_input <- MergeDims(dataW, merge_dims = c('lat','lon'), + rename_dim = 'space',na.rm = TRUE) + + } + + if (method == "kmeans") { + + clust <- kmeans( + cluster_input, + centers = ncenters, + iter.max = iter.max, + nstart = nstart, + trace = FALSE) + + result <- array(0, c(ncenters, nlat, nlon)) + # the order of the data dimensions is changed ('lat','lon','time') + result <- Composite(aperm(data,c(2, 3, 1)), clust$cluster) + + } else { + result <- hclust(dist(cluster_input), method = method) + clusterCut <- cutree(result, ncenters) + result <- Composite(aperm(data, c(2, 3, 1)), clusterCut) + } + result <- lapply(1:length(result), + function (n) { + names(dim(result[[n]])) <- c("lat", "lon", "cluster") + return (result[[n]]) + }) + + names(result) <- c('composite','pvalue') + + if (method == "kmeans") { + clust <- as.array(clust$cluster) + names(dim(clust)) <- 'time' + return(list( + composite = result$composite, + pvalue = result$pvalue, + cluster = clust)) + } else { + clust <- as.array(clusterCut) + names(dim(clust)) <- 'time' + return(list( + composite = result$composite, + pvalue = result$pvalue, + cluster = clust)) + } +} + +.freqPer<- function (clust, nsdates, nftimes, ncenters){ + frequency <- persistence <- matrix(NA, nsdates, ncenters) + x <- as.vector(clust) + for (i in 1:nsdates) { + occurences <-rle(x[((i * nftimes) + 1 - nftimes):(i * nftimes)]) + for (j in 1:ncenters) { + frequency[i, j] <-(sum(occurences$lengths[occurences$values == j]) / nftimes) * 100 + persistence[i, j] <- mean(occurences$lengths[occurences$values == j]) + } + } + return(list(frequency = frequency, + persistence = persistence)) +} diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 016e0186..961a268a 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -16,26 +16,24 @@ #' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} #' #'@param data a 's2dv_cube' object - -#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data. +#'@param ncenters Number of clusters to be calculated with the clustering function. +#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data. #'@param neofs number of modes to be kept (default = 30). #'@param varThreshold Value with the percentage of variance to be explained by the PCs. #' Only sufficient PCs to explain this much variance will be used in the clustering. #'@param lon Vector of longitudes. #'@param lat Vector of latitudes. -#'@param ncenters Number of clusters to be calculated with the clustering function. #'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) #'but the function also support the different methods included in the hclust . These methods are: #'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). #' For more details about these methods see the hclust function documentation included in the stats package. -#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). #'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). +#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). #'@param ncores The number of multicore threads to use for parallel computation. -#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) -# or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial -# dependence of the data with the same structure as Composite.), -#' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), +#'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)) and \code{$statistics} that includes +#' \code{$pvalue} (array with the same stru containing the pvalue of the composites obtained through a t-test that accounts for the serial), +#' \code{$cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), #' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), #' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), #'@import s2dverification @@ -90,23 +88,22 @@ CST_WeatherRegimes <- function(data, ncenters = NULL, #' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} #' #'@param data an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'. -#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data. +#'@param ncenters Number of clusters to be calculated with the clustering function. +#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data. #'@param neofs number of modes to be kept only if EOFs = TRUE has been selected. (default = 30). #'@param varThreshold Value with the percentage of variance to be explained by the PCs. #' Only sufficient PCs to explain this much variance will be used in the clustering. #'@param lon Vector of longitudes. #'@param lat Vector of latitudes. -#'@param ncenters Number of clusters to be calculated with the clustering function. #'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) #'but the function also support the different methods included in the hclust . These methods are: #'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). #' For more details about these methods see the hclust function documentation included in the stats package. -#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). #'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). -#'@param ncores The number of multicore threads to use for parallel computation. -#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) +#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@return A list with elements \code{$composite} (array with at least 3-d ('lat', 'lon', 'cluster') containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial +#' \code{pvalue} (array with at least 3-d ('lat','lon','cluster') with the pvalue of the composites obtained through a t-test that accounts for the serial # dependence of the data with the same structure as Composite.), #' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), #' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), -- GitLab From 8922b13b5aa62a53dd2a97413ba50499c6cf0de8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 19:02:21 +0200 Subject: [PATCH 32/45] updating CST_RegimesAssign.R --- R/CST_RegimesAssign.R | 34 +++++++++++++++++----------------- R/CST_WeatherRegimes.R | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 7af20f15..bf0811c2 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -14,15 +14,16 @@ #' #'@param data a 's2dv_cube' object -#'@param ref_maps Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching. -#'@param method Whether the matching will be performed in terms of minimum distance (default=’distance’) or +#'@param ref_maps array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching. +#'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or #' the maximum spatial correlation (method=’ACC’) between the maps. -#'@param ncores The number of multicore threads to use for parallel computation. -#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) -# or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{$pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test -#' that accounts for the serial dependence of the data with the same structure as Composite.), -#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , +#'@param composite a logical indicating if the composite maps are computed or not (default=FALSE). +#'@param ncores the number of multicore threads to use for parallel computation. +#'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)) (only when composite = 'TRUE') and \code{$statistics} that includes +#' \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test +#' that accounts for the serial dependence of the data with the same structure as Composite.)(only when composite = 'TRUE'), +#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , #' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), #'@import s2dverification #'@import multiApply @@ -78,17 +79,16 @@ CST_RegimesAssign <- function(data, ref_maps, #'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} #' #'@param data an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. -#'@param ref_maps Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching. -#'@param lat Vector of latitudes. -#'@param method Whether the matching will be performed in terms of minimum distance (default=’distance’) or +#'@param ref_maps array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching. +#'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or #' the maximum spatial correlation (method=’ACC’) between the maps. -#' @param composite a logical indicating if the composite maps are computed or not. -#'@param ncores The number of multicore threads to use for parallel computation. +#'@param composite a logical indicating if the composite maps are computed or not (default=FALSE). +#'@param ncores the number of multicore threads to use for parallel computation. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) -# or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{$pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test -#' that accounts for the serial dependence of the data with the same structure as Composite.), -#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , +# or only k=1 for any specific cluster, i.e., case (*2)) (only if composite='TRUE'), +#' \code{$pvalue} ( array with the same structure as \code{$composite} containing the pvalue of the composites obtained through a t-test +#' that accounts for the serial dependence of the data with the same structure as Composite.) (only if composite='TRUE'), +#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , #' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), #' #'@import s2dverification diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 961a268a..05785ffe 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -32,7 +32,7 @@ #'@param ncores The number of multicore threads to use for parallel computation. #'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)) and \code{$statistics} that includes -#' \code{$pvalue} (array with the same stru containing the pvalue of the composites obtained through a t-test that accounts for the serial), +#' \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test that accounts for the serial), #' \code{$cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), #' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), #' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), -- GitLab From 022cce6af896bad8ac90bca498e3350efb2d2d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 27 May 2020 19:03:52 +0200 Subject: [PATCH 33/45] changes in the documentation --- man/CST_RegimesAssign.Rd | 16 +++++++++------- man/CST_WeatherRegimes.Rd | 8 ++++---- man/RegimesAssign.Rd | 16 +++++++--------- man/WeatherRegimes.Rd | 8 +++----- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/man/CST_RegimesAssign.Rd b/man/CST_RegimesAssign.Rd index 2f47b2f6..cd897509 100644 --- a/man/CST_RegimesAssign.Rd +++ b/man/CST_RegimesAssign.Rd @@ -16,18 +16,20 @@ CST_RegimesAssign( \arguments{ \item{data}{a 's2dv_cube' object} -\item{ref_maps}{Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching.} +\item{ref_maps}{array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching.} -\item{method}{Whether the matching will be performed in terms of minimum distance (default=’distance’) or +\item{method}{whether the matching will be performed in terms of minimum distance (default=’distance’) or the maximum spatial correlation (method=’ACC’) between the maps.} -\item{ncores}{The number of multicore threads to use for parallel computation.} +\item{composite}{a logical indicating if the composite maps are computed or not (default=FALSE).} + +\item{ncores}{the number of multicore threads to use for parallel computation.} } \value{ -A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) - \code{$pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test - that accounts for the serial dependence of the data with the same structure as Composite.), - \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , +A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) + \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test + that accounts for the serial dependence of the data with the same structure as Composite.)(only when composite = 'TRUE'), + \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), } \description{ diff --git a/man/CST_WeatherRegimes.Rd b/man/CST_WeatherRegimes.Rd index 7a06c0fd..c5975537 100644 --- a/man/CST_WeatherRegimes.Rd +++ b/man/CST_WeatherRegimes.Rd @@ -35,7 +35,7 @@ For more details about these methods see the hclust function documentation inclu \item{ncores}{The number of multicore threads to use for parallel computation.} -\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data.} +\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data.} \item{lon}{Vector of longitudes.} @@ -44,9 +44,9 @@ For more details about these methods see the hclust function documentation inclu \item{nstarts}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} } \value{ -A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) - \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial - \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), +A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) + \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test that accounts for the serial), + \code{$cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), } diff --git a/man/RegimesAssign.Rd b/man/RegimesAssign.Rd index dded79fd..3c4360fa 100644 --- a/man/RegimesAssign.Rd +++ b/man/RegimesAssign.Rd @@ -17,22 +17,20 @@ RegimesAssign( \arguments{ \item{data}{an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon.} -\item{ref_maps}{Array with 3-dimensions (lon,lat, k) containing the maps k=1,..,n maps that will be used as a reference for the matching.} +\item{ref_maps}{array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching.} -\item{lat}{Vector of latitudes.} - -\item{method}{Whether the matching will be performed in terms of minimum distance (default=’distance’) or +\item{method}{whether the matching will be performed in terms of minimum distance (default=’distance’) or the maximum spatial correlation (method=’ACC’) between the maps.} -\item{composite}{a logical indicating if the composite maps are computed or not.} +\item{composite}{a logical indicating if the composite maps are computed or not (default=FALSE).} -\item{ncores}{The number of multicore threads to use for parallel computation.} +\item{ncores}{the number of multicore threads to use for parallel computation.} } \value{ A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) - \code{$pvalue} (3-d array (lon,lat, k) containing the pvalue of the composites obtained through a t-test - that accounts for the serial dependence of the data with the same structure as Composite.), - \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the regime.maps to which each point is allocated.) , + \code{$pvalue} ( array with the same structure as \code{$composite} containing the pvalue of the composites obtained through a t-test + that accounts for the serial dependence of the data with the same structure as Composite.) (only if composite='TRUE'), + \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), } \description{ diff --git a/man/WeatherRegimes.Rd b/man/WeatherRegimes.Rd index 887f202a..4b0863c1 100644 --- a/man/WeatherRegimes.Rd +++ b/man/WeatherRegimes.Rd @@ -39,15 +39,13 @@ For more details about these methods see the hclust function documentation inclu \item{iter.max}{Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected).} -\item{ncores}{The number of multicore threads to use for parallel computation.} - -\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) over data.} +\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data.} \item{nstarts}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} } \value{ -A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) - \code{pvalue} (3-d array (lon, lat, k) containing the pvalue of the composites obtained through a t-test that accounts for the serial +A list with elements \code{$composite} (array with at least 3-d ('lat', 'lon', 'cluster') containing the composites k=1,..,K for case (*1) + \code{pvalue} (array with at least 3-d ('lat','lon','cluster') with the pvalue of the composites obtained through a t-test that accounts for the serial \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), -- GitLab From d81d93476ae633d5c6390d20a3189bbc8fd86af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Thu, 28 May 2020 11:58:59 +0200 Subject: [PATCH 34/45] removing 1 file --- R/1 | 295 ------------------------------------------------------------ 1 file changed, 295 deletions(-) delete mode 100644 R/1 diff --git a/R/1 b/R/1 deleted file mode 100644 index 961a268a..00000000 --- a/R/1 +++ /dev/null @@ -1,295 +0,0 @@ -#' @rdname CST_WeatherRegimes -#' @title Function for Calculating the Cluster analysis -#' -#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -#' -#' @description This function computes the weather regimes from a cluster analysis. -#'It is applied on the array \code{data} in a 's2dv_cube' object. The dimensionality of this object can be also reduced -#'by using PCs obtained from the application of the #'EOFs analysis to filter the dataset. -#'The cluster analysis can be performed with the traditional k-means or those methods -#'included in the hclust (stats package). -#' -#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). -#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, -#' 4961–4976, doi:10.1007/s00382-019-04839-5. -#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools -#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} -#' -#'@param data a 's2dv_cube' object -#'@param ncenters Number of clusters to be calculated with the clustering function. -#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data. -#'@param neofs number of modes to be kept (default = 30). -#'@param varThreshold Value with the percentage of variance to be explained by the PCs. -#' Only sufficient PCs to explain this much variance will be used in the clustering. -#'@param lon Vector of longitudes. -#'@param lat Vector of latitudes. -#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) -#'but the function also support the different methods included in the hclust . These methods are: -#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). -#' For more details about these methods see the hclust function documentation included in the stats package. -#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). -#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). -#'@param ncores The number of multicore threads to use for parallel computation. -#'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) -# or only k=1 for any specific cluster, i.e., case (*2)) and \code{$statistics} that includes -#' \code{$pvalue} (array with the same stru containing the pvalue of the composites obtained through a t-test that accounts for the serial), -#' \code{$cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), -#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), -#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), -#'@import s2dverification -#'@import multiApply -#'@examples -#'res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) -#'res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = TRUE, ncenters = 3) -#'@export -#' -CST_WeatherRegimes <- function(data, ncenters = NULL, - EOFS = TRUE,neofs = 30, - varThreshold = NULL, - method = "kmeans", - iter.max=100, nstart = 30, - ncores = NULL) { - if (!inherits(data, 's2dv_cube')) { - stop("Parameter 'data' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - if ('lon' %in% names(data)){ - lon <- data$lon - }else { - lon <- NULL - } - result <- WeatherRegime(data$data,ncenters = ncenters, - EOFS = EOFS, neofs = neofs, - varThreshold = varThreshold, lon = lon, - lat = data$lat, method = method, - iter.max=iter.max, nstart = nstart, - ncores = ncores) - data$data <- result$composite - data$statistics <- result[-1] - return(data) -} - -#' @rdname WeatherRegimes -#' @title Function for Calculating the Cluster analysis -#' -#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -#' -#' @description This function computes the weather regimes from a cluster analysis. -#'It can be applied over the dataset with dimensions -#'c(year/month, month/day, lon, lat), or by using PCs obtained from the application of the -#'EOFs analysis to filter the dataset. -#'The cluster analysis can be performed with the traditional k-means or those methods -#'included in the hclust (stats package). -#' -#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). -#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, -#' 4961–4976, doi:10.1007/s00382-019-04839-5. -#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools -#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} -#' -#'@param data an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'. -#'@param ncenters Number of clusters to be calculated with the clustering function. -#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data. -#'@param neofs number of modes to be kept only if EOFs = TRUE has been selected. (default = 30). -#'@param varThreshold Value with the percentage of variance to be explained by the PCs. -#' Only sufficient PCs to explain this much variance will be used in the clustering. -#'@param lon Vector of longitudes. -#'@param lat Vector of latitudes. -#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) -#'but the function also support the different methods included in the hclust . These methods are: -#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). -#' For more details about these methods see the hclust function documentation included in the stats package. -#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). -#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). -#'@return A list with elements \code{$composite} (array with at least 3-d ('lat', 'lon', 'cluster') containing the composites k=1,..,K for case (*1) -# or only k=1 for any specific cluster, i.e., case (*2)), -#' \code{pvalue} (array with at least 3-d ('lat','lon','cluster') with the pvalue of the composites obtained through a t-test that accounts for the serial -# dependence of the data with the same structure as Composite.), -#' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), -#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), -#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), -#'@import s2dverification -#'@import multiApply -#'@examples -#'res <- WeatherRegime(data=lonlat_data$obs$data, lat= lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4) -#'@export - -WeatherRegime <- function(data, ncenters = NULL, - EOFS = TRUE,neofs = 30, - varThreshold = NULL, lon = NULL, - lat = NULL, method = "kmeans", - iter.max=100, nstart = 30, - ncores = NULL) { - - if (is.null(names(dim(data)))) { - stop("Parameter 'data' must be an array with named dimensions.") - } - - if (is.null(lat)) { - stop("Parameter 'lat' must be specified.") - } - - dimData <- names(dim(data)) - - if ('sdate' %in% dimData && 'ftime' %in% dimData) { - nsdates <- dim(data)['sdate'] - nftimes <- dim(data)['ftime'] - data <- MergeDims(data, - merge_dims = c('ftime','sdate'), - rename_dim = 'time') - } else if ('sdate' %in% dimData | 'ftime' %in% dimData) { - names(dim(data))[which(dimData == 'sdate' | dimData == 'ftime') ] = 'time' - } else { - if (!('time' %in% dimData)) { - stop("Parameter 'data' must have temporal dimensions.") - } - } - - - output <- Apply(data = list(data), - target_dims = c('time','lat','lon'), - fun = .WeatherRegime, - EOFS = EOFS, neofs = neofs, - varThreshold = varThreshold, - lon = lon, lat = lat, - ncenters = ncenters, - method = method, - ncores = ncores) - - if (method=='kmeans' && 'sdate' %in% dimData && 'ftime' %in% dimData) { - # The frequency and the persistency are computed as they are useful - # parameters in the cluster analysis - extra_output <- Apply(data = output$cluster, - target_dims = 'time', - fun = .freqPer, - nsdates = nsdates, - nftimes = nftimes , - ncenters = ncenters) - - output <- list(composite=output$composite, - pvalue=output$pvalue, - cluster=output$cluster, - frequency=extra_output$frequency, - persistence=extra_output$persistence) - } - return(output) -} - -.WeatherRegime <- function(data, ncenters = NULL, EOFS = TRUE,neofs = 30, - varThreshold = NULL, lon = NULL, - lat = NULL, method = "kmeans", - iter.max=100, nstart = 30) { - - if (is.null(names(dim(data)))) { - stop("Parameter 'data' must be an array with 'time', 'lat' and 'lon' dimensions.") - } - - if (!is.null(lat) && dim(data)['lat'] != length(lat)) { - stop("The length of the paramter 'lat' does not match with the ['lat'] dimension of - the parameter 'data'.") - } - if (is.null(ncenters)) { - stop("Parameter 'ncenters' must be specified.") - } - if (EOFS == TRUE && is.null(lon)) { - stop("Parameter 'lon' must be specified.") - } - if (is.null(lat)) { - stop("Parameter 'lat' must be specified.") - } - - nlon <- dim(data)['lat'] - nlat <- dim(data)['lon'] - - if (any(is.na(data))){ - nas_test <- MergeDims(data, merge_dims = c('lat','lon'), - rename_dim = 'space',na.rm = TRUE) - if (dim(nas_test)['space']== c(nlat*nlon)){ - stop("Parameter 'data' contains NAs in the 'time' dimensions.") - } - } - - if (EOFS == TRUE) { - if (is.null(varThreshold)) { - dataPC <- EOF(data, - lat = as.vector(lat), - lon = as.vector(lon), - neofs = neofs) - cluster_input <- dataPC$PC - } else { - dataPC <- EOF(data, - lat = as.vector(lat), - lon = as.vector(lon), - neofs = neofs) - minPC <- - head(as.numeric(which(cumsum(dataPC$var) > varThreshold)), 1) - cluster_input <- dataPC$PC[, 1:minPC] - } - } else { - - dataW <- aperm(Apply(data, target_dims = 'lat', - function (x, la) { - x * cos(la * pi / 180)}, - la = lat)[[1]], c(2, 1, 3)) - - cluster_input <- MergeDims(dataW, merge_dims = c('lat','lon'), - rename_dim = 'space',na.rm = TRUE) - - } - - if (method == "kmeans") { - - clust <- kmeans( - cluster_input, - centers = ncenters, - iter.max = iter.max, - nstart = nstart, - trace = FALSE) - - result <- array(0, c(ncenters, nlat, nlon)) - # the order of the data dimensions is changed ('lat','lon','time') - result <- Composite(aperm(data,c(2, 3, 1)), clust$cluster) - - } else { - result <- hclust(dist(cluster_input), method = method) - clusterCut <- cutree(result, ncenters) - result <- Composite(aperm(data, c(2, 3, 1)), clusterCut) - } - result <- lapply(1:length(result), - function (n) { - names(dim(result[[n]])) <- c("lat", "lon", "cluster") - return (result[[n]]) - }) - - names(result) <- c('composite','pvalue') - - if (method == "kmeans") { - clust <- as.array(clust$cluster) - names(dim(clust)) <- 'time' - return(list( - composite = result$composite, - pvalue = result$pvalue, - cluster = clust)) - } else { - clust <- as.array(clusterCut) - names(dim(clust)) <- 'time' - return(list( - composite = result$composite, - pvalue = result$pvalue, - cluster = clust)) - } -} - -.freqPer<- function (clust, nsdates, nftimes, ncenters){ - frequency <- persistence <- matrix(NA, nsdates, ncenters) - x <- as.vector(clust) - for (i in 1:nsdates) { - occurences <-rle(x[((i * nftimes) + 1 - nftimes):(i * nftimes)]) - for (j in 1:ncenters) { - frequency[i, j] <-(sum(occurences$lengths[occurences$values == j]) / nftimes) * 100 - persistence[i, j] <- mean(occurences$lengths[occurences$values == j]) - } - } - return(list(frequency = frequency, - persistence = persistence)) -} -- GitLab From 1f715b5284d9e9afc9b8418c13ad343cb21e1554 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Thu, 28 May 2020 12:25:00 +0200 Subject: [PATCH 35/45] changing the ref_maps input to s2dv_cube --- R/CST_RegimesAssign.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index bf0811c2..816ef052 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -43,16 +43,20 @@ CST_RegimesAssign <- function(data, ref_maps, stop("Parameter 'data' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } - if (is.null(ref_maps)) { - stop("Parameter 'ref_maps' must be specified.") + + if (!inherits(ref_maps, 's2dv_cube')) { + stop("Parameter 'ref_maps' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") } + if ('lat' %in% names(data)){ lat <- data$lat }else { lat <- NULL } - result <- RegimesAssign(data$data, ref_maps, lat= lat, - method = "distance", composite = composite) + result <- Apply(data=list(data=data$data, ref_maps=ref_maps$data), lat= lat, fun=RegimesAssign, + target_dims=list(names(dim(data$data)),c('lat','lon','cluster')), + method = method, composite = composite, ncores=ncores) if (composite){ data$data <- result$composite -- GitLab From a3ea39740379a7621e01ce5e4f6d09ff40c98c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Thu, 28 May 2020 12:32:31 +0200 Subject: [PATCH 36/45] updating documentation --- R/1 | 349 +++++++++++++++++++++++++++++++++++++++ R/CST_RegimesAssign.R | 8 +- man/CST_RegimesAssign.Rd | 8 +- 3 files changed, 357 insertions(+), 8 deletions(-) create mode 100644 R/1 diff --git a/R/1 b/R/1 new file mode 100644 index 00000000..12b5aa08 --- /dev/null +++ b/R/1 @@ -0,0 +1,349 @@ +#' @rdname CST_RegimesAssign +#' @title Function for matching a field of anomalies with +#' a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function) +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function performs the matching between a field of anomalies and a set +#' of maps which will be used as a reference. The anomalies will be assigned to the reference map +#' for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +#' (method=‘ACC’) is obtained. +#' +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data a 's2dv_cube' object. + +#'@param ref_maps a 's2dv_cube' object as the output of CST_WeatherRegimes. +#'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or +#' the maximum spatial correlation (method=’ACC’) between the maps. +#'@param composite a logical indicating if the composite maps are computed or not (default=FALSE). +#'@param ncores the number of multicore threads to use for parallel computation. +#'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)) (only when composite = 'TRUE') and \code{$statistics} that includes +#' \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test +#' that accounts for the serial dependence of the data with the same structure as Composite.)(only when composite = 'TRUE'), +#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , +#' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +#'@import s2dverification +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@examples +#'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data +#'res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) +#'res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) +#'@export +#' + +CST_RegimesAssign <- function(data, ref_maps, + method = "distance", + composite = FALSE, + ncores=NULL) { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + + if (!inherits(ref_maps, 's2dv_cube')) { + stop("Parameter 'ref_maps' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + + if ('lat' %in% names(data)){ + lat <- data$lat + }else { + lat <- NULL + } + result <- Apply(data=list(data=data$data, ref_maps=ref_maps$data), lat= lat, fun=RegimesAssign, + target_dims=list(names(dim(data$data)),c('lat','lon','cluster')), + method = method, composite = composite, ncores=ncores) + + if (composite){ + data$data <- result$composite + data$statistics <- result[-1] + }else{ + data <- NULL + data$statistics <- result + } + + return(data) +} + +#' @rdname RegimesAssign +#' @title Function for matching a field of anomalies with +#' a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function). +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function performs the matching between a field of anomalies and a set +#' of maps which will be used as a reference. The anomalies will be assigned to the reference map +#' for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +#' (method=‘ACC’) is obtained. +#' +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. +#'@param ref_maps array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching. +#'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or +#' the maximum spatial correlation (method=’ACC’) between the maps. +#'@param composite a logical indicating if the composite maps are computed or not (default=FALSE). +#'@param ncores the number of multicore threads to use for parallel computation. +#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)) (only if composite='TRUE'), +#' \code{$pvalue} ( array with the same structure as \code{$composite} containing the pvalue of the composites obtained through a t-test +#' that accounts for the serial dependence of the data with the same structure as Composite.) (only if composite='TRUE'), +#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , +#' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +#' +#'@import s2dverification +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@examples +#'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite +#'res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = drop(regimes), +#'lat=lonlat_data$exp$lat,composite=FALSE) +#'@export + +RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = FALSE, ncores=NULL) { + + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must be an array with named dimensions.") + } + if (is.null(ref_maps)) { + stop("Parameter 'ref_maps' must be specified.") + } + + if (is.null(lat)) { + stop("Parameter 'lat' must be specified.") + } + + if (is.null(names(dim(ref_maps)))) { + stop("Parameter 'ref_maps' must be an array with named dimensions.") + } + + dimData <- names(dim(data)) + + if (!all( c('lat', 'lon') %in% dimData)) { + stop("Parameter 'data' must contain the named dimensions 'lat' and 'lon'.") + } + + dimRef <- names(dim(ref_maps)) + + if (!all( c('cluster', 'lat', 'lon') %in% dimRef)) { + stop("Parameter 'ref_maps' must contain the named dimensions + 'cluster','lat' and 'lon'.") + } + + + if (length(lat) != dim(data)['lat'] | (length(lat) != dim(ref_maps)['lat']) ) { + stop(" Parameter 'lat' does not match with the dimension 'lat' in the + parameter 'data' or in the parameter 'ref_maps'.") + } + + + if ('sdate' %in% dimData && 'ftime' %in% dimData) { + nsdates <- dim(data)['sdate'] + nftimes <- dim(data)['ftime'] + data <- MergeDims(data, + merge_dims = c('ftime','sdate'), + rename_dim = 'time') + } else if ('sdate' %in% dimData | 'ftime' %in% dimData) { + names(dim(data))[which(dimData == 'sdate' | dimData == 'ftime') ] = 'time' + } else { + if (!('time' %in% dimData)) { + stop("Parameter 'data' must have temporal dimensions.") + } + } + + index <- Apply( data = list(target = data), + target_dims = c('lat','lon'), + fun = .RegimesAssign, + ref = ref_maps, + lat = lat, method = method, + ncores=ncores)[[1]] + + nclust <- dim(ref_maps)['cluster'] + freqs <- rep(NA, nclust) + for (n in 1:nclust) { + freqs[n] <- (length(which(index == n)) / length(index)) * 100 + } + + if (composite){ + poslon <- which(names(dim(data)) == 'lon') + poslat <- which(names(dim(data)) == 'lat') + postime <- which(names(dim(data)) == 'time') + posdim <- setdiff(1:length(dim(data)), c(postime, poslat, poslon)) + dataComp <- aperm(data, c(poslon, poslat, postime, posdim)) + + if (any(is.na(index))) { + recon <-list( + composite = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), + postime, dim(ref_maps)['composite.cluster']), + pvalue = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), + postime, dim(ref_maps)['composite.cluster'])) + } else { + recon <- + Apply(data = list(var = dataComp, occ = index), + target_dims = list(c('lon', 'lat', 'time'), c('time')), + fun = Composite, + K = dim(ref_maps)['cluster']) + } + + output <- list(composite = recon$composite, + pvalue = recon$pvalue, + cluster = index, + frequency = freqs) + } else{ + + output <- list(cluster = index, + frequency = freqs) + } + + return(output) +} + +.RegimesAssign <- function(ref, target, method = 'distance', lat, composite=FALSE) { + posdim <- which(names(dim(ref)) == 'cluster') + poslat <- which(names(dim(ref)) == 'lat') + poslon <- which(names(dim(ref)) == 'lon') + + nclust <- dim(ref)[posdim] + + if (all(dim(ref)[-posdim] != dim(target))) { + stop('The target should have the same dimensions [lat,lon] that + the reference ') + } + + if (is.null(names(dim(ref))) | is.null(names(dim(target)))) { + stop( + 'The arrays should include dimensions names ref[cluster,lat,lon] + and target [lat,lon]' + ) + } + + + if (length(lat) != dim(ref)[poslat]) { + stop('latitudes do not match with the maps') + } + + if (is.na(max(target))){ + assign <- NA + + }else{ + + + # This dimensions are reorganized + ref <- aperm(ref, c(posdim, poslat, poslon)) + target <- + aperm(target, c(which(names(dim( + target + )) == 'lat'), which(names(dim( + target + )) == 'lon'))) + + # weights are defined + latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(ref)[3]) + + + rmsdiff <- function(x, y) { + dims <- dim(x) + ndims <- length(dims) + if (ndims != 2 | ndims != length(dim(y))) { + stop('x and y should be maps') + } + map_diff <- NA * x + for (i in 1:dims[1]) { + for (j in 1:dims[2]) { + map_diff[i, j] <- (x[i, j] - y[i, j]) ^ 2 + } + } + rmsdiff <- sqrt(mean(map_diff)) + return(rmsdiff) + } + + if (method == 'ACC') { + corr <- rep(NA, nclust) + for (i in 1:nclust) { + corr[i] <- + ACC(InsertDim(InsertDim( + InsertDim(ref[i, , ] * latWeights, 1, 1), 2, 1 + ), 3, 1), + InsertDim(InsertDim( + InsertDim(target * latWeights, 1, 1), 2, 1 + ), 3, 1))$ACC[2] + } + assign <- which(corr == max(corr)) + } + + if (method == 'distance') { + rms <- rep(NA, nclust) + for (i in 1:nclust) { + rms[i] <- rmsdiff(ref[i, , ] * latWeights, target * latWeights) + } + assign <- which(rms == min(rms)) + } + } + + return(assign) +} + + +Composite <- function(var, occ, lag = 0, eno = FALSE, K = NULL, fileout = NULL) { + + if ( dim(var)[3] != length(occ) ) { + stop("Temporal dimension of var is not equal to length of occ.") + } + if (is.null(K)) { + K <- max(occ) + } + composite <- array(dim = c(dim(var)[1:2], composite = K)) + tvalue <- array(dim = dim(var)[1:2]) + dof <- array(dim = dim(var)[1:2]) + pvalue <- array(dim = c(dim(var)[1:2], composite = K)) + + if (eno == TRUE) { + n_tot <- Eno(var, posdim = 3) + } else { + n_tot <- length(occ) + } + mean_tot <- Mean1Dim(var, posdim = 3, narm = TRUE) + stdv_tot <- apply(var, c(1, 2), sd, na.rm = TRUE) + + for (k in 1 : K) { + if (length(which(occ == k)) >= 1) { + indices <- which(occ == k) + lag + toberemoved <- which(0 > indices | indices > dim(var)[3]) + + if (length(toberemoved) > 0) { + indices <- indices[-toberemoved] + } + if (eno == TRUE) { + n_k <- Eno(var[, , indices], posdim = 3) + } else { + n_k <- length(indices) + } + if (length(indices) == 1) { + composite[, , k] <- var[, , indices] + warning(paste("Composite", k, "has length 1 and pvalue is NA.")) + } else { + composite[, , k] <- Mean1Dim(var[, , indices], posdim = 3, narm = TRUE) + } + stdv_k <- apply(var[, , indices], c(1, 2), sd, na.rm = TRUE) + + tvalue <- (mean_tot - composite[, , k]) / + sqrt(stdv_tot ^ 2 / n_tot + stdv_k ^ 2 / n_k) + dof <- (stdv_tot ^ 2 / n_tot + stdv_k ^ 2 / n_k) ^ 2 / + ((stdv_tot ^ 2 / n_tot) ^ 2 / (n_tot - 1) + + (stdv_k ^ 2 / n_k) ^ 2 / (n_k - 1)) + pvalue[, , k] <- 2 * pt(-abs(tvalue), df = dof) + } + } + if (is.null(fileout) == FALSE) { + output <- list(composite = composite, pvalue = pvalue) + save(output, file = paste(fileout, '.sav', sep = '')) + } + + invisible(list(composite = composite, pvalue = pvalue)) +} + + diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 816ef052..12b5aa08 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -12,9 +12,9 @@ #'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools #' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} #' -#'@param data a 's2dv_cube' object +#'@param data a 's2dv_cube' object. -#'@param ref_maps array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching. +#'@param ref_maps a 's2dv_cube' object as the output of CST_WeatherRegimes. #'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or #' the maximum spatial correlation (method=’ACC’) between the maps. #'@param composite a logical indicating if the composite maps are computed or not (default=FALSE). @@ -30,8 +30,8 @@ #'@importFrom ClimProjDiags Subset #'@examples #'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data -#'res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = drop(regimes),composite=FALSE) -#'res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = drop(regimes),composite=TRUE) +#'res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) +#'res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) #'@export #' diff --git a/man/CST_RegimesAssign.Rd b/man/CST_RegimesAssign.Rd index cd897509..72d75bef 100644 --- a/man/CST_RegimesAssign.Rd +++ b/man/CST_RegimesAssign.Rd @@ -14,9 +14,9 @@ CST_RegimesAssign( ) } \arguments{ -\item{data}{a 's2dv_cube' object} +\item{data}{a 's2dv_cube' object.} -\item{ref_maps}{array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching.} +\item{ref_maps}{a 's2dv_cube' object as the output of CST_WeatherRegimes.} \item{method}{whether the matching will be performed in terms of minimum distance (default=’distance’) or the maximum spatial correlation (method=’ACC’) between the maps.} @@ -40,8 +40,8 @@ for which the minimum Eucledian distance (method=’distance’) or highest spat } \examples{ regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data -res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = drop(regimes),composite=FALSE) -res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = drop(regimes),composite=TRUE) +res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) +res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) } \references{ Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools -- GitLab From c97588926885cf0a0a00b9a0a19083272e0092f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Thu, 28 May 2020 12:33:05 +0200 Subject: [PATCH 37/45] remove 1 --- R/1 | 349 ------------------------------------------------------------ 1 file changed, 349 deletions(-) delete mode 100644 R/1 diff --git a/R/1 b/R/1 deleted file mode 100644 index 12b5aa08..00000000 --- a/R/1 +++ /dev/null @@ -1,349 +0,0 @@ -#' @rdname CST_RegimesAssign -#' @title Function for matching a field of anomalies with -#' a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function) -#' -#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -#' -#' @description This function performs the matching between a field of anomalies and a set -#' of maps which will be used as a reference. The anomalies will be assigned to the reference map -#' for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation -#' (method=‘ACC’) is obtained. -#' -#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools -#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} -#' -#'@param data a 's2dv_cube' object. - -#'@param ref_maps a 's2dv_cube' object as the output of CST_WeatherRegimes. -#'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or -#' the maximum spatial correlation (method=’ACC’) between the maps. -#'@param composite a logical indicating if the composite maps are computed or not (default=FALSE). -#'@param ncores the number of multicore threads to use for parallel computation. -#'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) -# or only k=1 for any specific cluster, i.e., case (*2)) (only when composite = 'TRUE') and \code{$statistics} that includes -#' \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test -#' that accounts for the serial dependence of the data with the same structure as Composite.)(only when composite = 'TRUE'), -#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , -#' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), -#'@import s2dverification -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@examples -#'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data -#'res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) -#'res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) -#'@export -#' - -CST_RegimesAssign <- function(data, ref_maps, - method = "distance", - composite = FALSE, - ncores=NULL) { - if (!inherits(data, 's2dv_cube')) { - stop("Parameter 'data' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - - if (!inherits(ref_maps, 's2dv_cube')) { - stop("Parameter 'ref_maps' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - - if ('lat' %in% names(data)){ - lat <- data$lat - }else { - lat <- NULL - } - result <- Apply(data=list(data=data$data, ref_maps=ref_maps$data), lat= lat, fun=RegimesAssign, - target_dims=list(names(dim(data$data)),c('lat','lon','cluster')), - method = method, composite = composite, ncores=ncores) - - if (composite){ - data$data <- result$composite - data$statistics <- result[-1] - }else{ - data <- NULL - data$statistics <- result - } - - return(data) -} - -#' @rdname RegimesAssign -#' @title Function for matching a field of anomalies with -#' a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function). -#' -#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -#' -#' @description This function performs the matching between a field of anomalies and a set -#' of maps which will be used as a reference. The anomalies will be assigned to the reference map -#' for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation -#' (method=‘ACC’) is obtained. -#' -#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} -#' -#'@param data an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. -#'@param ref_maps array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching. -#'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or -#' the maximum spatial correlation (method=’ACC’) between the maps. -#'@param composite a logical indicating if the composite maps are computed or not (default=FALSE). -#'@param ncores the number of multicore threads to use for parallel computation. -#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) -# or only k=1 for any specific cluster, i.e., case (*2)) (only if composite='TRUE'), -#' \code{$pvalue} ( array with the same structure as \code{$composite} containing the pvalue of the composites obtained through a t-test -#' that accounts for the serial dependence of the data with the same structure as Composite.) (only if composite='TRUE'), -#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , -#' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), -#' -#'@import s2dverification -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@examples -#'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite -#'res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = drop(regimes), -#'lat=lonlat_data$exp$lat,composite=FALSE) -#'@export - -RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = FALSE, ncores=NULL) { - - if (is.null(names(dim(data)))) { - stop("Parameter 'data' must be an array with named dimensions.") - } - if (is.null(ref_maps)) { - stop("Parameter 'ref_maps' must be specified.") - } - - if (is.null(lat)) { - stop("Parameter 'lat' must be specified.") - } - - if (is.null(names(dim(ref_maps)))) { - stop("Parameter 'ref_maps' must be an array with named dimensions.") - } - - dimData <- names(dim(data)) - - if (!all( c('lat', 'lon') %in% dimData)) { - stop("Parameter 'data' must contain the named dimensions 'lat' and 'lon'.") - } - - dimRef <- names(dim(ref_maps)) - - if (!all( c('cluster', 'lat', 'lon') %in% dimRef)) { - stop("Parameter 'ref_maps' must contain the named dimensions - 'cluster','lat' and 'lon'.") - } - - - if (length(lat) != dim(data)['lat'] | (length(lat) != dim(ref_maps)['lat']) ) { - stop(" Parameter 'lat' does not match with the dimension 'lat' in the - parameter 'data' or in the parameter 'ref_maps'.") - } - - - if ('sdate' %in% dimData && 'ftime' %in% dimData) { - nsdates <- dim(data)['sdate'] - nftimes <- dim(data)['ftime'] - data <- MergeDims(data, - merge_dims = c('ftime','sdate'), - rename_dim = 'time') - } else if ('sdate' %in% dimData | 'ftime' %in% dimData) { - names(dim(data))[which(dimData == 'sdate' | dimData == 'ftime') ] = 'time' - } else { - if (!('time' %in% dimData)) { - stop("Parameter 'data' must have temporal dimensions.") - } - } - - index <- Apply( data = list(target = data), - target_dims = c('lat','lon'), - fun = .RegimesAssign, - ref = ref_maps, - lat = lat, method = method, - ncores=ncores)[[1]] - - nclust <- dim(ref_maps)['cluster'] - freqs <- rep(NA, nclust) - for (n in 1:nclust) { - freqs[n] <- (length(which(index == n)) / length(index)) * 100 - } - - if (composite){ - poslon <- which(names(dim(data)) == 'lon') - poslat <- which(names(dim(data)) == 'lat') - postime <- which(names(dim(data)) == 'time') - posdim <- setdiff(1:length(dim(data)), c(postime, poslat, poslon)) - dataComp <- aperm(data, c(poslon, poslat, postime, posdim)) - - if (any(is.na(index))) { - recon <-list( - composite = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), - postime, dim(ref_maps)['composite.cluster']), - pvalue = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), - postime, dim(ref_maps)['composite.cluster'])) - } else { - recon <- - Apply(data = list(var = dataComp, occ = index), - target_dims = list(c('lon', 'lat', 'time'), c('time')), - fun = Composite, - K = dim(ref_maps)['cluster']) - } - - output <- list(composite = recon$composite, - pvalue = recon$pvalue, - cluster = index, - frequency = freqs) - } else{ - - output <- list(cluster = index, - frequency = freqs) - } - - return(output) -} - -.RegimesAssign <- function(ref, target, method = 'distance', lat, composite=FALSE) { - posdim <- which(names(dim(ref)) == 'cluster') - poslat <- which(names(dim(ref)) == 'lat') - poslon <- which(names(dim(ref)) == 'lon') - - nclust <- dim(ref)[posdim] - - if (all(dim(ref)[-posdim] != dim(target))) { - stop('The target should have the same dimensions [lat,lon] that - the reference ') - } - - if (is.null(names(dim(ref))) | is.null(names(dim(target)))) { - stop( - 'The arrays should include dimensions names ref[cluster,lat,lon] - and target [lat,lon]' - ) - } - - - if (length(lat) != dim(ref)[poslat]) { - stop('latitudes do not match with the maps') - } - - if (is.na(max(target))){ - assign <- NA - - }else{ - - - # This dimensions are reorganized - ref <- aperm(ref, c(posdim, poslat, poslon)) - target <- - aperm(target, c(which(names(dim( - target - )) == 'lat'), which(names(dim( - target - )) == 'lon'))) - - # weights are defined - latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(ref)[3]) - - - rmsdiff <- function(x, y) { - dims <- dim(x) - ndims <- length(dims) - if (ndims != 2 | ndims != length(dim(y))) { - stop('x and y should be maps') - } - map_diff <- NA * x - for (i in 1:dims[1]) { - for (j in 1:dims[2]) { - map_diff[i, j] <- (x[i, j] - y[i, j]) ^ 2 - } - } - rmsdiff <- sqrt(mean(map_diff)) - return(rmsdiff) - } - - if (method == 'ACC') { - corr <- rep(NA, nclust) - for (i in 1:nclust) { - corr[i] <- - ACC(InsertDim(InsertDim( - InsertDim(ref[i, , ] * latWeights, 1, 1), 2, 1 - ), 3, 1), - InsertDim(InsertDim( - InsertDim(target * latWeights, 1, 1), 2, 1 - ), 3, 1))$ACC[2] - } - assign <- which(corr == max(corr)) - } - - if (method == 'distance') { - rms <- rep(NA, nclust) - for (i in 1:nclust) { - rms[i] <- rmsdiff(ref[i, , ] * latWeights, target * latWeights) - } - assign <- which(rms == min(rms)) - } - } - - return(assign) -} - - -Composite <- function(var, occ, lag = 0, eno = FALSE, K = NULL, fileout = NULL) { - - if ( dim(var)[3] != length(occ) ) { - stop("Temporal dimension of var is not equal to length of occ.") - } - if (is.null(K)) { - K <- max(occ) - } - composite <- array(dim = c(dim(var)[1:2], composite = K)) - tvalue <- array(dim = dim(var)[1:2]) - dof <- array(dim = dim(var)[1:2]) - pvalue <- array(dim = c(dim(var)[1:2], composite = K)) - - if (eno == TRUE) { - n_tot <- Eno(var, posdim = 3) - } else { - n_tot <- length(occ) - } - mean_tot <- Mean1Dim(var, posdim = 3, narm = TRUE) - stdv_tot <- apply(var, c(1, 2), sd, na.rm = TRUE) - - for (k in 1 : K) { - if (length(which(occ == k)) >= 1) { - indices <- which(occ == k) + lag - toberemoved <- which(0 > indices | indices > dim(var)[3]) - - if (length(toberemoved) > 0) { - indices <- indices[-toberemoved] - } - if (eno == TRUE) { - n_k <- Eno(var[, , indices], posdim = 3) - } else { - n_k <- length(indices) - } - if (length(indices) == 1) { - composite[, , k] <- var[, , indices] - warning(paste("Composite", k, "has length 1 and pvalue is NA.")) - } else { - composite[, , k] <- Mean1Dim(var[, , indices], posdim = 3, narm = TRUE) - } - stdv_k <- apply(var[, , indices], c(1, 2), sd, na.rm = TRUE) - - tvalue <- (mean_tot - composite[, , k]) / - sqrt(stdv_tot ^ 2 / n_tot + stdv_k ^ 2 / n_k) - dof <- (stdv_tot ^ 2 / n_tot + stdv_k ^ 2 / n_k) ^ 2 / - ((stdv_tot ^ 2 / n_tot) ^ 2 / (n_tot - 1) + - (stdv_k ^ 2 / n_k) ^ 2 / (n_k - 1)) - pvalue[, , k] <- 2 * pt(-abs(tvalue), df = dof) - } - } - if (is.null(fileout) == FALSE) { - output <- list(composite = composite, pvalue = pvalue) - save(output, file = paste(fileout, '.sav', sep = '')) - } - - invisible(list(composite = composite, pvalue = pvalue)) -} - - -- GitLab From b764b01c1221e5d88fbab6e4c59817fb3b3666d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Thu, 28 May 2020 12:59:18 +0200 Subject: [PATCH 38/45] changes in the tests --- tests/testthat/test-CST_RegimesAssign.R | 38 +++++++++++++++++-------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/tests/testthat/test-CST_RegimesAssign.R b/tests/testthat/test-CST_RegimesAssign.R index 33f177f5..09dad48f 100644 --- a/tests/testthat/test-CST_RegimesAssign.R +++ b/tests/testthat/test-CST_RegimesAssign.R @@ -10,31 +10,30 @@ test_that("Sanity checks", { class(data1) <- 's2dv_cube' expect_error( CST_RegimesAssign(data = data1,ref_maps=1), + paste0("Parameter 'ref_maps' must be of the class 's2dv_cube', as output by ", + "CSTools::CST_Load.")) + + regimes <- 1:20 + dim(regimes) <- c(lat = 5, lon=2, cluster=2) + regimes <- list(data=regimes) + class(regimes) <- 's2dv_cube' + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), paste0("Parameter 'data' must be an array with named dimensions.")) data1 <- 1 : 20 dim(data1) <- c(lat = 5, lon=4) data1 <- list(data = data1 , lat=1:5) class(data1) <- 's2dv_cube' - expect_error( - CST_RegimesAssign(data = data1,ref_maps = 1), - paste0("Parameter 'ref_maps' must be an array with named dimensions.")) - - regimes <- 1:20 - dim(regimes) <- c(lat = 5, lon=2, cluster=2) expect_error( CST_RegimesAssign(data = data1,ref_maps = regimes), paste0("Parameter 'data' must have temporal dimensions.")) - data1 <- 1 : 20 dim(data1) <- c(time=20) data1 <- list(data = data1) class(data1) <- 's2dv_cube' - regimes <- 1:20 - dim(regimes) <- c(lat = 5, lon=2, cluster=2) - expect_error( CST_RegimesAssign(data = data1,ref_maps = regimes), paste0("Parameter 'lat' must be specified.")) @@ -99,9 +98,24 @@ test_that("Sanity checks", { regimes <- 1:60 dim(regimes) <- c(lat = 5, lon=2, cluster=6) - + regimes <- list(data=regimes) + class(regimes) <- 's2dv_cube' expect_equal(max(CST_RegimesAssign(data = data1, ref_maps = regimes, composite = FALSE)$statistics$cluster), - unname(dim(regimes)['cluster'])) + unname(dim(regimes$data)['cluster'])) + + + regimes <- 1:60 + dim(regimes) <- c(lat = 5, lon=2, cluster=3, member=2) + regimes <- list(data=regimes) + class(regimes) <- 's2dv_cube' + expect_equal(names(dim(CST_RegimesAssign(data = data1, ref_maps = regimes, + composite = FALSE)$statistics$cluster)),c('time','member','member')) + + + + + + }) -- GitLab From fdce4ea3bf212ed878e6ecd7b7ca9d3bc7cd8ab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Thu, 28 May 2020 13:07:14 +0200 Subject: [PATCH 39/45] bugfix in the examples --- R/CST_RegimesAssign.R | 2 +- man/CST_RegimesAssign.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 12b5aa08..9886905e 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -29,7 +29,7 @@ #'@import multiApply #'@importFrom ClimProjDiags Subset #'@examples -#'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data +#'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) #'res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) #'res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) #'@export diff --git a/man/CST_RegimesAssign.Rd b/man/CST_RegimesAssign.Rd index 72d75bef..2a26fd39 100644 --- a/man/CST_RegimesAssign.Rd +++ b/man/CST_RegimesAssign.Rd @@ -39,7 +39,7 @@ for which the minimum Eucledian distance (method=’distance’) or highest spat (method=‘ACC’) is obtained. } \examples{ -regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4)$data +regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) } -- GitLab From fd52843d151cb5b6e0359ef59a1de50d06ae6ffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 17 Jun 2020 19:28:38 +0200 Subject: [PATCH 40/45] improvements suggested by LP --- R/CST_WeatherRegimes.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 05785ffe..82704f38 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -21,8 +21,6 @@ #'@param neofs number of modes to be kept (default = 30). #'@param varThreshold Value with the percentage of variance to be explained by the PCs. #' Only sufficient PCs to explain this much variance will be used in the clustering. -#'@param lon Vector of longitudes. -#'@param lat Vector of latitudes. #'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) #'but the function also support the different methods included in the hclust . These methods are: #'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). @@ -32,9 +30,9 @@ #'@param ncores The number of multicore threads to use for parallel computation. #'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)) and \code{$statistics} that includes -#' \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test that accounts for the serial), -#' \code{$cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), -#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), +#' \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test that accounts for the serial dependence.), +#' \code{cluster} (A matrix or vector with integers (from 1:k) indicating the cluster to which each time step is allocated.), +#' \code{persistence} (Percentage of days in a month/season before a cluster is replaced for a new one (only if method=’kmeans’ has been selected.)), #' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), #'@import s2dverification #'@import multiApply @@ -105,8 +103,8 @@ CST_WeatherRegimes <- function(data, ncenters = NULL, # or only k=1 for any specific cluster, i.e., case (*2)), #' \code{pvalue} (array with at least 3-d ('lat','lon','cluster') with the pvalue of the composites obtained through a t-test that accounts for the serial # dependence of the data with the same structure as Composite.), -#' \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), -#' \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), +#' \code{cluster} (A matrix or vector with integers (from 1:k) indicating the cluster to which each time step is allocated.), +#' \code{persistence} (Percentage of days in a month/season before a cluster is replaced for a new one (only if method=’kmeans’ has been selected.)), #' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), #'@import s2dverification #'@import multiApply @@ -157,6 +155,10 @@ WeatherRegime <- function(data, ncenters = NULL, ncores = ncores) if (method=='kmeans' && 'sdate' %in% dimData && 'ftime' %in% dimData) { + output$cluster <- t(array(output$cluster,dim=c(nftimes,nsdates))) + names(dim(output$cluster)) <- c('sdate','ftime') + + # The frequency and the persistency are computed as they are useful # parameters in the cluster analysis extra_output <- Apply(data = output$cluster, -- GitLab From 62a42ff3e19f2a44921c7400cbca7a41bcad78b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 17 Jun 2020 19:33:39 +0200 Subject: [PATCH 41/45] update documentation --- DESCRIPTION | 3 +-- NAMESPACE | 2 -- man/Analogs.Rd | 25 +++++++-------------- man/BEI_PDFBest.Rd | 20 ++++++----------- man/BEI_Weights.Rd | 7 +++--- man/CST_Analogs.Rd | 20 ++++++----------- man/CST_Anomaly.Rd | 10 +++++---- man/CST_BEI_Weighting.Rd | 16 +++++--------- man/CST_BiasCorrection.Rd | 7 +++--- man/CST_Calibration.Rd | 19 ++++++---------- man/CST_CategoricalEnsCombination.Rd | 17 +++++--------- man/CST_EnsClustering.Rd | 17 +++++--------- man/CST_Load.Rd | 1 + man/CST_MergeDims.Rd | 9 +++----- man/CST_MultiEOF.Rd | 11 +++------- man/CST_MultiMetric.Rd | 9 ++++---- man/CST_MultivarRMSE.Rd | 7 +++--- man/CST_QuantileMapping.Rd | 20 ++++++----------- man/CST_RFSlope.Rd | 1 + man/CST_RFWeights.Rd | 7 +++--- man/CST_RainFARM.Rd | 24 ++++++-------------- man/CST_RegimesAssign.Rd | 16 +++++--------- man/CST_SaveExp.Rd | 7 +++--- man/CST_SplitDim.Rd | 1 + man/CST_WeatherRegimes.Rd | 31 +++++++++----------------- man/Calibration.Rd | 20 ++++++----------- man/EnsClustering.Rd | 19 +++++----------- man/MergeDims.Rd | 9 +++----- man/MultiEOF.Rd | 17 ++++---------- man/PlotCombinedMap.Rd | 33 ++++++++-------------------- man/PlotForecastPDF.Rd | 17 +++++--------- man/PlotMostLikelyQuantileMap.Rd | 18 +++++---------- man/RFSlope.Rd | 4 +++- man/RainFARM.Rd | 23 +++++-------------- man/RegimesAssign.Rd | 17 +++++--------- man/SplitDim.Rd | 1 + man/WeatherRegimes.Rd | 27 ++++++++--------------- man/areave_data.Rd | 1 + man/as.s2dv_cube.Rd | 7 +++--- man/lonlat_data.Rd | 1 + man/lonlat_prec.Rd | 1 + man/s2dv_cube.Rd | 19 +++++----------- 42 files changed, 193 insertions(+), 348 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2174452f..19d0b720 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -70,5 +70,4 @@ VignetteBuilder: knitr License: Apache License 2.0 Encoding: UTF-8 LazyData: true -RoxygenNote: 7.0.2 - +RoxygenNote: 5.0.0 diff --git a/NAMESPACE b/NAMESPACE index a7fed60c..c9e08fdc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,10 +19,8 @@ export(CST_QuantileMapping) export(CST_RFSlope) export(CST_RFWeights) export(CST_RainFARM) -export(CST_RegimesAssign) export(CST_SaveExp) export(CST_SplitDim) -export(CST_WeatherRegimes) export(EnsClustering) export(MergeDims) export(MultiEOF) diff --git a/man/Analogs.Rd b/man/Analogs.Rd index 06107c07..ee8a737e 100644 --- a/man/Analogs.Rd +++ b/man/Analogs.Rd @@ -4,19 +4,9 @@ \alias{Analogs} \title{Analogs based on large scale fields.} \usage{ -Analogs( - expL, - obsL, - time_obsL, - expVar = NULL, - obsVar = NULL, - criteria = "Large_dist", - lonVar = NULL, - latVar = NULL, - region = NULL, - nAnalogs = NULL, - return_list = FALSE -) +Analogs(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, + criteria = "Large_dist", lonVar = NULL, latVar = NULL, region = NULL, + nAnalogs = NULL, return_list = FALSE) } \arguments{ \item{expL}{an array of N named dimensions containing the experimental field @@ -387,6 +377,11 @@ Local_scalecor <- Analogs(expL=expSLP, str(Local_scalecor) Local_scalecor$AnalogsInfo +} +\author{ +M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} + +Nuria Perez-Zanon \email{nuria.perez@bsc.es} } \references{ Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, @@ -394,8 +389,4 @@ and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. \email{pascal.yiou@lsce.ipsl.fr} } -\author{ -M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} -Nuria Perez-Zanon \email{nuria.perez@bsc.es} -} diff --git a/man/BEI_PDFBest.Rd b/man/BEI_PDFBest.Rd index 0ba24a84..f836ab72 100644 --- a/man/BEI_PDFBest.Rd +++ b/man/BEI_PDFBest.Rd @@ -4,16 +4,9 @@ \alias{BEI_PDFBest} \title{Computing the Best Index PDFs combining Index PDFs from two SFSs} \usage{ -BEI_PDFBest( - index_obs, - index_hind1, - index_hind2, - index_fcst1 = NULL, - index_fcst2 = NULL, - method_BC = "none", - time_dim_name = "time", - na.rm = FALSE -) +BEI_PDFBest(index_obs, index_hind1, index_hind2, index_fcst1 = NULL, + index_fcst2 = NULL, method_BC = "none", time_dim_name = "time", + na.rm = FALSE) } \arguments{ \item{index_obs}{Index (e.g. NAO index) array from an observational database @@ -120,11 +113,12 @@ dim(res) # time statistic season # 1 2 2 } +\author{ +Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} +} \references{ Regionally improved seasonal forecast of precipitation through Best estimation of winter NAO, Sanchez-Garcia, E. et al., Adv. Sci. Res., 16, 165174, 2019, https://doi.org/10.5194/asr-16-165-2019 } -\author{ -Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} -} + diff --git a/man/BEI_Weights.Rd b/man/BEI_Weights.Rd index 867a4eb0..61db33af 100644 --- a/man/BEI_Weights.Rd +++ b/man/BEI_Weights.Rd @@ -43,12 +43,13 @@ dim(res) # sdate dataset member season # 10 3 5 1 +} +\author{ +Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} } \references{ Regionally improved seasonal forecast of precipitation through Best estimation of winter NAO, Sanchez-Garcia, E. et al., Adv. Sci. Res., 16, 165174, 2019, https://doi.org/10.5194/asr-16-165-2019 } -\author{ -Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} -} + diff --git a/man/CST_Analogs.Rd b/man/CST_Analogs.Rd index d7dd5e14..7c9a1e6f 100644 --- a/man/CST_Analogs.Rd +++ b/man/CST_Analogs.Rd @@ -4,15 +4,8 @@ \alias{CST_Analogs} \title{Downscaling using Analogs based on large scale fields.} \usage{ -CST_Analogs( - expL, - obsL, - time_obsL, - expVar = NULL, - obsVar = NULL, - region = NULL, - criteria = "Large_dist" -) +CST_Analogs(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, + region = NULL, criteria = "Large_dist") } \arguments{ \item{expL}{an 's2dv_cube' object containing the experimental field on the @@ -88,6 +81,11 @@ adapted version of the method of Yiou et al 2013. \examples{ res <- CST_Analogs(expL = lonlat_data$exp, obsL = lonlat_data$obs) +} +\author{ +M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} + +Nuria Perez-Zanon \email{nuria.perez@bsc.es} } \references{ Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, @@ -99,8 +97,4 @@ from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. code{\link{CST_Load}}, \code{\link[s2dverification]{Load}} and \code{\link[s2dverification]{CDORemap}} } -\author{ -M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} -Nuria Perez-Zanon \email{nuria.perez@bsc.es} -} diff --git a/man/CST_Anomaly.Rd b/man/CST_Anomaly.Rd index 07691ea7..e1c31f0c 100644 --- a/man/CST_Anomaly.Rd +++ b/man/CST_Anomaly.Rd @@ -4,7 +4,8 @@ \alias{CST_Anomaly} \title{Anomalies relative to a climatology along selected dimension with or without cross-validation} \usage{ -CST_Anomaly(exp = NULL, obs = NULL, cross = FALSE, memb = TRUE, dim_anom = 3) +CST_Anomaly(exp = NULL, obs = NULL, cross = FALSE, memb = TRUE, + dim_anom = 3) } \arguments{ \item{exp}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}.} @@ -52,12 +53,13 @@ str(anom3) anom4 <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = FALSE) str(anom4) -} -\seealso{ -\code{\link[s2dverification]{Ano_CrossValid}}, \code{\link[s2dverification]{Clim}} and \code{\link{CST_Load}} } \author{ Perez-Zanon Nuria, \email{nuria.perez@bsc.es} Pena Jesus, \email{jesus.pena@bsc.es} } +\seealso{ +\code{\link[s2dverification]{Ano_CrossValid}}, \code{\link[s2dverification]{Clim}} and \code{\link{CST_Load}} +} + diff --git a/man/CST_BEI_Weighting.Rd b/man/CST_BEI_Weighting.Rd index d6f65bb5..0e60a356 100644 --- a/man/CST_BEI_Weighting.Rd +++ b/man/CST_BEI_Weighting.Rd @@ -4,13 +4,8 @@ \alias{CST_BEI_Weighting} \title{Weighting SFSs of a CSTools object.} \usage{ -CST_BEI_Weighting( - var_exp, - aweights, - terciles = NULL, - type = "ensembleMean", - time_dim_name = "time" -) +CST_BEI_Weighting(var_exp, aweights, terciles = NULL, type = "ensembleMean", + time_dim_name = "time") } \arguments{ \item{var_exp}{An object of the class 's2dv_cube' containing the variable @@ -73,11 +68,12 @@ dim(res_CST$data) # time lat lon dataset # 2 3 2 2 } +\author{ +Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} +} \references{ Regionally improved seasonal forecast of precipitation through Best estimation of winter NAO, Sanchez-Garcia, E. et al., Adv. Sci. Res., 16, 165174, 2019, https://doi.org/10.5194/asr-16-165-2019 } -\author{ -Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} -} + diff --git a/man/CST_BiasCorrection.Rd b/man/CST_BiasCorrection.Rd index a1b415fb..e8a82af0 100644 --- a/man/CST_BiasCorrection.Rd +++ b/man/CST_BiasCorrection.Rd @@ -35,9 +35,10 @@ attr(obs, 'class') <- 's2dv_cube' a <- CST_BiasCorrection(exp = exp, obs = obs) str(a) } -\references{ -Torralba, V., F.J. Doblas-Reyes, D. MacLeod, I. Christel and M. Davis (2017). Seasonal climate prediction: a new source of information for the management of wind energy resources. Journal of Applied Meteorology and Climatology, 56, 1231-1247, doi:10.1175/JAMC-D-16-0204.1. (CLIM4ENERGY, EUPORIAS, NEWA, RESILIENCE, SPECS) -} \author{ Verónica Torralba, \email{veronica.torralba@bsc.es} } +\references{ +Torralba, V., F.J. Doblas-Reyes, D. MacLeod, I. Christel and M. Davis (2017). Seasonal climate prediction: a new source of information for the management of wind energy resources. Journal of Applied Meteorology and Climatology, 56, 1231-1247, doi:10.1175/JAMC-D-16-0204.1. (CLIM4ENERGY, EUPORIAS, NEWA, RESILIENCE, SPECS) +} + diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 891e2e5f..ed880aab 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -4,15 +4,9 @@ \alias{CST_Calibration} \title{Forecast Calibration} \usage{ -CST_Calibration( - exp, - obs, - cal.method = "mse_min", - eval.method = "leave-one-out", - multi.model = F, - na.fill = T, - ncores = 1 -) +CST_Calibration(exp, obs, cal.method = "mse_min", + eval.method = "leave-one-out", multi.model = F, na.fill = T, + ncores = 1) } \arguments{ \item{exp}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}.} @@ -50,11 +44,12 @@ attr(obs, 'class') <- 's2dv_cube' a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample") str(a) } -\seealso{ -\code{\link{CST_Load}} -} \author{ Verónica Torralba, \email{veronica.torralba@bsc.es} Bert Van Schaeybroeck, \email{bertvs@meteo.be} } +\seealso{ +\code{\link{CST_Load}} +} + diff --git a/man/CST_CategoricalEnsCombination.Rd b/man/CST_CategoricalEnsCombination.Rd index c23f8341..e551c3ec 100644 --- a/man/CST_CategoricalEnsCombination.Rd +++ b/man/CST_CategoricalEnsCombination.Rd @@ -4,14 +4,8 @@ \alias{CST_CategoricalEnsCombination} \title{Make categorical forecast based on a multi-model forecast with potential for calibrate} \usage{ -CST_CategoricalEnsCombination( - exp, - obs, - cat.method = "pool", - eval.method = "leave-one-out", - amt.cat = 3, - ... -) +CST_CategoricalEnsCombination(exp, obs, cat.method = "pool", + eval.method = "leave-one-out", amt.cat = 3, ...) } \arguments{ \item{exp}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}. The amount of forecasting models is equal to the size of the \code{dataset} dimension of the data array. The amount of members per model may be different. The size of the \code{member} dimension of the data array is equal to the maximum of the ensemble members among the models. Models with smaller ensemble sizes have residual indices of \code{member} dimension in the data array filled with NA values.} @@ -89,6 +83,9 @@ attr(obs, 'class') <- 's2dv_cube' a <- CST_CategoricalEnsCombination(exp = exp, obs = obs, amt.cat = 3, cat.method = "mmw") } } +\author{ +Bert Van Schaeybroeck, \email{bertvs@meteo.be} +} \references{ Rajagopalan, B., Lall, U., & Zebiak, S. E. (2002). Categorical climate forecasts through regularization and optimal combination of multiple GCM ensembles. Monthly Weather Review, 130(7), 1792-1811. @@ -96,6 +93,4 @@ Robertson, A. W., Lall, U., Zebiak, S. E., & Goddard, L. (2004). Improved combin Van Schaeybroeck, B., & Vannitsem, S. (2019). Postprocessing of Long-Range Forecasts. In Statistical Postprocessing of Ensemble Forecasts (pp. 267-290). } -\author{ -Bert Van Schaeybroeck, \email{bertvs@meteo.be} -} + diff --git a/man/CST_EnsClustering.Rd b/man/CST_EnsClustering.Rd index 154541d5..c13bf205 100644 --- a/man/CST_EnsClustering.Rd +++ b/man/CST_EnsClustering.Rd @@ -4,18 +4,10 @@ \alias{CST_EnsClustering} \title{Ensemble clustering} \usage{ -CST_EnsClustering( - exp, - time_moment = "mean", - numclus = NULL, - lon_lim = NULL, - lat_lim = NULL, - variance_explained = 80, - numpcs = NULL, - time_percentile = 90, - cluster_dim = "member", - verbose = F -) +CST_EnsClustering(exp, time_moment = "mean", numclus = NULL, + lon_lim = NULL, lat_lim = NULL, variance_explained = 80, + numpcs = NULL, time_percentile = 90, cluster_dim = "member", + verbose = F) } \arguments{ \item{exp}{An object of the class 's2dv_cube', containing the variables to be analysed. @@ -133,3 +125,4 @@ Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } + diff --git a/man/CST_Load.Rd b/man/CST_Load.Rd index bf03ba42..1fee022c 100644 --- a/man/CST_Load.Rd +++ b/man/CST_Load.Rd @@ -47,3 +47,4 @@ obs <- CSTools::lonlat_data$obs \author{ Nicolau Manubens, \email{nicolau.manubens@bsc.es} } + diff --git a/man/CST_MergeDims.Rd b/man/CST_MergeDims.Rd index 0762e83f..449e011e 100644 --- a/man/CST_MergeDims.Rd +++ b/man/CST_MergeDims.Rd @@ -4,12 +4,8 @@ \alias{CST_MergeDims} \title{Function to Merge Dimensions} \usage{ -CST_MergeDims( - data, - merge_dims = c("ftime", "monthly"), - rename_dim = NULL, - na.rm = FALSE -) +CST_MergeDims(data, merge_dims = c("ftime", "monthly"), rename_dim = NULL, + na.rm = FALSE) } \arguments{ \item{data}{a 's2dv_cube' object} @@ -42,3 +38,4 @@ dim(new_data$data) \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } + diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd index 036a6470..fb584751 100644 --- a/man/CST_MultiEOF.Rd +++ b/man/CST_MultiEOF.Rd @@ -4,14 +4,8 @@ \alias{CST_MultiEOF} \title{EOF analysis of multiple variables} \usage{ -CST_MultiEOF( - datalist, - neof_max = 40, - neof_composed = 5, - minvar = 0.6, - lon_lim = NULL, - lat_lim = NULL -) +CST_MultiEOF(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, + lon_lim = NULL, lat_lim = NULL) } \arguments{ \item{datalist}{A list of objects of the class 's2dv_cube', containing the variables to be analysed. @@ -75,3 +69,4 @@ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} } + diff --git a/man/CST_MultiMetric.Rd b/man/CST_MultiMetric.Rd index 8e3ce593..079a5588 100644 --- a/man/CST_MultiMetric.Rd +++ b/man/CST_MultiMetric.Rd @@ -37,14 +37,15 @@ c(ano_exp, ano_obs) \%<-\% CST_Anomaly(exp = exp, obs = obs, cross = TRUE, memb a <- CST_MultiMetric(exp = ano_exp, obs = ano_obs) str(a) } +\author{ +Mishra Niti, \email{niti.mishra@bsc.es} + +Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +} \references{ Mishra, N., Prodhomme, C., & Guemas, V. (n.d.). Multi-Model Skill Assessment of Seasonal Temperature and Precipitation Forecasts over Europe, 29-31.\url{http://link.springer.com/10.1007/s00382-018-4404-z} } \seealso{ \code{\link[s2dverification]{Corr}}, \code{\link[s2dverification]{RMS}}, \code{\link[s2dverification]{RMSSS}} and \code{\link{CST_Load}} } -\author{ -Mishra Niti, \email{niti.mishra@bsc.es} -Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -} diff --git a/man/CST_MultivarRMSE.Rd b/man/CST_MultivarRMSE.Rd index 24af608c..685eaf77 100644 --- a/man/CST_MultivarRMSE.Rd +++ b/man/CST_MultivarRMSE.Rd @@ -56,9 +56,10 @@ weight <- c(1, 2) a <- CST_MultivarRMSE(exp = ano_exp, obs = ano_obs, weight = weight) str(a) } -\seealso{ -\code{\link[s2dverification]{RMS}} and \code{\link{CST_Load}} -} \author{ Deborah Verfaillie, \email{deborah.verfaillie@bsc.es} } +\seealso{ +\code{\link[s2dverification]{RMS}} and \code{\link{CST_Load}} +} + diff --git a/man/CST_QuantileMapping.Rd b/man/CST_QuantileMapping.Rd index ad8f4b6c..1c93843e 100644 --- a/man/CST_QuantileMapping.Rd +++ b/man/CST_QuantileMapping.Rd @@ -4,16 +4,9 @@ \alias{CST_QuantileMapping} \title{Quantiles Mapping for seasonal or decadal forecast data} \usage{ -CST_QuantileMapping( - exp, - obs, - exp_cor = NULL, - sample_dims = c("sdate", "ftime", "member"), - sample_length = NULL, - method = "QUANT", - ncores = NULL, - ... -) +CST_QuantileMapping(exp, obs, exp_cor = NULL, sample_dims = c("sdate", + "ftime", "member"), sample_length = NULL, method = "QUANT", + ncores = NULL, ...) } \arguments{ \item{exp}{an object of class \code{s2dv_cube}} @@ -84,9 +77,10 @@ res <- CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', method = 'DIST') } } -\seealso{ -\code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} -} \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } +\seealso{ +\code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} +} + diff --git a/man/CST_RFSlope.Rd b/man/CST_RFSlope.Rd index 0c4e1671..d2b5aec0 100644 --- a/man/CST_RFSlope.Rd +++ b/man/CST_RFSlope.Rd @@ -50,3 +50,4 @@ slopes \author{ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } + diff --git a/man/CST_RFWeights.Rd b/man/CST_RFWeights.Rd index ef5ebe4d..08a7b850 100644 --- a/man/CST_RFWeights.Rd +++ b/man/CST_RFWeights.Rd @@ -47,6 +47,9 @@ nf <- 8 ww <- CST_RFWeights("./worldclim.nc", nf, lon, lat, fsmooth = TRUE) } } +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +} \references{ Terzago, S., Palazzi, E., & von Hardenberg, J. (2018). Stochastic downscaling of precipitation in complex orography: @@ -54,6 +57,4 @@ A simple method to reproduce a realistic fine-scale climatology. Natural Hazards and Earth System Sciences, 18(11), 2825-2840. http://doi.org/10.5194/nhess-18-2825-2018 . } -\author{ -Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} -} + diff --git a/man/CST_RainFARM.Rd b/man/CST_RainFARM.Rd index 1c609e08..4a667f9a 100644 --- a/man/CST_RainFARM.Rd +++ b/man/CST_RainFARM.Rd @@ -4,20 +4,9 @@ \alias{CST_RainFARM} \title{RainFARM stochastic precipitation downscaling of a CSTools object} \usage{ -CST_RainFARM( - data, - nf, - weights = 1, - slope = 0, - kmin = 1, - nens = 1, - fglob = FALSE, - fsmooth = TRUE, - nprocs = 1, - time_dim = NULL, - verbose = FALSE, - drop_realization_dim = FALSE -) +CST_RainFARM(data, nf, weights = 1, slope = 0, kmin = 1, nens = 1, + fglob = FALSE, fsmooth = TRUE, nprocs = 1, time_dim = NULL, + verbose = FALSE, drop_realization_dim = FALSE) } \arguments{ \item{data}{An object of the class 's2dv_cube' as returned by `CST_Load`, @@ -106,12 +95,13 @@ dim(res$data) # dataset member realization sdate ftime lat lon # 1 2 3 3 4 64 64 +} +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } \references{ Terzago, S. et al. (2018). NHESS 18(11), 2825-2840. http://doi.org/10.5194/nhess-18-2825-2018 ; D'Onofrio et al. (2014), J of Hydrometeorology 15, 830-843; Rebora et. al. (2006), JHM 7, 724. } -\author{ -Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} -} + diff --git a/man/CST_RegimesAssign.Rd b/man/CST_RegimesAssign.Rd index 2a26fd39..2606720d 100644 --- a/man/CST_RegimesAssign.Rd +++ b/man/CST_RegimesAssign.Rd @@ -5,13 +5,8 @@ \title{Function for matching a field of anomalies with a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function)} \usage{ -CST_RegimesAssign( - data, - ref_maps, - method = "distance", - composite = FALSE, - ncores = NULL -) +CST_RegimesAssign(data, ref_maps, method = "distance", composite = FALSE, + ncores = NULL) } \arguments{ \item{data}{a 's2dv_cube' object.} @@ -43,10 +38,11 @@ regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4 res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) } +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} \references{ Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} } -\author{ -Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -} + diff --git a/man/CST_SaveExp.Rd b/man/CST_SaveExp.Rd index 0e49c119..17537205 100644 --- a/man/CST_SaveExp.Rd +++ b/man/CST_SaveExp.Rd @@ -29,10 +29,11 @@ destination <- "./path/" CST_SaveExp(data = data, destination = destination) } -} -\seealso{ -\code{\link{CST_Load}}, \code{\link{as.s2dv_cube}} and \code{\link{s2dv_cube}} } \author{ Perez-Zanon Nuria, \email{nuria.perez@bsc.es} } +\seealso{ +\code{\link{CST_Load}}, \code{\link{as.s2dv_cube}} and \code{\link{s2dv_cube}} +} + diff --git a/man/CST_SplitDim.Rd b/man/CST_SplitDim.Rd index ee93aedc..2019ea7b 100644 --- a/man/CST_SplitDim.Rd +++ b/man/CST_SplitDim.Rd @@ -43,3 +43,4 @@ dim(new_data$data) \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } + diff --git a/man/CST_WeatherRegimes.Rd b/man/CST_WeatherRegimes.Rd index c5975537..bd811b44 100644 --- a/man/CST_WeatherRegimes.Rd +++ b/man/CST_WeatherRegimes.Rd @@ -4,17 +4,9 @@ \alias{CST_WeatherRegimes} \title{Function for Calculating the Cluster analysis} \usage{ -CST_WeatherRegimes( - data, - ncenters = NULL, - EOFS = TRUE, - neofs = 30, - varThreshold = NULL, - method = "kmeans", - iter.max = 100, - nstart = 30, - ncores = NULL -) +CST_WeatherRegimes(data, ncenters = NULL, EOFS = TRUE, neofs = 30, + varThreshold = NULL, method = "kmeans", iter.max = 100, nstart = 30, + ncores = NULL) } \arguments{ \item{data}{a 's2dv_cube' object} @@ -37,17 +29,13 @@ For more details about these methods see the hclust function documentation inclu \item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data.} -\item{lon}{Vector of longitudes.} - -\item{lat}{Vector of latitudes.} - \item{nstarts}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} } \value{ A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) - \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test that accounts for the serial), - \code{$cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), - \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), + \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test that accounts for the serial dependence.), + \code{cluster} (A matrix or vector with integers (from 1:k) indicating the cluster to which each time step is allocated.), + \code{persistence} (Percentage of days in a month/season before a cluster is replaced for a new one (only if method=’kmeans’ has been selected.)), \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), } \description{ @@ -61,6 +49,9 @@ included in the hclust (stats package). res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = TRUE, ncenters = 3) } +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} \references{ Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, @@ -69,6 +60,4 @@ Characterization of European wind speed variability using weather regimes. Clima Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} } -\author{ -Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -} + diff --git a/man/Calibration.Rd b/man/Calibration.Rd index 9f884671..4290abd7 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -4,15 +4,8 @@ \alias{Calibration} \title{Forecast Calibration} \usage{ -Calibration( - exp, - obs, - cal.method = "mse_min", - eval.method = "leave-one-out", - multi.model = F, - na.fill = T, - ncores = 1 -) +Calibration(exp, obs, cal.method = "mse_min", eval.method = "leave-one-out", + multi.model = F, na.fill = T, ncores = 1) } \arguments{ \item{exp}{an array containing the seasonal forecast experiment data.} @@ -37,6 +30,11 @@ Four types of member-by-member bias correction can be performed. The \code{bias} Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. } +\author{ +Verónica Torralba, \email{veronica.torralba@bsc.es} + +Bert Van Schaeybroeck, \email{bertvs@meteo.be} +} \references{ Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x @@ -47,8 +45,4 @@ Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using mem \seealso{ \code{\link{CST_Load}} } -\author{ -Verónica Torralba, \email{veronica.torralba@bsc.es} -Bert Van Schaeybroeck, \email{bertvs@meteo.be} -} diff --git a/man/EnsClustering.Rd b/man/EnsClustering.Rd index 2fd8a3f1..27aca453 100644 --- a/man/EnsClustering.Rd +++ b/man/EnsClustering.Rd @@ -4,20 +4,10 @@ \alias{EnsClustering} \title{Ensemble clustering} \usage{ -EnsClustering( - data, - lat, - lon, - time_moment = "mean", - numclus = NULL, - lon_lim = NULL, - lat_lim = NULL, - variance_explained = 80, - numpcs = NULL, - time_percentile = 90, - cluster_dim = "member", - verbose = T -) +EnsClustering(data, lat, lon, time_moment = "mean", numclus = NULL, + lon_lim = NULL, lat_lim = NULL, variance_explained = 80, + numpcs = NULL, time_percentile = 90, cluster_dim = "member", + verbose = T) } \arguments{ \item{data}{A matrix of dimensions 'dataset member sdate ftime lat lon' containing the variables to be analysed.} @@ -77,3 +67,4 @@ Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } + diff --git a/man/MergeDims.Rd b/man/MergeDims.Rd index 7539ef6e..585049e8 100644 --- a/man/MergeDims.Rd +++ b/man/MergeDims.Rd @@ -4,12 +4,8 @@ \alias{MergeDims} \title{Function to Split Dimension} \usage{ -MergeDims( - data, - merge_dims = c("time", "monthly"), - rename_dim = NULL, - na.rm = FALSE -) +MergeDims(data, merge_dims = c("time", "monthly"), rename_dim = NULL, + na.rm = FALSE) } \arguments{ \item{data}{an n-dimensional array with named dimensions} @@ -32,3 +28,4 @@ new_data <- MergeDims(data, merge_dims = c('time', 'lat')) \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } + diff --git a/man/MultiEOF.Rd b/man/MultiEOF.Rd index dd0fc7fe..1e822fc4 100644 --- a/man/MultiEOF.Rd +++ b/man/MultiEOF.Rd @@ -4,19 +4,9 @@ \alias{MultiEOF} \title{EOF analysis of multiple variables starting from an array (reduced version)} \usage{ -MultiEOF( - data, - lon, - lat, - time, - lon_dim = "lon", - lat_dim = "lat", - neof_max = 40, - neof_composed = 5, - minvar = 0.6, - lon_lim = NULL, - lat_lim = NULL -) +MultiEOF(data, lon, lat, time, lon_dim = "lon", lat_dim = "lat", + neof_max = 40, neof_composed = 5, minvar = 0.6, lon_lim = NULL, + lat_lim = NULL) } \arguments{ \item{data}{A multidimensional array with dimension \code{"var"}, @@ -56,3 +46,4 @@ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} } + diff --git a/man/PlotCombinedMap.Rd b/man/PlotCombinedMap.Rd index 616b84f9..6857c64d 100644 --- a/man/PlotCombinedMap.Rd +++ b/man/PlotCombinedMap.Rd @@ -4,27 +4,11 @@ \alias{PlotCombinedMap} \title{Plot Multiple Lon-Lat Variables In a Single Map According to a Decision Function} \usage{ -PlotCombinedMap( - maps, - lon, - lat, - map_select_fun, - display_range, - map_dim = "map", - brks = NULL, - cols = NULL, - col_unknown_map = "white", - mask = NULL, - col_mask = "grey", - bar_titles = NULL, - legend_scale = 1, - fileout = NULL, - width = 8, - height = 5, - size_units = "in", - res = 100, - ... -) +PlotCombinedMap(maps, lon, lat, map_select_fun, display_range, + map_dim = "map", brks = NULL, cols = NULL, col_unknown_map = "white", + mask = NULL, col_mask = "grey", bar_titles = NULL, legend_scale = 1, + fileout = NULL, width = 8, height = 5, size_units = "in", res = 100, + ...) } \arguments{ \item{maps}{List of matrices to plot, each with (longitude, latitude) dimensions, or 3-dimensional array with the dimensions (longitude, latitude, map). Dimension names are required.} @@ -83,11 +67,12 @@ PlotCombinedMap(list(a, b, c), lons, lats, bar_titles = paste('\% of belonging to', c('a', 'b', 'c')), brks = 20, width = 10, height = 8) } -\seealso{ -\code{PlotCombinedMap} and \code{PlotEquiMap} -} \author{ Nicolau Manubens, \email{nicolau.manubens@bsc.es} Veronica Torralba, \email{veronica.torralba@bsc.es} } +\seealso{ +\code{PlotCombinedMap} and \code{PlotEquiMap} +} + diff --git a/man/PlotForecastPDF.Rd b/man/PlotForecastPDF.Rd index c04b43c1..d7b95b08 100644 --- a/man/PlotForecastPDF.Rd +++ b/man/PlotForecastPDF.Rd @@ -4,18 +4,10 @@ \alias{PlotForecastPDF} \title{Plot one or multiple ensemble forecast pdfs for the same event} \usage{ -PlotForecastPDF( - fcst, - tercile.limits, - extreme.limits = NULL, - obs = NULL, - plotfile = NULL, - title = "Set a title", - var.name = "Varname (units)", - fcst.names = NULL, - add.ensmemb = c("above", "below", "no"), - color.set = c("ggplot", "s2s4e", "hydro") -) +PlotForecastPDF(fcst, tercile.limits, extreme.limits = NULL, obs = NULL, + plotfile = NULL, title = "Set a title", var.name = "Varname (units)", + fcst.names = NULL, add.ensmemb = c("above", "below", "no"), + color.set = c("ggplot", "s2s4e", "hydro")) } \arguments{ \item{fcst}{a dataframe or array containing all the ensember members for each forecast. If \code{'fcst'} is an array, it should have two labelled dimensions, and one of them should be \code{'members'}. If \code{'fcsts'} is a data.frame, each column shoul be a separate forecast, with the rows beeing the different ensemble members.} @@ -57,3 +49,4 @@ PlotForecastPDF(fcsts2, c(-0.66, 0.66), extreme.limits = c(-1.2, 1.2), \author{ Llorenç Lledó \email{llledo@bsc.es} } + diff --git a/man/PlotMostLikelyQuantileMap.Rd b/man/PlotMostLikelyQuantileMap.Rd index 4c400b18..6c92850e 100644 --- a/man/PlotMostLikelyQuantileMap.Rd +++ b/man/PlotMostLikelyQuantileMap.Rd @@ -4,15 +4,8 @@ \alias{PlotMostLikelyQuantileMap} \title{Plot Maps of Most Likely Quantiles} \usage{ -PlotMostLikelyQuantileMap( - probs, - lon, - lat, - cat_dim = "bin", - bar_titles = NULL, - col_unknown_cat = "white", - ... -) +PlotMostLikelyQuantileMap(probs, lon, lat, cat_dim = "bin", + bar_titles = NULL, col_unknown_cat = "white", ...) } \arguments{ \item{probs}{a list of bi-dimensional arrays with the named dimensions 'latitude' (or 'lat') and 'longitude' (or 'lon'), with equal size and in the same order, or a single tri-dimensional array with an additional dimension (e.g. 'bin') for the different categories. The arrays must contain probability values between 0 and 1, and the probabilities for all categories of a grid cell should not exceed 1 when added.} @@ -116,10 +109,11 @@ PlotMostLikelyQuantileMap(bins, lons, lats, mask = 1 - (w1 + w2 / max(c(w1, w2))), brks = 20, width = 10, height = 8) -} -\seealso{ -\code{PlotCombinedMap} and \code{PlotEquiMap} } \author{ Veronica Torralba, \email{veronica.torralba@bsc.es}, Nicolau Manubens, \email{nicolau.manubens@bsc.es} } +\seealso{ +\code{PlotCombinedMap} and \code{PlotEquiMap} +} + diff --git a/man/RFSlope.Rd b/man/RFSlope.Rd index db3f0e10..09a24ff5 100644 --- a/man/RFSlope.Rd +++ b/man/RFSlope.Rd @@ -4,7 +4,8 @@ \alias{RFSlope} \title{RainFARM spectral slopes from an array (reduced version)} \usage{ -RFSlope(data, kmin = 1, time_dim = NULL, lon_dim = "lon", lat_dim = "lat") +RFSlope(data, kmin = 1, time_dim = NULL, lon_dim = "lon", + lat_dim = "lat") } \arguments{ \item{data}{Array containing the spatial precipitation fields to downscale. @@ -59,3 +60,4 @@ slopes \author{ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } + diff --git a/man/RainFARM.Rd b/man/RainFARM.Rd index 0db84679..984dcd42 100644 --- a/man/RainFARM.Rd +++ b/man/RainFARM.Rd @@ -4,24 +4,10 @@ \alias{RainFARM} \title{RainFARM stochastic precipitation downscaling (reduced version)} \usage{ -RainFARM( - data, - lon, - lat, - nf, - weights = 1, - nens = 1, - slope = 0, - kmin = 1, - fglob = FALSE, - fsmooth = TRUE, - nprocs = 1, - time_dim = NULL, - lon_dim = "lon", - lat_dim = "lat", - drop_realization_dim = FALSE, - verbose = FALSE -) +RainFARM(data, lon, lat, nf, weights = 1, nens = 1, slope = 0, kmin = 1, + fglob = FALSE, fsmooth = TRUE, nprocs = 1, time_dim = NULL, + lon_dim = "lon", lat_dim = "lat", drop_realization_dim = FALSE, + verbose = FALSE) } \arguments{ \item{data}{Precipitation array to downscale. @@ -131,3 +117,4 @@ dim(res$data) \author{ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } + diff --git a/man/RegimesAssign.Rd b/man/RegimesAssign.Rd index 3c4360fa..0e3f4d2c 100644 --- a/man/RegimesAssign.Rd +++ b/man/RegimesAssign.Rd @@ -5,14 +5,8 @@ \title{Function for matching a field of anomalies with a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function).} \usage{ -RegimesAssign( - data, - ref_maps, - lat, - method = "distance", - composite = FALSE, - ncores = NULL -) +RegimesAssign(data, ref_maps, lat, method = "distance", composite = FALSE, + ncores = NULL) } \arguments{ \item{data}{an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon.} @@ -44,9 +38,10 @@ regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, E res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = drop(regimes), lat=lonlat_data$exp$lat,composite=FALSE) } -\references{ -Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} -} \author{ Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} } +\references{ +Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +} + diff --git a/man/SplitDim.Rd b/man/SplitDim.Rd index f07e4756..e36aa8a5 100644 --- a/man/SplitDim.Rd +++ b/man/SplitDim.Rd @@ -35,3 +35,4 @@ new_data <- SplitDim(data, indices = time, freq = 'year') \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } + diff --git a/man/WeatherRegimes.Rd b/man/WeatherRegimes.Rd index 4b0863c1..d01ed918 100644 --- a/man/WeatherRegimes.Rd +++ b/man/WeatherRegimes.Rd @@ -4,19 +4,9 @@ \alias{WeatherRegime} \title{Function for Calculating the Cluster analysis} \usage{ -WeatherRegime( - data, - ncenters = NULL, - EOFS = TRUE, - neofs = 30, - varThreshold = NULL, - lon = NULL, - lat = NULL, - method = "kmeans", - iter.max = 100, - nstart = 30, - ncores = NULL -) +WeatherRegime(data, ncenters = NULL, EOFS = TRUE, neofs = 30, + varThreshold = NULL, lon = NULL, lat = NULL, method = "kmeans", + iter.max = 100, nstart = 30, ncores = NULL) } \arguments{ \item{data}{an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'.} @@ -46,8 +36,8 @@ For more details about these methods see the hclust function documentation inclu \value{ A list with elements \code{$composite} (array with at least 3-d ('lat', 'lon', 'cluster') containing the composites k=1,..,K for case (*1) \code{pvalue} (array with at least 3-d ('lat','lon','cluster') with the pvalue of the composites obtained through a t-test that accounts for the serial - \code{cluster} (A time series of integers (from 1:k) indicating the cluster to which each point is allocated.), - \code{persistence} (The value of the regime whose length is given in cluster_lengths (only if method=’kmeans’ has been selected)), + \code{cluster} (A matrix or vector with integers (from 1:k) indicating the cluster to which each time step is allocated.), + \code{persistence} (Percentage of days in a month/season before a cluster is replaced for a new one (only if method=’kmeans’ has been selected.)), \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), } \description{ @@ -61,6 +51,9 @@ included in the hclust (stats package). \examples{ res <- WeatherRegime(data=lonlat_data$obs$data, lat= lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4) } +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} \references{ Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, @@ -69,6 +62,4 @@ Characterization of European wind speed variability using weather regimes. Clima Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} } -\author{ -Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -} + diff --git a/man/areave_data.Rd b/man/areave_data.Rd index a772220a..cc79c85c 100644 --- a/man/areave_data.Rd +++ b/man/areave_data.Rd @@ -41,3 +41,4 @@ areave_data <- Nicolau Manubens \email{nicolau.manubens@bsc.es} } \keyword{data} + diff --git a/man/as.s2dv_cube.Rd b/man/as.s2dv_cube.Rd index c2b8f3a8..13a2a296 100644 --- a/man/as.s2dv_cube.Rd +++ b/man/as.s2dv_cube.Rd @@ -40,11 +40,12 @@ data <- as.s2dv_cube(data) class(data) } } -\seealso{ -\code{\link{s2dv_cube}}, \code{\link[s2dverification]{Load}}, \code{\link[startR]{Start}} and \code{\link{CST_Load}} -} \author{ Perez-Zanon Nuria, \email{nuria.perez@bsc.es} Nicolau Manubens, \email{nicolau.manubens@bsc.es} } +\seealso{ +\code{\link{s2dv_cube}}, \code{\link[s2dverification]{Load}}, \code{\link[startR]{Start}} and \code{\link{CST_Load}} +} + diff --git a/man/lonlat_data.Rd b/man/lonlat_data.Rd index 0c6ee30f..eca7abac 100644 --- a/man/lonlat_data.Rd +++ b/man/lonlat_data.Rd @@ -41,3 +41,4 @@ lonlat_data <- Nicolau Manubens \email{nicolau.manubens@bsc.es} } \keyword{data} + diff --git a/man/lonlat_prec.Rd b/man/lonlat_prec.Rd index 345e3cab..69cb94e8 100644 --- a/man/lonlat_prec.Rd +++ b/man/lonlat_prec.Rd @@ -29,3 +29,4 @@ lonlat_prec <- CST_Load('prlr', exp = list(infile), obs = NULL, Jost von Hardenberg \email{j.vonhardenberg@isac.cnr.it} } \keyword{data} + diff --git a/man/s2dv_cube.Rd b/man/s2dv_cube.Rd index b0ce8966..48af7bbb 100644 --- a/man/s2dv_cube.Rd +++ b/man/s2dv_cube.Rd @@ -4,16 +4,8 @@ \alias{s2dv_cube} \title{Creation of a 's2dv_cube' object} \usage{ -s2dv_cube( - data, - lon = NULL, - lat = NULL, - Variable = NULL, - Datasets = NULL, - Dates = NULL, - when = NULL, - source_files = NULL -) +s2dv_cube(data, lon = NULL, lat = NULL, Variable = NULL, + Datasets = NULL, Dates = NULL, when = NULL, source_files = NULL) } \arguments{ \item{data}{an array with any number of named dimensions, typically an object output from CST_Load, with the following dimensions: dataset, member, sdate, ftime, lat and lon.} @@ -83,9 +75,10 @@ exp8 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), end = paste0(rep("31", 10), rep("01", 10), 1990:1999))) class(exp8) } -\seealso{ -\code{\link[s2dverification]{Load}} and \code{\link{CST_Load}} -} \author{ Perez-Zanon Nuria, \email{nuria.perez@bsc.es} } +\seealso{ +\code{\link[s2dverification]{Load}} and \code{\link{CST_Load}} +} + -- GitLab From b4e2386f77b8a6f617ac740307f247f685c831e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 17 Jun 2020 19:41:11 +0200 Subject: [PATCH 42/45] RegimesAssign improvements suggested by LP --- R/CST_RegimesAssign.R | 8 ++++---- man/CST_RegimesAssign.Rd | 4 ++-- man/RegimesAssign.Rd | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 9886905e..0885f82d 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -17,13 +17,13 @@ #'@param ref_maps a 's2dv_cube' object as the output of CST_WeatherRegimes. #'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or #' the maximum spatial correlation (method=’ACC’) between the maps. -#'@param composite a logical indicating if the composite maps are computed or not (default=FALSE). +#'@param composite a logical parameter indicating if the composite maps are computed or not (default=FALSE). #'@param ncores the number of multicore threads to use for parallel computation. #'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)) (only when composite = 'TRUE') and \code{$statistics} that includes #' \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test #' that accounts for the serial dependence of the data with the same structure as Composite.)(only when composite = 'TRUE'), -#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , +#' \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , #' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), #'@import s2dverification #'@import multiApply @@ -86,13 +86,13 @@ CST_RegimesAssign <- function(data, ref_maps, #'@param ref_maps array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching. #'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or #' the maximum spatial correlation (method=’ACC’) between the maps. -#'@param composite a logical indicating if the composite maps are computed or not (default=FALSE). +#'@param composite a logical parameter indicating if the composite maps are computed or not (default=FALSE). #'@param ncores the number of multicore threads to use for parallel computation. #'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) # or only k=1 for any specific cluster, i.e., case (*2)) (only if composite='TRUE'), #' \code{$pvalue} ( array with the same structure as \code{$composite} containing the pvalue of the composites obtained through a t-test #' that accounts for the serial dependence of the data with the same structure as Composite.) (only if composite='TRUE'), -#' \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , +#' \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , #' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), #' #'@import s2dverification diff --git a/man/CST_RegimesAssign.Rd b/man/CST_RegimesAssign.Rd index 2606720d..56120942 100644 --- a/man/CST_RegimesAssign.Rd +++ b/man/CST_RegimesAssign.Rd @@ -16,7 +16,7 @@ CST_RegimesAssign(data, ref_maps, method = "distance", composite = FALSE, \item{method}{whether the matching will be performed in terms of minimum distance (default=’distance’) or the maximum spatial correlation (method=’ACC’) between the maps.} -\item{composite}{a logical indicating if the composite maps are computed or not (default=FALSE).} +\item{composite}{a logical parameter indicating if the composite maps are computed or not (default=FALSE).} \item{ncores}{the number of multicore threads to use for parallel computation.} } @@ -24,7 +24,7 @@ the maximum spatial correlation (method=’ACC’) between the maps.} A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test that accounts for the serial dependence of the data with the same structure as Composite.)(only when composite = 'TRUE'), - \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , + \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), } \description{ diff --git a/man/RegimesAssign.Rd b/man/RegimesAssign.Rd index 0e3f4d2c..dd9cd7a0 100644 --- a/man/RegimesAssign.Rd +++ b/man/RegimesAssign.Rd @@ -16,7 +16,7 @@ RegimesAssign(data, ref_maps, lat, method = "distance", composite = FALSE, \item{method}{whether the matching will be performed in terms of minimum distance (default=’distance’) or the maximum spatial correlation (method=’ACC’) between the maps.} -\item{composite}{a logical indicating if the composite maps are computed or not (default=FALSE).} +\item{composite}{a logical parameter indicating if the composite maps are computed or not (default=FALSE).} \item{ncores}{the number of multicore threads to use for parallel computation.} } @@ -24,7 +24,7 @@ the maximum spatial correlation (method=’ACC’) between the maps.} A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) \code{$pvalue} ( array with the same structure as \code{$composite} containing the pvalue of the composites obtained through a t-test that accounts for the serial dependence of the data with the same structure as Composite.) (only if composite='TRUE'), - \code{$cluster} (array with the same dimensions that data except latitude and longitude indicating the ref_maps to which each point is allocated.) , + \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), } \description{ -- GitLab From 8f4489905ab3c9593d16631a8be687850a59c1e4 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 18 Jun 2020 17:30:24 +0200 Subject: [PATCH 43/45] documentation updated automatically with devtools in R3.4.2 --- DESCRIPTION | 2 +- NAMESPACE | 2 ++ man/Analogs.Rd | 25 ++++++++++++++------- man/BEI_PDFBest.Rd | 20 +++++++++++------ man/BEI_Weights.Rd | 7 +++--- man/CST_Analogs.Rd | 20 +++++++++++------ man/CST_Anomaly.Rd | 7 +++--- man/CST_BEI_Weighting.Rd | 16 +++++++++----- man/CST_BiasCorrection.Rd | 7 +++--- man/CST_Calibration.Rd | 7 +++--- man/CST_CategoricalEnsCombination.Rd | 17 +++++++++----- man/CST_EnsClustering.Rd | 1 - man/CST_Load.Rd | 1 - man/CST_MergeDims.Rd | 9 +++++--- man/CST_MultiEOF.Rd | 11 +++++++--- man/CST_MultiMetric.Rd | 9 ++++---- man/CST_MultivarRMSE.Rd | 7 +++--- man/CST_QuantileMapping.Rd | 20 +++++++++++------ man/CST_RFSlope.Rd | 1 - man/CST_RFWeights.Rd | 7 +++--- man/CST_RainFARM.Rd | 24 ++++++++++++++------ man/CST_RegimesAssign.Rd | 16 +++++++++----- man/CST_SaveExp.Rd | 7 +++--- man/CST_SplitDim.Rd | 1 - man/CST_WeatherRegimes.Rd | 21 ++++++++++++------ man/Calibration.Rd | 2 +- man/EnsClustering.Rd | 1 - man/MergeDims.Rd | 9 +++++--- man/MultiEOF.Rd | 17 ++++++++++---- man/PlotCombinedMap.Rd | 33 ++++++++++++++++++++-------- man/PlotForecastPDF.Rd | 17 +++++++++----- man/PlotMostLikelyQuantileMap.Rd | 18 ++++++++++----- man/RFSlope.Rd | 4 +--- man/RainFARM.Rd | 23 ++++++++++++++----- man/RegimesAssign.Rd | 17 +++++++++----- man/SplitDim.Rd | 1 - man/WeatherRegimes.Rd | 23 +++++++++++++------ man/areave_data.Rd | 1 - man/as.s2dv_cube.Rd | 7 +++--- man/lonlat_data.Rd | 1 - man/lonlat_prec.Rd | 1 - man/s2dv_cube.Rd | 19 +++++++++++----- 42 files changed, 290 insertions(+), 169 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dd1e75d0..0492186b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -71,4 +71,4 @@ VignetteBuilder: knitr License: Apache License 2.0 Encoding: UTF-8 LazyData: true -RoxygenNote: 5.0.0 +RoxygenNote: 7.0.2 diff --git a/NAMESPACE b/NAMESPACE index 085f2b0d..a27aaaa2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,8 +19,10 @@ export(CST_QuantileMapping) export(CST_RFSlope) export(CST_RFWeights) export(CST_RainFARM) +export(CST_RegimesAssign) export(CST_SaveExp) export(CST_SplitDim) +export(CST_WeatherRegimes) export(Calibration) export(EnsClustering) export(MergeDims) diff --git a/man/Analogs.Rd b/man/Analogs.Rd index ee8a737e..06107c07 100644 --- a/man/Analogs.Rd +++ b/man/Analogs.Rd @@ -4,9 +4,19 @@ \alias{Analogs} \title{Analogs based on large scale fields.} \usage{ -Analogs(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, - criteria = "Large_dist", lonVar = NULL, latVar = NULL, region = NULL, - nAnalogs = NULL, return_list = FALSE) +Analogs( + expL, + obsL, + time_obsL, + expVar = NULL, + obsVar = NULL, + criteria = "Large_dist", + lonVar = NULL, + latVar = NULL, + region = NULL, + nAnalogs = NULL, + return_list = FALSE +) } \arguments{ \item{expL}{an array of N named dimensions containing the experimental field @@ -377,11 +387,6 @@ Local_scalecor <- Analogs(expL=expSLP, str(Local_scalecor) Local_scalecor$AnalogsInfo -} -\author{ -M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} - -Nuria Perez-Zanon \email{nuria.perez@bsc.es} } \references{ Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, @@ -389,4 +394,8 @@ and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. \email{pascal.yiou@lsce.ipsl.fr} } +\author{ +M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} +Nuria Perez-Zanon \email{nuria.perez@bsc.es} +} diff --git a/man/BEI_PDFBest.Rd b/man/BEI_PDFBest.Rd index f836ab72..0ba24a84 100644 --- a/man/BEI_PDFBest.Rd +++ b/man/BEI_PDFBest.Rd @@ -4,9 +4,16 @@ \alias{BEI_PDFBest} \title{Computing the Best Index PDFs combining Index PDFs from two SFSs} \usage{ -BEI_PDFBest(index_obs, index_hind1, index_hind2, index_fcst1 = NULL, - index_fcst2 = NULL, method_BC = "none", time_dim_name = "time", - na.rm = FALSE) +BEI_PDFBest( + index_obs, + index_hind1, + index_hind2, + index_fcst1 = NULL, + index_fcst2 = NULL, + method_BC = "none", + time_dim_name = "time", + na.rm = FALSE +) } \arguments{ \item{index_obs}{Index (e.g. NAO index) array from an observational database @@ -113,12 +120,11 @@ dim(res) # time statistic season # 1 2 2 } -\author{ -Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} -} \references{ Regionally improved seasonal forecast of precipitation through Best estimation of winter NAO, Sanchez-Garcia, E. et al., Adv. Sci. Res., 16, 165174, 2019, https://doi.org/10.5194/asr-16-165-2019 } - +\author{ +Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} +} diff --git a/man/BEI_Weights.Rd b/man/BEI_Weights.Rd index 61db33af..867a4eb0 100644 --- a/man/BEI_Weights.Rd +++ b/man/BEI_Weights.Rd @@ -43,13 +43,12 @@ dim(res) # sdate dataset member season # 10 3 5 1 -} -\author{ -Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} } \references{ Regionally improved seasonal forecast of precipitation through Best estimation of winter NAO, Sanchez-Garcia, E. et al., Adv. Sci. Res., 16, 165174, 2019, https://doi.org/10.5194/asr-16-165-2019 } - +\author{ +Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} +} diff --git a/man/CST_Analogs.Rd b/man/CST_Analogs.Rd index 7c9a1e6f..d7dd5e14 100644 --- a/man/CST_Analogs.Rd +++ b/man/CST_Analogs.Rd @@ -4,8 +4,15 @@ \alias{CST_Analogs} \title{Downscaling using Analogs based on large scale fields.} \usage{ -CST_Analogs(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, - region = NULL, criteria = "Large_dist") +CST_Analogs( + expL, + obsL, + time_obsL, + expVar = NULL, + obsVar = NULL, + region = NULL, + criteria = "Large_dist" +) } \arguments{ \item{expL}{an 's2dv_cube' object containing the experimental field on the @@ -81,11 +88,6 @@ adapted version of the method of Yiou et al 2013. \examples{ res <- CST_Analogs(expL = lonlat_data$exp, obsL = lonlat_data$obs) -} -\author{ -M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} - -Nuria Perez-Zanon \email{nuria.perez@bsc.es} } \references{ Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, @@ -97,4 +99,8 @@ from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. code{\link{CST_Load}}, \code{\link[s2dverification]{Load}} and \code{\link[s2dverification]{CDORemap}} } +\author{ +M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} +Nuria Perez-Zanon \email{nuria.perez@bsc.es} +} diff --git a/man/CST_Anomaly.Rd b/man/CST_Anomaly.Rd index 256a94ac..11574168 100644 --- a/man/CST_Anomaly.Rd +++ b/man/CST_Anomaly.Rd @@ -65,13 +65,12 @@ anom5 <- CST_Anomaly(lonlat_data$exp) anom6 <- CST_Anomaly(obs = lonlat_data$obs) +} +\seealso{ +\code{\link[s2dverification]{Ano_CrossValid}}, \code{\link[s2dverification]{Clim}} and \code{\link{CST_Load}} } \author{ Perez-Zanon Nuria, \email{nuria.perez@bsc.es} Pena Jesus, \email{jesus.pena@bsc.es} } -\seealso{ -\code{\link[s2dverification]{Ano_CrossValid}}, \code{\link[s2dverification]{Clim}} and \code{\link{CST_Load}} -} - diff --git a/man/CST_BEI_Weighting.Rd b/man/CST_BEI_Weighting.Rd index 0e60a356..d6f65bb5 100644 --- a/man/CST_BEI_Weighting.Rd +++ b/man/CST_BEI_Weighting.Rd @@ -4,8 +4,13 @@ \alias{CST_BEI_Weighting} \title{Weighting SFSs of a CSTools object.} \usage{ -CST_BEI_Weighting(var_exp, aweights, terciles = NULL, type = "ensembleMean", - time_dim_name = "time") +CST_BEI_Weighting( + var_exp, + aweights, + terciles = NULL, + type = "ensembleMean", + time_dim_name = "time" +) } \arguments{ \item{var_exp}{An object of the class 's2dv_cube' containing the variable @@ -68,12 +73,11 @@ dim(res_CST$data) # time lat lon dataset # 2 3 2 2 } -\author{ -Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} -} \references{ Regionally improved seasonal forecast of precipitation through Best estimation of winter NAO, Sanchez-Garcia, E. et al., Adv. Sci. Res., 16, 165174, 2019, https://doi.org/10.5194/asr-16-165-2019 } - +\author{ +Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es} +} diff --git a/man/CST_BiasCorrection.Rd b/man/CST_BiasCorrection.Rd index 3a8fa4b6..55c325a2 100644 --- a/man/CST_BiasCorrection.Rd +++ b/man/CST_BiasCorrection.Rd @@ -37,10 +37,9 @@ attr(obs, 'class') <- 's2dv_cube' a <- CST_BiasCorrection(exp = exp, obs = obs) str(a) } -\author{ -Verónica Torralba, \email{veronica.torralba@bsc.es} -} \references{ Torralba, V., F.J. Doblas-Reyes, D. MacLeod, I. Christel and M. Davis (2017). Seasonal climate prediction: a new source of information for the management of wind energy resources. Journal of Applied Meteorology and Climatology, 56, 1231-1247, doi:10.1175/JAMC-D-16-0204.1. (CLIM4ENERGY, EUPORIAS, NEWA, RESILIENCE, SPECS) } - +\author{ +Verónica Torralba, \email{veronica.torralba@bsc.es} +} diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 13e6414f..76812a43 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -50,12 +50,11 @@ attr(obs, 'class') <- 's2dv_cube' a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample") str(a) } +\seealso{ +\code{\link{CST_Load}} +} \author{ Verónica Torralba, \email{veronica.torralba@bsc.es} Bert Van Schaeybroeck, \email{bertvs@meteo.be} } -\seealso{ -\code{\link{CST_Load}} -} - diff --git a/man/CST_CategoricalEnsCombination.Rd b/man/CST_CategoricalEnsCombination.Rd index e551c3ec..c23f8341 100644 --- a/man/CST_CategoricalEnsCombination.Rd +++ b/man/CST_CategoricalEnsCombination.Rd @@ -4,8 +4,14 @@ \alias{CST_CategoricalEnsCombination} \title{Make categorical forecast based on a multi-model forecast with potential for calibrate} \usage{ -CST_CategoricalEnsCombination(exp, obs, cat.method = "pool", - eval.method = "leave-one-out", amt.cat = 3, ...) +CST_CategoricalEnsCombination( + exp, + obs, + cat.method = "pool", + eval.method = "leave-one-out", + amt.cat = 3, + ... +) } \arguments{ \item{exp}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}. The amount of forecasting models is equal to the size of the \code{dataset} dimension of the data array. The amount of members per model may be different. The size of the \code{member} dimension of the data array is equal to the maximum of the ensemble members among the models. Models with smaller ensemble sizes have residual indices of \code{member} dimension in the data array filled with NA values.} @@ -83,9 +89,6 @@ attr(obs, 'class') <- 's2dv_cube' a <- CST_CategoricalEnsCombination(exp = exp, obs = obs, amt.cat = 3, cat.method = "mmw") } } -\author{ -Bert Van Schaeybroeck, \email{bertvs@meteo.be} -} \references{ Rajagopalan, B., Lall, U., & Zebiak, S. E. (2002). Categorical climate forecasts through regularization and optimal combination of multiple GCM ensembles. Monthly Weather Review, 130(7), 1792-1811. @@ -93,4 +96,6 @@ Robertson, A. W., Lall, U., Zebiak, S. E., & Goddard, L. (2004). Improved combin Van Schaeybroeck, B., & Vannitsem, S. (2019). Postprocessing of Long-Range Forecasts. In Statistical Postprocessing of Ensemble Forecasts (pp. 267-290). } - +\author{ +Bert Van Schaeybroeck, \email{bertvs@meteo.be} +} diff --git a/man/CST_EnsClustering.Rd b/man/CST_EnsClustering.Rd index 017b8d4c..6ee79b7c 100644 --- a/man/CST_EnsClustering.Rd +++ b/man/CST_EnsClustering.Rd @@ -138,4 +138,3 @@ Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } - diff --git a/man/CST_Load.Rd b/man/CST_Load.Rd index 1fee022c..bf03ba42 100644 --- a/man/CST_Load.Rd +++ b/man/CST_Load.Rd @@ -47,4 +47,3 @@ obs <- CSTools::lonlat_data$obs \author{ Nicolau Manubens, \email{nicolau.manubens@bsc.es} } - diff --git a/man/CST_MergeDims.Rd b/man/CST_MergeDims.Rd index 449e011e..0762e83f 100644 --- a/man/CST_MergeDims.Rd +++ b/man/CST_MergeDims.Rd @@ -4,8 +4,12 @@ \alias{CST_MergeDims} \title{Function to Merge Dimensions} \usage{ -CST_MergeDims(data, merge_dims = c("ftime", "monthly"), rename_dim = NULL, - na.rm = FALSE) +CST_MergeDims( + data, + merge_dims = c("ftime", "monthly"), + rename_dim = NULL, + na.rm = FALSE +) } \arguments{ \item{data}{a 's2dv_cube' object} @@ -38,4 +42,3 @@ dim(new_data$data) \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } - diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd index fb584751..036a6470 100644 --- a/man/CST_MultiEOF.Rd +++ b/man/CST_MultiEOF.Rd @@ -4,8 +4,14 @@ \alias{CST_MultiEOF} \title{EOF analysis of multiple variables} \usage{ -CST_MultiEOF(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, - lon_lim = NULL, lat_lim = NULL) +CST_MultiEOF( + datalist, + neof_max = 40, + neof_composed = 5, + minvar = 0.6, + lon_lim = NULL, + lat_lim = NULL +) } \arguments{ \item{datalist}{A list of objects of the class 's2dv_cube', containing the variables to be analysed. @@ -69,4 +75,3 @@ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} } - diff --git a/man/CST_MultiMetric.Rd b/man/CST_MultiMetric.Rd index 079a5588..8e3ce593 100644 --- a/man/CST_MultiMetric.Rd +++ b/man/CST_MultiMetric.Rd @@ -37,15 +37,14 @@ c(ano_exp, ano_obs) \%<-\% CST_Anomaly(exp = exp, obs = obs, cross = TRUE, memb a <- CST_MultiMetric(exp = ano_exp, obs = ano_obs) str(a) } -\author{ -Mishra Niti, \email{niti.mishra@bsc.es} - -Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -} \references{ Mishra, N., Prodhomme, C., & Guemas, V. (n.d.). Multi-Model Skill Assessment of Seasonal Temperature and Precipitation Forecasts over Europe, 29-31.\url{http://link.springer.com/10.1007/s00382-018-4404-z} } \seealso{ \code{\link[s2dverification]{Corr}}, \code{\link[s2dverification]{RMS}}, \code{\link[s2dverification]{RMSSS}} and \code{\link{CST_Load}} } +\author{ +Mishra Niti, \email{niti.mishra@bsc.es} +Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +} diff --git a/man/CST_MultivarRMSE.Rd b/man/CST_MultivarRMSE.Rd index 685eaf77..24af608c 100644 --- a/man/CST_MultivarRMSE.Rd +++ b/man/CST_MultivarRMSE.Rd @@ -56,10 +56,9 @@ weight <- c(1, 2) a <- CST_MultivarRMSE(exp = ano_exp, obs = ano_obs, weight = weight) str(a) } -\author{ -Deborah Verfaillie, \email{deborah.verfaillie@bsc.es} -} \seealso{ \code{\link[s2dverification]{RMS}} and \code{\link{CST_Load}} } - +\author{ +Deborah Verfaillie, \email{deborah.verfaillie@bsc.es} +} diff --git a/man/CST_QuantileMapping.Rd b/man/CST_QuantileMapping.Rd index 1c93843e..ad8f4b6c 100644 --- a/man/CST_QuantileMapping.Rd +++ b/man/CST_QuantileMapping.Rd @@ -4,9 +4,16 @@ \alias{CST_QuantileMapping} \title{Quantiles Mapping for seasonal or decadal forecast data} \usage{ -CST_QuantileMapping(exp, obs, exp_cor = NULL, sample_dims = c("sdate", - "ftime", "member"), sample_length = NULL, method = "QUANT", - ncores = NULL, ...) +CST_QuantileMapping( + exp, + obs, + exp_cor = NULL, + sample_dims = c("sdate", "ftime", "member"), + sample_length = NULL, + method = "QUANT", + ncores = NULL, + ... +) } \arguments{ \item{exp}{an object of class \code{s2dv_cube}} @@ -77,10 +84,9 @@ res <- CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', method = 'DIST') } } -\author{ -Nuria Perez-Zanon, \email{nuria.perez@bsc.es} -} \seealso{ \code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} } - +\author{ +Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +} diff --git a/man/CST_RFSlope.Rd b/man/CST_RFSlope.Rd index d2b5aec0..0c4e1671 100644 --- a/man/CST_RFSlope.Rd +++ b/man/CST_RFSlope.Rd @@ -50,4 +50,3 @@ slopes \author{ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } - diff --git a/man/CST_RFWeights.Rd b/man/CST_RFWeights.Rd index 08a7b850..ef5ebe4d 100644 --- a/man/CST_RFWeights.Rd +++ b/man/CST_RFWeights.Rd @@ -47,9 +47,6 @@ nf <- 8 ww <- CST_RFWeights("./worldclim.nc", nf, lon, lat, fsmooth = TRUE) } } -\author{ -Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} -} \references{ Terzago, S., Palazzi, E., & von Hardenberg, J. (2018). Stochastic downscaling of precipitation in complex orography: @@ -57,4 +54,6 @@ A simple method to reproduce a realistic fine-scale climatology. Natural Hazards and Earth System Sciences, 18(11), 2825-2840. http://doi.org/10.5194/nhess-18-2825-2018 . } - +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +} diff --git a/man/CST_RainFARM.Rd b/man/CST_RainFARM.Rd index 4a667f9a..1c609e08 100644 --- a/man/CST_RainFARM.Rd +++ b/man/CST_RainFARM.Rd @@ -4,9 +4,20 @@ \alias{CST_RainFARM} \title{RainFARM stochastic precipitation downscaling of a CSTools object} \usage{ -CST_RainFARM(data, nf, weights = 1, slope = 0, kmin = 1, nens = 1, - fglob = FALSE, fsmooth = TRUE, nprocs = 1, time_dim = NULL, - verbose = FALSE, drop_realization_dim = FALSE) +CST_RainFARM( + data, + nf, + weights = 1, + slope = 0, + kmin = 1, + nens = 1, + fglob = FALSE, + fsmooth = TRUE, + nprocs = 1, + time_dim = NULL, + verbose = FALSE, + drop_realization_dim = FALSE +) } \arguments{ \item{data}{An object of the class 's2dv_cube' as returned by `CST_Load`, @@ -95,13 +106,12 @@ dim(res$data) # dataset member realization sdate ftime lat lon # 1 2 3 3 4 64 64 -} -\author{ -Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } \references{ Terzago, S. et al. (2018). NHESS 18(11), 2825-2840. http://doi.org/10.5194/nhess-18-2825-2018 ; D'Onofrio et al. (2014), J of Hydrometeorology 15, 830-843; Rebora et. al. (2006), JHM 7, 724. } - +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +} diff --git a/man/CST_RegimesAssign.Rd b/man/CST_RegimesAssign.Rd index 56120942..f10dc142 100644 --- a/man/CST_RegimesAssign.Rd +++ b/man/CST_RegimesAssign.Rd @@ -5,8 +5,13 @@ \title{Function for matching a field of anomalies with a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function)} \usage{ -CST_RegimesAssign(data, ref_maps, method = "distance", composite = FALSE, - ncores = NULL) +CST_RegimesAssign( + data, + ref_maps, + method = "distance", + composite = FALSE, + ncores = NULL +) } \arguments{ \item{data}{a 's2dv_cube' object.} @@ -38,11 +43,10 @@ regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4 res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) } -\author{ -Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -} \references{ Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} } - +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/CST_SaveExp.Rd b/man/CST_SaveExp.Rd index 17537205..0e49c119 100644 --- a/man/CST_SaveExp.Rd +++ b/man/CST_SaveExp.Rd @@ -29,11 +29,10 @@ destination <- "./path/" CST_SaveExp(data = data, destination = destination) } -} -\author{ -Perez-Zanon Nuria, \email{nuria.perez@bsc.es} } \seealso{ \code{\link{CST_Load}}, \code{\link{as.s2dv_cube}} and \code{\link{s2dv_cube}} } - +\author{ +Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +} diff --git a/man/CST_SplitDim.Rd b/man/CST_SplitDim.Rd index 2019ea7b..ee93aedc 100644 --- a/man/CST_SplitDim.Rd +++ b/man/CST_SplitDim.Rd @@ -43,4 +43,3 @@ dim(new_data$data) \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } - diff --git a/man/CST_WeatherRegimes.Rd b/man/CST_WeatherRegimes.Rd index bd811b44..4ffb3c4c 100644 --- a/man/CST_WeatherRegimes.Rd +++ b/man/CST_WeatherRegimes.Rd @@ -4,9 +4,17 @@ \alias{CST_WeatherRegimes} \title{Function for Calculating the Cluster analysis} \usage{ -CST_WeatherRegimes(data, ncenters = NULL, EOFS = TRUE, neofs = 30, - varThreshold = NULL, method = "kmeans", iter.max = 100, nstart = 30, - ncores = NULL) +CST_WeatherRegimes( + data, + ncenters = NULL, + EOFS = TRUE, + neofs = 30, + varThreshold = NULL, + method = "kmeans", + iter.max = 100, + nstart = 30, + ncores = NULL +) } \arguments{ \item{data}{a 's2dv_cube' object} @@ -49,9 +57,6 @@ included in the hclust (stats package). res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = TRUE, ncenters = 3) } -\author{ -Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -} \references{ Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, @@ -60,4 +65,6 @@ Characterization of European wind speed variability using weather regimes. Clima Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} } - +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/Calibration.Rd b/man/Calibration.Rd index 880cf0f7..64452279 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -37,7 +37,6 @@ Four types of member-by-member bias correction can be performed. The \code{bias} Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. } - \examples{ mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) @@ -58,5 +57,6 @@ Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using mem } \author{ Verónica Torralba, \email{veronica.torralba@bsc.es} + Bert Van Schaeybroeck, \email{bertvs@meteo.be} } diff --git a/man/EnsClustering.Rd b/man/EnsClustering.Rd index 72912ba7..30d81f87 100644 --- a/man/EnsClustering.Rd +++ b/man/EnsClustering.Rd @@ -82,4 +82,3 @@ Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } - diff --git a/man/MergeDims.Rd b/man/MergeDims.Rd index 585049e8..7539ef6e 100644 --- a/man/MergeDims.Rd +++ b/man/MergeDims.Rd @@ -4,8 +4,12 @@ \alias{MergeDims} \title{Function to Split Dimension} \usage{ -MergeDims(data, merge_dims = c("time", "monthly"), rename_dim = NULL, - na.rm = FALSE) +MergeDims( + data, + merge_dims = c("time", "monthly"), + rename_dim = NULL, + na.rm = FALSE +) } \arguments{ \item{data}{an n-dimensional array with named dimensions} @@ -28,4 +32,3 @@ new_data <- MergeDims(data, merge_dims = c('time', 'lat')) \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } - diff --git a/man/MultiEOF.Rd b/man/MultiEOF.Rd index 1e822fc4..dd0fc7fe 100644 --- a/man/MultiEOF.Rd +++ b/man/MultiEOF.Rd @@ -4,9 +4,19 @@ \alias{MultiEOF} \title{EOF analysis of multiple variables starting from an array (reduced version)} \usage{ -MultiEOF(data, lon, lat, time, lon_dim = "lon", lat_dim = "lat", - neof_max = 40, neof_composed = 5, minvar = 0.6, lon_lim = NULL, - lat_lim = NULL) +MultiEOF( + data, + lon, + lat, + time, + lon_dim = "lon", + lat_dim = "lat", + neof_max = 40, + neof_composed = 5, + minvar = 0.6, + lon_lim = NULL, + lat_lim = NULL +) } \arguments{ \item{data}{A multidimensional array with dimension \code{"var"}, @@ -46,4 +56,3 @@ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} } - diff --git a/man/PlotCombinedMap.Rd b/man/PlotCombinedMap.Rd index d9895ca9..c45d1afb 100644 --- a/man/PlotCombinedMap.Rd +++ b/man/PlotCombinedMap.Rd @@ -4,11 +4,27 @@ \alias{PlotCombinedMap} \title{Plot Multiple Lon-Lat Variables In a Single Map According to a Decision Function} \usage{ -PlotCombinedMap(maps, lon, lat, map_select_fun, display_range, - map_dim = "map", brks = NULL, cols = NULL, col_unknown_map = "white", - mask = NULL, col_mask = "grey", bar_titles = NULL, legend_scale = 1, - fileout = NULL, width = 8, height = 5, size_units = "in", res = 100, - ...) +PlotCombinedMap( + maps, + lon, + lat, + map_select_fun, + display_range, + map_dim = "map", + brks = NULL, + cols = NULL, + col_unknown_map = "white", + mask = NULL, + col_mask = "grey", + bar_titles = NULL, + legend_scale = 1, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) } \arguments{ \item{maps}{List of matrices to plot, each with (longitude, latitude) dimensions, or 3-dimensional array with the dimensions (longitude, latitude, map). Dimension names are required.} @@ -77,13 +93,12 @@ PlotCombinedMap(data, lon = Lon, lat = Lat, map_select_fun = max, display_range = range(data), mask = mask, width = 12, height = 8) +} +\seealso{ +\code{PlotCombinedMap} and \code{PlotEquiMap} } \author{ Nicolau Manubens, \email{nicolau.manubens@bsc.es} Veronica Torralba, \email{veronica.torralba@bsc.es} } -\seealso{ -\code{PlotCombinedMap} and \code{PlotEquiMap} -} - diff --git a/man/PlotForecastPDF.Rd b/man/PlotForecastPDF.Rd index d7b95b08..c04b43c1 100644 --- a/man/PlotForecastPDF.Rd +++ b/man/PlotForecastPDF.Rd @@ -4,10 +4,18 @@ \alias{PlotForecastPDF} \title{Plot one or multiple ensemble forecast pdfs for the same event} \usage{ -PlotForecastPDF(fcst, tercile.limits, extreme.limits = NULL, obs = NULL, - plotfile = NULL, title = "Set a title", var.name = "Varname (units)", - fcst.names = NULL, add.ensmemb = c("above", "below", "no"), - color.set = c("ggplot", "s2s4e", "hydro")) +PlotForecastPDF( + fcst, + tercile.limits, + extreme.limits = NULL, + obs = NULL, + plotfile = NULL, + title = "Set a title", + var.name = "Varname (units)", + fcst.names = NULL, + add.ensmemb = c("above", "below", "no"), + color.set = c("ggplot", "s2s4e", "hydro") +) } \arguments{ \item{fcst}{a dataframe or array containing all the ensember members for each forecast. If \code{'fcst'} is an array, it should have two labelled dimensions, and one of them should be \code{'members'}. If \code{'fcsts'} is a data.frame, each column shoul be a separate forecast, with the rows beeing the different ensemble members.} @@ -49,4 +57,3 @@ PlotForecastPDF(fcsts2, c(-0.66, 0.66), extreme.limits = c(-1.2, 1.2), \author{ Llorenç Lledó \email{llledo@bsc.es} } - diff --git a/man/PlotMostLikelyQuantileMap.Rd b/man/PlotMostLikelyQuantileMap.Rd index 6c92850e..4c400b18 100644 --- a/man/PlotMostLikelyQuantileMap.Rd +++ b/man/PlotMostLikelyQuantileMap.Rd @@ -4,8 +4,15 @@ \alias{PlotMostLikelyQuantileMap} \title{Plot Maps of Most Likely Quantiles} \usage{ -PlotMostLikelyQuantileMap(probs, lon, lat, cat_dim = "bin", - bar_titles = NULL, col_unknown_cat = "white", ...) +PlotMostLikelyQuantileMap( + probs, + lon, + lat, + cat_dim = "bin", + bar_titles = NULL, + col_unknown_cat = "white", + ... +) } \arguments{ \item{probs}{a list of bi-dimensional arrays with the named dimensions 'latitude' (or 'lat') and 'longitude' (or 'lon'), with equal size and in the same order, or a single tri-dimensional array with an additional dimension (e.g. 'bin') for the different categories. The arrays must contain probability values between 0 and 1, and the probabilities for all categories of a grid cell should not exceed 1 when added.} @@ -109,11 +116,10 @@ PlotMostLikelyQuantileMap(bins, lons, lats, mask = 1 - (w1 + w2 / max(c(w1, w2))), brks = 20, width = 10, height = 8) -} -\author{ -Veronica Torralba, \email{veronica.torralba@bsc.es}, Nicolau Manubens, \email{nicolau.manubens@bsc.es} } \seealso{ \code{PlotCombinedMap} and \code{PlotEquiMap} } - +\author{ +Veronica Torralba, \email{veronica.torralba@bsc.es}, Nicolau Manubens, \email{nicolau.manubens@bsc.es} +} diff --git a/man/RFSlope.Rd b/man/RFSlope.Rd index 09a24ff5..db3f0e10 100644 --- a/man/RFSlope.Rd +++ b/man/RFSlope.Rd @@ -4,8 +4,7 @@ \alias{RFSlope} \title{RainFARM spectral slopes from an array (reduced version)} \usage{ -RFSlope(data, kmin = 1, time_dim = NULL, lon_dim = "lon", - lat_dim = "lat") +RFSlope(data, kmin = 1, time_dim = NULL, lon_dim = "lon", lat_dim = "lat") } \arguments{ \item{data}{Array containing the spatial precipitation fields to downscale. @@ -60,4 +59,3 @@ slopes \author{ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } - diff --git a/man/RainFARM.Rd b/man/RainFARM.Rd index 984dcd42..0db84679 100644 --- a/man/RainFARM.Rd +++ b/man/RainFARM.Rd @@ -4,10 +4,24 @@ \alias{RainFARM} \title{RainFARM stochastic precipitation downscaling (reduced version)} \usage{ -RainFARM(data, lon, lat, nf, weights = 1, nens = 1, slope = 0, kmin = 1, - fglob = FALSE, fsmooth = TRUE, nprocs = 1, time_dim = NULL, - lon_dim = "lon", lat_dim = "lat", drop_realization_dim = FALSE, - verbose = FALSE) +RainFARM( + data, + lon, + lat, + nf, + weights = 1, + nens = 1, + slope = 0, + kmin = 1, + fglob = FALSE, + fsmooth = TRUE, + nprocs = 1, + time_dim = NULL, + lon_dim = "lon", + lat_dim = "lat", + drop_realization_dim = FALSE, + verbose = FALSE +) } \arguments{ \item{data}{Precipitation array to downscale. @@ -117,4 +131,3 @@ dim(res$data) \author{ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} } - diff --git a/man/RegimesAssign.Rd b/man/RegimesAssign.Rd index dd9cd7a0..40daf6be 100644 --- a/man/RegimesAssign.Rd +++ b/man/RegimesAssign.Rd @@ -5,8 +5,14 @@ \title{Function for matching a field of anomalies with a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function).} \usage{ -RegimesAssign(data, ref_maps, lat, method = "distance", composite = FALSE, - ncores = NULL) +RegimesAssign( + data, + ref_maps, + lat, + method = "distance", + composite = FALSE, + ncores = NULL +) } \arguments{ \item{data}{an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon.} @@ -38,10 +44,9 @@ regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, E res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = drop(regimes), lat=lonlat_data$exp$lat,composite=FALSE) } -\author{ -Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -} \references{ Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} } - +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/SplitDim.Rd b/man/SplitDim.Rd index e36aa8a5..f07e4756 100644 --- a/man/SplitDim.Rd +++ b/man/SplitDim.Rd @@ -35,4 +35,3 @@ new_data <- SplitDim(data, indices = time, freq = 'year') \author{ Nuria Perez-Zanon, \email{nuria.perez@bsc.es} } - diff --git a/man/WeatherRegimes.Rd b/man/WeatherRegimes.Rd index d01ed918..22cc4f08 100644 --- a/man/WeatherRegimes.Rd +++ b/man/WeatherRegimes.Rd @@ -4,9 +4,19 @@ \alias{WeatherRegime} \title{Function for Calculating the Cluster analysis} \usage{ -WeatherRegime(data, ncenters = NULL, EOFS = TRUE, neofs = 30, - varThreshold = NULL, lon = NULL, lat = NULL, method = "kmeans", - iter.max = 100, nstart = 30, ncores = NULL) +WeatherRegime( + data, + ncenters = NULL, + EOFS = TRUE, + neofs = 30, + varThreshold = NULL, + lon = NULL, + lat = NULL, + method = "kmeans", + iter.max = 100, + nstart = 30, + ncores = NULL +) } \arguments{ \item{data}{an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'.} @@ -51,9 +61,6 @@ included in the hclust (stats package). \examples{ res <- WeatherRegime(data=lonlat_data$obs$data, lat= lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4) } -\author{ -Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} -} \references{ Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, @@ -62,4 +69,6 @@ Characterization of European wind speed variability using weather regimes. Clima Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} } - +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/areave_data.Rd b/man/areave_data.Rd index cc79c85c..a772220a 100644 --- a/man/areave_data.Rd +++ b/man/areave_data.Rd @@ -41,4 +41,3 @@ areave_data <- Nicolau Manubens \email{nicolau.manubens@bsc.es} } \keyword{data} - diff --git a/man/as.s2dv_cube.Rd b/man/as.s2dv_cube.Rd index 13a2a296..c2b8f3a8 100644 --- a/man/as.s2dv_cube.Rd +++ b/man/as.s2dv_cube.Rd @@ -40,12 +40,11 @@ data <- as.s2dv_cube(data) class(data) } } +\seealso{ +\code{\link{s2dv_cube}}, \code{\link[s2dverification]{Load}}, \code{\link[startR]{Start}} and \code{\link{CST_Load}} +} \author{ Perez-Zanon Nuria, \email{nuria.perez@bsc.es} Nicolau Manubens, \email{nicolau.manubens@bsc.es} } -\seealso{ -\code{\link{s2dv_cube}}, \code{\link[s2dverification]{Load}}, \code{\link[startR]{Start}} and \code{\link{CST_Load}} -} - diff --git a/man/lonlat_data.Rd b/man/lonlat_data.Rd index eca7abac..0c6ee30f 100644 --- a/man/lonlat_data.Rd +++ b/man/lonlat_data.Rd @@ -41,4 +41,3 @@ lonlat_data <- Nicolau Manubens \email{nicolau.manubens@bsc.es} } \keyword{data} - diff --git a/man/lonlat_prec.Rd b/man/lonlat_prec.Rd index 69cb94e8..345e3cab 100644 --- a/man/lonlat_prec.Rd +++ b/man/lonlat_prec.Rd @@ -29,4 +29,3 @@ lonlat_prec <- CST_Load('prlr', exp = list(infile), obs = NULL, Jost von Hardenberg \email{j.vonhardenberg@isac.cnr.it} } \keyword{data} - diff --git a/man/s2dv_cube.Rd b/man/s2dv_cube.Rd index 48af7bbb..b0ce8966 100644 --- a/man/s2dv_cube.Rd +++ b/man/s2dv_cube.Rd @@ -4,8 +4,16 @@ \alias{s2dv_cube} \title{Creation of a 's2dv_cube' object} \usage{ -s2dv_cube(data, lon = NULL, lat = NULL, Variable = NULL, - Datasets = NULL, Dates = NULL, when = NULL, source_files = NULL) +s2dv_cube( + data, + lon = NULL, + lat = NULL, + Variable = NULL, + Datasets = NULL, + Dates = NULL, + when = NULL, + source_files = NULL +) } \arguments{ \item{data}{an array with any number of named dimensions, typically an object output from CST_Load, with the following dimensions: dataset, member, sdate, ftime, lat and lon.} @@ -75,10 +83,9 @@ exp8 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), end = paste0(rep("31", 10), rep("01", 10), 1990:1999))) class(exp8) } -\author{ -Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -} \seealso{ \code{\link[s2dverification]{Load}} and \code{\link{CST_Load}} } - +\author{ +Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +} -- GitLab From a6669a447ee0c8ca728eb5d819d7d55ff250dac0 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 18 Jun 2020 17:32:41 +0200 Subject: [PATCH 44/45] WeatherRegimes and RegimeAssign listed in NEWS --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index f81ad720..f3ebd2b3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ + CST_BiasCorrection has na.rm paramter + CST_Anomaly allows to smooth the climatology with filter.span parameter + PlotTriangles4Categories new plotting function to convert any 3-d numerical array to a grid of coloured triangles. + + CST_WeatherRegimes/WeatherRegimes and CST_RegimeAssign/RegimeAssign - Fixes + CST_Anomaly handles exp, obs or both + PlotForecastPDF vignette displays figures correctly -- GitLab From 588fd320a1fcab70bc0dab5a442b87615a35f68b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Thu, 18 Jun 2020 17:45:31 +0200 Subject: [PATCH 45/45] bugfix freqs --- R/CST_WeatherRegimes.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 82704f38..209aa1b6 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -155,9 +155,6 @@ WeatherRegime <- function(data, ncenters = NULL, ncores = ncores) if (method=='kmeans' && 'sdate' %in% dimData && 'ftime' %in% dimData) { - output$cluster <- t(array(output$cluster,dim=c(nftimes,nsdates))) - names(dim(output$cluster)) <- c('sdate','ftime') - # The frequency and the persistency are computed as they are useful # parameters in the cluster analysis @@ -167,8 +164,11 @@ WeatherRegime <- function(data, ncenters = NULL, nsdates = nsdates, nftimes = nftimes , ncenters = ncenters) - - output <- list(composite=output$composite, + + output$cluster <- t(array(output$cluster,dim=c(nftimes,nsdates))) + names(dim(output$cluster)) <- c('sdate','ftime') + + output <- list(composite=output$composite, pvalue=output$pvalue, cluster=output$cluster, frequency=extra_output$frequency, -- GitLab