diff --git a/NAMESPACE b/NAMESPACE index 3aa0e32d49da4bd96cef708c8736ebc94eccb765..a27aaaa260e57e255e7714262bc2a1be9aaa3e32 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,8 +19,10 @@ export(CST_QuantileMapping) export(CST_RFSlope) export(CST_RFWeights) export(CST_RainFARM) +export(CST_RegimesAssign) export(CST_SaveExp) export(CST_SplitDim) +export(CST_WeatherRegimes) export(Calibration) export(EnsClustering) export(MergeDims) @@ -31,7 +33,9 @@ export(PlotMostLikelyQuantileMap) export(PlotTriangles4Categories) export(RFSlope) export(RainFARM) +export(RegimesAssign) export(SplitDim) +export(WeatherRegime) export(as.s2dv_cube) export(s2dv_cube) import(abind) @@ -43,6 +47,7 @@ import(rainfarmr) import(s2dverification) import(stats) importFrom(ClimProjDiags,SelBox) +importFrom(ClimProjDiags,Subset) importFrom(data.table,CJ) importFrom(data.table,data.table) importFrom(data.table,setkey) diff --git a/NEWS.md b/NEWS.md index f81ad72009e66668673cf8c4f8f2bde9ba029386..f3ebd2b3f04785e3fb3b26dacec8004912aaf1fb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ + CST_BiasCorrection has na.rm paramter + CST_Anomaly allows to smooth the climatology with filter.span parameter + PlotTriangles4Categories new plotting function to convert any 3-d numerical array to a grid of coloured triangles. + + CST_WeatherRegimes/WeatherRegimes and CST_RegimeAssign/RegimeAssign - Fixes + CST_Anomaly handles exp, obs or both + PlotForecastPDF vignette displays figures correctly diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R new file mode 100644 index 0000000000000000000000000000000000000000..0885f82d515b54776cc3c4c444014833933dec46 --- /dev/null +++ b/R/CST_RegimesAssign.R @@ -0,0 +1,349 @@ +#' @rdname CST_RegimesAssign +#' @title Function for matching a field of anomalies with +#' a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function) +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function performs the matching between a field of anomalies and a set +#' of maps which will be used as a reference. The anomalies will be assigned to the reference map +#' for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +#' (method=‘ACC’) is obtained. +#' +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data a 's2dv_cube' object. + +#'@param ref_maps a 's2dv_cube' object as the output of CST_WeatherRegimes. +#'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or +#' the maximum spatial correlation (method=’ACC’) between the maps. +#'@param composite a logical parameter indicating if the composite maps are computed or not (default=FALSE). +#'@param ncores the number of multicore threads to use for parallel computation. +#'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)) (only when composite = 'TRUE') and \code{$statistics} that includes +#' \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test +#' that accounts for the serial dependence of the data with the same structure as Composite.)(only when composite = 'TRUE'), +#' \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , +#' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +#'@import s2dverification +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@examples +#'regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) +#'res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) +#'res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) +#'@export +#' + +CST_RegimesAssign <- function(data, ref_maps, + method = "distance", + composite = FALSE, + ncores=NULL) { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + + if (!inherits(ref_maps, 's2dv_cube')) { + stop("Parameter 'ref_maps' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + + if ('lat' %in% names(data)){ + lat <- data$lat + }else { + lat <- NULL + } + result <- Apply(data=list(data=data$data, ref_maps=ref_maps$data), lat= lat, fun=RegimesAssign, + target_dims=list(names(dim(data$data)),c('lat','lon','cluster')), + method = method, composite = composite, ncores=ncores) + + if (composite){ + data$data <- result$composite + data$statistics <- result[-1] + }else{ + data <- NULL + data$statistics <- result + } + + return(data) +} + +#' @rdname RegimesAssign +#' @title Function for matching a field of anomalies with +#' a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function). +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function performs the matching between a field of anomalies and a set +#' of maps which will be used as a reference. The anomalies will be assigned to the reference map +#' for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +#' (method=‘ACC’) is obtained. +#' +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon. +#'@param ref_maps array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching. +#'@param method whether the matching will be performed in terms of minimum distance (default=’distance’) or +#' the maximum spatial correlation (method=’ACC’) between the maps. +#'@param composite a logical parameter indicating if the composite maps are computed or not (default=FALSE). +#'@param ncores the number of multicore threads to use for parallel computation. +#'@return A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)) (only if composite='TRUE'), +#' \code{$pvalue} ( array with the same structure as \code{$composite} containing the pvalue of the composites obtained through a t-test +#' that accounts for the serial dependence of the data with the same structure as Composite.) (only if composite='TRUE'), +#' \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , +#' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +#' +#'@import s2dverification +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@examples +#'regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite +#'res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = drop(regimes), +#'lat=lonlat_data$exp$lat,composite=FALSE) +#'@export + +RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = FALSE, ncores=NULL) { + + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must be an array with named dimensions.") + } + if (is.null(ref_maps)) { + stop("Parameter 'ref_maps' must be specified.") + } + + if (is.null(lat)) { + stop("Parameter 'lat' must be specified.") + } + + if (is.null(names(dim(ref_maps)))) { + stop("Parameter 'ref_maps' must be an array with named dimensions.") + } + + dimData <- names(dim(data)) + + if (!all( c('lat', 'lon') %in% dimData)) { + stop("Parameter 'data' must contain the named dimensions 'lat' and 'lon'.") + } + + dimRef <- names(dim(ref_maps)) + + if (!all( c('cluster', 'lat', 'lon') %in% dimRef)) { + stop("Parameter 'ref_maps' must contain the named dimensions + 'cluster','lat' and 'lon'.") + } + + + if (length(lat) != dim(data)['lat'] | (length(lat) != dim(ref_maps)['lat']) ) { + stop(" Parameter 'lat' does not match with the dimension 'lat' in the + parameter 'data' or in the parameter 'ref_maps'.") + } + + + if ('sdate' %in% dimData && 'ftime' %in% dimData) { + nsdates <- dim(data)['sdate'] + nftimes <- dim(data)['ftime'] + data <- MergeDims(data, + merge_dims = c('ftime','sdate'), + rename_dim = 'time') + } else if ('sdate' %in% dimData | 'ftime' %in% dimData) { + names(dim(data))[which(dimData == 'sdate' | dimData == 'ftime') ] = 'time' + } else { + if (!('time' %in% dimData)) { + stop("Parameter 'data' must have temporal dimensions.") + } + } + + index <- Apply( data = list(target = data), + target_dims = c('lat','lon'), + fun = .RegimesAssign, + ref = ref_maps, + lat = lat, method = method, + ncores=ncores)[[1]] + + nclust <- dim(ref_maps)['cluster'] + freqs <- rep(NA, nclust) + for (n in 1:nclust) { + freqs[n] <- (length(which(index == n)) / length(index)) * 100 + } + + if (composite){ + poslon <- which(names(dim(data)) == 'lon') + poslat <- which(names(dim(data)) == 'lat') + postime <- which(names(dim(data)) == 'time') + posdim <- setdiff(1:length(dim(data)), c(postime, poslat, poslon)) + dataComp <- aperm(data, c(poslon, poslat, postime, posdim)) + + if (any(is.na(index))) { + recon <-list( + composite = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), + postime, dim(ref_maps)['composite.cluster']), + pvalue = InsertDim(array(NA, dim = c(dim(dataComp)[-postime])), + postime, dim(ref_maps)['composite.cluster'])) + } else { + recon <- + Apply(data = list(var = dataComp, occ = index), + target_dims = list(c('lon', 'lat', 'time'), c('time')), + fun = Composite, + K = dim(ref_maps)['cluster']) + } + + output <- list(composite = recon$composite, + pvalue = recon$pvalue, + cluster = index, + frequency = freqs) + } else{ + + output <- list(cluster = index, + frequency = freqs) + } + + return(output) +} + +.RegimesAssign <- function(ref, target, method = 'distance', lat, composite=FALSE) { + posdim <- which(names(dim(ref)) == 'cluster') + poslat <- which(names(dim(ref)) == 'lat') + poslon <- which(names(dim(ref)) == 'lon') + + nclust <- dim(ref)[posdim] + + if (all(dim(ref)[-posdim] != dim(target))) { + stop('The target should have the same dimensions [lat,lon] that + the reference ') + } + + if (is.null(names(dim(ref))) | is.null(names(dim(target)))) { + stop( + 'The arrays should include dimensions names ref[cluster,lat,lon] + and target [lat,lon]' + ) + } + + + if (length(lat) != dim(ref)[poslat]) { + stop('latitudes do not match with the maps') + } + + if (is.na(max(target))){ + assign <- NA + + }else{ + + + # This dimensions are reorganized + ref <- aperm(ref, c(posdim, poslat, poslon)) + target <- + aperm(target, c(which(names(dim( + target + )) == 'lat'), which(names(dim( + target + )) == 'lon'))) + + # weights are defined + latWeights <- InsertDim(sqrt(cos(lat * pi / 180)), 2, dim(ref)[3]) + + + rmsdiff <- function(x, y) { + dims <- dim(x) + ndims <- length(dims) + if (ndims != 2 | ndims != length(dim(y))) { + stop('x and y should be maps') + } + map_diff <- NA * x + for (i in 1:dims[1]) { + for (j in 1:dims[2]) { + map_diff[i, j] <- (x[i, j] - y[i, j]) ^ 2 + } + } + rmsdiff <- sqrt(mean(map_diff)) + return(rmsdiff) + } + + if (method == 'ACC') { + corr <- rep(NA, nclust) + for (i in 1:nclust) { + corr[i] <- + ACC(InsertDim(InsertDim( + InsertDim(ref[i, , ] * latWeights, 1, 1), 2, 1 + ), 3, 1), + InsertDim(InsertDim( + InsertDim(target * latWeights, 1, 1), 2, 1 + ), 3, 1))$ACC[2] + } + assign <- which(corr == max(corr)) + } + + if (method == 'distance') { + rms <- rep(NA, nclust) + for (i in 1:nclust) { + rms[i] <- rmsdiff(ref[i, , ] * latWeights, target * latWeights) + } + assign <- which(rms == min(rms)) + } + } + + return(assign) +} + + +Composite <- function(var, occ, lag = 0, eno = FALSE, K = NULL, fileout = NULL) { + + if ( dim(var)[3] != length(occ) ) { + stop("Temporal dimension of var is not equal to length of occ.") + } + if (is.null(K)) { + K <- max(occ) + } + composite <- array(dim = c(dim(var)[1:2], composite = K)) + tvalue <- array(dim = dim(var)[1:2]) + dof <- array(dim = dim(var)[1:2]) + pvalue <- array(dim = c(dim(var)[1:2], composite = K)) + + if (eno == TRUE) { + n_tot <- Eno(var, posdim = 3) + } else { + n_tot <- length(occ) + } + mean_tot <- Mean1Dim(var, posdim = 3, narm = TRUE) + stdv_tot <- apply(var, c(1, 2), sd, na.rm = TRUE) + + for (k in 1 : K) { + if (length(which(occ == k)) >= 1) { + indices <- which(occ == k) + lag + toberemoved <- which(0 > indices | indices > dim(var)[3]) + + if (length(toberemoved) > 0) { + indices <- indices[-toberemoved] + } + if (eno == TRUE) { + n_k <- Eno(var[, , indices], posdim = 3) + } else { + n_k <- length(indices) + } + if (length(indices) == 1) { + composite[, , k] <- var[, , indices] + warning(paste("Composite", k, "has length 1 and pvalue is NA.")) + } else { + composite[, , k] <- Mean1Dim(var[, , indices], posdim = 3, narm = TRUE) + } + stdv_k <- apply(var[, , indices], c(1, 2), sd, na.rm = TRUE) + + tvalue <- (mean_tot - composite[, , k]) / + sqrt(stdv_tot ^ 2 / n_tot + stdv_k ^ 2 / n_k) + dof <- (stdv_tot ^ 2 / n_tot + stdv_k ^ 2 / n_k) ^ 2 / + ((stdv_tot ^ 2 / n_tot) ^ 2 / (n_tot - 1) + + (stdv_k ^ 2 / n_k) ^ 2 / (n_k - 1)) + pvalue[, , k] <- 2 * pt(-abs(tvalue), df = dof) + } + } + if (is.null(fileout) == FALSE) { + output <- list(composite = composite, pvalue = pvalue) + save(output, file = paste(fileout, '.sav', sep = '')) + } + + invisible(list(composite = composite, pvalue = pvalue)) +} + + diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R new file mode 100644 index 0000000000000000000000000000000000000000..209aa1b67e3df6a6d611d5667428360e0f745f28 --- /dev/null +++ b/R/CST_WeatherRegimes.R @@ -0,0 +1,297 @@ +#' @rdname CST_WeatherRegimes +#' @title Function for Calculating the Cluster analysis +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function computes the weather regimes from a cluster analysis. +#'It is applied on the array \code{data} in a 's2dv_cube' object. The dimensionality of this object can be also reduced +#'by using PCs obtained from the application of the #'EOFs analysis to filter the dataset. +#'The cluster analysis can be performed with the traditional k-means or those methods +#'included in the hclust (stats package). +#' +#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +#' 4961–4976, doi:10.1007/s00382-019-04839-5. +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data a 's2dv_cube' object +#'@param ncenters Number of clusters to be calculated with the clustering function. +#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data. +#'@param neofs number of modes to be kept (default = 30). +#'@param varThreshold Value with the percentage of variance to be explained by the PCs. +#' Only sufficient PCs to explain this much variance will be used in the clustering. +#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +#'but the function also support the different methods included in the hclust . These methods are: +#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +#' For more details about these methods see the hclust function documentation included in the stats package. +#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). +#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@param ncores The number of multicore threads to use for parallel computation. +#'@return A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)) and \code{$statistics} that includes +#' \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test that accounts for the serial dependence.), +#' \code{cluster} (A matrix or vector with integers (from 1:k) indicating the cluster to which each time step is allocated.), +#' \code{persistence} (Percentage of days in a month/season before a cluster is replaced for a new one (only if method=’kmeans’ has been selected.)), +#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +#'@import s2dverification +#'@import multiApply +#'@examples +#'res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) +#'res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = TRUE, ncenters = 3) +#'@export +#' +CST_WeatherRegimes <- function(data, ncenters = NULL, + EOFS = TRUE,neofs = 30, + varThreshold = NULL, + method = "kmeans", + iter.max=100, nstart = 30, + ncores = NULL) { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + if ('lon' %in% names(data)){ + lon <- data$lon + }else { + lon <- NULL + } + result <- WeatherRegime(data$data,ncenters = ncenters, + EOFS = EOFS, neofs = neofs, + varThreshold = varThreshold, lon = lon, + lat = data$lat, method = method, + iter.max=iter.max, nstart = nstart, + ncores = ncores) + data$data <- result$composite + data$statistics <- result[-1] + return(data) +} + +#' @rdname WeatherRegimes +#' @title Function for Calculating the Cluster analysis +#' +#' @author Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +#' +#' @description This function computes the weather regimes from a cluster analysis. +#'It can be applied over the dataset with dimensions +#'c(year/month, month/day, lon, lat), or by using PCs obtained from the application of the +#'EOFs analysis to filter the dataset. +#'The cluster analysis can be performed with the traditional k-means or those methods +#'included in the hclust (stats package). +#' +#'@references Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +#' Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +#' 4961–4976, doi:10.1007/s00382-019-04839-5. +#'@references Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +#' for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +#' +#'@param data an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'. +#'@param ncenters Number of clusters to be calculated with the clustering function. +#'@param EOFs Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data. +#'@param neofs number of modes to be kept only if EOFs = TRUE has been selected. (default = 30). +#'@param varThreshold Value with the percentage of variance to be explained by the PCs. +#' Only sufficient PCs to explain this much variance will be used in the clustering. +#'@param lon Vector of longitudes. +#'@param lat Vector of latitudes. +#'@param method Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +#'but the function also support the different methods included in the hclust . These methods are: +#'"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +#' For more details about these methods see the hclust function documentation included in the stats package. +#'@param iter.max Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected). +#'@param nstarts Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected). +#'@return A list with elements \code{$composite} (array with at least 3-d ('lat', 'lon', 'cluster') containing the composites k=1,..,K for case (*1) +# or only k=1 for any specific cluster, i.e., case (*2)), +#' \code{pvalue} (array with at least 3-d ('lat','lon','cluster') with the pvalue of the composites obtained through a t-test that accounts for the serial +# dependence of the data with the same structure as Composite.), +#' \code{cluster} (A matrix or vector with integers (from 1:k) indicating the cluster to which each time step is allocated.), +#' \code{persistence} (Percentage of days in a month/season before a cluster is replaced for a new one (only if method=’kmeans’ has been selected.)), +#' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +#'@import s2dverification +#'@import multiApply +#'@examples +#'res <- WeatherRegime(data=lonlat_data$obs$data, lat= lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4) +#'@export + +WeatherRegime <- function(data, ncenters = NULL, + EOFS = TRUE,neofs = 30, + varThreshold = NULL, lon = NULL, + lat = NULL, method = "kmeans", + iter.max=100, nstart = 30, + ncores = NULL) { + + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must be an array with named dimensions.") + } + + if (is.null(lat)) { + stop("Parameter 'lat' must be specified.") + } + + dimData <- names(dim(data)) + + if ('sdate' %in% dimData && 'ftime' %in% dimData) { + nsdates <- dim(data)['sdate'] + nftimes <- dim(data)['ftime'] + data <- MergeDims(data, + merge_dims = c('ftime','sdate'), + rename_dim = 'time') + } else if ('sdate' %in% dimData | 'ftime' %in% dimData) { + names(dim(data))[which(dimData == 'sdate' | dimData == 'ftime') ] = 'time' + } else { + if (!('time' %in% dimData)) { + stop("Parameter 'data' must have temporal dimensions.") + } + } + + + output <- Apply(data = list(data), + target_dims = c('time','lat','lon'), + fun = .WeatherRegime, + EOFS = EOFS, neofs = neofs, + varThreshold = varThreshold, + lon = lon, lat = lat, + ncenters = ncenters, + method = method, + ncores = ncores) + + if (method=='kmeans' && 'sdate' %in% dimData && 'ftime' %in% dimData) { + + # The frequency and the persistency are computed as they are useful + # parameters in the cluster analysis + extra_output <- Apply(data = output$cluster, + target_dims = 'time', + fun = .freqPer, + nsdates = nsdates, + nftimes = nftimes , + ncenters = ncenters) + + output$cluster <- t(array(output$cluster,dim=c(nftimes,nsdates))) + names(dim(output$cluster)) <- c('sdate','ftime') + + output <- list(composite=output$composite, + pvalue=output$pvalue, + cluster=output$cluster, + frequency=extra_output$frequency, + persistence=extra_output$persistence) + } + return(output) +} + +.WeatherRegime <- function(data, ncenters = NULL, EOFS = TRUE,neofs = 30, + varThreshold = NULL, lon = NULL, + lat = NULL, method = "kmeans", + iter.max=100, nstart = 30) { + + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must be an array with 'time', 'lat' and 'lon' dimensions.") + } + + if (!is.null(lat) && dim(data)['lat'] != length(lat)) { + stop("The length of the paramter 'lat' does not match with the ['lat'] dimension of + the parameter 'data'.") + } + if (is.null(ncenters)) { + stop("Parameter 'ncenters' must be specified.") + } + if (EOFS == TRUE && is.null(lon)) { + stop("Parameter 'lon' must be specified.") + } + if (is.null(lat)) { + stop("Parameter 'lat' must be specified.") + } + + nlon <- dim(data)['lat'] + nlat <- dim(data)['lon'] + + if (any(is.na(data))){ + nas_test <- MergeDims(data, merge_dims = c('lat','lon'), + rename_dim = 'space',na.rm = TRUE) + if (dim(nas_test)['space']== c(nlat*nlon)){ + stop("Parameter 'data' contains NAs in the 'time' dimensions.") + } + } + + if (EOFS == TRUE) { + if (is.null(varThreshold)) { + dataPC <- EOF(data, + lat = as.vector(lat), + lon = as.vector(lon), + neofs = neofs) + cluster_input <- dataPC$PC + } else { + dataPC <- EOF(data, + lat = as.vector(lat), + lon = as.vector(lon), + neofs = neofs) + minPC <- + head(as.numeric(which(cumsum(dataPC$var) > varThreshold)), 1) + cluster_input <- dataPC$PC[, 1:minPC] + } + } else { + + dataW <- aperm(Apply(data, target_dims = 'lat', + function (x, la) { + x * cos(la * pi / 180)}, + la = lat)[[1]], c(2, 1, 3)) + + cluster_input <- MergeDims(dataW, merge_dims = c('lat','lon'), + rename_dim = 'space',na.rm = TRUE) + + } + + if (method == "kmeans") { + + clust <- kmeans( + cluster_input, + centers = ncenters, + iter.max = iter.max, + nstart = nstart, + trace = FALSE) + + result <- array(0, c(ncenters, nlat, nlon)) + # the order of the data dimensions is changed ('lat','lon','time') + result <- Composite(aperm(data,c(2, 3, 1)), clust$cluster) + + } else { + result <- hclust(dist(cluster_input), method = method) + clusterCut <- cutree(result, ncenters) + result <- Composite(aperm(data, c(2, 3, 1)), clusterCut) + } + result <- lapply(1:length(result), + function (n) { + names(dim(result[[n]])) <- c("lat", "lon", "cluster") + return (result[[n]]) + }) + + names(result) <- c('composite','pvalue') + + if (method == "kmeans") { + clust <- as.array(clust$cluster) + names(dim(clust)) <- 'time' + return(list( + composite = result$composite, + pvalue = result$pvalue, + cluster = clust)) + } else { + clust <- as.array(clusterCut) + names(dim(clust)) <- 'time' + return(list( + composite = result$composite, + pvalue = result$pvalue, + cluster = clust)) + } +} + +.freqPer<- function (clust, nsdates, nftimes, ncenters){ + frequency <- persistence <- matrix(NA, nsdates, ncenters) + x <- as.vector(clust) + for (i in 1:nsdates) { + occurences <-rle(x[((i * nftimes) + 1 - nftimes):(i * nftimes)]) + for (j in 1:ncenters) { + frequency[i, j] <-(sum(occurences$lengths[occurences$values == j]) / nftimes) * 100 + persistence[i, j] <- mean(occurences$lengths[occurences$values == j]) + } + } + return(list(frequency = frequency, + persistence = persistence)) +} diff --git a/man/CST_RegimesAssign.Rd b/man/CST_RegimesAssign.Rd new file mode 100644 index 0000000000000000000000000000000000000000..f10dc1427ba7f9797c37cdd7e71dda0e08b9ab49 --- /dev/null +++ b/man/CST_RegimesAssign.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_RegimesAssign.R +\name{CST_RegimesAssign} +\alias{CST_RegimesAssign} +\title{Function for matching a field of anomalies with +a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function)} +\usage{ +CST_RegimesAssign( + data, + ref_maps, + method = "distance", + composite = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{a 's2dv_cube' object.} + +\item{ref_maps}{a 's2dv_cube' object as the output of CST_WeatherRegimes.} + +\item{method}{whether the matching will be performed in terms of minimum distance (default=’distance’) or +the maximum spatial correlation (method=’ACC’) between the maps.} + +\item{composite}{a logical parameter indicating if the composite maps are computed or not (default=FALSE).} + +\item{ncores}{the number of multicore threads to use for parallel computation.} +} +\value{ +A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) + \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test + that accounts for the serial dependence of the data with the same structure as Composite.)(only when composite = 'TRUE'), + \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , + \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +} +\description{ +This function performs the matching between a field of anomalies and a set +of maps which will be used as a reference. The anomalies will be assigned to the reference map +for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +(method=‘ACC’) is obtained. +} +\examples{ +regimes <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) +res1 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=FALSE) +res2 <- CST_RegimesAssign(data=lonlat_data$exp, ref_maps = regimes,composite=TRUE) +} +\references{ +Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +} +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/CST_WeatherRegimes.Rd b/man/CST_WeatherRegimes.Rd new file mode 100644 index 0000000000000000000000000000000000000000..4ffb3c4cd43ef8d05204f848d048d50f0cfa5b22 --- /dev/null +++ b/man/CST_WeatherRegimes.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_WeatherRegimes.R +\name{CST_WeatherRegimes} +\alias{CST_WeatherRegimes} +\title{Function for Calculating the Cluster analysis} +\usage{ +CST_WeatherRegimes( + data, + ncenters = NULL, + EOFS = TRUE, + neofs = 30, + varThreshold = NULL, + method = "kmeans", + iter.max = 100, + nstart = 30, + ncores = NULL +) +} +\arguments{ +\item{data}{a 's2dv_cube' object} + +\item{ncenters}{Number of clusters to be calculated with the clustering function.} + +\item{neofs}{number of modes to be kept (default = 30).} + +\item{varThreshold}{Value with the percentage of variance to be explained by the PCs. +Only sufficient PCs to explain this much variance will be used in the clustering.} + +\item{method}{Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +but the function also support the different methods included in the hclust . These methods are: +"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +For more details about these methods see the hclust function documentation included in the stats package.} + +\item{iter.max}{Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected).} + +\item{ncores}{The number of multicore threads to use for parallel computation.} + +\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data.} + +\item{nstarts}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} +} +\value{ +A list with two elements \code{$data} (a 's2dv_cube' object containing the composites cluster=1,..,K for case (*1) + \code{$pvalue} (array with the same structure as \code{$data} containing the pvalue of the composites obtained through a t-test that accounts for the serial dependence.), + \code{cluster} (A matrix or vector with integers (from 1:k) indicating the cluster to which each time step is allocated.), + \code{persistence} (Percentage of days in a month/season before a cluster is replaced for a new one (only if method=’kmeans’ has been selected.)), + \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +} +\description{ +This function computes the weather regimes from a cluster analysis. +It is applied on the array \code{data} in a 's2dv_cube' object. The dimensionality of this object can be also reduced +by using PCs obtained from the application of the #'EOFs analysis to filter the dataset. +The cluster analysis can be performed with the traditional k-means or those methods +included in the hclust (stats package). +} +\examples{ +res1 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = FALSE, ncenters = 4) +res2 <- CST_WeatherRegimes(data = lonlat_data$obs, EOFS = TRUE, ncenters = 3) +} +\references{ +Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +4961–4976, doi:10.1007/s00382-019-04839-5. + +Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +} +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/RegimesAssign.Rd b/man/RegimesAssign.Rd new file mode 100644 index 0000000000000000000000000000000000000000..40daf6bea7152c8748d00d6d356d8a09f6996dd8 --- /dev/null +++ b/man/RegimesAssign.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_RegimesAssign.R +\name{RegimesAssign} +\alias{RegimesAssign} +\title{Function for matching a field of anomalies with +a set of maps used as a reference (e.g. clusters obtained from the WeatherRegime function).} +\usage{ +RegimesAssign( + data, + ref_maps, + lat, + method = "distance", + composite = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{an array containing anomalies with named dimensions: dataset, member, sdate, ftime, lat and lon.} + +\item{ref_maps}{array with 3-dimensions ('lon','lat', 'cluster') containing the maps/clusters that will be used as a reference for the matching.} + +\item{method}{whether the matching will be performed in terms of minimum distance (default=’distance’) or +the maximum spatial correlation (method=’ACC’) between the maps.} + +\item{composite}{a logical parameter indicating if the composite maps are computed or not (default=FALSE).} + +\item{ncores}{the number of multicore threads to use for parallel computation.} +} +\value{ +A list with elements \code{$composite} (3-d array (lon, lat, k) containing the composites k=1,..,K for case (*1) + \code{$pvalue} ( array with the same structure as \code{$composite} containing the pvalue of the composites obtained through a t-test + that accounts for the serial dependence of the data with the same structure as Composite.) (only if composite='TRUE'), + \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , + \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), +} +\description{ +This function performs the matching between a field of anomalies and a set +of maps which will be used as a reference. The anomalies will be assigned to the reference map +for which the minimum Eucledian distance (method=’distance’) or highest spatial correlation +(method=‘ACC’) is obtained. +} +\examples{ +regimes <- WeatherRegime(data = lonlat_data$obs$data, lat=lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4)$composite +res1 <- RegimesAssign(data=lonlat_data$exp$data, ref_maps = drop(regimes), +lat=lonlat_data$exp$lat,composite=FALSE) +} +\references{ +Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +} +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/man/WeatherRegimes.Rd b/man/WeatherRegimes.Rd new file mode 100644 index 0000000000000000000000000000000000000000..22cc4f081b732844c045860064160851d3c35056 --- /dev/null +++ b/man/WeatherRegimes.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_WeatherRegimes.R +\name{WeatherRegime} +\alias{WeatherRegime} +\title{Function for Calculating the Cluster analysis} +\usage{ +WeatherRegime( + data, + ncenters = NULL, + EOFS = TRUE, + neofs = 30, + varThreshold = NULL, + lon = NULL, + lat = NULL, + method = "kmeans", + iter.max = 100, + nstart = 30, + ncores = NULL +) +} +\arguments{ +\item{data}{an array containing anomalies with named dimensions with at least start date 'sdate', forecast time 'ftime', latitude 'lat' and longitude 'lon'.} + +\item{ncenters}{Number of clusters to be calculated with the clustering function.} + +\item{neofs}{number of modes to be kept only if EOFs = TRUE has been selected. (default = 30).} + +\item{varThreshold}{Value with the percentage of variance to be explained by the PCs. +Only sufficient PCs to explain this much variance will be used in the clustering.} + +\item{lon}{Vector of longitudes.} + +\item{lat}{Vector of latitudes.} + +\item{method}{Different options to estimate the clusters. The most traditional approach is the k-means analysis (default=’kmeans’) +but the function also support the different methods included in the hclust . These methods are: +"ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). +For more details about these methods see the hclust function documentation included in the stats package.} + +\item{iter.max}{Parameter to select the maximum number of iterations allowed (Only if method='kmeans' is selected).} + +\item{EOFs}{Whether to compute the EOFs (default = 'TRUE') or not (FALSE) to filter the data.} + +\item{nstarts}{Parameter for the cluster analysis determining how many random sets to choose (Only if method='kmeans' is selected).} +} +\value{ +A list with elements \code{$composite} (array with at least 3-d ('lat', 'lon', 'cluster') containing the composites k=1,..,K for case (*1) + \code{pvalue} (array with at least 3-d ('lat','lon','cluster') with the pvalue of the composites obtained through a t-test that accounts for the serial + \code{cluster} (A matrix or vector with integers (from 1:k) indicating the cluster to which each time step is allocated.), + \code{persistence} (Percentage of days in a month/season before a cluster is replaced for a new one (only if method=’kmeans’ has been selected.)), + \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), +} +\description{ +This function computes the weather regimes from a cluster analysis. +It can be applied over the dataset with dimensions +c(year/month, month/day, lon, lat), or by using PCs obtained from the application of the +EOFs analysis to filter the dataset. +The cluster analysis can be performed with the traditional k-means or those methods +included in the hclust (stats package). +} +\examples{ +res <- WeatherRegime(data=lonlat_data$obs$data, lat= lonlat_data$obs$lat, EOFS = FALSE, ncenters = 4) +} +\references{ +Cortesi, N., V., Torralba, N., González-Reviriego, A., Soret, and F.J., Doblas-Reyes (2019). +Characterization of European wind speed variability using weather regimes. Climate Dynamics,53, +4961–4976, doi:10.1007/s00382-019-04839-5. + +Torralba, V. (2019) Seasonal climate prediction for the wind energy sector: methods and tools +for the development of a climate service. Thesis. Available online: \url{https://eprints.ucm.es/56841/} +} +\author{ +Verónica Torralba - BSC, \email{veronica.torralba@bsc.es} +} diff --git a/tests/testthat/test-CST_RegimesAssign.R b/tests/testthat/test-CST_RegimesAssign.R new file mode 100644 index 0000000000000000000000000000000000000000..09dad48f959ac418538e9520077b825ceafe2264 --- /dev/null +++ b/tests/testthat/test-CST_RegimesAssign.R @@ -0,0 +1,121 @@ +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_RegimesAssign(data = 1), + paste0("Parameter 'data' must be of the class 's2dv_cube', as output by ", + "CSTools::CST_Load.")) + + data1 <- 1 : 20 + data1 <- list(data = data1) + class(data1) <- 's2dv_cube' + expect_error( + CST_RegimesAssign(data = data1,ref_maps=1), + paste0("Parameter 'ref_maps' must be of the class 's2dv_cube', as output by ", + "CSTools::CST_Load.")) + + regimes <- 1:20 + dim(regimes) <- c(lat = 5, lon=2, cluster=2) + regimes <- list(data=regimes) + class(regimes) <- 's2dv_cube' + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), + paste0("Parameter 'data' must be an array with named dimensions.")) + + data1 <- 1 : 20 + dim(data1) <- c(lat = 5, lon=4) + data1 <- list(data = data1 , lat=1:5) + class(data1) <- 's2dv_cube' + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), + paste0("Parameter 'data' must have temporal dimensions.")) + + data1 <- 1 : 20 + dim(data1) <- c(time=20) + data1 <- list(data = data1) + class(data1) <- 's2dv_cube' + + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), + paste0("Parameter 'lat' must be specified.")) + + + data1 <- 1 : 20 + dim(data1) <- c(time=20) + data1 <- list(data = data1,lat=1:5) + class(data1) <- 's2dv_cube' + + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), + paste0("Parameter 'data' must contain the named dimensions 'lat' and 'lon'.")) + + data1 <- 1: 20 + dim(data1) <- c(lat = 2, lon=5, time=2) + data1 <- list(data = data1, lat=1:5) + class(data1) <- 's2dv_cube' + + expect_error( + CST_RegimesAssign(data = data1,ref_maps = regimes), + " Parameter 'lat' does not match with the dimension 'lat' in the + parameter 'data' or in the parameter 'ref_maps'.") + + + data1 <- 1: 20 + dim(data1) <- c(lat = 5, lon=2, time=2) + data1 <- list(data = data1, lat=1:5) + class(data1) <- 's2dv_cube' + + expect_equal(names(CST_RegimesAssign(data = data1, ref_maps = regimes)$statistics), + c('cluster', 'frequency')) + + expect_equal(names( + CST_RegimesAssign( + data = data1, + ref_maps = regimes, + composite = TRUE)$statistics), c('pvalue', 'cluster', 'frequency')) + + expect_equal(names(dim( + CST_RegimesAssign( + data = data1, + ref_maps = regimes, + composite = TRUE)$data)), c('lon', 'lat', 'composite.cluster')) + + data1 <- 1: 160 + dim(data1) <- c(lat = 5, lon=2, time=2, member=8) + data1 <- list(data = data1, lat=1:5) + class(data1) <- 's2dv_cube' + + expect_equal(names(dim( + CST_RegimesAssign( + data = data1, + ref_maps = regimes, + composite = TRUE)$data)), c('lon', 'lat', 'composite.cluster', 'member')) + + expect_equal(names(dim( + CST_RegimesAssign( + data = data1, + ref_maps = regimes, + composite = TRUE)$statistics$cluster)), c('time', 'member')) + + regimes <- 1:60 + dim(regimes) <- c(lat = 5, lon=2, cluster=6) + regimes <- list(data=regimes) + class(regimes) <- 's2dv_cube' + expect_equal(max(CST_RegimesAssign(data = data1, ref_maps = regimes, + composite = FALSE)$statistics$cluster), + unname(dim(regimes$data)['cluster'])) + + + regimes <- 1:60 + dim(regimes) <- c(lat = 5, lon=2, cluster=3, member=2) + regimes <- list(data=regimes) + class(regimes) <- 's2dv_cube' + expect_equal(names(dim(CST_RegimesAssign(data = data1, ref_maps = regimes, + composite = FALSE)$statistics$cluster)),c('time','member','member')) + + + + + + + +}) diff --git a/tests/testthat/test-CST_WeatherRegimes.R b/tests/testthat/test-CST_WeatherRegimes.R new file mode 100644 index 0000000000000000000000000000000000000000..33e75e24768c839b4c3458fe06f742208f86330e --- /dev/null +++ b/tests/testthat/test-CST_WeatherRegimes.R @@ -0,0 +1,103 @@ +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_WeatherRegimes(data = 1), + paste0("Parameter 'data' must be of the class 's2dv_cube', as output by ", + "CSTools::CST_Load.")) + + data1 <- 1 : 20 + data1 <- list(data = data1) + class(data1) <- 's2dv_cube' + expect_error( + CST_WeatherRegimes(data = data1), + paste0("Parameter 'data' must be an array with named dimensions.")) + + data1 <- 1 : 20 + dim(data1) <- c(lat = 5, lon=4) + data1 <- list(data = data1 , lat=1:5) + class(data1) <- 's2dv_cube' + expect_error( + CST_WeatherRegimes(data = data1), + paste0("Parameter 'data' must have temporal dimensions.")) + + data1 <- 1 : 20 + dim(data1) <- c(time = 20) + data1 <- list(data = data1) + class(data1) <- 's2dv_cube' + expect_error( + CST_WeatherRegimes(data = data1) , + paste0("Parameter 'lat' must be specified.")) + + data1 <- 1 : 400 + dim(data1) <- c(time = 20, lat = 5, lon=4) + data1 <- list(data = data1, lat=1:5) + class(data1) <- 's2dv_cube' + expect_error( + CST_WeatherRegimes(data = data1), + paste0("Parameter 'ncenters' must be specified.")) + + expect_error( + CST_WeatherRegimes(data = data1, ncenters=3), + paste0("Parameter 'lon' must be specified.")) + + expect_equal( + names(dim(CST_WeatherRegimes(data = data1, ncenters=3, EOFS= FALSE)$data)), + c('lat', 'lon', 'cluster')) + + data1 <- 1 : 400 + dim(data1) <- c(sdate = 2, ftime = 10, lat = 5, lon=4) + data1 <- list(data = data1, lat=1:5) + class(data1) <- 's2dv_cube' + nclusters <- 3 + + expect_equal( + dim(CST_WeatherRegimes(data = data1 , + ncenters = nclusters, + EOFS = FALSE)$statistics$frequency),c(2, nclusters)) + expect_equal( + names(dim(CST_WeatherRegimes(data = data1, nclusters, EOFS= FALSE)$data)), + c('lat', 'lon', 'cluster')) + + data1 <- 1 : 400 + dim(data1) <- c(sdate = 2, ftime = 10, lat = 5, lon=4) + data1 <- list(data = data1, lat=1:5 ,lon=1:4) + class(data1) <- 's2dv_cube' + + expect_equal( + names(CST_WeatherRegimes(data = data1 , ncenters = 4)$statistics), + c('pvalue', 'cluster', 'frequency', 'persistence')) + + expect_equal( + names(CST_WeatherRegimes(data = data1 , ncenters = 4, method='ward.D')$statistics), + c('pvalue', 'cluster')) + + expect_equal( + names(dim(CST_WeatherRegimes(data = data1, ncenters=4)$data)), + c('lat', 'lon', 'cluster')) + + data1 <- 1 : 400 + dim(data1) <- c(time = 20, lat = 5, lon=4) + data1[4,,] <- NA + data1 <- list(data = data1, lat=1:5 ,lon=1:4) + class(data1) <- 's2dv_cube' + expect_error( + CST_WeatherRegimes(data = data1, ncenters=3, EOFS = FALSE), + paste0("Parameter 'data' contains NAs in the 'time' dimensions.")) + + data1 <- 1 : 400 + dim(data1) <- c(time = 20, lat = 5, lon=4) + data1[,2,3] <- NA + data1 <- list(data = data1, lat=1:5 ,lon=1:4) + class(data1) <- 's2dv_cube' + expect_equal( + any(is.na(CST_WeatherRegimes(data = data1, ncenters=3, EOFS = FALSE)$data)), + TRUE) + expect_equal( + names(dim(CST_WeatherRegimes(data = data1, ncenters=3, EOFS = FALSE)$data)), + c('lat', 'lon', 'cluster')) +}) + + + + +