diff --git a/DESCRIPTION b/DESCRIPTION index 30fd237a2d6be5f375e45b55a683ffec062c0d8c..f8f5567b46fba55dfbcc8a50b92ed31dd03f12fa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,6 +35,7 @@ Imports: stats, plyr, ncdf4, + NbClust, multiApply (>= 2.1.1) Suggests: easyVerification, diff --git a/NAMESPACE b/NAMESPACE index b12cd829b03c6868eb9dbedb5eb0549e5be55ddd..23ed4eaa662990d3d138c6be989f8b6af602f8a2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(AMV) export(AnimateMap) export(Ano) export(Clim) +export(Cluster) export(ColorBar) export(Composite) export(ConfigAddEntry) @@ -51,6 +52,7 @@ export(Trend) export(clim.colors) export(clim.palette) import(GEOmap) +import(NbClust) import(bigmemory) import(geomapdata) import(graphics) @@ -85,6 +87,7 @@ importFrom(stats,acf) importFrom(stats,anova) importFrom(stats,confint) importFrom(stats,cor) +importFrom(stats,kmeans) importFrom(stats,lm) importFrom(stats,median) importFrom(stats,na.fail) diff --git a/R/Cluster.R b/R/Cluster.R new file mode 100644 index 0000000000000000000000000000000000000000..33527aea9b0e8f43bcce78be874cd07d5a02629a --- /dev/null +++ b/R/Cluster.R @@ -0,0 +1,267 @@ +#'K-means Clustering +#' +#'Compute cluster centers and their time series of occurrences, with the +#'K-means clustering method using Euclidean distance, of an array of input data +#'with any number of dimensions that at least contain time_dim. +#'Specifically, it partitions the array along time axis in K groups or clusters +#'in which each space vector/array belongs to (i.e., is a member of) the +#'cluster with the nearest center or centroid. This function is a wrapper of +#'kmeans() and relies on the NbClust package (Charrad et al., 2014 JSS) to +#'determine the optimal number of clusters used for K-means clustering if it is +#'not provided by users. +#' +#'@param data A numeric array with named dimensions that at least have +#' 'time_dim' corresponding to time and 'space_dim' (optional) corresponding +#' to either area-averages over a series of domains or the grid points for any +#' sptial grid structure. +#'@param weights A numeric array with named dimension of multiplicative weights +#' based on the areas covering each domain/region or grid-cell of 'data'. The +#' dimensions must be equal to the 'space_dim' in 'data'. The default value is +#' NULL which means no weighting is applied. +#'@param time_dim A character string indicating the name of time dimension in +#' 'data'. The default value is 'sdate'. +#'@param space_dim A character vector indicating the names of spatial dimensions +#' in 'data'. The default value is NULL. +#'@param nclusters A positive integer K that must be bigger than 1 indicating +#' the number of clusters to be computed, or K initial cluster centers to be +#' used in the method. The default value is NULL, which means that the number +#' of clusters will be determined by NbClust(). The parameter 'index' +#' therefore needs to be specified for NbClust() to find the optimal number of +#' clusters to be used for K-means clustering calculation. +#'@param index A character string of the validity index from NbClust package +#' that can be used to determine optimal K if K is not specified with +#' 'nclusters'. The default value is 'sdindex' (Halkidi et al. 2001, JIIS). +#' Other indices available in NBClust are "kl", "ch", "hartigan", "ccc", +#' "scott", "marriot", "trcovw", "tracew", "friedman", "rubin", "cindex", "db", +#' "silhouette", "duda", "pseudot2", "beale", "ratkowsky", "ball", +#' "ptbiserial", "gap", "frey", "mcclain", "gamma", "gplus", "tau", "dunn", +#' "hubert", "sdindex", and "sdbw". +#' One can also use all of them with the option 'alllong' or almost all indices +# except gap, gamma, gplus and tau with 'all', when the optimal number of +#' clusters K is detremined by the majority rule (the maximum of histogram of +#' the results of all indices with finite solutions). Use of some indices on +#' a big and/or unstructured dataset can be computationally intense and/or +#' could lead to numerical singularity. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{$cluster}{ +#' An integer array of the occurrence of a cluster along time, i.e., when +#' certain data member in time is allocated to a specific cluster. The dimensions +#' are same as 'data' without 'space_dim'. +#'} +#'\item{$centers}{ +#' A nemeric array of cluster centres or centroids (e.g. [1:K, 1:spatial degrees +#' of freedom]). The rest dimensions are same as 'data' except 'time_dim' +#' and 'space_dim'. +#'} +#'\item{$totss}{ +#' A numeric array of the total sum of squares. The dimensions are same as 'data' +#' except 'time_dim' and 'space_dim'. +#'} +#'\item{$withinss}{ +#' A numeric array of within-cluster sum of squares, one component per cluster. +#' The first dimenion is the number of cluster, and the rest dimensions are +#' same as 'data' except 'time_dim' and 'space_dim'. +#'} +#'\item{$tot.withinss}{ +#' A numeric array of the total within-cluster sum of squares, i.e., +#' sum(withinss). The dimensions are same as 'data' except 'time_dim' and +#' 'space_dim'. +#'} +#'\item{$betweenss}{ +#' A numeric array of the between-cluster sum of squares, i.e. totss-tot.withinss. +#' The dimensions are same as 'data' except 'time_dim' and 'space_dim'. +#'} +#'\item{$size}{ +#' A numeric array of the number of points in each cluster. The first dimenion +#' is the number of cluster, and the rest dimensions are same as 'data' except +#' 'time_dim' and 'space_dim'. +#'} +#'\item{$iter}{ +#' A numeric array of the number of (outer) iterations. The dimensions are +#' same as 'data' except 'time_dim' and 'space_dim'. +#'} +#'\item{$ifault}{ +#' A numeric array of an indicator of a possible algorithm problem. The +#' dimensions are same as 'data' except 'time_dim' and 'space_dim'. +#'} +#' +#'@references +#'Wilks, 2011, Statistical Methods in the Atmospheric Sciences, 3rd ed., Elsevire, pp 676. +#' +#'@examples +#'# Generating synthetic data +#'a1 <- array(dim = c(200, 4)) +#'mean1 <- 0 +#'sd1 <- 0.3 +#' +#'c0 <- seq(1, 200) +#'c1 <- sort(sample(x = 1:200, size = sample(x = 50:150, size = 1), replace = FALSE)) +#'x1 <- c(1, 1, 1, 1) +#'for (i1 in c1) { +#' a1[i1, ] <- x1 + rnorm(4, mean = mean1, sd = sd1) +#'} +#' +#'c1p5 <- c0[!(c0 %in% c1)] +#'c2 <- c1p5[seq(1, length(c1p5), 2)] +#'x2 <- c(2, 2, 4, 4) +#'for (i2 in c2) { +#' a1[i2, ] <- x2 + rnorm(4, mean = mean1, sd = sd1) +#'} +#' +#'c3 <- c1p5[seq(2, length(c1p5), 2)] +#'x3 <- c(3, 3, 1, 1) +#'for (i3 in c3) { +#' a1[i3, ] <- x3 + rnorm(4, mean = mean1, sd = sd1) +#'} +#' +#'# Computing the clusters +#'names(dim(a1)) <- c('sdate', 'space') +#'res1 <- Cluster(data = a1, weights = array(1, dim = dim(a1)[2]), nclusters = 3) +#'res2 <- Cluster(data = a1, weights = array(1, dim = dim(a1)[2])) +#' +#'@import NbClust multiApply +#'@importFrom abind abind +#'@importFrom stats kmeans +#'@importFrom grDevices pdf dev.off +#'@export +Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, + nclusters = NULL, index = 'sdindex', ncores = NULL) { + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + + ## weights + if (!is.null(weights)) { + if (!is.numeric(weights)) { + stop("Parameter 'weights' must be a numeric array.") + } + if (is.null(dim(weights))) { #is vector + dim(weights) <- c(length(weights)) + } + if (any(is.null(names(dim(weights))))| any(nchar(names(dim(weights))) == 0)) { + stop("Parameter 'weights' must have dimension names.") + } + if (any(!names(dim(weights)) %in% names(dim(data)) | + !dim(weights) %in% dim(data))) { + stop("Parameter 'weights' must have dimensions that can be found in 'data' dimensions.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## space_dim + if (!is.null(space_dim)) { + if (!is.character(space_dim)) { + stop("Parameter 'space_dim' must be a character vector.") + } + if (any(!space_dim %in% names(dim(data)))) { + stop("Parameter 'space_dim' is not found in 'data' dimensions.") + } + if (!is.null(weights)) { + if (!(length(space_dim) == length(dim(weights)) & all(space_dim %in% names(dim(weights))))) { + stop("Parameter 'weights' must have dimension names the same as 'space_dim'.") + } + if (space_dim != names(dim(weights))) { + space_dim <- names(dim(weights)) + } + } + } + if (is.null(space_dim) & !is.null(weights)) { + space_dim <- names(dim(weights)) + .warning(paste0("Parameter 'weights' is assigned but not 'space_dim'. Define 'space_dim' ", + "by the dimensions of 'weights'.")) + } + ## nclusters + if (!is.null(nclusters)) { + if (!is.numeric(nclusters) | length(nclusters) != 1) { + stop("Parameter 'nclusters' must be an integer bigger than 1.") + } else if (nclusters <= 1) { + stop("Parameter 'nclusters' must be an integer bigger than 1.") + } + } + + ## index + if (!is.character(index) | length(index) > 1) { + stop("Parameter 'index' should be a character strings accepted as 'index' by the function NbClust::NbClust.") + } + + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Cluster + + output <- Apply(list(data), + target_dims = c(time_dim, space_dim), + fun = .Cluster, + weights = weights, nclusters = nclusters, index = index, + ncores = ncores) + + return(output) +} + +.Cluster <- function(data, weights = NULL, nclusters = NULL, index = 'sdindex') { + # data: [time, (lat, lon)] + dat_dim <- dim(data) + + if (length(dim(data)) != 1) { + # Reshape data into two dims + dim(data) <- c(dat_dim[1], prod(dat_dim[-1])) + + # weights + if (!is.null(weights)) { + dim(weights) <- prod(dim(weights)) # a vector + data_list <- lapply(1:dat_dim[1], + function(x) { data[x, ] * weights }) + data <- do.call(abind::abind, c(data_list, along = 0)) + } + } + + if (!is.null(nclusters)) { + kmeans.results <- kmeans(data, centers = nclusters, iter.max = 300, + nstart = 30) + } else { + pdf(file = NULL) + nbclust.results <- NbClust::NbClust(data, distance = 'euclidean', + min.nc = 2, max.nc = 20, + method = 'kmeans', index = index) + dev.off() + + if (index == 'all' || index == 'alllong') { + kmc <- hist(nbclust.results$Best.nc[1, ], breaks = seq(0, 20), + plot = FALSE)$counts + kmc1 <- which(kmc == max(kmc)) + } else { + kmc1 <- nbclust.results$Best.nc[1] + } + + kmeans.results <- kmeans(data, centers = kmc1, iter.max = 300, + nstart = 30) + } + invisible(kmeans.results) +} diff --git a/R/Persistence.R b/R/Persistence.R index f569e632651ddeb72fe8dc9b76ed9bfc2dcbcc52..184092756d25f8332f0a8af281a58fdd78fec940 100644 --- a/R/Persistence.R +++ b/R/Persistence.R @@ -39,32 +39,32 @@ #' #'@return #'A list containing: -#'\item{$persistence} { +#'\item{$persistence}{ #' A numeric array with dimensions 'memb', time (start dates), latitudes and longitudes #' containing the persistence forecast. #'} -#'\item{$persistence.mean} { +#'\item{$persistence.mean}{ #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the ensemble mean persistence forecast. #'} -#'\item{$persistence.predint} { +#'\item{$persistence.predint}{ #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the prediction interval of the persistence forecast. #'} -#'\item{$AR.slope} { +#'\item{$AR.slope}{ #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the slope coefficient of the autoregression. #'} -#'\item{$AR.intercept} { +#'\item{$AR.intercept}{ #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the intercept coefficient of the autoregression. #'} -#'\item{$AR.lowCI} { +#'\item{$AR.lowCI}{ #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the lower value of the confidence interval of the #' autoregression. #'} -#'\item{$AR.highCI} { +#'\item{$AR.highCI}{ #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the upper value of the confidence interval of the #' autoregression. diff --git a/man/Cluster.Rd b/man/Cluster.Rd new file mode 100644 index 0000000000000000000000000000000000000000..7ea25de5bfe29b8f49dc2c117a220d01dc6c8236 --- /dev/null +++ b/man/Cluster.Rd @@ -0,0 +1,147 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Cluster.R +\name{Cluster} +\alias{Cluster} +\title{K-means Clustering} +\usage{ +Cluster( + data, + weights = NULL, + time_dim = "sdate", + space_dim = NULL, + nclusters = NULL, + index = "sdindex", + ncores = NULL +) +} +\arguments{ +\item{data}{A numeric array with named dimensions that at least have +'time_dim' corresponding to time and 'space_dim' (optional) corresponding +to either area-averages over a series of domains or the grid points for any +sptial grid structure.} + +\item{weights}{A numeric array with named dimension of multiplicative weights +based on the areas covering each domain/region or grid-cell of 'data'. The +dimensions must be equal to the 'space_dim' in 'data'. The default value is +NULL which means no weighting is applied.} + +\item{time_dim}{A character string indicating the name of time dimension in +'data'. The default value is 'sdate'.} + +\item{space_dim}{A character vector indicating the names of spatial dimensions +in 'data'. The default value is NULL.} + +\item{nclusters}{A positive integer K that must be bigger than 1 indicating +the number of clusters to be computed, or K initial cluster centers to be +used in the method. The default value is NULL, which means that the number +of clusters will be determined by NbClust(). The parameter 'index' +therefore needs to be specified for NbClust() to find the optimal number of +clusters to be used for K-means clustering calculation.} + +\item{index}{A character string of the validity index from NbClust package +that can be used to determine optimal K if K is not specified with +'nclusters'. The default value is 'sdindex' (Halkidi et al. 2001, JIIS). +Other indices available in NBClust are "kl", "ch", "hartigan", "ccc", +"scott", "marriot", "trcovw", "tracew", "friedman", "rubin", "cindex", "db", +"silhouette", "duda", "pseudot2", "beale", "ratkowsky", "ball", +"ptbiserial", "gap", "frey", "mcclain", "gamma", "gplus", "tau", "dunn", +"hubert", "sdindex", and "sdbw". +One can also use all of them with the option 'alllong' or almost all indices +clusters K is detremined by the majority rule (the maximum of histogram of +the results of all indices with finite solutions). Use of some indices on +a big and/or unstructured dataset can be computationally intense and/or +could lead to numerical singularity.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing: +\item{$cluster}{ + An integer array of the occurrence of a cluster along time, i.e., when + certain data member in time is allocated to a specific cluster. The dimensions + are same as 'data' without 'space_dim'. +} +\item{$centers}{ + A nemeric array of cluster centres or centroids (e.g. [1:K, 1:spatial degrees + of freedom]). The rest dimensions are same as 'data' except 'time_dim' + and 'space_dim'. +} +\item{$totss}{ + A numeric array of the total sum of squares. The dimensions are same as 'data' + except 'time_dim' and 'space_dim'. +} +\item{$withinss}{ + A numeric array of within-cluster sum of squares, one component per cluster. + The first dimenion is the number of cluster, and the rest dimensions are + same as 'data' except 'time_dim' and 'space_dim'. +} +\item{$tot.withinss}{ + A numeric array of the total within-cluster sum of squares, i.e., + sum(withinss). The dimensions are same as 'data' except 'time_dim' and + 'space_dim'. +} +\item{$betweenss}{ + A numeric array of the between-cluster sum of squares, i.e. totss-tot.withinss. + The dimensions are same as 'data' except 'time_dim' and 'space_dim'. +} +\item{$size}{ + A numeric array of the number of points in each cluster. The first dimenion + is the number of cluster, and the rest dimensions are same as 'data' except + 'time_dim' and 'space_dim'. +} +\item{$iter}{ + A numeric array of the number of (outer) iterations. The dimensions are + same as 'data' except 'time_dim' and 'space_dim'. +} +\item{$ifault}{ + A numeric array of an indicator of a possible algorithm problem. The + dimensions are same as 'data' except 'time_dim' and 'space_dim'. +} +} +\description{ +Compute cluster centers and their time series of occurrences, with the +K-means clustering method using Euclidean distance, of an array of input data +with any number of dimensions that at least contain time_dim. +Specifically, it partitions the array along time axis in K groups or clusters +in which each space vector/array belongs to (i.e., is a member of) the +cluster with the nearest center or centroid. This function is a wrapper of +kmeans() and relies on the NbClust package (Charrad et al., 2014 JSS) to +determine the optimal number of clusters used for K-means clustering if it is +not provided by users. +} +\examples{ +# Generating synthetic data +a1 <- array(dim = c(200, 4)) +mean1 <- 0 +sd1 <- 0.3 + +c0 <- seq(1, 200) +c1 <- sort(sample(x = 1:200, size = sample(x = 50:150, size = 1), replace = FALSE)) +x1 <- c(1, 1, 1, 1) +for (i1 in c1) { + a1[i1, ] <- x1 + rnorm(4, mean = mean1, sd = sd1) +} + +c1p5 <- c0[!(c0 \%in\% c1)] +c2 <- c1p5[seq(1, length(c1p5), 2)] +x2 <- c(2, 2, 4, 4) +for (i2 in c2) { + a1[i2, ] <- x2 + rnorm(4, mean = mean1, sd = sd1) +} + +c3 <- c1p5[seq(2, length(c1p5), 2)] +x3 <- c(3, 3, 1, 1) +for (i3 in c3) { + a1[i3, ] <- x3 + rnorm(4, mean = mean1, sd = sd1) +} + +# Computing the clusters +names(dim(a1)) <- c('sdate', 'space') +res1 <- Cluster(data = a1, weights = array(1, dim = dim(a1)[2]), nclusters = 3) +res2 <- Cluster(data = a1, weights = array(1, dim = dim(a1)[2])) + +} +\references{ +Wilks, 2011, Statistical Methods in the Atmospheric Sciences, 3rd ed., Elsevire, pp 676. +} diff --git a/man/Persistence.Rd b/man/Persistence.Rd index 84b846480df3f073ba876607ea94978c51397dfc..b2bd2765d529580c093a154cbfe672007717d0a2 100644 --- a/man/Persistence.Rd +++ b/man/Persistence.Rd @@ -64,32 +64,32 @@ computation. The default value is NULL.} } \value{ A list containing: -\item{$persistence} { +\item{$persistence}{ A numeric array with dimensions 'memb', time (start dates), latitudes and longitudes containing the persistence forecast. } -\item{$persistence.mean} { +\item{$persistence.mean}{ A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the ensemble mean persistence forecast. } -\item{$persistence.predint} { +\item{$persistence.predint}{ A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the prediction interval of the persistence forecast. } -\item{$AR.slope} { +\item{$AR.slope}{ A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the slope coefficient of the autoregression. } -\item{$AR.intercept} { +\item{$AR.intercept}{ A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the intercept coefficient of the autoregression. } -\item{$AR.lowCI} { +\item{$AR.lowCI}{ A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the lower value of the confidence interval of the autoregression. } -\item{$AR.highCI} { +\item{$AR.highCI}{ A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the upper value of the confidence interval of the autoregression. diff --git a/tests/testthat/test-Cluster.R b/tests/testthat/test-Cluster.R new file mode 100644 index 0000000000000000000000000000000000000000..9071cd8e5e479e0ef2ccee2b6b74208815e604c8 --- /dev/null +++ b/tests/testthat/test-Cluster.R @@ -0,0 +1,127 @@ +context("s2dv::Cluster tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(100), + dim = c(sdate = 50, space = 2)) + weights1 <- array(c(0.9, 1.1), dim = c(space = 2)) + + # dat2 + set.seed(2) + dat2 <- array(rnorm(300), + dim = c(sdate = 50, lat = 2, lon = 3)) + weights2 <- array(c(0.9, 1.1), dim = c(lat = 2, lon = 3)) + +############################################## + +test_that("1. Input checks", { + # data + expect_error( + Cluster(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Cluster(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + expect_error( + Cluster(array(1:10, dim = c(2, 5))), + "Parameter 'data' must have dimension names." + ) + # weights + expect_error( + Cluster(dat1, weights = 'lat'), + "Parameter 'weights' must be a numeric array." + ) + expect_error( + Cluster(dat1, weights = 2), + "Parameter 'weights' must have dimension names." + ) + expect_error( + Cluster(dat1, weights = array(2, dim = c(lat = 2))), + "Parameter 'weights' must have dimensions that can be found in 'data' dimensions." + ) + # time_dim + expect_error( + Cluster(dat1, weights1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + Cluster(array(c(1:25), dim = c(dat = 1, time = 5, space = 2)), weights1), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + Cluster(dat1, weights1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Cluster(dat1, weights1, time_dim = c('a', 'sdate')), + "Parameter 'time_dim' must be a character string." + ) + # nclusters + expect_error( + Cluster(dat1, weights1, ncluster = 1), + "Parameter 'nclusters' must be an integer bigger than 1." + ) + # index + expect_error( + Cluster(dat1, weights1, index = 1), + "Parameter 'index' should be a character strings accepted as 'index' by the function NbClust::NbClust." + ) + # ncores + expect_error( + Cluster(dat1, weights1, ncore = 0), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## +test_that("2. Output checks: dat1", { +# The output is random. Only check dimensions. + expect_equal( + length(Cluster(dat1, weights1)$cluster), + 50 + ) + expect_equal( + length(Cluster(dat1)$cluster), + 100 + ) + expect_equal( + dim(Cluster(dat1, weights1)$centers), + c(8, 2) + ) + expect_equal( + dim(Cluster(dat1, weights1, nclusters = 3)$centers), + c(3, 2) + ) + +}) + + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + length(Cluster(dat2, weights2)$cluster), + 50 + ) + expect_equal( + length(Cluster(dat2)$cluster), + 300 + ) + expect_equal( + length(Cluster(dat2, space_dim = c('lon', 'lat'))$cluster), + 50 + ) + expect_equal( + dim(Cluster(dat2, weights2)$centers), + c(7, 6) + ) + expect_equal( + dim(Cluster(dat2, weights2, nclusters = 5)$centers), + c(5, 6) + ) + +}) +