Commit 127814c3 authored by aho's avatar aho
Browse files

Set 'weights' default to NULL and add parameter 'space_dim'

parent 08ba27ce
Pipeline #5346 passed with stage
in 3 minutes and 33 seconds
......@@ -11,14 +11,17 @@
#'not provided by users.
#'
#'@param data A numeric array with named dimensions that at least have
#' 'time_dim' corresponding to time and the dimensions of 'weights'
#' corresponding to either area-averages over a series of domains or the grid
#' points for any sptial grid structure.
#' 'time_dim' corresponding to time and 'space_dim' (optional) corresponding
#' to either area-averages over a series of domains or the grid points for any
#' sptial grid structure.
#'@param weights A numeric array with named dimension of multiplicative weights
#' based on the areas covering each domain/region or grid-cell of 'data'. The
#' dimensions must also be part of the 'data' dimensions.
#' dimensions must be equal to the 'space_dim' in 'data'. The default value is
#' NULL which means no weighting is applied.
#'@param time_dim A character string indicating the name of time dimension in
#' 'data'. The default value is 'sdate'.
#'@param space_dim A character vector indicating the names of spatial dimensions
#' in 'data'. The default value is NULL.
#'@param nclusters A positive integer K that must be bigger than 1 indicating
#' the number of clusters to be computed, or K initial cluster centers to be
#' used in the method. The default value is NULL, which means that the number
......@@ -112,7 +115,7 @@
#'@importFrom stats kmeans
#'@importFrom grDevices pdf dev.off
#'@export
Cluster <- function(data, weights, time_dim = 'sdate',
Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL,
nclusters = NULL, index = 'sdindex', ncores = NULL) {
# Check inputs
## data
......@@ -131,23 +134,21 @@ Cluster <- function(data, weights, time_dim = 'sdate',
}
## weights
if (is.null(weights)) {
stop("Parameter 'weights' cannot be NULL.")
}
if (!is.numeric(weights)) {
stop("Parameter 'weights' must be a numeric array.")
}
if (is.null(dim(weights))) { #is vector
dim(weights) <- c(length(weights))
}
if(any(is.null(names(dim(weights))))| any(nchar(names(dim(weights))) == 0)) {
stop("Parameter 'weights' must have dimension names.")
}
if (any(!names(dim(weights)) %in% names(dim(data)) |
!dim(weights) %in% dim(data))) {
stop("Parameter 'weights' must have dimensions that can be found in 'data' dimensions.")
if (!is.null(weights)) {
if (!is.numeric(weights)) {
stop("Parameter 'weights' must be a numeric array.")
}
if (is.null(dim(weights))) { #is vector
dim(weights) <- c(length(weights))
}
if (any(is.null(names(dim(weights))))| any(nchar(names(dim(weights))) == 0)) {
stop("Parameter 'weights' must have dimension names.")
}
if (any(!names(dim(weights)) %in% names(dim(data)) |
!dim(weights) %in% dim(data))) {
stop("Parameter 'weights' must have dimensions that can be found in 'data' dimensions.")
}
}
## time_dim
if (!is.character(time_dim) | length(time_dim) > 1) {
stop("Parameter 'time_dim' must be a character string.")
......@@ -155,7 +156,28 @@ Cluster <- function(data, weights, time_dim = 'sdate',
if (!time_dim %in% names(dim(data))) {
stop("Parameter 'time_dim' is not found in 'data' dimensions.")
}
## space_dim
if (!is.null(space_dim)) {
if (!is.character(space_dim)) {
stop("Parameter 'space_dim' must be a character vector.")
}
if (any(!space_dim %in% names(dim(data)))) {
stop("Parameter 'space_dim' is not found in 'data' dimensions.")
}
if (!is.null(weights)) {
if (!(length(space_dim) == length(dim(weights)) & all(space_dim %in% names(dim(weights))))) {
stop("Parameter 'weights' must have dimension names the same as 'space_dim'.")
}
if (space_dim != names(dim(weights))) {
space_dim <- names(dim(weights))
}
}
}
if (is.null(space_dim) & !is.null(weights)) {
space_dim <- names(dim(weights))
.warning(paste0("Parameter 'weights' is assigned but not 'space_dim'. Define 'space_dim' ",
"by the dimensions of 'weights'."))
}
## nclusters
if (!is.null(nclusters)) {
if (!is.numeric(nclusters) | length(nclusters) != 1) {
......@@ -182,7 +204,7 @@ Cluster <- function(data, weights, time_dim = 'sdate',
# Calculate Cluster
output <- Apply(list(data),
target_dims = c(time_dim, names(dim(weights))),
target_dims = c(time_dim, space_dim),
fun = .Cluster,
weights = weights, nclusters = nclusters, index = index,
ncores = ncores)
......@@ -190,18 +212,22 @@ Cluster <- function(data, weights, time_dim = 'sdate',
return(output)
}
.Cluster <- function(data, weights, nclusters = NULL, index = 'sdindex') {
# data: [time, lat, lon]
.Cluster <- function(data, weights = NULL, nclusters = NULL, index = 'sdindex') {
# data: [time, (lat, lon)]
dat_dim <- dim(data)
# Reshape data into two dims
dim(data) <- c(dat_dim[1], prod(dat_dim[-1]))
dim(weights) <- prod(dim(weights)) # a vector
# weights
data_list <- lapply(1:dat_dim[1],
function(x) { data[x, ] * weights })
data <- do.call(abind::abind, c(data_list, along = 0))
if (length(dim(data)) != 1) {
# Reshape data into two dims
dim(data) <- c(dat_dim[1], prod(dat_dim[-1]))
# weights
if (!is.null(weights)) {
dim(weights) <- prod(dim(weights)) # a vector
data_list <- lapply(1:dat_dim[1],
function(x) { data[x, ] * weights })
data <- do.call(abind::abind, c(data_list, along = 0))
}
}
if (!is.null(nclusters)) {
kmeans.results <- kmeans(data, centers = nclusters, iter.max = 300,
......@@ -224,6 +250,5 @@ Cluster <- function(data, weights, time_dim = 'sdate',
kmeans.results <- kmeans(data, centers = kmc1, iter.max = 300,
nstart = 30)
}
invisible(kmeans.results)
}
......@@ -6,8 +6,9 @@
\usage{
Cluster(
data,
weights,
weights = NULL,
time_dim = "sdate",
space_dim = NULL,
nclusters = NULL,
index = "sdindex",
ncores = NULL
......@@ -15,17 +16,21 @@ Cluster(
}
\arguments{
\item{data}{A numeric array with named dimensions that at least have
'time_dim' corresponding to time and the dimensions of 'weights'
corresponding to either area-averages over a series of domains or the grid
points for any sptial grid structure.}
'time_dim' corresponding to time and 'space_dim' (optional) corresponding
to either area-averages over a series of domains or the grid points for any
sptial grid structure.}
\item{weights}{A numeric array with named dimension of multiplicative weights
based on the areas covering each domain/region or grid-cell of 'data'. The
dimensions must also be part of the 'data' dimensions.}
dimensions must be equal to the 'space_dim' in 'data'. The default value is
NULL which means no weighting is applied.}
\item{time_dim}{A character string indicating the name of time dimension in
'data'. The default value is 'sdate'.}
\item{space_dim}{A character vector indicating the names of spatial dimensions
in 'data'. The default value is NULL.}
\item{nclusters}{A positive integer K that must be bigger than 1 indicating
the number of clusters to be computed, or K initial cluster centers to be
used in the method. The default value is NULL, which means that the number
......
......@@ -13,7 +13,6 @@ context("s2dv::Cluster tests")
dim = c(sdate = 50, lat = 2, lon = 3))
weights2 <- array(c(0.9, 1.1), dim = c(lat = 2, lon = 3))
##############################################
test_that("1. Input checks", {
......@@ -32,10 +31,6 @@ test_that("1. Input checks", {
)
# weights
expect_error(
Cluster(dat1, weights = c()),
"Parameter 'weights' cannot be NULL."
)
expect_error(
Cluster(dat1, weights = 'lat'),
"Parameter 'weights' must be a numeric array."
)
......@@ -88,6 +83,10 @@ test_that("2. Output checks: dat1", {
length(Cluster(dat1, weights1)$cluster),
50
)
expect_equal(
length(Cluster(dat1)$cluster),
100
)
expect_equal(
dim(Cluster(dat1, weights1)$centers),
c(8, 2)
......@@ -107,6 +106,14 @@ test_that("3. Output checks: dat2", {
length(Cluster(dat2, weights2)$cluster),
50
)
expect_equal(
length(Cluster(dat2)$cluster),
300
)
expect_equal(
length(Cluster(dat2, space_dim = c('lon', 'lat'))$cluster),
50
)
expect_equal(
dim(Cluster(dat2, weights2)$centers),
c(7, 6)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment