From 7ce185b79dac9a564365cf833966caa330990a37 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 8 Jun 2020 13:01:10 +0200 Subject: [PATCH 1/2] Modify Composite() to adopt multiApply. Create unit test --- R/Composite.R | 246 ++++++++++++++++++++++++++++++++ R/Trend.R | 4 +- tests/testthat/test-Composite.R | 225 +++++++++++++++++++++++++++++ 3 files changed, 473 insertions(+), 2 deletions(-) create mode 100644 R/Composite.R create mode 100644 tests/testthat/test-Composite.R diff --git a/R/Composite.R b/R/Composite.R new file mode 100644 index 0000000..1ddbfee --- /dev/null +++ b/R/Composite.R @@ -0,0 +1,246 @@ +#'Compute composites +#' +#'Composite a multi-dimensional array which contains two spatial and one +#'temporal dimensions, e.g., (lon, lat, time), according to the indices of +#'mode/cluster occurrences in time. The p-value by t-test is also computed. +#' +#'@param data A numeric array containing two spatial and one temporal +#' dimensions. +#'@param occ A vector of the occurrence time series of mode(s)/cluster(s). +#' The length should be the same as the temporal dimension in 'data'. +#' (*1) When one wants to composite all modes, e.g., all K = 3 clusters then +#' for example occurrences could look like: 1 1 2 3 2 3 1 3 3 2 3 2 2 3 2. +#' (*2) Otherwise for compositing only the 2nd mode or cluster of the above +#' example occurrences should look like 0 0 1 0 1 0 0 0 0 1 0 1 1 0 1. +#'@param time_dim A character string indicating the name of the temporal +#' dimension in 'data'. The default value is 'time'. +#'@param space_dim A character vector indicating the names of the spatial +#' dimensions in 'data'. The default value is c('lon', 'lat'). +#'@param lag An integer indicating the lag time step. E.g., for lag = 2, +#' +2 occurrences will be used (i.e., shifted 2 time steps forward). +#' The default value is 0. +#'@param eno A logical value indicating whether to use the effective sample +#' size (TRUE) or the total sample size (FALSE) for the number of degrees of +#' freedom. The default value is FALSE. +#'@param K A numeric value indicating the maximum number of composites. The +#' default value is NULL, which means the maximum value provided in 'occ' is +#' used. +#'@param fileout A character string indicating the name of the .sav output file +#' The default value is NULL, which means not to save the output. +#' +#'@return +#'A list containing: +#'\item{$composite}{ +#' A numeric array of the spatial dimensions and new dimension 'K' first, +#' followed by the same dimensions as parameter 'data'. The length of +#' dimension 'K' is parameter 'K'. +#'} +#'\item{$p.val}{ +#' A numeric array with the same dimension as $composite. It is the p-value of +#' the composites obtained through a t-test that accounts for the serial +#' dependence of the data. +#'} +#' +#'@examples +#'blank <- array(0, dim = c(20, 10, 30)) +#'x1 <- blank +#'t1 <- blank +#'f1 <- blank +#' +#'for (i in 1:20) { +#' x1[i, , ] <- i +#'} +#' +#'for (i in 1:30) { +#' t1[, , i] <- i +#'} +#' +#'# This is 2D propagating sin wave example, where we use f1(lon, lat, time) +#'# wave field. Compositing (like using stroboscopicc light) at different +#'# time steps can lead to modification or cancelation of wave pattern. +#' +#'for (i in 1:20) { +#' for (j in 1:30) { +#' f1[i, , j] <- 3 * sin(2 * pi * x1[i, , j] / 5. - 2 * pi * t1[i, , j] / 6.) +#' } +#'} +#'names(dim(f1)) <- c('lon', 'lat', 'time') +#'occ <- rep(0, 30) +#'occ[c(2, 5, 8, 11, 14, 17, 20, 23)] <- 1 +#'res <- Composite(data = f1, occ = occ) +#'filled.contour(res$composite[, , 1]) +#' +#'occ <- rep(0, 30) +#'occ[c(3, 9, 15, 21)] <- 1 +#'res <- Composite(data = f1, occ = occ) +#'filled.contour(res$composite[, , 1]) +#' +#'# Example with one missing composite (#3) in occ: +#'data <- 1:(4 * 5 * 6) +#'dim(data) <- c(lon = 4, lat = 5, case = 6) +#'occ <- c(1, 1, 2, 2, 3, 3) +#'res <- Composite(data, occ, time_dim = 'case', K = 4) +#' +#'@importFrom stats sd pt +#'@import multiApply +#'@export +Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), + lag = 0, eno = FALSE, K = NULL, fileout = NULL, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (length(dim(data)) < 3) { + stop("Parameter 'data' must have at least three dimensions.") + } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## occ + if (is.null(occ)) { + stop("Parameter 'occ' cannot be NULL.") + } + if (!is.numeric(occ) | length(dim(occ)) > 1) { + stop("Parameter 'occ' must be a numeric vector.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + if (dim(data)[time_dim] != length(occ)) { + stop(paste0("The length of time_dim dimension in parameter 'data' is not ", + "equal to length of parameter 'occ'.")) + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector with two elements.") + } + if (!all(space_dim %in% names(dim(data)))) { + # See if 'longitude' and 'latitude' exist + if (all(c('longitude', 'latitude') %in% names(dim(data)))) { + space_dim <- c('longitude', 'latitude') + } else { + stop("Parameter 'space_dim' is not found in 'data' dimension.") + } + } + ## lag + if (!is.null(lag)) { + if (!is.numeric(lag) | lag %% 1 != 0 | lag < 0 | length(lag) > 1) { + stop("Parameter 'lag' must be a non-negative integer.") + } + } + ## eno + if (!is.logical(eno) | length(eno) > 1) { + stop("Parameter 'eno' must be one logical value.") + } + ## K + if (!is.null(K)) { + if (!is.numeric(K) | K %% 1 != 0 | K < 1 | length(K) > 1) { + stop("Parameter 'K' must be a positive integer.") + } + } + ## fileout + if (!is.null(fileout)) { + if (!is.character(fileout) | length(fileout) > 1) { + stop("Parameter 'fileout' must be a character string.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Composite + + ## Check if any occ is only 1 case + count_k <- plyr::count(occ) + if (any(count_k$freq == 1)) { + tmp <- count_k$x[which(count_k$freq == 1)] + warning(paste0("Composite K = ", tmp, " has length 1. The p-value is NA.")) + } + + output_dims <- list(composite = c(space_dim, 'K'), + p.val = c(space_dim, 'K')) + + output <- Apply(list(data), + target_dims = list(c(space_dim, time_dim)), + fun = .Composite, + output_dims = output_dims, + occ = occ, time_dim = time_dim, space_dim = space_dim, + K = K, lag = lag, eno = eno, + ncores = ncores) + + if (!is.null(fileout)) { + save(output, file = paste(fileout, '.sav', sep = '')) + } + + return(output) +} + +.Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), + K = NULL, lag = 0, eno = FALSE) { +# data: [lon, lat, time] +# occ: [time] + if (is.null(K)) { + K <- max(occ) + } + composite <- array(dim = c(dim(data)[1:2], composite = K)) + tvalue <- array(dim = dim(data)[1:2]) + dof <- array(dim = dim(data)[1:2]) + pval <- array(dim = c(dim(data)[1:2], composite = K)) + + if (eno == TRUE) { + n_tot <- Eno(data, time_dim = time_dim) + } else { + n_tot <- length(occ) + } + + mean_tot <- MeanDims(data, dims = 3, na.rm = TRUE) + stdv_tot <- apply(data, c(1, 2), sd, na.rm = TRUE) + + for (k in 1:K) { + + if (length(which(occ == k)) >= 1) { + indices <- which(occ == k) + lag + toberemoved <- which(0 > indices | indices > dim(data)[3]) + + if (length(toberemoved) > 0) { + indices <- indices[-toberemoved] + } + if (eno == TRUE) { + data_tmp <- data[, , indices] + names(dim(data_tmp)) <- names(dim(data)) + n_k <- Eno(data_tmp, time_dim = time_dim) + } else { + n_k <- length(indices) + } + if (length(indices) == 1) { + composite[, , k] <- data[, , indices] + } else { + composite[, , k] <- MeanDims(data[, , indices], dims = 3, na.rm = TRUE) + } + stdv_k <- apply(data[, , indices], c(1, 2), sd, na.rm = TRUE) + + tvalue <- (mean_tot - composite[, , k]) / + sqrt(stdv_tot^2 / n_tot + stdv_k^2 / n_k) + dof <- (stdv_tot^2 / n_tot + stdv_k^2 / n_k)^2 / + ((stdv_tot^2 / n_tot)^2 / (n_tot - 1) + + (stdv_k^2 / n_k)^2 / (n_k - 1)) + pval[, , k] <- 2 * pt(-abs(tvalue), df = dof) + } + } + + invisible(list(K = composite, p.val = pval)) +} diff --git a/R/Trend.R b/R/Trend.R index 0f20efc..cdd219b 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -92,11 +92,11 @@ Trend <- function(data, time_dim = 'sdate', interval = 1, polydeg = 1, stop("Parameter 'time_dim' is not found in 'data' dimension.") } ## interval - if (any(!is.numeric(interval) | interval <= 0 | length(interval) > 1)) { + if (!is.numeric(interval) | interval <= 0 | length(interval) > 1) { stop("Parameter 'interval' must be a positive number.") } ## polydeg - if (!is.numeric(polydeg) | polydeg %% 1 != 0 | polydeg < 0 | + if (!is.numeric(polydeg) | polydeg %% 1 != 0 | polydeg <= 0 | length(polydeg) > 1) { stop("Parameter 'polydeg' must be a positive integer.") } diff --git a/tests/testthat/test-Composite.R b/tests/testthat/test-Composite.R new file mode 100644 index 0000000..338abf3 --- /dev/null +++ b/tests/testthat/test-Composite.R @@ -0,0 +1,225 @@ +context("s2dv::Composite tests") + +############################################## + # dat1 + x1 <- array(0, dim = c(20, 10, 30)) + t1 <- array(0, dim = c(20, 10, 30)) + dat1 <- array(0, dim = c(20, 10, 30)) + + for (i in 1:20) { + x1[i, , ] <- i + } + for (i in 1:30) { + t1[, , i] <- i + } + for (i in 1:20) { + for (j in 1:30) { + dat1[i,, j ] <- 3 * sin(2 * pi * x1[i, , j] / 5. - 2 * pi * t1[i, , j] / 6.) + } + } + names(dim(dat1)) <- c('lon', 'lat', 'time') + occ1 <- rep(0, 30) + occ1[c(2, 5, 8, 11, 14, 17, 20, 23)] <- 1 + + # dat2 + dat2 <- array(0, dim = c(dat = 2, longitude = 20, latitude = 10, sdate = 30)) + dat2[1, , , ] <- dat1 + set.seed(1) + dat2[2, , , ] <- dat1 + rnorm(n = 6000, mean = mean(dat1), sd = sd(dat1)/10) + + occ2 <- rep(0, 30) + occ2[c(2, 5, 8, 11, 14, 17, 20, 23)] <- 1 + occ2[c(1, 4, 10, 13, 19, 24, 29)] <- 2 + + # dat3 + dat3 <- array(rep(c(1, 3, 2, 1, 2, 2), 8), dim = c(x = 2, y = 4, time = 6)) + occ3 <- c(1, 1, 4, 4, 3, 3) + +############################################## + +test_that("1. Input checks", { + # data + expect_error( + Composite(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Composite(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + expect_error( + Composite(c(1:10)), + "Parameter 'data' must have at least three dimensions." + ) + expect_error( + Composite(array(1:10, dim = c(2, 5, 2))), + "Parameter 'data' must have dimension names." + ) + # occ + expect_error( + Composite(dat1, c()), + "Parameter 'occ' cannot be NULL." + ) + expect_error( + Composite(dat1, c(NA, NA)), + "Parameter 'occ' must be a numeric vector." + ) + expect_error( + Composite(dat1, array(1:10, dim = c(2, 5, 2))), + "Parameter 'occ' must be a numeric vector." + ) + # time_dim + expect_error( + Composite(dat1, occ1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Composite(dat1, occ1, time_dim = 'sdate'), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + Composite(dat1, c(occ1, 1)), + paste0("The length of time_dim dimension in parameter 'data' is not ", + "equal to length of parameter 'occ'.") + ) + # space_dim + expect_error( + Composite(dat1, occ1, space_dim = 'lon'), + "Parameter 'space_dim' must be a character vector with two elements." + ) + expect_error( + Composite(dat1, occ1, space_dim = c('lon', 'level')), + "Parameter 'space_dim' is not found in 'data' dimension." + ) + # lag + expect_error( + Composite(dat1, occ1, lag = -1), + "Parameter 'lag' must be a non-negative integer." + ) + # eno + expect_error( + Composite(dat1, occ1, eno = 1), + "Parameter 'eno' must be one logical value." + ) + # K + expect_error( + Composite(dat1, occ1, K = 1.5), + "Parameter 'K' must be a positive integer." + ) + # fileout + expect_error( + Composite(dat1, occ1, fileout = TRUE), + "Parameter 'fileout' must be a character string." + ) + #ncores + expect_error( + Composite(dat1, occ1, ncores = 1.2), + "Parameter 'ncores' must be a positive integer." + ) + + expect_warning( + Composite(data = array(1:40, dim = c(lon = 2, lat = 5, time = 4)), + occ = c(1, 2, 2, 2)), + "Composite K = 1 has length 1. The p-value is NA." + ) + +}) + + +test_that("2. dat1", { + + expect_equal( + dim(Composite(dat1, occ1)$composite), + c(lon = 20, lat = 10, K = 1) + ) + expect_equal( + dim(Composite(dat1, occ1)$p.val), + c(lon = 20, lat = 10, K = 1) + ) + expect_equal( + mean(Composite(dat1, occ1)$composite)*10^17, + -3.816392, + tolerance = 0.00001 + ) + expect_equal( + Composite(dat1, occ1)$composite[5, 5, 1]*10^16, + 2.220446, + tolerance = 0.00001 + ) + expect_equal( + mean(Composite(dat1, occ1)$p.val), + 1, + tolerance = 0.00001 + ) + expect_equal( + mean(Composite(dat1, occ2, eno = TRUE)$composite[, , 1])*10^17, + -3.816392, + tolerance = 0.00001 + ) + +}) + +test_that("3. dat2", { + + expect_equal( + dim(Composite(dat2, occ2, time_dim = 'sdate')$composite), + c(longitude = 20, latitude = 10, K = 2, dat = 2) + ) + expect_equal( + dim(Composite(dat2, occ2, time_dim = 'sdate')$p.val), + c(longitude = 20, latitude = 10, K = 2, dat = 2) + ) + expect_equal( + dim(Composite(dat2, occ2, time_dim = 'sdate', K = 1)$composite), + c(longitude = 20, latitude = 10, K = 1, dat = 2) + ) + expect_equal( + mean(Composite(dat2, occ2, eno = TRUE, time_dim = 'sdate')$composite[, , 1, 1])*10^17, + -3.816392, + tolerance = 0.00001 + ) + expect_equal( + mean(Composite(dat2, occ2, time_dim = 'sdate')$composite[, , 2, 1])*10^17, + -3.908599, + tolerance = 0.00001 + ) + expect_equal( + mean(Composite(dat2, occ2, time_dim = 'sdate')$composite[, , 1, 2]), + 0.00309623, + tolerance = 0.00001 + ) + expect_equal( + mean(Composite(dat2, occ2, time_dim = 'sdate')$p.val[, , 2, 2]), + 0.5632136, + tolerance = 0.00001 + ) + expect_equal( + mean(Composite(dat2, occ2, time_dim = 'sdate', eno = TRUE)$p.val[, , 2, 2]), + 0.5639538, + tolerance = 0.00001 + ) +}) + +test_that("4. dat3", { + + expect_equal( + dim(Composite(dat3, occ3, space_dim = c('x', 'y'))$composite), + c(x = 2, y = 4, K = 4) + ) + expect_equal( + all(is.na(Composite(dat3, occ3, space_dim = c('x', 'y'))$composite[, , 2])), + TRUE + ) + expect_equal( + sd(Composite(dat3, occ3, space_dim = c('x', 'y'))$composite[, , 4]), + 0.4432026, + tolerance = 0.0001 + ) + expect_equal( + mean(Composite(dat3, occ3, space_dim = c('x', 'y'))$p.val[, , 1]), + 0.6954875, + tolerance = 0.0001 + ) + +}) + -- GitLab From e05890c767938d1ab82a45e67aa99137e82665a8 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 8 Jun 2020 13:12:09 +0200 Subject: [PATCH 2/2] Modify Composite() documentation --- NAMESPACE | 2 + R/Composite.R | 2 +- man/Composite.Rd | 101 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 man/Composite.Rd diff --git a/NAMESPACE b/NAMESPACE index 627bf51..cf26c00 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(AnimateMap) export(Clim) export(ColorBar) +export(Composite) export(ConfigAddEntry) export(ConfigApplyMatchingEntries) export(ConfigEditDefinition) @@ -78,5 +79,6 @@ importFrom(stats,pt) importFrom(stats,qchisq) importFrom(stats,qnorm) importFrom(stats,rnorm) +importFrom(stats,sd) importFrom(stats,ts) importFrom(stats,window) diff --git a/R/Composite.R b/R/Composite.R index 1ddbfee..c79ef24 100644 --- a/R/Composite.R +++ b/R/Composite.R @@ -75,7 +75,7 @@ #'res <- Composite(data = f1, occ = occ) #'filled.contour(res$composite[, , 1]) #' -#'# Example with one missing composite (#3) in occ: +#'# Example with one missing composite in occ: #'data <- 1:(4 * 5 * 6) #'dim(data) <- c(lon = 4, lat = 5, case = 6) #'occ <- c(1, 1, 2, 2, 3, 3) diff --git a/man/Composite.Rd b/man/Composite.Rd new file mode 100644 index 0000000..25c64aa --- /dev/null +++ b/man/Composite.Rd @@ -0,0 +1,101 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Composite.R +\name{Composite} +\alias{Composite} +\title{Compute composites} +\usage{ +Composite(data, occ, time_dim = "time", space_dim = c("lon", "lat"), + lag = 0, eno = FALSE, K = NULL, fileout = NULL, ncores = NULL) +} +\arguments{ +\item{data}{A numeric array containing two spatial and one temporal +dimensions.} + +\item{occ}{A vector of the occurrence time series of mode(s)/cluster(s). +The length should be the same as the temporal dimension in 'data'. +(*1) When one wants to composite all modes, e.g., all K = 3 clusters then + for example occurrences could look like: 1 1 2 3 2 3 1 3 3 2 3 2 2 3 2. +(*2) Otherwise for compositing only the 2nd mode or cluster of the above + example occurrences should look like 0 0 1 0 1 0 0 0 0 1 0 1 1 0 1.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension in 'data'. The default value is 'time'.} + +\item{space_dim}{A character vector indicating the names of the spatial +dimensions in 'data'. The default value is c('lon', 'lat').} + +\item{lag}{An integer indicating the lag time step. E.g., for lag = 2, ++2 occurrences will be used (i.e., shifted 2 time steps forward). +The default value is 0.} + +\item{eno}{A logical value indicating whether to use the effective sample +size (TRUE) or the total sample size (FALSE) for the number of degrees of +freedom. The default value is FALSE.} + +\item{K}{A numeric value indicating the maximum number of composites. The +default value is NULL, which means the maximum value provided in 'occ' is +used.} + +\item{fileout}{A character string indicating the name of the .sav output file +The default value is NULL, which means not to save the output.} +} +\value{ +A list containing: +\item{$composite}{ + A numeric array of the spatial dimensions and new dimension 'K' first, + followed by the same dimensions as parameter 'data'. The length of + dimension 'K' is parameter 'K'. +} +\item{$p.val}{ + A numeric array with the same dimension as $composite. It is the p-value of + the composites obtained through a t-test that accounts for the serial + dependence of the data. +} +} +\description{ +Composite a multi-dimensional array which contains two spatial and one +temporal dimensions, e.g., (lon, lat, time), according to the indices of +mode/cluster occurrences in time. The p-value by t-test is also computed. +} +\examples{ +blank <- array(0, dim = c(20, 10, 30)) +x1 <- blank +t1 <- blank +f1 <- blank + +for (i in 1:20) { + x1[i, , ] <- i +} + +for (i in 1:30) { + t1[, , i] <- i +} + +# This is 2D propagating sin wave example, where we use f1(lon, lat, time) +# wave field. Compositing (like using stroboscopicc light) at different +# time steps can lead to modification or cancelation of wave pattern. + +for (i in 1:20) { + for (j in 1:30) { + f1[i, , j] <- 3 * sin(2 * pi * x1[i, , j] / 5. - 2 * pi * t1[i, , j] / 6.) + } +} +names(dim(f1)) <- c('lon', 'lat', 'time') +occ <- rep(0, 30) +occ[c(2, 5, 8, 11, 14, 17, 20, 23)] <- 1 +res <- Composite(data = f1, occ = occ) +filled.contour(res$composite[, , 1]) + +occ <- rep(0, 30) +occ[c(3, 9, 15, 21)] <- 1 +res <- Composite(data = f1, occ = occ) +filled.contour(res$composite[, , 1]) + +# Example with one missing composite in occ: +data <- 1:(4 * 5 * 6) +dim(data) <- c(lon = 4, lat = 5, case = 6) +occ <- c(1, 1, 2, 2, 3, 3) +res <- Composite(data, occ, time_dim = 'case', K = 4) + +} + -- GitLab