Composite.R 8.4 KB
Newer Older
#'Compute composites
#'
#'Composite a multi-dimensional array which contains two spatial and one 
#'temporal dimensions, e.g., (lon, lat, time), according to the indices of 
#'mode/cluster occurrences in time. The p-value by t-test is also computed. 
#'
#'@param data A numeric array containing two spatial and one temporal 
#'  dimensions.
#'@param occ A vector of the occurrence time series of mode(s)/cluster(s).
#'  The length should be the same as the temporal dimension in 'data'.
#'  (*1) When one wants to composite all modes, e.g., all K = 3 clusters then 
#'    for example occurrences could look like: 1 1 2 3 2 3 1 3 3 2 3 2 2 3 2.
#'  (*2) Otherwise for compositing only the 2nd mode or cluster of the above 
#'    example occurrences should look like 0 0 1 0 1 0 0 0 0 1 0 1 1 0 1.
#'@param time_dim A character string indicating the name of the temporal 
#'  dimension in 'data'. The default value is 'time'.
#'@param space_dim A character vector indicating the names of the spatial 
#'  dimensions in 'data'. The default value is c('lon', 'lat').
#'@param lag An integer indicating the lag time step. E.g., for lag = 2, 
#'  +2 occurrences will be used (i.e., shifted 2 time steps forward). 
#'  The default value is 0.
#'@param eno A logical value indicating whether to use the effective sample 
#'  size (TRUE) or the total sample size (FALSE) for the number of degrees of 
#'  freedom. The default value is FALSE.
#'@param K A numeric value indicating the maximum number of composites. The 
#'  default value is NULL, which means the maximum value provided in 'occ' is 
#'  used.
#'@param fileout A character string indicating the name of the .sav output file
#'  The default value is NULL, which means not to save the output.
aho's avatar
aho committed
#'@param ncores An integer indicating the number of cores to use for parallel 
#'  computation. The default value is NULL.
#'
#'@return 
#'A list containing:
#'\item{$composite}{ 
#'  A numeric array of the spatial dimensions and new dimension 'K' first, 
#'  followed by the same dimensions as parameter 'data'. The length of 
#'  dimension 'K' is parameter 'K'.
#'}
#'\item{$p.val}{
#'  A numeric array with the same dimension as $composite. It is the p-value of
#'  the composites obtained through a t-test that accounts for the serial
#'  dependence of the data.
#'}
#'
#'@examples
#'blank <- array(0, dim = c(20, 10, 30))
#'x1 <- blank
#'t1 <- blank
#'f1 <- blank
#'
#'for (i in 1:20) {
#'  x1[i, , ] <- i
#'}
#'
#'for (i in 1:30) {
#'  t1[, , i] <- i
#'}
#'
#'# This is 2D propagating sin wave example, where we use f1(lon, lat, time) 
#'# wave field. Compositing (like using stroboscopicc light) at different
#'# time steps can lead to modification or cancelation of wave pattern.
#'
#'for (i in 1:20) {
#'  for (j in 1:30) {
#'    f1[i, , j] <- 3 * sin(2 * pi * x1[i, , j] / 5. - 2 * pi * t1[i, , j] / 6.)
#'  }
#'}
#'names(dim(f1)) <- c('lon', 'lat', 'time')
#'occ <- rep(0, 30)
#'occ[c(2, 5, 8, 11, 14, 17, 20, 23)] <- 1
#'res <- Composite(data = f1, occ = occ)
#'filled.contour(res$composite[, , 1])
#'
#'occ <- rep(0, 30)
#'occ[c(3, 9, 15, 21)] <- 1
#'res <- Composite(data = f1, occ = occ)
#'filled.contour(res$composite[, , 1])
#'
aho's avatar
aho committed
#'# Example with one missing composite in occ:
#'data <- 1:(4 * 5 * 6)
#'dim(data) <- c(lon = 4, lat = 5, case = 6)
#'occ <- c(1, 1, 2, 2, 3, 3) 
#'res <- Composite(data, occ, time_dim = 'case',  K = 4)
#'
#'@importFrom stats sd pt
#'@import multiApply
#'@export
Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'),
                      lag = 0, eno = FALSE, K = NULL, fileout = NULL, ncores = NULL) {

  # Check inputs 
  ## data
  if (is.null(data)) {
    stop("Parameter 'data' cannot be NULL.")
  }
  if (!is.numeric(data)) {
    stop("Parameter 'data' must be a numeric array.")
  }
  if (length(dim(data)) < 3) {
    stop("Parameter 'data' must have at least three dimensions.")
  }
  if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) {
    stop("Parameter 'data' must have dimension names.")
  }
  ## occ
  if (is.null(occ)) {
    stop("Parameter 'occ' cannot be NULL.")
  }
  if (!is.numeric(occ) | length(dim(occ)) > 1) {
    stop("Parameter 'occ' must be a numeric vector.")
  }
  ## time_dim
  if (!is.character(time_dim) | length(time_dim) > 1) {
    stop("Parameter 'time_dim' must be a character string.")
  }
  if (!time_dim %in% names(dim(data))) {
    stop("Parameter 'time_dim' is not found in 'data' dimension.")
  }
  if (dim(data)[time_dim] != length(occ)) {
     stop(paste0("The length of time_dim dimension in parameter 'data' is not ",
                 "equal to length of parameter 'occ'."))
  }
  ## space_dim
  if (!is.character(space_dim) | length(space_dim) != 2) {
    stop("Parameter 'space_dim' must be a character vector with two elements.")
  }
  if (!all(space_dim %in% names(dim(data)))) {
    # See if 'longitude' and 'latitude' exist
    if (all(c('longitude', 'latitude') %in% names(dim(data)))) {
      space_dim <- c('longitude', 'latitude')
    } else {
      stop("Parameter 'space_dim' is not found in 'data' dimension.")
    }
  }
  ## lag
  if (!is.null(lag)) {
    if (!is.numeric(lag) | lag %% 1 != 0 | lag < 0 | length(lag) > 1) {
      stop("Parameter 'lag' must be a non-negative integer.")
    }
  }
  ## eno
  if (!is.logical(eno) | length(eno) > 1) {
    stop("Parameter 'eno' must be one logical value.")
  }
  ## K
  if (!is.null(K)) {
    if (!is.numeric(K) | K %% 1 != 0 | K < 1 | length(K) > 1) {
      stop("Parameter 'K' must be a positive integer.")
    }
  }
  ## fileout
  if (!is.null(fileout)) {
    if (!is.character(fileout) | length(fileout) > 1) {
      stop("Parameter 'fileout' must be a character string.")
    }
  }
  ## ncores
  if (!is.null(ncores)) {
    if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
      length(ncores) > 1) {
      stop("Parameter 'ncores' must be a positive integer.")
    }
  }

  ###############################
  # Calculate Composite

  ## Check if any occ is only 1 case
  count_k <- plyr::count(occ)
  if (any(count_k$freq == 1)) {
    tmp <- count_k$x[which(count_k$freq == 1)]
    warning(paste0("Composite K = ", tmp, " has length 1. The p-value is NA."))
  }

  output_dims <- list(composite = c(space_dim, 'K'), 
                      p.val = c(space_dim, 'K'))

  output <- Apply(list(data),
                  target_dims = list(c(space_dim, time_dim)),
                  fun = .Composite,
                  output_dims = output_dims,
                  occ = occ, time_dim = time_dim, space_dim = space_dim,
                  K = K, lag = lag, eno = eno, ncores_input = ncores,
                  ncores = ncores)

  if (!is.null(fileout)) {
    save(output, file = paste(fileout, '.sav', sep = ''))
  }

  return(output)
}

.Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), 
                       K = NULL, lag = 0, eno = FALSE, ncores_input = NULL) {
# data: [lon, lat, time]
# occ: [time]
  if (is.null(K)) {
     K <- max(occ)
  }
  composite <- array(dim = c(dim(data)[1:2], composite = K))
  tvalue <- array(dim = dim(data)[1:2])
  dof <- array(dim = dim(data)[1:2])
  pval <- array(dim = c(dim(data)[1:2], composite = K))

  if (eno == TRUE) { 
    n_tot <- Eno(data, time_dim = time_dim, ncores = ncores_input)
  } else {
    n_tot <- length(occ)
  }

  mean_tot <- MeanDims(data, dims = 3, na.rm = TRUE, ncores = ncores_input)
  stdv_tot <- apply(data, c(1, 2), sd, na.rm = TRUE) 

  for (k in 1:K) {

    if (length(which(occ == k)) >= 1) {
      indices <- which(occ == k) + lag
      toberemoved <-  which(0 > indices | indices > dim(data)[3])

    if (length(toberemoved) > 0) {
        indices <- indices[-toberemoved]
    }
    if (eno == TRUE) {
        data_tmp <- data[, , indices]
        names(dim(data_tmp)) <- names(dim(data))
        n_k <- Eno(data_tmp, time_dim = time_dim, ncores = ncores_input)
    } else {
        n_k <- length(indices)
    }
    if (length(indices) == 1) {
        composite[, , k] <- data[, , indices] 
    }  else {
        composite[, , k] <- MeanDims(data[, , indices], dims = 3, na.rm = TRUE, ncores = ncores_input)
    }
    stdv_k <- apply(data[, , indices], c(1, 2), sd, na.rm = TRUE)
    
    tvalue <- (mean_tot - composite[, , k]) / 
               sqrt(stdv_tot^2 / n_tot + stdv_k^2 / n_k)
    dof <- (stdv_tot^2 / n_tot + stdv_k^2 / n_k)^2 / 
           ((stdv_tot^2 / n_tot)^2 / (n_tot - 1) +
           (stdv_k^2 / n_k)^2 / (n_k - 1))
    pval[, , k] <- 2 * pt(-abs(tvalue), df = dof)
    }
  }  

  invisible(list(K = composite, p.val = pval)) 
}