Apply.R 3.76 KB
Newer Older
Alasdair Hunter's avatar
Alasdair Hunter committed
#' Wrapper For Applying Atomic Functions To Arrays.
#'
#' A wrapper for applying a function across one or more arrays. The user can input one or more arrays, and specify the dimensions of each the arrays over which the function should be looped. This is a extension of the apply paradigm to the case where the data being considered is distributed across multiple objects.
#' @param data A single object (vector, matrix or array) or a list of objects.
#' @param input_margins List of vectors containing the margins to be input to the function for each object. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects.
#' @param AtomicFun Function to be applied to the arrays.
#' @param ... Additional arguments to be used in the AtomicFun.
#' @param parallel Logical, should the function be applied in parallel.
#' @param ncores The number of cores to use for parallel computation.
#' @param margins List of vectors containing the margins for the function to be applied over for each object in the data. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects.
#' @return Array with dimensions equal to margins[[1]], and any additional dimensions resulting from AtomicFun.
#' @references Wickham, H (2011), The Split-Apply-Combine Strategy for Data Analysis, Journal of Statistical Software.
#' @export
#' @examples
#' data = list(array(rnorm(1000), c(10,10,10)),array(rnorm(1000), c(10,10,10)))
#' margins = list(c(1, 3), c(1, 3))
#' corr <- Apply(data, margins, AtomicFun = "cor")
Apply <- function(data, input_ = NULL, AtomicFun, ..., parallel = FALSE, ncores = NULL, margins = NULL) {
  if (!is.list(data)) {
    data <- list(data)
  }
  if (!is.null(input_margins)) {
    reverse <- TRUE
    margins <- input_margins
  } else {
    reverse <- FALSE
  }
  if (!is.null(margins)) {
    if (!is.list(margins)) {
      margins <- rep(list(margins), length(data))
    }
  }
  if (is.character(unlist(margins[1])) && !is.null(margins)) {
    margins2 <- margins
    for (i in 1 : length(data)) {
      margins_new <- c()
      for (j in 1 : length(margins2[[i]])) {
        margins_new[j] <- which(names(dim(data[[i]])) == margins2[[i]][[j]])
      }
      margins[[i]] <- c(margins_new)
    }
  }
  if (reverse == TRUE) {
    for (i in 1 : length(data)) {
      margins[[i]] <- c(1 :length(dim(data[[i]])))[-c(margins[[i]])]
    }
  }
  if (!is.logical(parallel)) {
    stop("parallel must be logical")
  }
  input <- list()
  f <- splat(get(AtomicFun))
  if (!is.null(margins)) {
    for (i in 1 : length(data)) {
      input[[i]] <- plyr:::splitter_a(data[[i]], .margins = margins[[i]], .expand = TRUE,
                                      .id = names(dim(data[[1]]))[margins[[1]]])
    }
    i_max <- length(input[[1]]) / dim(data[[1]])[1]
    k <- length(input[[1]]) / i_max
    if (parallel == TRUE) {
      if (is.null(ncores)) {
        ncores <- availableCores() - 1
      } else {
        ncores <- min(availableCores() - 1, ncores)
      }
      registerDoMC(ncores)
    }
    WrapperFun <- llply(1 : i_max, function(i)
      sapply((k * i - (k - 1)) : (k * i), function(x) f(lapply(input, `[[`, x),...), simplify = FALSE),
      .parallel = parallel)
    if (is.null(dim(WrapperFun[[1]][[1]]))) {
      WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(length(WrapperFun[[1]][[1]])),
                                                                dim(data[[1]])[margins[[1]]]))
    } else {
      WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(dim(WrapperFun[[1]][[1]])),
                                                                dim(data[[1]])[margins[[1]]]))
    }
  } else {
    WrapperFun <- f(data, ...)
  }
  out <- WrapperFun
}