Commit 4a55da1d authored by Nicolau Manubens Gil's avatar Nicolau Manubens Gil
Browse files

Merge branch 'master' into 'production'

Master

See merge request !4
parents defacc51 c54d74aa
Package: multiApply
Title: Apply Functions to Multiple Multidimensional Arguments
Version: 0.0.1
Version: 1.0.0
Authors@R: c(
person("BSC-CNS", role = c("aut", "cph")),
person("Alasdair", "Hunter", , "alasdair.hunter@bsc.es", role = c("aut", "cre")),
......@@ -10,10 +10,9 @@ Depends:
R (>= 3.2.0)
Imports:
abind,
plyr,
doParallel,
future,
foreach
foreach,
plyr
License: LGPL-3
URL: https://earth.bsc.es/gitlab/ces/multiApply
BugReports: https://earth.bsc.es/gitlab/ces/multiApply/issues
......
# Generated by roxygen2: do not edit by hand
importFrom(plyr, llply, splat)
importFrom(abind, abind)
importFrom(future, availableCores)
importFrom(doParallel, registerDoParallel)
importFrom(foreach, registerDoSEQ)
importFrom(doParallel, registerDoParallel)
importFrom(plyr, splat)
importFrom(plyr, llply)
importFrom(stats, setNames)
export(Apply)
This diff is collapsed.
# Function to permute arrays of non-atomic elements (e.g. POSIXct)
.aperm2 <- function(x, new_order) {
y <- array(1:length(x), dim = dim(x))
y <- aperm(y, new_order)
old_dims <- dim(x)
x <- x[as.vector(y)]
dim(x) <- old_dims[new_order]
x
}
# This function is a helper for the function .MergeArrays.
# It expects as inputs two named numeric vectors, and it extends them
# with dimensions of length 1 until an ordered common dimension
# format is reached.
.MergeArrayDims <- function(dims1, dims2) {
new_dims1 <- c()
new_dims2 <- c()
while (length(dims1) > 0) {
if (names(dims1)[1] %in% names(dims2)) {
pos <- which(names(dims2) == names(dims1)[1])
dims_to_add <- rep(1, pos - 1)
if (length(dims_to_add) > 0) {
names(dims_to_add) <- names(dims2[1:(pos - 1)])
}
new_dims1 <- c(new_dims1, dims_to_add, dims1[1])
new_dims2 <- c(new_dims2, dims2[1:pos])
dims1 <- dims1[-1]
dims2 <- dims2[-c(1:pos)]
} else {
new_dims1 <- c(new_dims1, dims1[1])
new_dims2 <- c(new_dims2, 1)
names(new_dims2)[length(new_dims2)] <- names(dims1)[1]
dims1 <- dims1[-1]
}
}
if (length(dims2) > 0) {
dims_to_add <- rep(1, length(dims2))
names(dims_to_add) <- names(dims2)
new_dims1 <- c(new_dims1, dims_to_add)
new_dims2 <- c(new_dims2, dims2)
}
list(new_dims1, new_dims2)
}
# This function takes two named arrays and merges them, filling with
# NA where needed.
# dim(array1)
# 'b' 'c' 'e' 'f'
# 1 3 7 9
# dim(array2)
# 'a' 'b' 'd' 'f' 'g'
# 2 3 5 9 11
# dim(.MergeArrays(array1, array2, 'b'))
# 'a' 'b' 'c' 'e' 'd' 'f' 'g'
# 2 4 3 7 5 9 11
.MergeArrays <- function(array1, array2, along) {
if (!(is.null(array1) || is.null(array2))) {
if (!(identical(names(dim(array1)), names(dim(array2))) &&
identical(dim(array1)[-which(names(dim(array1)) == along)],
dim(array2)[-which(names(dim(array2)) == along)]))) {
new_dims <- .MergeArrayDims(dim(array1), dim(array2))
dim(array1) <- new_dims[[1]]
dim(array2) <- new_dims[[2]]
for (j in 1:length(dim(array1))) {
if (names(dim(array1))[j] != along) {
if (dim(array1)[j] != dim(array2)[j]) {
if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) {
na_array_dims <- dim(array2)
na_array_dims[j] <- dim(array1)[j] - dim(array2)[j]
na_array <- array(dim = na_array_dims)
array2 <- abind(array2, na_array, along = j)
names(dim(array2)) <- names(na_array_dims)
} else {
na_array_dims <- dim(array1)
na_array_dims[j] <- dim(array2)[j] - dim(array1)[j]
na_array <- array(dim = na_array_dims)
array1 <- abind(array1, na_array, along = j)
names(dim(array1)) <- names(na_array_dims)
}
}
}
}
}
if (!(along %in% names(dim(array2)))) {
stop("The dimension specified in 'along' is not present in the ",
"provided arrays.")
}
array1 <- abind(array1, array2, along = which(names(dim(array1)) == along))
names(dim(array1)) <- names(dim(array2))
} else if (is.null(array1)) {
array1 <- array2
}
array1
}
# Takes as input a list of arrays. The list must have named dimensions.
.MergeArrayOfArrays <- function(array_of_arrays) {
MergeArrays <- .MergeArrays
array_dims <- (dim(array_of_arrays))
dim_names <- names(array_dims)
# Merge the chunks.
for (dim_index in 1:length(dim_names)) {
dim_sub_array_of_chunks <- dim_sub_array_of_chunk_indices <- NULL
if (dim_index < length(dim_names)) {
dim_sub_array_of_chunks <- array_dims[(dim_index + 1):length(dim_names)]
names(dim_sub_array_of_chunks) <- dim_names[(dim_index + 1):length(dim_names)]
dim_sub_array_of_chunk_indices <- dim_sub_array_of_chunks
sub_array_of_chunk_indices <- array(1:prod(dim_sub_array_of_chunk_indices),
dim_sub_array_of_chunk_indices)
} else {
sub_array_of_chunk_indices <- NULL
}
sub_array_of_chunks <- vector('list', prod(dim_sub_array_of_chunks))
dim(sub_array_of_chunks) <- dim_sub_array_of_chunks
for (i in 1:prod(dim_sub_array_of_chunks)) {
if (!is.null(sub_array_of_chunk_indices)) {
chunk_sub_indices <- which(sub_array_of_chunk_indices == i, arr.ind = TRUE)[1, ]
} else {
chunk_sub_indices <- NULL
}
for (j in 1:(array_dims[dim_index])) {
new_chunk <- do.call('[[', c(list(x = array_of_arrays),
as.list(c(j, chunk_sub_indices))))
if (is.null(new_chunk)) {
stop("Chunks missing.")
}
if (is.null(sub_array_of_chunks[[i]])) {
sub_array_of_chunks[[i]] <- new_chunk
} else {
sub_array_of_chunks[[i]] <- MergeArrays(sub_array_of_chunks[[i]],
new_chunk,
dim_names[dim_index])
}
}
}
array_of_arrays <- sub_array_of_chunks
rm(sub_array_of_chunks)
gc()
}
array_of_arrays[[1]]
}
......@@ -4,48 +4,42 @@
\alias{Apply}
\title{Wrapper for Applying Atomic Functions to Arrays.}
\usage{
Apply(data, margins = NULL, AtomicFun, ..., inverse_margins = NULL, parallel = FALSE,
ncores = NULL)
Apply(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL,
margins = NULL, ncores = NULL)
}
\arguments{
\item{data}{A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun.}
\item{margins}{List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, margins takes priority over inverse_margins.}
\item{target_dims}{List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. This parameter is mandatory if margins is not specified. If both margins and target_dims are specified, margins takes priority over target_dims.}
\item{AtomicFun}{Function to be applied to the arrays.}
\item{...}{Additional arguments to be used in the AtomicFun.}
\item{inverse_margins}{List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, margins takes priority over inverse_margins.}
\item{output_dims}{Optional list of vectors containing the names of the dimensions to be output from the AtomicFun for each of the objects it returns (or a single vector if the function has only one output).}
\item{parallel}{Logical, should the function be applied in parallel.}
\item{margins}{List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims.}
\item{ncores}{The number of cores to use for parallel computation.}
\item{ncores}{The number of multicore threads to use for parallel computation.}
}
\value{
Array or matrix or vector resulting from AtomicFun.
List of arrays or matrices or vectors resulting from applying AtomicFun to data.
}
\description{
Takes lists of multidimensional objects as input, which may have different numbers of dimensions and dimension lengths. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the margins option.
This wrapper applies a given function, which takes N [multi-dimensional] arrays as inputs (which may have different numbers of dimensions and dimension lengths), and applies it to a list of N [multi-dimensional] arrays with at least as many dimensions as expected by the given function. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the \code{margins} or \code{target_dims} option. A user can apply a function that receives (in addition to other helper parameters) 1 or more arrays as input, each with a different number of dimensions, and returns any number of multidimensional arrays. The target dimensions can be specified by their names. It is recommended to use this wrapper with multidimensional arrays with named dimensions.
}
\details{
A user can apply a function that receives 1 or more objects as input, each with a different number of dimensions, and returns as a result a single array with any number of dimensions.
When using a single object as input, Apply is almost identical to the apply function. For multiple input objects, the output array will have dimensions equal to the dimensions specified in 'margins'.
}
\examples{
#Change in the rate of exceedance for two arrays, with different
#dimensions, for some matrix of exceedances.
array_1 <- array(rnorm(2000), c(10,10,20)) # array with 20 timesteps
array_2 <- array(rnorm(1000), c(10, 10, 15)) # array with 15 timesteps
thresholds <- matrix(rnorm(100), 10, 10) # matrix of thresholds (no timesteps)
# Function for calculating the change in the frequency of exceedances over the
#thresholds for array_1 relative to array_2 (percentage change).
test_fun <- function(x, y, z) {(((sum(x > z) / (length(x))) /
(sum(y > z) / (length(y)))) * 100) - 100}
data = list(array_1, array_2, thresholds)
data = list(array(rnorm(2000), c(10,10,20)), array(rnorm(1000), c(10,10,10)),
array(rnorm(100), c(10, 10)))
test_fun <- function(x, y, z) {((sum(x > z) / (length(x))) /
(sum(y > z) / (length(y)))) * 100}
margins = list(c(1, 2), c(1, 2), c(1,2))
test <- Apply(data = data, margins = margins, AtomicFun = "test_fun")
test <- Apply(data, margins = margins, AtomicFun = "test_fun")
}
\references{
Wickham, H (2011), The Split-Apply-Combine Strategy for Data Analysis, Journal of Statistical Software.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment