diff --git a/.Rbuildignore b/.Rbuildignore index 834c4fa124507946a4eb8131288fe93d2ed7cf00..97c7dbee587781349aa9529b8a3b300efeb00aa6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,5 +1,6 @@ .git .gitignore +.gitlab-ci.yml .tar.gz .pdf ./.nc diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..a7d3f8954fc859541b2ce5e564804bf6b7624848 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,10 @@ +stages: + - build + +build: + stage: build + script: + - module load R + - R CMD build --resave-data . + - R CMD check --as-cran multiApply_*.tar.gz + - R -e 'covr::package_coverage()' diff --git a/DESCRIPTION b/DESCRIPTION index 0e1cede84b03e659c62b3b4e789d6221a2eaa6c3..31dcd942f3bb2a29681724c40d917ebc33b7c09a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,18 +1,20 @@ Package: multiApply -Title: Apply Functions to Multiple Multidimensional Arguments -Version: 1.0.0 +Title: Apply Functions to Multiple Multidimensional Arrays or Vectors +Version: 2.0.0 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), - person("Alasdair", "Hunter", , "alasdair.hunter@bsc.es", role = c("aut", "cre")), - person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = "aut")) -Description: The base apply function and its variants, as well as the related functions in the 'plyr' package, typically apply user-defined functions to a single argument (or a list of vectorized arguments in the case of mapply). The 'multiApply' package extends this paradigm to functions taking a list of multiple unidimensional or multidimensional arguments (or combinations thereof) as input, which can have different numbers of dimensions as well as different dimension lengths. + person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = "aut"), + person("Alasdair", "Hunter", , "alasdair.hunter@bsc.es", role = "aut"), + person("Nuria", "Perez", , "nuria.perez@bsc.es", role = "cre")) +Description: The base apply function and its variants, as well as the related functions in the 'plyr' package, typically apply user-defined functions to a single argument (or a list of vectorized arguments in the case of mapply). The 'multiApply' package extends this paradigm with its only function, Apply, which efficiently applies functions taking one or a list of multiple unidimensional or multidimensional numeric arrays (or combinations thereof) as input. The input arrays can have different numbers of dimensions as well as different dimension lengths, and the applied function can return one or a list of unidimensional or multidimensional arrays as output. This saves development time by preventing the R user from writing often error-prone and memory-unefficient loops dealing with multiple complex arrays. Also, a remarkable feature of Apply is the transparent use of multi-core through its parameter 'ncores'. In contrast to the base apply function, this package suggests the use of 'target dimensions' as opposite to the 'margins' for specifying the dimensions relevant to the function to be applied. Depends: R (>= 3.2.0) Imports: - abind, doParallel, foreach, plyr +Suggests: + testthat License: LGPL-3 URL: https://earth.bsc.es/gitlab/ces/multiApply BugReports: https://earth.bsc.es/gitlab/ces/multiApply/issues diff --git a/NAMESPACE b/NAMESPACE index 283fbf0f0ce53da89e0116f669a2fda68535fda1..12eb9c50b1bb579c77ba3a41b70b37c9c3e783d5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,7 @@ # Generated by roxygen2: do not edit by hand -importFrom(abind, abind) -importFrom(foreach, registerDoSEQ) -importFrom(doParallel, registerDoParallel) -importFrom(plyr, splat) -importFrom(plyr, llply) -importFrom(stats, setNames) + export(Apply) +importFrom(doParallel,registerDoParallel) +importFrom(foreach,registerDoSEQ) +importFrom(plyr,llply) +importFrom(plyr,splat) diff --git a/R/Apply.R b/R/Apply.R index b49b11d46d0a1af59d8a6384389d87e6a567f7f2..876c8f44044be9e08af37690558f0ab878ac3caf 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -1,28 +1,37 @@ -#' Wrapper for Applying Atomic Functions to Arrays. +#' Apply Functions to Multiple Multidimensional Arrays or Vectors #' -#' This wrapper applies a given function, which takes N [multi-dimensional] arrays as inputs (which may have different numbers of dimensions and dimension lengths), and applies it to a list of N [multi-dimensional] arrays with at least as many dimensions as expected by the given function. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the \code{margins} or \code{target_dims} option. A user can apply a function that receives (in addition to other helper parameters) 1 or more arrays as input, each with a different number of dimensions, and returns any number of multidimensional arrays. The target dimensions can be specified by their names. It is recommended to use this wrapper with multidimensional arrays with named dimensions. -#' @param data A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun. -#' @param target_dims List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. This parameter is mandatory if margins is not specified. If both margins and target_dims are specified, margins takes priority over target_dims. -#' @param AtomicFun Function to be applied to the arrays. -#' @param ... Additional arguments to be used in the AtomicFun. -#' @param output_dims Optional list of vectors containing the names of the dimensions to be output from the AtomicFun for each of the objects it returns (or a single vector if the function has only one output). -#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims. -#' @param ncores The number of multicore threads to use for parallel computation. -#' @details When using a single object as input, Apply is almost identical to the apply function. For multiple input objects, the output array will have dimensions equal to the dimensions specified in 'margins'. -#' @return List of arrays or matrices or vectors resulting from applying AtomicFun to data. +#' This function efficiently applies a given function, which takes N vectors or multi-dimensional arrays as inputs (which may have different numbers of dimensions and dimension lengths), and applies it to a list of N vectors or multi-dimensional arrays with at least as many dimensions as expected by the given function. The user can specify which dimensions of each array the function is to be applied over with the \code{margins} or \code{target_dims} parameters. The function to be applied can receive other helper parameters and return any number of numeric vectors or multidimensional arrays. The target dimensions or margins can be specified by their names, as long as the inputs are provided with dimension names (recommended). This function can also use multi-core in a transparent way if requested via the \code{ncores} parameter.\cr\cr The following steps help to understand how \code{Apply} works:\cr\cr - The function receives N arrays with Dn dimensions each.\cr - The user specifies, for each of the arrays, which of its dimensions are 'target' dimensions (dimensions which the function provided in 'fun' operates with) and which are 'margins' (dimensions to be looped over).\cr - \code{Apply} will generate an array with as many dimensions as margins in all of the input arrays. If a margin is repeated across different inputs, it will appear only once in the resulting array.\cr - For each element of this resulting array, the function provided in the parameter'fun' is applied to the corresponding sub-arrays in 'data'.\cr - If the function returns a vector or a multidimensional array, the additional dimensions will be prepended to the resulting array (in left-most positions).\cr - If the provided function returns more than one vector or array, the process above is carried out for each of the outputs, resulting in a list with multiple arrays, each with the combination of all target dimensions (at the right-most positions) and resulting dimensions (at the left-most positions). +#' +#' @param data One or a list of numeric object (vector, matrix or array). They must be in the same order as expected by the function provided in the parameter 'fun'. The dimensions do not necessarily have to be ordered. If the 'target_dims' require a different order than the provided, \code{Apply} will automatically reorder the dimensions as needed. +#' @param target_dims One or a list of vectors (or NULLs) containing the dimensions to be input into fun for each of the objects in the data. If a single vector of target dimensions is specified and multiple inputs are provided in 'data, then the single set of target dimensions is re-used for all of the inputs. These vectors can contain either integers specifying the position of the dimensions, or character strings corresponding to the dimension names. This parameter is mandatory if 'margins' are not specified. If both 'margins' and 'target_dims' are specified, 'margins' takes priority. +#' @param fun Function to be applied to the arrays. Must receive as many inputs as provided in 'data', each with as many dimensions as specified in 'target_dims' or as the total number of dimensions in 'data' minus the ones specified in 'margins'. The function can receive other additional fixed parameters (see parameter '...' of \code{Apply}). The function can return one or a list of numeric vectors or multidimensional arrays, optionally with dimension names which will be propagated to the final result. The returned list can optionally be named, with a name for each output, which will be propagated to the resulting array. The function can optionally be provided with the attributes 'target_dims' and 'output_dims'. In that case, the corresponding parameters of \code{Apply} do not need to be provided. The function can expect named dimensions for each of its inputs, in the same order as specified in 'target_dims' or, if no 'target_dims' have been provided, in the same order as provided in 'data'. +#' @param ... Additional fixed arguments expected by the function provided in the parameter 'fun'. +#' @param output_dims Optional list of vectors containing the names of the dimensions to be output from the fun for each of the objects it returns (or a single vector if the function has only one output). +#' @param margins One or a list of vectors (or NULLs) containing the 'margin' dimensions to be looped over for each input in 'data'. If a single vector of margins is specified and multiple inputs are provided in 'data', then the single set of margins is re-used for all of the inputs. These vectors can contain either integers specifying the position of the margins, or character strings corresponding to the dimension names. If both 'margins' and 'target_dims' are specified, 'margins' takes priority. +#' @param guess_dim_names Whether to automatically guess missing dimension names for dimensions of equal length across different inputs in 'data' with a warning (TRUE; default), or to crash whenever unnamed dimensions of equa length are identified across different inputs (FALSE). +#' @param ncores The number of parallel processes to spawn for the use for parallel computation in multiple cores. +#' @param split_factor Factor telling to which degree the input data should be split into smaller pieces to be processed by the available cores. By default (split_factor = 1) the data is split into 4 pieces for each of the cores (as specified in ncores). A split_factor of 2 will result in 8 pieces for each of the cores, and so on. The special value 'greatest' will split the input data into as many pieces as possible. +#' @details When using a single object as input, Apply is almost identical to the apply function (as fast or slightly slower in some cases; with equal or improved -smaller- memory footprint). +#' @return List of arrays or matrices or vectors resulting from applying 'fun' to 'data'. #' @references Wickham, H (2011), The Split-Apply-Combine Strategy for Data Analysis, Journal of Statistical Software. #' @export #' @examples #' #Change in the rate of exceedance for two arrays, with different #' #dimensions, for some matrix of exceedances. -#' data = list(array(rnorm(2000), c(10,10,20)), array(rnorm(1000), c(10,10,10)), -#' array(rnorm(100), c(10, 10))) -#' test_fun <- function(x, y, z) {((sum(x > z) / (length(x))) / -#' (sum(y > z) / (length(y)))) * 100} -#' margins = list(c(1, 2), c(1, 2), c(1,2)) -#' test <- Apply(data, margins = margins, AtomicFun = "test_fun") -Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, - margins = NULL, ncores = NULL) { +#' data <- list(array(rnorm(1000), c(5, 10, 20)), +#' array(rnorm(500), c(5, 10, 10)), +#' array(rnorm(50), c(5, 10))) +#' test_fun <- function(x, y, z) { +#' ((sum(x > z) / (length(x))) / +#' (sum(y > z) / (length(y)))) * 100 +#' } +#' test <- Apply(data, target = list(3, 3, NULL), test_fun) +#' @importFrom foreach registerDoSEQ +#' @importFrom doParallel registerDoParallel +#' @importFrom plyr splat llply +Apply <- function(data, target_dims = NULL, fun, ..., + output_dims = NULL, margins = NULL, guess_dim_names = TRUE, + ncores = NULL, split_factor = 1) { # Check data if (!is.list(data)) { data <- list(data) @@ -32,7 +41,12 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, } is_vector <- rep(FALSE, length(data)) is_unnamed <- rep(FALSE, length(data)) + unnamed_dims <- c() + guessed_any_dimnames <- FALSE for (i in 1 : length(data)) { + if (length(data[[i]]) < 1) { + stop("Arrays in 'data' must be of length > 0.") + } if (is.null(dim(data[[i]]))) { is_vector[i] <- TRUE is_unnamed[i] <- TRUE @@ -43,41 +57,89 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, stop("Dimension names of arrays in 'data' must be at least ", "one character long.") } + if (length(unique(names(dim(data[[i]])))) != length(names(dim(data[[i]])))) { + stop("Arrays in 'data' must not have repeated dimension names.") + } + if (any(is.na(names(dim(data[[i]]))))) { + stop("Arrays in 'data' must not have NA as dimension names.") + } } else { is_unnamed[i] <- TRUE + new_unnamed_dims <- c() + unnamed_dims_copy <- unnamed_dims + for (j in 1 : length(dim(data[[i]]))) { + len_of_dim_j <- dim(data[[i]])[j] + found_match <- which(unnamed_dims_copy == len_of_dim_j) + if (!guess_dim_names && (length(found_match) > 0)) { + stop("Arrays in 'data' have multiple unnamed dimensions of the ", + "same length. Please provide dimension names.") + } + if (length(found_match) > 0) { + found_match <- found_match[1] + names(dim(data[[i]]))[j] <- names(unnamed_dims_copy[found_match]) + unnamed_dims_copy <- unnamed_dims_copy[-found_match] + guessed_any_dimnames <- TRUE + } else { + new_dim <- len_of_dim_j + names(new_dim) <- paste0('_unnamed_dim_', length(unnamed_dims) + + length(new_unnamed_dims) + 1, '_') + new_unnamed_dims <- c(new_unnamed_dims, new_dim) + names(dim(data[[i]]))[j] <- names(new_dim) + } + } + unnamed_dims <- c(unnamed_dims, new_unnamed_dims) } } + if (guessed_any_dimnames) { + dim_names_string <- "" + for (i in 1:length(data)) { + dim_names_string <- c(dim_names_string, "\n\tInput ", i, ":", + sapply(capture.output(print(dim(data[[i]]))), + function(x) paste0('\n\t\t', x))) + } + warning("Guessed names for some unnamed dimensions of equal length ", + "found across different inputs in 'data'. Please check ", + "carefully the assumed names below are correct, or provide ", + "dimension names for safety, or disable the parameter ", + "'guess_dim_names'.", dim_names_string) + } - # Check AtomicFun - if (is.character(AtomicFun)) { - try({AtomicFun <- get(AtomicFun)}, silent = TRUE) - if (!is.function(AtomicFun)) { - stop("Could not find the function '", AtomicFun, "'.") + # Check fun + if (is.character(fun)) { + fun_name <- fun + err <- try({ + fun <- get(fun) + }, silent = TRUE) + if (!is.function(fun)) { + stop("Could not find the function '", fun_name, "'.") } } - if (!is.function(AtomicFun)) { - stop("Parameter 'AtomicFun' must be a function or a character string ", + if (!is.function(fun)) { + stop("Parameter 'fun' must be a function or a character string ", "with the name of a function.") } - if ('startR_step' %in% class(AtomicFun)) { + if (!is.null(attributes(fun))) { if (is.null(target_dims)) { - target_dims <- attr(AtomicFun, 'target_dims') + if ('target_dims' %in% names(attributes(fun))) { + target_dims <- attr(fun, 'target_dims') + } } if (is.null(output_dims)) { - output_dims <- attr(AtomicFun, 'output_dims') + if ('output_dims' %in% names(attributes(fun))) { + output_dims <- attr(fun, 'output_dims') + } } } # Check target_dims and margins - if (is.null(margins) && is.null(target_dims)) { + arglist <- as.list(match.call()) + if (!any(c('margins', 'target_dims') %in% names(arglist)) && + is.null(target_dims)) { stop("One of 'margins' or 'target_dims' must be specified.") } - if (!is.null(margins)) { - target_dims <- NULL - } margins_names <- vector('list', length(data)) target_dims_names <- vector('list', length(data)) - if (!is.null(margins)) { + if ('margins' %in% names(arglist)) { # Check margins and build target_dims accordingly if (!is.list(margins)) { margins <- rep(list(margins), length(data)) @@ -87,6 +149,9 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, stop("Parameter 'margins' must be one or a list of numeric or ", "character vectors.") } + if (any(sapply(margins, function(x) is.character(x) && (length(x) == 0)))) { + stop("Parameter 'margins' must not contain length-0 character vectors.") + } duplicate_dim_specs <- sapply(margins, function(x) { length(unique(x)) != length(x) @@ -117,10 +182,15 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, margins_names[[i]] <- margins[[i]] margins[[i]] <- margins2_new_num } - if (!is.null(names(dim(data[[i]])))) { + if (length(margins[[i]]) == length(dim(data[[i]]))) { + target_dims_names[i] <- list(NULL) + target_dims[i] <- list(NULL) + margins_names[[i]] <- names(dim(data[[i]])) + } else { + margins_names[[i]] <- names(dim(data[[i]]))[margins[[i]]] target_dims_names[[i]] <- names(dim(data[[i]]))[- margins[[i]]] + target_dims[[i]] <- (1 : length(dim(data[[i]])))[- margins[[i]]] } - target_dims[[i]] <- (1 : length(dim(data[[i]])))[- margins[[i]]] } else { target_dims[[i]] <- 1 : length(dim(data[[i]])) if (!is.null(names(dim(data[[i]])))) { @@ -134,12 +204,12 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, target_dims <- rep(list(target_dims), length(data)) } if (any(!sapply(target_dims, - function(x) is.character(x) || is.numeric(x)))) { + function(x) is.character(x) || is.numeric(x) || is.null(x)))) { stop("Parameter 'target_dims' must be one or a list of numeric or ", "character vectors.") } - if (any(sapply(target_dims, length) == 0)) { - stop("Parameter 'target_dims' must not contain length-0 vectors.") + if (any(sapply(target_dims, function(x) is.character(x) && (length(x) == 0)))) { + stop("Parameter 'target_dims' must not contain length-0 character vectors.") } duplicate_dim_specs <- sapply(target_dims, function(x) { @@ -151,29 +221,41 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, } margins <- vector('list', length(data)) for (i in 1 : length(data)) { - if (is.character(unlist(target_dims[i]))) { - if (is.null(names(dim(data[[i]])))) { - stop("Parameter 'target_dims' contains dimension names, but ", - "some of the corresponding objects in 'data' do not have ", - "dimension names.") - } - targs2 <- target_dims[[i]] - targs2_new_num <- c() - for (j in 1 : length(targs2)) { - matches <- which(names(dim(data[[i]])) == targs2[j]) - if (length(matches) < 1) { - stop("Could not find dimension '", targs2[j], "' in ", i, - "th object provided in 'data'.") + if (length(target_dims[[i]]) > 0) { + if (is.character(unlist(target_dims[i]))) { + if (is.null(names(dim(data[[i]])))) { + stop("Parameter 'target_dims' contains dimension names, but ", + "some of the corresponding objects in 'data' do not have ", + "dimension names.") + } + targs2 <- target_dims[[i]] + targs2_new_num <- c() + for (j in 1 : length(targs2)) { + matches <- which(names(dim(data[[i]])) == targs2[j]) + if (length(matches) < 1) { + stop("Could not find dimension '", targs2[j], "' in ", i, + "th object provided in 'data'.") + } + targs2_new_num[j] <- matches[1] } - targs2_new_num[j] <- matches[1] + target_dims_names[[i]] <- target_dims[[i]] + target_dims[[i]] <- targs2_new_num + } + if (length(target_dims[[i]]) == length(dim(data[[i]]))) { + margins_names[i] <- list(NULL) + margins[i] <- list(NULL) + target_dims_names[[i]] <- names(dim(data[[i]])) + } else { + target_dims_names[[i]] <- names(dim(data[[i]]))[target_dims[[i]]] + margins_names[[i]] <- names(dim(data[[i]]))[- target_dims[[i]]] + margins[[i]] <- (1 : length(dim(data[[i]])))[- target_dims[[i]]] + } + } else { + margins[[i]] <- 1 : length(dim(data[[i]])) + if (!is.null(names(dim(data[[i]])))) { + margins_names[[i]] <- names(dim(data[[i]])) } - target_dims_names[[i]] <- target_dims[[i]] - target_dims[[i]] <- targs2_new_num - } - if (!is.null(names(dim(data[[i]])))) { - margins_names[[i]] <- names(dim(data[[i]]))[- target_dims[[i]]] } - margins[[i]] <- (1 : length(dim(data[[i]])))[- target_dims[[i]]] } } # Reorder dimensions of input data for target dims to be left-most @@ -185,7 +267,11 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, marg_dims <- (1 : length(dim(data[[i]])))[- target_dims[[i]]] data[[i]] <- .aperm2(data[[i]], c(target_dims[[i]], marg_dims)) target_dims[[i]] <- 1 : length(target_dims[[i]]) - margins[[i]] <- (length(target_dims[[i]]) + 1) : length(dim(data[[i]])) + target_dims_names[[i]] <- names(dim(data[[i]]))[target_dims[[i]]] + if (length(target_dims[[i]]) < length(dim(data[[i]]))) { + margins[[i]] <- (length(target_dims[[i]]) + 1) : length(dim(data[[i]])) + margins_names[[i]] <- names(dim(data[[i]]))[margins[[i]]] + } } } } @@ -218,16 +304,11 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, # Consistency checks of margins of all input objects # for each data array, add its margins to the list if not present. - # if there are unnamed margins in the list, check their size matches the margins being added - # and simply assing them a name # those margins present, check that they match - # if unnamed margins, check consistency with found margins - # if more mrgins than found, add numbers to the list, without names # with this we end up with a named list of margin sizes - # for data arrays with unnamed margins, we can assume their margins names are those of the first entries in the resulting list all_found_margins_lengths <- afml <- list() for (i in 1:length(data)) { - if (!is.null(margins_names[[i]])) { + #if (!is.null(margins_names[[i]])) { if (length(afml) > 0) { matches <- which(margins_names[[i]] %in% names(afml)) if (length(matches) > 0) { @@ -239,72 +320,11 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, } else { margs_to_add <- as.list(dim(data[[i]])[margins[[i]]]) } - unnamed_margins <- which(sapply(names(afml), nchar) == 0) - if (length(unnamed_margins) > 0) { - stop_with_error <- FALSE - if (length(unnamed_margins) <= length(margs_to_add)) { - if (any(unlist(afml[unnamed_margins]) != unlist(margs_to_add[1:length(unnamed_margins)]))) { - stop_with_error <- TRUE - } - names(afml)[unnamed_margins] <- names(margs_to_add)[1:length(unnamed_margins)] - margs_to_add <- margs_to_add[- (1:length(margs_to_add))] - } else { - if (any(unlist(afml[unnamed_margins[1:length(margs_to_add)]]) != unlist(margs_to_add))) { - stop_with_error <- TRUE - } - names(afml)[unnamed_margins[1:length(margs_to_add)]] <- names(margs_to_add) - margs_to_add <- list() - } - if (stop_with_error) { - stop("Found unnamed margins (for some objects in parameter ", - "'data') that have been associated by their position to ", - "named margins in other objects in 'data' and do not have ", - "matching length. It could also be that the unnamed ", - "margins don not follow the same order as the named ", - "margins. In that case, either put the corresponding names ", - "to the dimensions of the objects in 'data', or put them ", - "in a consistent order.") - } - } afml <- c(afml, margs_to_add) } else { afml <- as.list(dim(data[[i]])[margins[[i]]]) } - } else { - margs_to_add <- as.list(dim(data[[i]])[margins[[i]]]) - names(margs_to_add) <- rep('', length(margs_to_add)) - if (length(afml) > 0) { - stop_with_error <- FALSE - if (length(afml) >= length(margs_to_add)) { - if (any(unlist(margs_to_add) != unlist(afml[1:length(margs_to_add)]))) { - stop_with_error <- TRUE - } - } else { - if (any(unlist(margs_to_add)[1:length(afml)] != unlist(afml))) { - stop_with_error <- TRUE - } - margs_to_add <- margs_to_add[- (1:length(afml))] - afml <- c(afml, margs_to_add) - } - if (stop_with_error) { - stop("Found unnamed margins (for some objects in parameter ", - "'data') that have been associated by their position to ", - "named margins in other objects in 'data' and do not have ", - "matching length. It could also be that the unnamed ", - "margins don not follow the same order as in other ", - "objects. In that case, either put the corresponding names ", - "to the dimensions of the objects in 'data', or put them ", - "in a consistent order.") - } - } else { - afml <- margs_to_add - } - } - } - missing_margin_names <- which(names(afml) == '') - if (length(missing_margin_names) > 0) { - names(afml)[missing_margin_names] <- paste0('_unnamed_margin_', - 1:length(missing_margin_names), '_') + #} } # afml is now a named list with the lenghts of all margins. Each margin # appears once only. If some names are not provided, they are set automatically @@ -316,8 +336,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, # across them, and use data arrays repeatedly as needed. margins_afml <- margins for (i in 1:length(data)) { - if (!is.null(margins_names[[i]])) { - + if (length(margins[[i]]) > 0) { margins_afml[[i]] <- sapply(margins_names[[i]], function(x) { sapply(x, @@ -327,11 +346,6 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, ) } ) - } else if (length(margins_afml[[i]]) > 0) { - margins_afml[[i]] <- margins_afml[[i]] - min(margins_afml[[i]]) + 1 - # The missing margin and dim names are filled in. - margins_names[[i]] <- names(afml)[margins_afml[[i]]] - names(dim(data[[i]]))[margins[[i]]] <- margins_names[[i]] } } common_margs <- margins_afml[[1]] @@ -347,79 +361,41 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, } } } - non_common_margs <- 1:length(afml) - if (length(common_margs) > 0) { - non_common_margs <- non_common_margs[- common_margs] + if (length(afml) > 0) { + non_common_margs <- 1:length(afml) + if (length(common_margs) > 0) { + non_common_margs <- non_common_margs[- common_margs] + } + } else { + non_common_margs <- NULL } # common_margs is now a numeric vector with the indices of the common # margins (i.e. their position in afml) # non_common_margs is now a numeric vector with the indices of the # non-common margins (i.e. their position in afml) - - .isolate <- function(data, margin_length, drop = FALSE) { - eval(dim(environment()$data)) - structure(list(env = environment(), index = margin_length, - drop = drop, subs = as.name("[")), - class = c("indexed_array")) - } - .consolidate <- function(subsets, dimnames, out_dims) { - lapply(setNames(1:length(subsets), names(subsets)), - function(x) { - if (length(out_dims[[x]]) > 0) { - dims <- dim(subsets[[x]]) - if (!is_unnamed[x]) { - names(dims) <- dimnames[[x]] - } - dims <- dims[out_dims[[x]]] - array(subsets[[x]], dim = dims) - } else { - as.vector(subsets[[x]]) - } - }) - } - - data_indexed <- vector('list', length(data)) - data_indexed_indices <- vector('list', length(data)) - for (i in 1 : length(data)) { - margs_i <- which(names(dim(data[[i]])) %in% names(afml[c(non_common_margs, common_margs)])) - false_margs_i <- which(margs_i %in% target_dims[[i]]) - margs_i <- setdiff(margs_i, false_margs_i) - if (length(margs_i) > 0) { - margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) - margin_length[- margs_i] <- "" - } else { - margin_length <- as.list(rep("", length(dim(data[[i]])))) - } - margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, - stringsAsFactors = FALSE) - data_indexed[[i]] <- .isolate(data[[i]], margin_length) - if (length(margs_i) > 0) { - data_indexed_indices[[i]] <- array(1:prod(dim(data[[i]])[margs_i]), - dim = dim(data[[i]])[margs_i]) - } else { - data_indexed_indices[[i]] <- array(1, dim = 1) - } - } - - splatted_f <- splat(AtomicFun) - - # Iterate along all non-common margins if (length(c(non_common_margs, common_margs)) > 0) { marg_inds_ordered <- sort(c(non_common_margs, common_margs)) - margins_array <- ma <- array(1:prod(unlist(afml[marg_inds_ordered])), - dim = unlist(afml[marg_inds_ordered])) + margins_array_dims <- mad <- unlist(afml[marg_inds_ordered]) } else { - ma <- array(1) + margins_array_dims <- mad <- NULL } - arrays_of_results <- NULL - found_first_result <- FALSE - total_size <- prod(dim(ma)) - if (!is.null(ncores)) { - chunk_size <- round(total_size / (ncores * 4)) + # Sharing workload across cores. Each core will run 4 chunks if possible. + # the larger the split factor, the smaller the amount of data that + # will be processed at once and the finer the granules to be distributed + # across cores, but the larger the overhead for granule startup, etc. + total_size <- prod(mad) + if (split_factor == 'greatest') { + chunks_per_core <- ceiling(total_size / ncores) } else { - chunk_size <- 4 + chunks_per_core <- 4 * split_factor + } + if (!is.null(ncores)) { + chunk_size <- round(total_size / (ncores * chunks_per_core)) } + #} else { + # chunk_size <- 4 + #} if (chunk_size < 1) { chunk_size <- 1 } @@ -429,32 +405,89 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, chunk_sizes <- c(chunk_sizes, total_size %% chunk_size) } -# need to add progress bar + input_margin_weights <- vector('list', length(data)) + for (i in 1:length(data)) { + marg_sizes <- dim(data[[i]])[margins[[i]]] + input_margin_weights[[i]] <- sapply(1:length(marg_sizes), + function(k) prod(c(1, marg_sizes)[1:k])) + } + + # TODO: need to add progress bar + splatted_f <- splat(fun) + # For a selected use case, these are the timings: + # - total: 17 s + # - preparation + post: 1 s + # - llply (40 iterations): 16 s + # - one iteration: 1.5s with profiling of 50 sub-iterations (0.4 without) + # - intro: 0 s + # - for loop with profiling of 50 sub-iterations (5000 sub-iterations): 1.5 s + # - one sub-iteration: 0.0003 s + # - intro: 0.000125 s + # - splatted_f: 0.000125 s + # - outro: 0.00005 iteration <- function(m) { + # INTRO + n <- 1 + first_index <- n + (m - 1) * chunk_size + first_marg_indices <- arrayInd(first_index, mad) + names(first_marg_indices) <- names(mad) sub_arrays_of_results <- list() found_first_sub_result <- FALSE - for (n in 1:chunk_sizes[m]) { - # j is the index of the data piece to load in data_indexed - j <- n + (m - 1) * chunk_size - marg_indices <- arrayInd(j, dim(ma)) - names(marg_indices) <- names(dim(ma)) - input <- list() - # Each iteration of n, the variable input is populated with sub-arrays for - # each object in data (if possible). For each set of 'input's, the - # splatted_f is applied in parallel if possible. - for (i in 1:length(data_indexed)) { - inds_to_take <- which(names(marg_indices) %in% names(dim(data_indexed_indices[[i]]))) - if (length(inds_to_take) > 0) { - marg_inds_to_take <- marg_indices[inds_to_take][names(dim(data_indexed_indices[[i]]))] - input[[i]] <- data_indexed[[i]][[do.call("[", - c(list(x = data_indexed_indices[[i]]), marg_inds_to_take, - list(drop = TRUE)))]] + iteration_indices_to_take <- list() + for (i in 1:length(data)) { + iteration_indices_to_take[[i]] <- as.list(rep(TRUE, length(dim(data[[i]])))) + names(iteration_indices_to_take[[i]]) <- names(dim(data[[i]])) + } + + add_one_multidim <- function(index, dims) { + stop_iterating <- FALSE + check_dim <- 1 + ndims <- length(index) + while (!stop_iterating) { + index[check_dim] <- index[check_dim] + 1 + if (index[check_dim] > dims[check_dim]) { + index[check_dim] <- 1 + check_dim <- check_dim + 1 + if (check_dim > ndims) { + check_dim <- rep(1, ndims) + stop_iterating <- TRUE + } } else { - input[[i]] <- data_indexed[[i]][[1]] + stop_iterating <- TRUE + } + } + index + } + + # FOR LOOP + for (n in 1:chunk_sizes[m]) { + # SUB-ITERATION INTRO + iteration_input <- list() + for (i in 1:length(data)) { + input_margin_dim_index <- first_marg_indices[margins_names[[i]]] + iteration_indices_to_take[[i]][margins_names[[i]]] <- input_margin_dim_index + iteration_input[[i]] <- do.call('[', c(list(x = data[[i]]), + iteration_indices_to_take[[i]], + list(drop = FALSE))) + num_margins <- length(margins_names[[i]]) + if (num_margins > 0) { + if (num_margins == length(dim(iteration_input[[i]]))) { + dim(iteration_input[[i]]) <- NULL + } else { + dims_to_remove <- 1:num_margins + length(target_dims[[i]]) + dim(iteration_input[[i]]) <- dim(iteration_input[[i]])[-dims_to_remove] + #if only one dim remains, make as.vector + } } } - result <- splatted_f(.consolidate(input, lapply(lapply(data, dim), names), - target_dims), ...) + if (!is.null(mad)) { + first_marg_indices <- add_one_multidim(first_marg_indices, mad) + } + + # SPLATTED_F + result <- splatted_f(iteration_input, ...) + + # SUB-ITERATION OUTRO if (!is.list(result)) { result <- list(result) } @@ -462,7 +495,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, sub_arrays_of_results <- vector('list', length(result)) if (!is.null(output_dims)) { if (length(output_dims) != length(sub_arrays_of_results)) { - stop("The 'AtomicFun' returns ", length(sub_arrays_of_results), + stop("The 'fun' returns ", length(sub_arrays_of_results), " elements, but ", length(output_dims), " elements were expected.") } @@ -472,6 +505,10 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, } else { names(sub_arrays_of_results) <- paste0('output', 1:length(result)) } + len0_names <- which(nchar(names(sub_arrays_of_results)) == 0) + if (length(len0_names) > 0) { + names(sub_arrays_of_results)[len0_names] <- paste0('output', len0_names) + } } atomic_fun_out_dims <- vector('list', length(result)) for (component in 1:length(result)) { @@ -499,13 +536,13 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, if (!is.null(output_dims)) { # Check number of outputs. if (length(output_dims) != length(result)) { - stop("Expected AtomicFun to return ", length(output_dims), " components, ", + stop("Expected fun to return ", length(output_dims), " components, ", "but ", length(result), " found.") } # Check number of output dimensions is correct. for (component in 1:length(result)) { if (length(atomic_fun_out_dims[[component]]) != length(output_dims[[component]])) { - stop("Expected ", component, "st returned element by 'AtomicFun' ", + stop("Expected ", component, "st returned element by 'fun' ", "to have ", length(output_dims[[component]]), " dimensions, ", "but ", length(atomic_fun_out_dims[[component]]), " found.") } @@ -520,16 +557,17 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, if (parallel) registerDoParallel(ncores) result <- llply(1:length(chunk_sizes), iteration, .parallel = parallel) if (parallel) registerDoSEQ() - # Merge the results - chunk_length <- NULL + arrays_of_results <- NULL + found_first_result <- FALSE + result_chunk_lengths <- vector('list', length(result[[1]])) fun_out_dims <- vector('list', length(result[[1]])) for (m in 1:length(result)) { if (!found_first_result) { arrays_of_results <- vector('list', length(result[[1]])) if (!is.null(output_dims)) { if (length(output_dims) != length(arrays_of_results)) { - stop("The 'AtomicFun' returns ", length(arrays_of_results), " elements, but ", + stop("The 'fun' returns ", length(arrays_of_results), " elements, but ", length(output_dims), " elements were expected.") } names(arrays_of_results) <- names(output_dims) @@ -542,23 +580,25 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, for (component in 1:length(result[[m]])) { component_dims <- dim(result[[m]][[component]]) if (!found_first_result) { + result_chunk_lengths[[component]] <- prod(component_dims) if (length(component_dims) > 1) { fun_out_dims[[component]] <- component_dims[- length(component_dims)] } - arrays_of_results[[component]] <- array(dim = c(fun_out_dims[[component]], - dim(ma))) - dimnames_to_remove <- which(grepl('^_unnamed_margin_', - names(dim(arrays_of_results[[component]])))) - if (length(dimnames_to_remove) > 0) { - names(dim(arrays_of_results[[component]]))[dimnames_to_remove] <- rep('', length(dimnames_to_remove)) - } - if (all(names(dim(arrays_of_results[[component]])) == '')) { - names(dim(arrays_of_results[[component]])) <- NULL + if (length(fun_out_dims[[component]]) + length(mad) > 0) { + arrays_of_results[[component]] <- array(dim = c(fun_out_dims[[component]], + mad)) + dimnames_to_remove <- which(grepl('^_unnamed_dim_', + names(dim(arrays_of_results[[component]])))) + if (length(dimnames_to_remove) > 0) { + names(dim(arrays_of_results[[component]]))[dimnames_to_remove] <- rep('', length(dimnames_to_remove)) + } + if (all(names(dim(arrays_of_results[[component]])) == '')) { + names(dim(arrays_of_results[[component]])) <- NULL + } } - chunk_length <- prod(component_dims) } arrays_of_results[[component]][(1:prod(component_dims)) + - (m - 1) * chunk_length] <- result[[m]][[component]] + (m - 1) * result_chunk_lengths[[component]]] <- result[[m]][[component]] } if (!found_first_result) { found_first_result <- TRUE diff --git a/R/zzz.R b/R/zzz.R index 3e04077786b652e7a1db6acb4d375fc853115fbd..2bf747e7d38039995b33abe6636e771e6f2f30f5 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,143 +1,13 @@ # Function to permute arrays of non-atomic elements (e.g. POSIXct) .aperm2 <- function(x, new_order) { - y <- array(1:length(x), dim = dim(x)) - y <- aperm(y, new_order) old_dims <- dim(x) - x <- x[as.vector(y)] + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(1:length(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } dim(x) <- old_dims[new_order] x } - -# This function is a helper for the function .MergeArrays. -# It expects as inputs two named numeric vectors, and it extends them -# with dimensions of length 1 until an ordered common dimension -# format is reached. -.MergeArrayDims <- function(dims1, dims2) { - new_dims1 <- c() - new_dims2 <- c() - while (length(dims1) > 0) { - if (names(dims1)[1] %in% names(dims2)) { - pos <- which(names(dims2) == names(dims1)[1]) - dims_to_add <- rep(1, pos - 1) - if (length(dims_to_add) > 0) { - names(dims_to_add) <- names(dims2[1:(pos - 1)]) - } - new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) - new_dims2 <- c(new_dims2, dims2[1:pos]) - dims1 <- dims1[-1] - dims2 <- dims2[-c(1:pos)] - } else { - new_dims1 <- c(new_dims1, dims1[1]) - new_dims2 <- c(new_dims2, 1) - names(new_dims2)[length(new_dims2)] <- names(dims1)[1] - dims1 <- dims1[-1] - } - } - if (length(dims2) > 0) { - dims_to_add <- rep(1, length(dims2)) - names(dims_to_add) <- names(dims2) - new_dims1 <- c(new_dims1, dims_to_add) - new_dims2 <- c(new_dims2, dims2) - } - list(new_dims1, new_dims2) -} - -# This function takes two named arrays and merges them, filling with -# NA where needed. -# dim(array1) -# 'b' 'c' 'e' 'f' -# 1 3 7 9 -# dim(array2) -# 'a' 'b' 'd' 'f' 'g' -# 2 3 5 9 11 -# dim(.MergeArrays(array1, array2, 'b')) -# 'a' 'b' 'c' 'e' 'd' 'f' 'g' -# 2 4 3 7 5 9 11 -.MergeArrays <- function(array1, array2, along) { - if (!(is.null(array1) || is.null(array2))) { - if (!(identical(names(dim(array1)), names(dim(array2))) && - identical(dim(array1)[-which(names(dim(array1)) == along)], - dim(array2)[-which(names(dim(array2)) == along)]))) { - new_dims <- .MergeArrayDims(dim(array1), dim(array2)) - dim(array1) <- new_dims[[1]] - dim(array2) <- new_dims[[2]] - for (j in 1:length(dim(array1))) { - if (names(dim(array1))[j] != along) { - if (dim(array1)[j] != dim(array2)[j]) { - if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { - na_array_dims <- dim(array2) - na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] - na_array <- array(dim = na_array_dims) - array2 <- abind(array2, na_array, along = j) - names(dim(array2)) <- names(na_array_dims) - } else { - na_array_dims <- dim(array1) - na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] - na_array <- array(dim = na_array_dims) - array1 <- abind(array1, na_array, along = j) - names(dim(array1)) <- names(na_array_dims) - } - } - } - } - } - if (!(along %in% names(dim(array2)))) { - stop("The dimension specified in 'along' is not present in the ", - "provided arrays.") - } - array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) - names(dim(array1)) <- names(dim(array2)) - } else if (is.null(array1)) { - array1 <- array2 - } - array1 -} - -# Takes as input a list of arrays. The list must have named dimensions. -.MergeArrayOfArrays <- function(array_of_arrays) { - MergeArrays <- .MergeArrays - array_dims <- (dim(array_of_arrays)) - dim_names <- names(array_dims) - - # Merge the chunks. - for (dim_index in 1:length(dim_names)) { - dim_sub_array_of_chunks <- dim_sub_array_of_chunk_indices <- NULL - if (dim_index < length(dim_names)) { - dim_sub_array_of_chunks <- array_dims[(dim_index + 1):length(dim_names)] - names(dim_sub_array_of_chunks) <- dim_names[(dim_index + 1):length(dim_names)] - dim_sub_array_of_chunk_indices <- dim_sub_array_of_chunks - sub_array_of_chunk_indices <- array(1:prod(dim_sub_array_of_chunk_indices), - dim_sub_array_of_chunk_indices) - } else { - sub_array_of_chunk_indices <- NULL - } - sub_array_of_chunks <- vector('list', prod(dim_sub_array_of_chunks)) - dim(sub_array_of_chunks) <- dim_sub_array_of_chunks - for (i in 1:prod(dim_sub_array_of_chunks)) { - if (!is.null(sub_array_of_chunk_indices)) { - chunk_sub_indices <- which(sub_array_of_chunk_indices == i, arr.ind = TRUE)[1, ] - } else { - chunk_sub_indices <- NULL - } - for (j in 1:(array_dims[dim_index])) { - new_chunk <- do.call('[[', c(list(x = array_of_arrays), - as.list(c(j, chunk_sub_indices)))) - if (is.null(new_chunk)) { - stop("Chunks missing.") - } - if (is.null(sub_array_of_chunks[[i]])) { - sub_array_of_chunks[[i]] <- new_chunk - } else { - sub_array_of_chunks[[i]] <- MergeArrays(sub_array_of_chunks[[i]], - new_chunk, - dim_names[dim_index]) - } - } - } - array_of_arrays <- sub_array_of_chunks - rm(sub_array_of_chunks) - gc() - } - - array_of_arrays[[1]] -} diff --git a/README.md b/README.md index a6cfa357043700e60f636fe03485f60a44609ccd..99d3eab088df8157893d0ebba69966af6c8269aa 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,91 @@ ---- -title: "Apply a Function Taking Multiple Numeric Objects as Input Across Multiple Arrays" -author: "Alasdair" -date: "14 July 2017" -output: html_document ---- +## multiApply [![build status](https://earth.bsc.es/gitlab/ces/multiApply/badges/master/build.svg)](https://earth.bsc.es/gitlab/ces/multiApply/commits/master) [![CRAN version](http://www.r-pkg.org/badges/version/multiApply)](https://cran.r-project.org/package=multiApply) [![coverage report](https://earth.bsc.es/gitlab/ces/multiApply/badges/master/coverage.svg)](https://earth.bsc.es/gitlab/ces/multiApply/commits/master) [![License: LGPL v3](https://img.shields.io/badge/License-LGPL%20v3-blue.svg)](https://www.gnu.org/licenses/lgpl-3.0) [![CRAN RStudio Downloads](https://cranlogs.r-pkg.org/badges/multiApply)](https://cran.rstudio.com/web/packages/multiApply/index.html) -This package extends the apply and plyr families of functions to applications which involve the use of multiple arrays as input. +This package includes the function `Apply` as its only function. It extends the `apply` function to applications in which a function needs to be applied simultaneously over multiple input arrays. Although this can be done manually with for loops and calls to the base `apply` function, it can often be a challenging task which can easily result in error-prone or memory-unefficient code. -This is especially useful for climate data related applications, where data is often distributed across multiple arrays with different dimensions (e.g experimental array 1, experimental array 2 and the observations). The multiApply:Apply function reduces the need write loops for every application. +A very simple example follows showing the kind of situation where `Apply` can be useful: imagine you have two arrays, each containing five 2x2 matrices, and you want to perform the multiplication of each of the five pairs of matrices. Next, one of the best ways to do this with base R (plus some helper libraries): +```r +library(plyr) +library(abind) +A <- array(1:20, c(5, 2, 2)) +B <- array(1:20, c(5, 2, 2)) + +D <- aaply(X = abind(A, B, along = 4), + MARGINS = 1, + FUN = function(x) x[,,1] %*% x[,,2]) +``` + +Since the choosen use case is very simple, this solution is not excessively complex, but the complexity would increase as the function to apply required additional dimensions or inputs, and would be unapplicable if multiple outputs were to be returned. In addition, the function to apply (matrix multiplication) had to be redefined for this particular case (multiplication of the first matrix along the third dimension by the second along the third dimension). + +Next, an example of how to reach the same results using `Apply`: + +```r +library(multiApply) + +A <- array(1:20, c(5, 2, 2)) +B <- array(1:20, c(5, 2, 2)) + +D <- Apply(data = list(A, B), + target_dims = c(2, 3), + fun = "%*%")$output1 +``` + +This solution takes half the time to complete (as measured with `microbenchmark` with inputs of different sizes), and is cleaner and extensible to functions receiving any number of inputs with any number of dimensions, or returning any number of outputs. Although the peak RAM usage (as measured with `peakRAM`) of both solutions in this example is about the same, it is challenging to avoid memory duplications when using custom code in more complex applications, and can usually require hours of dedication. `Apply` scales well to large inputs and has been designed to be fast and avoid memory duplications. + +Additionally, multi-code computation can be enabled via the `ncores` parameter, as shown next. Although in this minimalist example using multi-core would make the execution slower, in applications where the inputs are larger the wall-clock time is reduced dramatically. + +```r +D <- Apply(data = list(A, B), + target_dims = c(2, 3), + fun = "%*%", + ncores = 4)$output1 +``` + +In contrast to `apply` and variants, this package suggests the use of 'target dimensions' as opposite to the 'margins' for specifying the dimensions relevant to the function to be applied. Additionally, it supports functions returning multiple vector or arrays, and can transparently uses multi-core. + +### Installation + +In order to install and load the latest published version of the package on CRAN, you can run the following lines in your R session: + +```r +install.packages('multiApply') +library(multiApply) +``` + +Also, you can install the latest stable version from the GitHub repository as follows: + +```r +devtools::install_git('https://earth.bsc.es/gitlab/ces/multiApply') +``` + +### How to use + +This package consistis in a single function, `Apply`, which is used in a similar fashion as the base `apply`. Full documentation can be found in `?Apply`. + +A simple example is provided next. In this example, we have two data arrays. The first, with information on the number of items sold in 5 different stores (located in different countries) during the past 1000 days, for 200 different items. The second, with information on the price point for each item in each store. + +The example shows how to compute the total income for each of the stores, straightforwardly combining the input data arrays, by automatically applying repeatedly the 'atomic' function that performs only the essential calculations for a single case. + +```r +dims <- c(store = 5, item = 200, day = 1000) +sales_amount <- array(rnorm(prod(dims)), dims) + +dims <- c(store = 5, item = 200) +sales_price <- array(rnorm(prod(dims)), dims) + +income_function <- function(x, y) { + # Expected inputs: + # x: array with dimensions (item, day) + # y: price point vector with dimension (item) + sum(rowSums(x) * y) +} + +income <- Apply(data = list(sales_amount, sales_price), + target_dims = list(c('item', 'day'), 'item'), + income_function) + +dim(income$output1) +# store +# 5 +``` diff --git a/man/Apply.Rd b/man/Apply.Rd index 2f69b6c3345ea6ed004379bc886368ce9fdcc546..4fe37e951ce203ef6ca4b605a45d0b86cd41c5c7 100644 --- a/man/Apply.Rd +++ b/man/Apply.Rd @@ -2,44 +2,51 @@ % Please edit documentation in R/Apply.R \name{Apply} \alias{Apply} -\title{Wrapper for Applying Atomic Functions to Arrays.} +\title{Apply Functions to Multiple Multidimensional Arrays or Vectors} \usage{ -Apply(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, - margins = NULL, ncores = NULL) +Apply(data, target_dims = NULL, fun, ..., output_dims = NULL, + margins = NULL, guess_dim_names = TRUE, ncores = NULL, + split_factor = 1) } \arguments{ -\item{data}{A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun.} +\item{data}{One or a list of numeric object (vector, matrix or array). They must be in the same order as expected by the function provided in the parameter 'fun'. The dimensions do not necessarily have to be ordered. If the 'target_dims' require a different order than the provided, \code{Apply} will automatically reorder the dimensions as needed.} -\item{target_dims}{List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. This parameter is mandatory if margins is not specified. If both margins and target_dims are specified, margins takes priority over target_dims.} +\item{target_dims}{One or a list of vectors (or NULLs) containing the dimensions to be input into fun for each of the objects in the data. If a single vector of target dimensions is specified and multiple inputs are provided in 'data, then the single set of target dimensions is re-used for all of the inputs. These vectors can contain either integers specifying the position of the dimensions, or character strings corresponding to the dimension names. This parameter is mandatory if 'margins' are not specified. If both 'margins' and 'target_dims' are specified, 'margins' takes priority.} -\item{AtomicFun}{Function to be applied to the arrays.} +\item{fun}{Function to be applied to the arrays. Must receive as many inputs as provided in 'data', each with as many dimensions as specified in 'target_dims' or as the total number of dimensions in 'data' minus the ones specified in 'margins'. The function can receive other additional fixed parameters (see parameter '...' of \code{Apply}). The function can return one or a list of numeric vectors or multidimensional arrays, optionally with dimension names which will be propagated to the final result. The returned list can optionally be named, with a name for each output, which will be propagated to the resulting array. The function can optionally be provided with the attributes 'target_dims' and 'output_dims'. In that case, the corresponding parameters of \code{Apply} do not need to be provided. The function can expect named dimensions for each of its inputs, in the same order as specified in 'target_dims' or, if no 'target_dims' have been provided, in the same order as provided in 'data'.} -\item{...}{Additional arguments to be used in the AtomicFun.} +\item{...}{Additional fixed arguments expected by the function provided in the parameter 'fun'.} -\item{output_dims}{Optional list of vectors containing the names of the dimensions to be output from the AtomicFun for each of the objects it returns (or a single vector if the function has only one output).} +\item{output_dims}{Optional list of vectors containing the names of the dimensions to be output from the fun for each of the objects it returns (or a single vector if the function has only one output).} -\item{margins}{List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims.} +\item{margins}{One or a list of vectors (or NULLs) containing the 'margin' dimensions to be looped over for each input in 'data'. If a single vector of margins is specified and multiple inputs are provided in 'data', then the single set of margins is re-used for all of the inputs. These vectors can contain either integers specifying the position of the margins, or character strings corresponding to the dimension names. If both 'margins' and 'target_dims' are specified, 'margins' takes priority.} -\item{ncores}{The number of multicore threads to use for parallel computation.} +\item{guess_dim_names}{Whether to automatically guess missing dimension names for dimensions of equal length across different inputs in 'data' with a warning (TRUE; default), or to crash whenever unnamed dimensions of equa length are identified across different inputs (FALSE).} + +\item{ncores}{The number of parallel processes to spawn for the use for parallel computation in multiple cores.} + +\item{split_factor}{Factor telling to which degree the input data should be split into smaller pieces to be processed by the available cores. By default (split_factor = 1) the data is split into 4 pieces for each of the cores (as specified in ncores). A split_factor of 2 will result in 8 pieces for each of the cores, and so on. The special value 'greatest' will split the input data into as many pieces as possible.} } \value{ -List of arrays or matrices or vectors resulting from applying AtomicFun to data. +List of arrays or matrices or vectors resulting from applying 'fun' to 'data'. } \description{ -This wrapper applies a given function, which takes N [multi-dimensional] arrays as inputs (which may have different numbers of dimensions and dimension lengths), and applies it to a list of N [multi-dimensional] arrays with at least as many dimensions as expected by the given function. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the \code{margins} or \code{target_dims} option. A user can apply a function that receives (in addition to other helper parameters) 1 or more arrays as input, each with a different number of dimensions, and returns any number of multidimensional arrays. The target dimensions can be specified by their names. It is recommended to use this wrapper with multidimensional arrays with named dimensions. +This function efficiently applies a given function, which takes N vectors or multi-dimensional arrays as inputs (which may have different numbers of dimensions and dimension lengths), and applies it to a list of N vectors or multi-dimensional arrays with at least as many dimensions as expected by the given function. The user can specify which dimensions of each array the function is to be applied over with the \code{margins} or \code{target_dims} parameters. The function to be applied can receive other helper parameters and return any number of numeric vectors or multidimensional arrays. The target dimensions or margins can be specified by their names, as long as the inputs are provided with dimension names (recommended). This function can also use multi-core in a transparent way if requested via the \code{ncores} parameter.\cr\cr The following steps help to understand how \code{Apply} works:\cr\cr - The function receives N arrays with Dn dimensions each.\cr - The user specifies, for each of the arrays, which of its dimensions are 'target' dimensions (dimensions which the function provided in 'fun' operates with) and which are 'margins' (dimensions to be looped over).\cr - \code{Apply} will generate an array with as many dimensions as margins in all of the input arrays. If a margin is repeated across different inputs, it will appear only once in the resulting array.\cr - For each element of this resulting array, the function provided in the parameter'fun' is applied to the corresponding sub-arrays in 'data'.\cr - If the function returns a vector or a multidimensional array, the additional dimensions will be prepended to the resulting array (in left-most positions).\cr - If the provided function returns more than one vector or array, the process above is carried out for each of the outputs, resulting in a list with multiple arrays, each with the combination of all target dimensions (at the right-most positions) and resulting dimensions (at the left-most positions). } \details{ -When using a single object as input, Apply is almost identical to the apply function. For multiple input objects, the output array will have dimensions equal to the dimensions specified in 'margins'. +When using a single object as input, Apply is almost identical to the apply function (as fast or slightly slower in some cases; with equal or improved -smaller- memory footprint). } \examples{ #Change in the rate of exceedance for two arrays, with different #dimensions, for some matrix of exceedances. -data = list(array(rnorm(2000), c(10,10,20)), array(rnorm(1000), c(10,10,10)), - array(rnorm(100), c(10, 10))) -test_fun <- function(x, y, z) {((sum(x > z) / (length(x))) / - (sum(y > z) / (length(y)))) * 100} -margins = list(c(1, 2), c(1, 2), c(1,2)) -test <- Apply(data, margins = margins, AtomicFun = "test_fun") +data <- list(array(rnorm(1000), c(5, 10, 20)), + array(rnorm(500), c(5, 10, 10)), + array(rnorm(50), c(5, 10))) +test_fun <- function(x, y, z) { + ((sum(x > z) / (length(x))) / + (sum(y > z) / (length(y)))) * 100 +} +test <- Apply(data, target = list(3, 3, NULL), test_fun) } \references{ Wickham, H (2011), The Split-Apply-Combine Strategy for Data Analysis, Journal of Statistical Software. diff --git a/multiApply-manual.pdf b/multiApply-manual.pdf index bf1c86e977e001ee0b102d7b749e031b158ac0d0..c1ab41cb14670fb7d596548390f3070814cc0963 100644 Binary files a/multiApply-manual.pdf and b/multiApply-manual.pdf differ diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000000000000000000000000000000000000..b948b35786c30acb128038bb59fb95c62e8a3a7e --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(multiApply) + +test_check("multiApply") diff --git a/tests/testthat/test-sanity-checks.R b/tests/testthat/test-sanity-checks.R new file mode 100644 index 0000000000000000000000000000000000000000..77fdb95be41843f0b77023f9604c94dac40da7e1 --- /dev/null +++ b/tests/testthat/test-sanity-checks.R @@ -0,0 +1,39 @@ +context("Sanity checks") + +test_that("required arguments are provided", { + expect_error( + Apply(), + "missing, with no default" + ) + expect_error( + Apply(1:10), + "missing, with no default" + ) + expect_error( + Apply(1:10, fun = mean), + "must be specified" + ) +}) + +test_that("arguments have the right type", { + expect_equal( + Apply(1:10, NULL, mean), + list(output1 = array(1:10, dim = 10)) + ) + expect_error( + Apply(numeric(0), NULL, mean), + "must be of length > 0" + ) + #expect_error( + #) +}) + + +#context("Parameter order") +# +#test_that("", { +# expect_that( +# Apply(), +# throws_error("") +# ) +#}) diff --git a/tests/testthat/test-use-cases.R b/tests/testthat/test-use-cases.R new file mode 100644 index 0000000000000000000000000000000000000000..22aa8cad62affb72f0622a9a54c5db5fa933229a --- /dev/null +++ b/tests/testthat/test-use-cases.R @@ -0,0 +1,1276 @@ +context("Use cases") + +test_that("in1: 1 val; targ. dims: 0; out1: 0 val", { + f <- function(x) NULL + expect_equal( + Apply(1, NULL, f), + list(output1 = array(logical(0), dim = c(0, 1))) + ) + f <- function(x) numeric(0) + expect_equal( + Apply(1, NULL, f), + list(output1 = array(logical(0), dim = c(0, 1))) + ) +}) + +test_that("in1: 1 val; targ. dims: 0; out1: 0 val", { + f <- function(x) NULL + expect_equal( + Apply(1, 1, f), + list(output1 = array(logical(0))) + ) + f <- function(x) numeric(0) + expect_equal( + Apply(1, 1, f), + list(output1 = array(logical(0))) + ) +}) + +test_that("in1: 1 val; targ. dims: 0; out1: 1 val", { + expect_equal( + Apply(1, NULL, mean), + list(output1 = array(1)) + ) +}) + +test_that("in1: 1 val; targ. dims: 1; out1: 1 val", { + expect_equal( + Apply(1, 1, mean), + list(output1 = 1) + ) +}) + +test_that("in1: 1 val; targ. dims: 0; out1: 3 val", { + f <- function(x) x:(x + 2) + expect_equal( + Apply(1, NULL, f), + list(output1 = array(1:3, dim = c(3, 1))) + ) +}) + +test_that("in1: 1 val; targ. dims: 1; out1: 3 val", { + f <- function(x) x:(x + 2) + expect_equal( + Apply(1, 1, f), + list(output1 = array(1:3)) + ) +}) + +test_that("in1: 1 val; targ. dims: 0; out1: 1 dim", { + # unnamed output dim + f <- function(x) array(x:(x + 2)) + expect_equal( + Apply(1, NULL, f), + list(output1 = array(1:3, dim = c(3, 1))) + ) + # named output dim + f <- function(x) array(x:(x + 2), dim = c(time = 3)) + expect_equal( + Apply(1, NULL, f), + list(output1 = array(1:3, dim = c(time = 3, 1))) + ) +}) + +test_that("in1: 1 val; targ. dims: 1; out1: 1 dim", { + # unnamed output dim + f <- function(x) array(x:(x + 2)) + expect_equal( + Apply(1, 1, f), + list(output1 = array(1:3)) + ) + # named output dim + f <- function(x) array(x:(x + 2), dim = c(time = 3)) + expect_equal( + Apply(1, 1, f), + list(output1 = array(1:3, dim = c(time = 3))) + ) +}) + +test_that("in1: 1 val; targ. dims: 0; out1: 2 dim", { + # unnamed output dims + f <- function(x) array(x:(x + 2), dim = c(3, 4)) + expect_equal( + Apply(1, NULL, f), + list(output1 = array(1:3, dim = c(3, 4, 1))) + ) + # named output dim 1 + f <- function(x) array(x:(x + 2), dim = c(time = 3, 4)) + expect_equal( + Apply(1, NULL, f), + list(output1 = array(1:3, dim = c(time = 3, 4, 1))) + ) + # named output dim 2 + f <- function(x) array(x:(x + 2), dim = c(3, region = 4)) + expect_equal( + Apply(1, NULL, f), + list(output1 = array(1:3, dim = c(3, region = 4, 1))) + ) + # named output dims + f <- function(x) array(x:(x + 2), dim = c(time = 3, region = 4)) + expect_equal( + Apply(1, NULL, f), + list(output1 = array(1:3, dim = c(time = 3, region = 4, 1))) + ) +}) + +test_that("in1: 1 val; targ. dims: 1; out1: 2 dim", { + # unnamed output dims + f <- function(x) array(x:(x + 2), dim = c(3, 4)) + expect_equal( + Apply(1, 1, f), + list(output1 = array(1:3, dim = c(3, 4))) + ) + # named output dims + f <- function(x) array(x:(x + 2), dim = c(time = 3, region = 4)) + expect_equal( + Apply(1, 1, f), + list(output1 = array(1:3, dim = c(time = 3, region = 4))) + ) +}) + +test_that("in1: 1 val; targ. dims: 0; out1: 1 val; out 2: 1 val", { + # unnamed outputs + f <- function(x) list(mean(x), mean(x) + 1) + expect_equal( + Apply(1, NULL, f), + list(output1 = array(1), + output2 = array(2)) + ) + # named outputs + f <- function(x) list(a = mean(x), b = mean(x) + 1) + expect_equal( + Apply(1, NULL, f), + list(a = array(1), + b = array(2)) + ) +}) + +test_that("in1: 1 val; targ. dims: 1; out1: 3 val; out 2: 2 dim", { + f <- function(x) list(a = x:(x + 2), array(x:(x + 2), dim = c(3, region = 4))) + expect_equal( + Apply(1, 1, f), + list(a = array(1:3), + output2 = array(1:3, dim = c(3, region = 4))) + ) +}) + +test_that("in1: 1 dim; targ. dims: 0; out1: 0 val", { + # unnamed dim + # unnamed output + f <- function(x) NULL + expect_equal( + Apply(array(1:10), NULL, f), + list(output1 = array(logical(0), dim = c(0, 10))) + ) + # named dim + # named output + f <- function(x) list(out1 = NULL) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), NULL, f), + list(out1 = array(logical(0), dim = c(0, a = 10))) + ) +}) + +test_that("in1: 1 dim; targ. dims: 1; out1: 0 val", { + # unnamed dim + # unnamed output + f <- function(x) NULL + expect_equal( + Apply(array(1:10), 1, f), + list(output1 = array(logical(0))) + ) + # named dim + # named output + f <- function(x) list(out1 = NULL) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 1, f), + list(out1 = array(logical(0), dim = c(0))) + ) + # named target dim + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 'a', f), + list(out1 = array(logical(0), dim = c(0))) + ) +}) + +test_that("in1: 1 dim; targ. dims: 0; out1: 1 val", { + # unnamed dim + # unnamed output + expect_equal( + Apply(array(1:10), NULL, mean), + list(output1 = array(1:10)) + ) + # named dim + # named output + f <- function(x) list(out1 = mean(x)) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), NULL, f), + list(out1 = array(1:10, dim = c(a = 10))) + ) +}) + +test_that("in1: 1 dim; targ. dims: 1; out1: 1 val", { + # unnamed dim + # unnamed output + expect_equal( + Apply(array(1:10), 1, mean), + list(output1 = 5.5) + ) + # named dim + # named output + f <- function(x) list(out1 = mean(x)) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 1, f), + list(out1 = 5.5) + ) + # named target dim + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 'a', f), + list(out1 = 5.5) + ) +}) + +test_that("in1: 1 dim; targ. dims: 0; out1: 3 vals", { + # unnamed dim + # unnamed output + f <- function(x) x:(x + 2) + expect_equal( + Apply(array(1:10), NULL, f), + list(output1 = array(sapply(1:10, function(x) x:(x + 2)), dim = c(3, 10))) + ) + # named dim + # named output + f <- function(x) list(out1 = x:(x + 2)) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), NULL, f), + list(out1 = array(sapply(1:10, function(x) x:(x + 2)), dim = c(3, a = 10))) + ) +}) + +test_that("in1: 1 dim; targ. dims: 1; out1: 3 vals", { + # unnamed dim + # unnamed output + f <- function(x) x[1]:(x[1] + 2) + expect_equal( + Apply(array(1:10), 1, f), + list(output1 = array(1:3)) + ) + # named dim + # named output + f <- function(x) list(out1 = x[1]:(x[1] + 2)) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 1, f), + list(out1 = array(1:3)) + ) + # named target dim + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 'a', f), + list(out1 = array(1:3)) + ) +}) + +test_that("in1: 1 dim; targ. dims: 0; out1: 1 dim", { + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x) array(x:(x + 2)) + expect_equal( + Apply(array(1:10), NULL, f), + list(output1 = array(sapply(1:10, function(x) x:(x + 2)), dim = c(3, 10))) + ) + # named input dim + # named output + # named output dim + f <- function(x) list(out1 = array(x:(x + 2), dim = c(b = 3))) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), NULL, f), + list(out1 = array(sapply(1:10, function(x) x:(x + 2)), dim = c(b = 3, a = 10))) + ) +}) + +test_that("in1: 1 dim; targ. dims: 1; out1: 1 dim", { + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x) array(x[1]:(x[1] + 2)) + expect_equal( + Apply(array(1:10), 1, f), + list(output1 = array(1:3)) + ) + # named input dim + # named output + # named output dim + f <- function(x) list(out1 = array(x[1]:(x[1] + 2), dim = c(b = 3))) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 1, f), + list(out1 = array(1:3, dim = c(b = 3))) + ) + # named target dim + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 'a', f), + list(out1 = array(1:3, dim = c(b = 3))) + ) +}) + +test_that("in1: 1 dim; targ. dims: 0; out1: 2 dims", { + # unnamed input dim + # unnamed output + # unnamed output dims + f <- function(x) array(x:(x + 2), dim = c(3, 4)) + expect_equal( + Apply(array(1:10), NULL, f), + list(output1 = array(sapply(1:10, function(x) rep(x:(x + 2), 4)), dim = c(3, 4, 10))) + ) + # named input dim + # named output + # named output dim + f <- function(x) list(out1 = array(x:(x + 2), dim = c(b = 3, c = 4))) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), NULL, f), + list(out1 = array(sapply(1:10, function(x) rep(x:(x + 2), 4)), dim = c(b = 3, c = 4, a = 10))) + ) +}) + +test_that("in1: 1 dim; targ. dims: 1; out1: 2 dims", { + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x) array(x[1]:(x[1] + 2), dim = c(3, 4)) + expect_equal( + Apply(array(1:10), 1, f), + list(output1 = array(sapply(1:10, function(x) rep(x:(x + 2), 4)), dim = c(3, 4))) + ) + # named input dim + # named output + # named output dim + f <- function(x) list(out1 = array(x[1]:(x[1] + 2), dim = c(b = 3, c = 4))) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 1, f), + list(out1 = array(sapply(1:10, function(x) rep(x:(x + 2), 4)), dim = c(b = 3, c = 4))) + ) + # named target dim + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 'a', f), + list(out1 = array(sapply(1:10, function(x) rep(x:(x + 2), 4)), dim = c(b = 3, c = 4))) + ) +}) + +test_that("in1: 1 dim; targ. dims: 0; out1: 1 dim; out2: 2 dims; out3: 1 val", { + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x) list(array(x:(x + 2), dim = c(3)), + array(x:(x + 3), dim = c(4, 5)), + mean(x)) + expect_equal( + Apply(array(1:10), NULL, f), + list(output1 = array(sapply(1:10, function(x) x:(x + 2)), dim = c(3, 10)), + output2 = array(sapply(1:10, function(x) rep(x:(x + 3), 5)), dim = c(4, 5, 10)), + output3 = array(1:10)) + ) + # named input dim + # named output + # named output dim + f <- function(x) list(a = array(x:(x + 2), dim = c(b = 3)), + b = array(x:(x + 3), dim = c(c = 4, d = 5)), + c = mean(x)) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), NULL, f), + list(a = array(sapply(1:10, function(x) x:(x + 2)), dim = c(b = 3, a = 10)), + b = array(sapply(1:10, function(x) rep(x:(x + 3), 5)), + dim = c(c = 4, d = 5, a = 10)), + c = array(1:10, dim = c(a = 10))) + ) +}) + +test_that("in1: 1 dim; targ. dims: 1; out1: 1 dim; out2: 2 dims; out3: 1 val", { + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x) list(array(x[1]:(x[1] + 2), dim = c(3)), + array(x[1]:(x[1] + 3), dim = c(4, 5)), + mean(x)) + expect_equal( + Apply(array(1:10), 1, f), + list(output1 = array(sapply(1, function(x) x:(x + 2)), dim = c(3)), + output2 = array(sapply(1, function(x) rep(x:(x + 3), 5)), dim = c(4, 5)), + output3 = 5.5) + ) + # named input dim + # named output + # named output dim + f <- function(x) list(a = array(x[1]:(x[1] + 2), dim = c(b = 3)), + b = array(x[1]:(x[1] + 3), dim = c(c = 4, d = 5)), + c = mean(x)) + expect_equal( + Apply(array(1:10, dim = c(a = 10)), 1, f), + list(a = array(sapply(1, function(x) x:(x + 2)), dim = c(b = 3)), + b = array(sapply(1, function(x) rep(x:(x + 3), 5)), dim = c(c = 4, d = 5)), + c = 5.5) + ) +}) + +test_that("in1: 2 dim; targ. dims: 0-2; out1: 1 dim; out2: 2 dims; out3: 1 val", { +# no target dim + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x) list(array(x[1]:(x[1] + 2), dim = c(3)), + array(x[1]:(x[1] + 3), dim = c(4, 5)), + mean(x)) + expect_equal( + Apply(array(1:10, dim = c(10, 3)), NULL, f), + list(output1 = array(rep(sapply(1:10, function(x) x:(x + 2)), 3), dim = c(3, 10, 3)), + output2 = array(rep(sapply(1:10, function(x) rep(x:(x + 3), 5)), 3), + dim = c(4, 5, 10, 3)), + output3 = array(1:10, dim = c(10, 3))) + ) + # named input dim + # named output + # named output dim + f <- function(x) list(a = array(x[1]:(x[1] + 2), dim = c(c = 3)), + b = array(x[1]:(x[1] + 3), dim = c(d = 4, e = 5)), + c = mean(x)) + expect_equal( + Apply(array(1:10, dim = c(a = 10, b = 3)), NULL, f), + list(a = array(rep(sapply(1:10, function(x) x:(x + 2)), 3), + dim = c(c = 3, a = 10, b = 3)), + b = array(rep(sapply(1:10, function(x) rep(x:(x + 3), 5)), 3), + dim = c(d = 4, e = 5, a = 10, b = 3)), + c = array(1:10, dim = c(a = 10, b = 3))) + ) +# first target dim + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x) list(array(x[1]:(x[1] + 2), dim = c(3)), + array(x[1]:(x[1] + 3), dim = c(4, 5)), + mean(x)) + expect_equal( + Apply(array(1:10, dim = c(10, 3)), 1, f), + list(output1 = array(rep(sapply(1, function(x) x:(x + 2)), 3), dim = c(3, 3)), + output2 = array(rep(sapply(1, function(x) rep(x:(x + 3), 5)), 3), + dim = c(4, 5, 3)), + output3 = array(5.5, dim = c(3))) + ) + # named input dim + # named output + # named output dim + f <- function(x) list(a = array(x[1]:(x[1] + 2), dim = c(c = 3)), + b = array(x[1]:(x[1] + 3), dim = c(d = 4, e = 5)), + c = mean(x)) + expect_equal( + Apply(array(1:10, dim = c(a = 10, b = 3)), 'a', f), + list(a = array(rep(sapply(1, function(x) x:(x + 2)), 3), dim = c(c = 3, b = 3)), + b = array(rep(sapply(1, function(x) rep(x:(x + 3), 5)), 3), + dim = c(d = 4, e = 5, b = 3)), + c = array(5.5, dim = c(b = 3))) + ) +# second target dim + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x) list(array(x[1]:(x[1] + 2), dim = c(3)), + array(x[1]:(x[1] + 3), dim = c(4, 5)), + mean(x)) + expect_equal( + Apply(array(1:10, dim = c(10, 3)), 2, f), + list(output1 = array(sapply(1:10, function(x) x:(x + 2)), dim = c(3, 10)), + output2 = array(sapply(1:10, function(x) rep(x:(x + 3), 5)), + dim = c(4, 5, 10)), + output3 = array(1:10, dim = c(10))) + ) + # named input dim + # named output + # named output dim + f <- function(x) list(a = array(x[1]:(x[1] + 2), dim = c(c = 3)), + b = array(x[1]:(x[1] + 3), dim = c(d = 4, e = 5)), + c = mean(x)) + expect_equal( + Apply(array(1:10, dim = c(a = 10, b = 3)), 'b', f), + list(a = array(sapply(1:10, function(x) x:(x + 2)), + dim = c(c = 3, a = 10)), + b = array(sapply(1:10, function(x) rep(x:(x + 3), 5)), + dim = c(d = 4, e = 5, a = 10)), + c = array(1:10, dim = c(a = 10))) + ) +# two target dims + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x) list(array(x[1]:(x[1] + 2), dim = c(3)), + array(x[1]:(x[1] + 3), dim = c(4, 5)), + mean(x)) + expect_equal( + Apply(array(1:10, dim = c(10, 3)), c(1, 2), f), + list(output1 = array(sapply(1, function(x) x:(x + 2)), dim = c(3)), + output2 = array(sapply(1, function(x) rep(x:(x + 3), 5)), + dim = c(4, 5)), + output3 = 5.5) + ) + # named input dim + # named output + # named output dim + f <- function(x) list(a = array(x[1]:(x[1] + 2), dim = c(c = 3)), + b = array(x[1]:(x[1] + 3), dim = c(d = 4, e = 5)), + c = mean(x)) + expect_equal( + Apply(array(1:10, dim = c(a = 10, b = 3)), c('a', 'b'), f), + list(a = array(sapply(1, function(x) x:(x + 2)), dim = c(c = 3)), + b = array(sapply(1, function(x) rep(x:(x + 3), 5)), + dim = c(d = 4, e = 5)), + c = 5.5) + ) +}) + + + + +#test_that("in1: 3 dim; targ. dims: 0-3; out1: 1 dim; out2: 2 dims; out3: 1 val", { +#}) + + + + +#test_that("in1: 1 val; in2: 1 val; targ. dims: 0, 0; out1: 0 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 1, 0; out1: 0 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 0, 1; out1: 0 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 1, 1; out1: 0 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 0, 0; out1: 1 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 1, 0; out1: 1 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 0, 1; out1: 1 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 1, 1; out1: 1 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 0, 0; out1: 3 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 1, 0; out1: 3 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 0, 1; out1: 3 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 1, 1; out1: 3 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 0, 0; out1: 1 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 1, 0; out1: 1 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 0, 1; out1: 1 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 1, 1; out1: 1 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 0, 0; out1: 1 dim; out2: 1 val; out3: 3 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 1, 0; out1: 1 dim; out2: 1 val; out3: 3 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 0, 1; out1: 1 dim; out2: 1 val; out3: 3 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 val; targ. dims: 1, 1; out1: 1 dim; out2: 1 val; out3: 3 dim", { +#}) +# +# +# +# +# +# +# +# +# +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 0, 0; out1: 0 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 1, 0; out1: 0 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 0, 1; out1: 0 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 1, 1; out1: 0 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 0, 0; out1: 1 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 1, 0; out1: 1 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 0, 1; out1: 1 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 1, 1; out1: 1 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 0, 0; out1: 3 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 1, 0; out1: 3 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 0, 1; out1: 3 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 1, 1; out1: 3 val", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 0, 0; out1: 1 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 1, 0; out1: 1 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 0, 1; out1: 1 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 1, 1; out1: 1 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 0, 0; out1: 1 dim; out2: 1 val; out3: 3 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 1, 0; out1: 1 dim; out2: 1 val; out3: 3 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 0, 1; out1: 1 dim; out2: 1 val; out3: 3 dim", { +#}) +# +#test_that("in1: 1 val; in2: 1 dim; targ. dims: 1, 1; out1: 1 dim; out2: 1 val; out3: 3 dim", { +#}) + + + + + + + + + + +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0, 0; out1: 0 val", { +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 1, 0; out1: 0 val", { +##first in first dim +##first in second dim +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0, 1; out1: 0 val", { +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 1, 1; out1: 0 val", { +##shared target dims +###first in first dim +###first in second dim +##not shared target dims +###first in first dim +###first in second dim +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 2, 1; out1: 0 val", { +## shared first target dim +## shared second target dim +## not shared target dims +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0, 0; out1: 1 val", { +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 1, 0; out1: 1 val", { +##first in first dim +##first in second dim +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0, 1; out1: 1 val", { +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 1, 1; out1: 1 val", { +##shared target dims +###first in first dim +###first in second dim +##not shared target dims +###first in first dim +###first in second dim +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 2, 1; out1: 1 val", { +## shared first target dim +## shared second target dim +## not shared target dims +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0, 0; out1: 3 val", { +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 1, 0; out1: 3 val", { +##first in first dim +##first in second dim +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0, 1; out1: 3 val", { +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 1, 1; out1: 3 val", { +##shared target dims +###first in first dim +###first in second dim +##not shared target dims +###first in first dim +###first in second dim +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 2, 1; out1: 3 val", { +## shared first target dim +## shared second target dim +## not shared target dims +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0, 0; out1: 1 dim", { +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 1, 0; out1: 1 dim", { +##first in first dim +##first in second dim +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0, 1; out1: 1 dim", { +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 1, 1; out1: 1 dim", { +##shared target dims +###first in first dim +###first in second dim +##not shared target dims +###first in first dim +###first in second dim +#}) +# +#test_that("in1: 2 dim; in2: 1 dim; targ. dims: 2, 1; out1: 1 dim", { +## shared first target dim +## shared second target dim +## not shared target dims +#}) + +test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0-2, 0-1; out1: 1 dim; out2: 1 val; out3: 3 dim", { +# no target dims +## no shared margins + # unnamed input dim + # unnamed output + # unnamed output dim + # equal dimensions -> crash + f <- function(x, y) { + z <- mean(x) + mean(y) + list(array(z:(z + 3)), + z, + array(z:(z + 4), dim = c(5, 6, 7))) + } + expect_error( + Apply(list(array(1:10, dim = c(10, 3)), + array(1:3 * 10, dim = c(3))), + NULL, f, guess_dim_names = FALSE), + "multiple unnamed dimensions of the same length" + ) + expect_warning( + Apply(list(array(1:10, dim = c(10, 3)), + array(1:3 * 10, dim = c(3))), + NULL, f), + "Guessed names for some unnamed dimensions" + ) + expect_equal( + Apply(list(array(1:10, dim = c(10, 3)), + array(1:3 * 10, dim = c(3))), + NULL, f), + list(output1 = array(sapply(c(10, 20, 30), function(x) { + x + rep(sapply(1:10, function(y) { + y:(y + 3) + }), 1) + }), + dim = c(4, 10, 3)), + output2 = array(sapply(c(10, 20, 30), function (x) x + rep(1:10, 1)), + dim = c(10, 3)), + output3 = array(sapply(c(10, 20, 30), function(x) { + x + rep(sapply(1:10, function(y) { + rep(y:(y + 4), 6 * 7) + }), 1) + }), + dim = c(5, 6, 7, 10, 3))) + ) + # unnamed input dim + # unnamed output + # unnamed output dim + expect_equal( + Apply(list(array(1:10, dim = c(10, 4)), + array(1:3 * 10, dim = c(3))), + NULL, f), + list(output1 = array(sapply(c(10, 20, 30), function(x) { + x + rep(sapply(1:10, function(y) { + y:(y + 3) + }), 4) + }), + dim = c(4, 10, 4, 3)), + output2 = array(sapply(c(10, 20, 30), function (x) x + rep(1:10, 4)), + dim = c(10, 4, 3)), + output3 = array(sapply(c(10, 20, 30), function(x) { + x + rep(sapply(1:10, function(y) { + rep(y:(y + 4), 6 * 7) + }), 4) + }), + dim = c(5, 6, 7, 10, 4, 3))) + ) + # named input dim + # named output + # named output dim + f <- function(x, y) { + z <- mean(x) + mean(y) + list(a = array(z:(z + 3), dim = c(d = 4)), + b = z, + c = array(z:(z + 4), dim = c(e = 5, f = 6, g = 7))) + } + expect_equal( + Apply(list(array(1:10, dim = c(a = 10, b = 4)), + array(1:3 * 10, dim = c(c = 3))), + NULL, f), + list(a = array(sapply(c(10, 20, 30), function(x) { + x + rep(sapply(1:10, function(y) { + y:(y + 3) + }), 4) + }), + dim = c(d = 4, a = 10, b = 4, c = 3)), + b = array(sapply(c(10, 20, 30), function (x) x + rep(1:10, 4)), + dim = c(a = 10, b = 4, c = 3)), + c = array(sapply(c(10, 20, 30), function(x) { + x + rep(sapply(1:10, function(y) { + rep(y:(y + 4), 6 * 7) + }), 4) + }), + dim = c(e = 5, f = 6, g = 7, a = 10, b = 4, c = 3))) + ) +## one shared margin + # named input dim + # named output + # named output dim + f <- function(x, y) { + z <- mean(x) + mean(y) + list(a = array(z:(z + 3), dim = c(d = 4)), + b = z, + c = array(z:(z + 4), dim = c(e = 5, f = 6, g = 7))) + } + expect_equal( + Apply(list(array(1:10, dim = c(a = 10, b = 3)), + array(1:3 * 10, dim = c(b = 3))), + NULL, f), + list(a = array(sapply(c(10, 20, 30), function(x) { + x + rep(sapply(1:10, function(y) { + y:(y + 3) + }), 1) + }), + dim = c(d = 4, a = 10, b = 3)), + b = array(sapply(c(10, 20, 30), function (x) x + rep(1:10, 1)), + dim = c(a = 10, b = 3)), + c = array(sapply(c(10, 20, 30), function(x) { + x + rep(sapply(1:10, function(y) { + rep(y:(y + 4), 6 * 7) + }), 1) + }), + dim = c(e = 5, f = 6, g = 7, a = 10, b = 3))) + ) + +#one target dim only +##no shared margins +###first in first dim as target + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x, y) { + list(array(rev(x)), + mean(x) + mean(y), + array(rep(x + y - 1, 30), dim = c(10, 5, 6))) + } + expect_equal( + Apply(list(array(1:10, dim = c(10, 4)), + array(1:3 * 10, dim = c(3))), + list(1, NULL), f), + list(output1 = array(sapply(c(10, 20, 30), function(x) { + rep(10:1, 4) + }), + dim = c(10, 4, 3)), + output2 = array(sapply(c(10, 20, 30), function(x) { + x + rep(5.5, 4) + }), + dim = c(4, 3)), + output3 = array(sapply(c(10, 20, 30), function(x) { + rep(1:10 + x - 1, 30 * 4) + }), + dim = c(10, 5, 6, 4, 3))) + ) + # named input dim + # named output + # named output dim + f <- function(x, y) { + list(a = array(rev(x), dim = c(d = 10)), + b = mean(x) + mean(y), + c = array(rep(x + y - 1, 30), dim = c(a = 10, e = 5, f = 6))) + } + expect_equal( + Apply(list(array(1:10, dim = c(a = 10, b = 4)), + array(1:3 * 10, dim = c(c = 3))), + list('a', NULL), f), + list(a = array(sapply(c(10, 20, 30), function(x) { + rep(10:1, 4) + }), + dim = c(d = 10, b = 4, c = 3)), + b = array(sapply(c(10, 20, 30), function(x) { + x + rep(5.5, 4) + }), + dim = c(b = 4, c = 3)), + c = array(sapply(c(10, 20, 30), function(x) { + rep(1:10 + x - 1, 30 * 4) + }), + dim = c(a = 10, e = 5, f = 6, b = 4, c = 3))) + ) +###first in second dim as target + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x, y) { + list(array(x[1]:(x[1] + length(x) - 1)), + mean(x) + mean(y), + array(rep(x + y - 1, 30), dim = c(4, 5, 6))) + } + expect_equal( + Apply(list(array(1:10, dim = c(10, 4)), + array(1:3 * 10, dim = c(3))), + list(2, NULL), f), + list(output1 = array(sapply(c(10, 20, 30), function(x) { + sapply(1:10, function(y) { + y:(y + 3) + }) + }), + dim = c(4, 10, 3)), + output2 = array(sapply(c(10, 20, 30), function(x) { + 1:10 + x + }), + dim = c(10, 3)), + output3 = array(sapply(c(10, 20, 30), function(x) { + sapply(1:10, function(y) { + rep(rep(y, 4) + x - 1, 30) + }) + }), + dim = c(4, 5, 6, 10, 3))) + ) + # named input dim + # named output + # named output dim + f <- function(x, y) { + list(a = array(x[1]:(x[1] + length(x) - 1), dim = c(d = 4)), + b = mean(x) + mean(y), + c = array(rep(x + y - 1, 30), dim = c(b = 4, e = 5, f = 6))) + } + expect_equal( + Apply(list(array(1:10, dim = c(a = 10, b = 4)), + array(1:3 * 10, dim = c(c = 3))), + list('b', NULL), f), + list(a = array(sapply(c(10, 20, 30), function(x) { + sapply(1:10, function(y) { + y:(y + 3) + }) + }), + dim = c(d = 4, a = 10, c = 3)), + b = array(sapply(c(10, 20, 30), function(x) { + 1:10 + x + }), + dim = c(a = 10, c = 3)), + c = array(sapply(c(10, 20, 30), function(x) { + sapply(1:10, function(y) { + rep(rep(y, 4) + x - 1, 30) + }) + }), + dim = c(b = 4, e = 5, f = 6, a = 10, c = 3))) + ) +###second in first dim as target + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x, y) { + list(array(rev(y)), + mean(x) + mean(y), + array(rep(x + y - 1, 30), dim = c(3, 5, 6))) + } + expect_equal( + Apply(list(array(1:10, dim = c(10, 4)), + array(1:3 * 10, dim = c(3))), + list(NULL, 1), f), + list(output1 = array(rep(sapply(1:10, + function(x) { + c(30, 20, 10) + }), 4), + dim = c(3, 10, 4)), + output2 = array(rep(sapply(1:10, + function(x) { + 20 + x + }), 4), + dim = c(10, 4)), + output3 = array(rep(sapply(1:10, + function(x) { + rep(c(10, 20, 30) + x - 1, 30) + }), 4), + dim = c(3, 5, 6, 10, 4))) + ) + # named input dim + # named output + # named output dim + f <- function(x, y) { + list(a = array(rev(y), dim = c(d = 3)), + b = mean(x) + mean(y), + c = array(rep(x + y - 1, 30), dim = c(c = 3, e = 5, f = 6))) + } + expect_equal( + Apply(list(array(1:10, dim = c(a = 10, b = 4)), + array(1:3 * 10, dim = c(c = 3))), + list(NULL, 'c'), f), + list(a = array(rep(sapply(1:10, + function(x) { + c(30, 20, 10) + }), 4), + dim = c(d = 3, a = 10, b = 4)), + b = array(rep(sapply(1:10, + function(x) { + 20 + x + }), 4), + dim = c(a = 10, b = 4)), + c = array(rep(sapply(1:10, + function(x) { + rep(c(10, 20, 30) + x - 1, 30) + }), 4), + dim = c(c = 3, e = 5, f = 6, a = 10, b = 4))) + ) +##one shared margin. the remaining dim in first in is the target + # named input dim + # named output + # named output dim + f <- function(x, y) { + list(a = array(x[1]:(x[1] + length(x) - 1), dim = c(d = 4)), + b = mean(x) + mean(y), + c = array(rep(x + y - 1, 30), dim = c(b = 4, e = 5, f = 6))) + } + expect_equal( + Apply(list(array(1:3, dim = c(a = 3, b = 4)), + array(1:3 * 10, dim = c(a = 3))), + list('b', NULL), f), + list(a = array(sapply(c(10, 20, 30), function(x) { + sapply(1:4, function(y) { + y[1]:(y[1] + 3) + }) + }), + dim = c(d = 4, a = 3)), + b = array(sapply(c(1:3), function(x) { + x + x * 10 + }), + dim = c(a = 3)), + c = array(sapply(c(1:3), function(x) { + rep(x * 10 + rep(x, 4) - 1, 30) + }), + dim = c(b = 4, e = 5, f = 6, a = 3))) + ) + +#one target dim from each in +##no shared target +##shared target + +#two target dims first in, no target dim second in + +#all target dims +##no shared target + # unnamed input dim + # unnamed output + # unnamed output dim + f <- function(x, y) { + list(array(rowMeans(x)), + mean(x) + mean(y), + array(sapply(y, function(z) z + x - 1), dim = c(10, 4, 3))) + } + expect_equal( + Apply(list(array(1:10, dim = c(10, 4)), + array(1:3 * 10, dim = c(3))), + list(c(1, 2), 1), f), + list(output1 = array(1:10), + output2 = 25.5, + output3 = array(sapply(c(10, 20, 30), + function(x) { + rep(1:10, 4) + x - 1 + }), + dim = c(10, 4, 3))) + ) + # named input dim + # named output + # named output dim + f <- function(x, y) { + list(a = array(rowMeans(x), c(a = 10)), + b = mean(x) + mean(y), + c = array(sapply(y, function(z) z + x - 1), + dim = c(a = 10, b = 4, c = 3))) + } + expect_equal( + Apply(list(array(1:10, dim = c(a = 10, b = 4)), + array(1:3 * 10, dim = c(c = 3))), + list(c('a', 'b'), 'c'), f), + list(a = array(1:10, dim = c(a = 10)), + b = 25.5, + c = array(sapply(c(10, 20, 30), + function(x) { + rep(1:10, 4) + x - 1 + }), + dim = c(a = 10, b = 4, c = 3))) + ) +##shared target + # named input dim + # named output + # named output dim + f <- function(x, y) { + list(a = array(rowMeans(x), c(a = 10)), + b = mean(x) + mean(y), + c = array(rep(t(apply(x, 1, function(z) z * y)), 3), + dim = c(a = 10, b = 4, c = 3))) + } + expect_equal( + Apply(list(array(1:10, dim = c(a = 10, b = 4)), + array(1:4 * 10, dim = c(b = 4))), + list(c('a', 'b'), 'b'), f), + list(a = array(1:10, dim = c(a = 10)), + b = 30.5, + c = array(rep(sapply(c(10, 20, 30, 40), + function(x) { + 1:10 * x + }), + 3), + dim = c(a = 10, b = 4, c = 3))) + ) +}) + +##test_that("in1: 2 dim; in2: 3 dim; targ. dims: 0-2, 0-3; out1: 2 dim", { +### shared first target dim +### shared second target dim +### shared two target dims (first two in second in) +### shared two target dims (last two in second in) +### shared two target dims (extreme two in second in) +### not shared target dims +##}) +# +#test_that("in1: 2 dim; in2: 3 dim; targ. dims: 0-2, 0-3; out1: 1 dim; out2: 1 val; out3: 3 dim", { +## shared first target dim +## shared second target dim +## shared two target dims (first two in second in) +## shared two target dims (last two in second in) +## shared two target dims (extreme two in second in) +## not shared target dims +#}) +# +#test_that("in1: 2 dim; in2: 3 dim; in3: 1 dim; targ. dims: 0-2, 0-3, 0-1; out1: 2 dim", { +## shared first target dim +## shared second target dim +## shared two target dims (first two in second in) +## shared two target dims (last two in second in) +## shared two target dims (extreme two in second in) +## not shared target dims +#}) + +# Real cases +test_that("real use case - standardization", { + standardization <- function(x, mean, deviation){ + (x - mean) / deviation + } + + x <- array(1:(2*3*4), dim = c(mod = 2, lon = 3, lat = 4)) + y <- array(1:12, dim = c(lon = 3, lat = 4)) + z <- array(1:12, dim = c(lon = 3, lat = 4)) + + expected_result <- array(c(0:11 / z, rep(1, 3 * 4)), dim = c(3, 4, mod = 2)) + + expect_equal( + Apply(data = list(x,y,z), + target_dims = list(c('lon', 'lat'), + c('lon', 'lat'), + c('lon', 'lat')), + fun = standardization)$output1, + expected_result + ) + + names(dim(expected_result)) <- c('lon', 'lat', 'mod') + + expect_equal( + Apply(data = list(x,y,z), + margins = list('mod', NULL, NULL), + fun = standardization, + output_dims = c('lon', 'lat') + )$output1, + expected_result + ) + + expect_equal( + Apply(data = list(x,y,z), + margins = list(c('mod', 'lat'), 'lat', 'lat'), + fun = standardization, + output_dims = c('lon') + )$output1, + multiApply:::.aperm2(expected_result, c(1, 3, 2)) + ) + + x <- multiApply:::.aperm2(x, c(3, 2, 1)) + + expect_equal( + Apply(data = list(x,y,z), + target_dims = list(c('lon', 'lat'), + c('lon', 'lat'), + c('lon', 'lat')), + fun = standardization, + output_dims = c('lon', 'lat') + )$output1, + expected_result + ) + +}) + +# Test .aperm2 +test_that(".aperm2", { + data <- seq(as.POSIXct('1990-11-01'), + length.out = 6, + by = as.difftime(1, units = 'days')) + dim(data) <- c(3, 2) + expect_equal( + class(multiApply:::.aperm2(data, c(2, 1))), + c('POSIXct', 'POSIXt') + ) +}) + +# TODOS: +# TESTS FOR MARGINS +# TESTS FOR DISORDERED TARGET_DIMS +# TESTS FOR FUN WITH TARGET_DIMS AND OUTPUT_DIMS ATTACHED +# TESTS FOR FUNCTIONS RECEIVING ADDITIONAL PARAMETERS +# TESTS FOR SPLIT FACTOR +# TESTS FOR NCORES +# TESTS OF WALLCLOCK TIME +# TESTS OF MEMORY FOOTPRINT