From d038362897d34157b3efadb735dfc4804828ccf4 Mon Sep 17 00:00:00 2001 From: ahunter Date: Tue, 26 Sep 2017 10:49:02 +0200 Subject: [PATCH 01/25] Add functionality to replicate margins --- R/Apply.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/R/Apply.R b/R/Apply.R index cafa3b5..f3a688d 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -73,6 +73,21 @@ Apply <- function(data, margins = NULL, AtomicFun, ..., inverse_margins = NULL, structure(list(env = environment(), index = margin_length, subs = as.name("[")), class = c("indexed_array")) } + #Check margins match for input objects + all_dims <- c(unlist(lapply(1 : length(data), + function(x) sum(dim(data[[x]])[margins[[x]]])))) + pos_dim <- min(which(all_dims == max(all_dims))) + dim_template <- dim(data[[pos_dim]])[margins[[pos_dim]]] + for (i in 1 : length(data)) { + if (identical(dim(data[[i]])[margins[[i]]], dim_template) == FALSE) { + for (j in 1 : (length(dim(data[[i]])[margins[[i]]]))) { + if (c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) { + data[[i]] <- InsertDim(data[[i]], posdim = margins[[i]][j], lendim = dim_template[j]) + data[[i]] <- adrop(data[[i]], drop = (margins[[i]][j] + 1)) + } + } + } + } for (i in 1 : length(data)) { margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) margin_length[-margins[[i]]] <- "" -- GitLab From 556c811fba87b45558cbb07c74c07a0c90145561 Mon Sep 17 00:00:00 2001 From: ahunter Date: Tue, 26 Sep 2017 10:53:52 +0200 Subject: [PATCH 02/25] Add s2dv dependency for InsertDim function --- DESCRIPTION | 3 ++- NAMESPACE | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e31050c..131c6d2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,8 @@ Imports: plyr, doParallel, future, - foreach + foreach, + s2dverification License: LGPL-3 URL: https://earth.bsc.es/gitlab/ces/multiApply BugReports: https://earth.bsc.es/gitlab/ces/multiApply/issues diff --git a/NAMESPACE b/NAMESPACE index 3b439b3..a6a7711 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,4 +4,5 @@ importFrom(abind, abind) importFrom(future, availableCores) importFrom(doParallel, registerDoParallel) importFrom(foreach, registerDoSEQ) +importFrom(s2dverification, InsertDim) export(Apply) -- GitLab From 4289df9b9d633485abdcef61f2c1a069bd32acc6 Mon Sep 17 00:00:00 2001 From: ahunter Date: Tue, 26 Sep 2017 12:32:54 +0200 Subject: [PATCH 03/25] Small change to allow functions to be provided directly --- R/Apply.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/Apply.R b/R/Apply.R index f3a688d..879ecb3 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -66,7 +66,11 @@ Apply <- function(data, margins = NULL, AtomicFun, ..., inverse_margins = NULL, } names <- names(dim(data[[1]]))[margins[[1]]] input <- list() - splatted_f <- splat(get(AtomicFun)) + if (is.charcter(AtomicFun)) { + splatted_f <- splat(get(AtomicFun)) + } else { + splatted_f <- splat(AtomicFun) + } if (!is.null(margins)) { .isolate <- function(data, margin_length, drop = TRUE) { eval(dim(environment()$data)) -- GitLab From 3f603641cf928d57644cf9322892da5d3aa6fac6 Mon Sep 17 00:00:00 2001 From: ahunter Date: Tue, 26 Sep 2017 13:36:00 +0200 Subject: [PATCH 04/25] Small bugfix for the previous commit --- R/Apply.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Apply.R b/R/Apply.R index 879ecb3..c5255f9 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -66,7 +66,7 @@ Apply <- function(data, margins = NULL, AtomicFun, ..., inverse_margins = NULL, } names <- names(dim(data[[1]]))[margins[[1]]] input <- list() - if (is.charcter(AtomicFun)) { + if (is.character(AtomicFun)) { splatted_f <- splat(get(AtomicFun)) } else { splatted_f <- splat(AtomicFun) -- GitLab From 180a6efbd46d91c67edaac839baf8b4de6641cea Mon Sep 17 00:00:00 2001 From: ahunter Date: Tue, 26 Sep 2017 13:42:19 +0200 Subject: [PATCH 05/25] Reverse prioritization of margins and inverse_marginas --- R/Apply.R | 7 ++++--- man/Apply.Rd | 7 +++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index c5255f9..6f5032a 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -2,10 +2,11 @@ #' #' The Apply function is an extension of the mapply function, which instead of taking lists of unidimensional objects as input, takes lists of multidimensional objects as input, which may have different numbers of dimensions and dimension lengths. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the margins option. #' @param data A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun. -#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, margins takes priority over inverse_margins. +#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, inverse_margins takes priority over margins. +#' @param inverse_margins List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, margins takes priority over inverse_margins. #' @param AtomicFun Function to be applied to the arrays. #' @param ... Additional arguments to be used in the AtomicFun. -#' @param inverse_margins List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, margins takes priority over inverse_margins. +#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, inverse_margins takes priority over margins. #' @param parallel Logical, should the function be applied in parallel. #' @param ncores The number of cores to use for parallel computation. #' @details When using a single object as input, Apply is almost identical to the apply function. For multiple input objects, the output array will have dimensions equal to the dimensions specified in 'margins'. @@ -21,7 +22,7 @@ #' (sum(y > z) / (length(y)))) * 100} #' margins = list(c(1, 2), c(1, 2), c(1,2)) #' test <- Apply(data, margins, AtomicFun = "test_fun") -Apply <- function(data, margins = NULL, AtomicFun, ..., inverse_margins = NULL, parallel = FALSE, ncores = NULL) { +Apply <- function(data, inverse_margins = NULL, AtomicFun, ..., margins = NULL, parallel = FALSE, ncores = NULL) { if (!is.list(data)) { data <- list(data) } diff --git a/man/Apply.Rd b/man/Apply.Rd index 4a09caf..e884db1 100644 --- a/man/Apply.Rd +++ b/man/Apply.Rd @@ -4,20 +4,19 @@ \alias{Apply} \title{Wrapper for Applying Atomic Functions to Arrays.} \usage{ -Apply(data, margins = NULL, AtomicFun, ..., inverse_margins = NULL, parallel = FALSE, +Apply(data, inverse_margins = NULL, AtomicFun, ..., margins = NULL, parallel = FALSE, ncores = NULL) } \arguments{ \item{data}{A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun.} -\item{margins}{List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, margins takes priority over inverse_margins.} +\item{inverse_margins}{List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, inverse_margins takes priority over margins.} \item{AtomicFun}{Function to be applied to the arrays.} \item{...}{Additional arguments to be used in the AtomicFun.} -\item{inverse_margins}{List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, margins takes priority over inverse_margins.} - +\item{margins}{List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, inverse_margins takes priority over margins.} \item{parallel}{Logical, should the function be applied in parallel.} \item{ncores}{The number of cores to use for parallel computation.} -- GitLab From b20cd33be42708b1c8f3bcece54d7068a05b8b2c Mon Sep 17 00:00:00 2001 From: ahunter Date: Thu, 28 Sep 2017 16:20:59 +0200 Subject: [PATCH 06/25] Change inverse_margins to target_dims --- R/Apply.R | 24 ++++++++++++------------ man/Apply.Rd | 6 +++--- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index 6f5032a..0fb2966 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -2,11 +2,11 @@ #' #' The Apply function is an extension of the mapply function, which instead of taking lists of unidimensional objects as input, takes lists of multidimensional objects as input, which may have different numbers of dimensions and dimension lengths. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the margins option. #' @param data A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun. -#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, inverse_margins takes priority over margins. -#' @param inverse_margins List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, margins takes priority over inverse_margins. +#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, target_dims takes priority over margins. +#' @param target_dims List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims. #' @param AtomicFun Function to be applied to the arrays. #' @param ... Additional arguments to be used in the AtomicFun. -#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, inverse_margins takes priority over margins. +#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, target_dims takes priority over margins. #' @param parallel Logical, should the function be applied in parallel. #' @param ncores The number of cores to use for parallel computation. #' @details When using a single object as input, Apply is almost identical to the apply function. For multiple input objects, the output array will have dimensions equal to the dimensions specified in 'margins'. @@ -22,29 +22,29 @@ #' (sum(y > z) / (length(y)))) * 100} #' margins = list(c(1, 2), c(1, 2), c(1,2)) #' test <- Apply(data, margins, AtomicFun = "test_fun") -Apply <- function(data, inverse_margins = NULL, AtomicFun, ..., margins = NULL, parallel = FALSE, ncores = NULL) { +Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, parallel = FALSE, ncores = NULL) { if (!is.list(data)) { data <- list(data) } if (!is.null(margins)) { - inverse_margins <- NULL + target_dims <- NULL } - if (!is.null(inverse_margins)) { - if (!is.list(inverse_margins)) { - inverse_margins <- rep(list(inverse_margins), length(data)) + if (!is.null(target_dims)) { + if (!is.list(target_dims)) { + target_dims <- rep(list(target_dims), length(data)) } - if (is.character(unlist(inverse_margins[1]))) { - margins2 <- inverse_margins + if (is.character(unlist(target_dims[1]))) { + margins2 <- target_dims for (i in 1 : length(data)) { margins_new <- c() for (j in 1 : length(margins2[[i]])) { margins_new[j] <- which(names(dim(data[[i]])) == margins2[[i]][[j]]) } - inverse_margins[[i]] <- c(margins_new) + target_dims[[i]] <- c(margins_new) } } for (i in 1 : length(data)) { - margins[[i]] <- c(1 :length(dim(data[[i]])))[-c(inverse_margins[[i]])] + margins[[i]] <- c(1 :length(dim(data[[i]])))[-c(target_dims[[i]])] } } if (!is.null(margins)) { diff --git a/man/Apply.Rd b/man/Apply.Rd index e884db1..699facd 100644 --- a/man/Apply.Rd +++ b/man/Apply.Rd @@ -4,19 +4,19 @@ \alias{Apply} \title{Wrapper for Applying Atomic Functions to Arrays.} \usage{ -Apply(data, inverse_margins = NULL, AtomicFun, ..., margins = NULL, parallel = FALSE, +Apply(data, target_dims = NULL, AtomicFun, ..., margins = NULL, parallel = FALSE, ncores = NULL) } \arguments{ \item{data}{A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun.} -\item{inverse_margins}{List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, inverse_margins takes priority over margins.} +\item{target_dims}{List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, target_dims takes priority over margins.} \item{AtomicFun}{Function to be applied to the arrays.} \item{...}{Additional arguments to be used in the AtomicFun.} -\item{margins}{List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and inverse_margins are specified, inverse_margins takes priority over margins.} +\item{margins}{List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, target_dims takes priority over margins.} \item{parallel}{Logical, should the function be applied in parallel.} \item{ncores}{The number of cores to use for parallel computation.} -- GitLab From be6bf905519f558863ccf8a1371802cca951b915 Mon Sep 17 00:00:00 2001 From: ahunter Date: Mon, 2 Oct 2017 08:55:15 +0200 Subject: [PATCH 07/25] I've introduced a bug somewhere --- R/Apply.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/Apply.R b/R/Apply.R index 0fb2966..8c9eaba 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -62,6 +62,10 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para margins[[i]] <- c(margins_new) } } + # if (is.unsorted(margins)) { + # unordered_dims <- margins + # margins <- sort(margins) + # } if (!is.logical(parallel)) { stop("parallel must be logical") } @@ -89,6 +93,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para if (c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) { data[[i]] <- InsertDim(data[[i]], posdim = margins[[i]][j], lendim = dim_template[j]) data[[i]] <- adrop(data[[i]], drop = (margins[[i]][j] + 1)) + print("OK") } } } @@ -129,6 +134,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para } if (!is.null(dim(WrapperFun))) { names(dim(WrapperFun)) <- c(AtomicFun, names) + print(dim(WrapperFun)) } out <- WrapperFun } -- GitLab From 224579be0874decda5fb5fc40671669b85c2004e Mon Sep 17 00:00:00 2001 From: ahunter Date: Mon, 2 Oct 2017 15:29:21 +0200 Subject: [PATCH 08/25] Bugfixed, but still can't handle POSXIct objects when replicating margins --- R/Apply.R | 75 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index 8c9eaba..679cd8d 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -2,11 +2,10 @@ #' #' The Apply function is an extension of the mapply function, which instead of taking lists of unidimensional objects as input, takes lists of multidimensional objects as input, which may have different numbers of dimensions and dimension lengths. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the margins option. #' @param data A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun. -#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, target_dims takes priority over margins. -#' @param target_dims List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims. +#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims. #' @param AtomicFun Function to be applied to the arrays. #' @param ... Additional arguments to be used in the AtomicFun. -#' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, target_dims takes priority over margins. +#' @param target_dims List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims. #' @param parallel Logical, should the function be applied in parallel. #' @param ncores The number of cores to use for parallel computation. #' @details When using a single object as input, Apply is almost identical to the apply function. For multiple input objects, the output array will have dimensions equal to the dimensions specified in 'margins'. @@ -22,7 +21,7 @@ #' (sum(y > z) / (length(y)))) * 100} #' margins = list(c(1, 2), c(1, 2), c(1,2)) #' test <- Apply(data, margins, AtomicFun = "test_fun") -Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, parallel = FALSE, ncores = NULL) { +Apply <- function(data, margins = NULL, AtomicFun, ..., target_dims = NULL, parallel = FALSE, ncores = NULL) { if (!is.list(data)) { data <- list(data) } @@ -43,7 +42,13 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para target_dims[[i]] <- c(margins_new) } } - for (i in 1 : length(data)) { + for (i in 1 : length(data)) { + if (is.unsorted(target_dims[[i]])) { + targ_dims <- sort(target_dims[[i]]) + marg_dims <- c(1 : length(dim(data[[i]])))[- target_dims[[i]]] + data[[i]] <- aperm(data[[i]], c(targ_dims, marg_dims)) + target_dims[[i]] <- 1 : length(targ_dims) + } margins[[i]] <- c(1 :length(dim(data[[i]])))[-c(target_dims[[i]])] } } @@ -62,42 +67,48 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para margins[[i]] <- c(margins_new) } } - # if (is.unsorted(margins)) { - # unordered_dims <- margins - # margins <- sort(margins) - # } if (!is.logical(parallel)) { stop("parallel must be logical") } names <- names(dim(data[[1]]))[margins[[1]]] input <- list() - if (is.character(AtomicFun)) { - splatted_f <- splat(get(AtomicFun)) - } else { + if (is.function(AtomicFun)) { splatted_f <- splat(AtomicFun) + } else { + splatted_f <- splat(get(AtomicFun)) } + + splatted_f <- splat(get(AtomicFun)) + if (!is.null(margins)) { + #Check margins match for input objects + all_dims <- c(unlist(lapply(1 : length(data), + function(x) sum(dim(data[[x]])[margins[[x]]])))) + print(all_dims) + pos_dim <- min(which(all_dims == max(all_dims))) + dim_template <- dim(data[[pos_dim]])[margins[[pos_dim]]] + print(pos_dim) + print(dim_template) + for (i in 1 : length(data)) { + if (identical(dim(data[[i]])[margins[[i]]], dim_template) == FALSE) { + for (j in 1 : (length(dim(data[[i]])[margins[[i]]]))) { + print("OK") + print(c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) + if (c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) { + print(class(data[[i]])) + data[[i]] <- InsertDim(data[[i]], posdim = margins[[i]][j], lendim = dim_template[j]) + data[[i]] <- adrop(data[[i]], drop = (margins[[i]][j] + 1)) + print(class(data[[i]])) + } + } + } + } + print(dim(data)[[2]]) .isolate <- function(data, margin_length, drop = TRUE) { eval(dim(environment()$data)) structure(list(env = environment(), index = margin_length, subs = as.name("[")), class = c("indexed_array")) } - #Check margins match for input objects - all_dims <- c(unlist(lapply(1 : length(data), - function(x) sum(dim(data[[x]])[margins[[x]]])))) - pos_dim <- min(which(all_dims == max(all_dims))) - dim_template <- dim(data[[pos_dim]])[margins[[pos_dim]]] - for (i in 1 : length(data)) { - if (identical(dim(data[[i]])[margins[[i]]], dim_template) == FALSE) { - for (j in 1 : (length(dim(data[[i]])[margins[[i]]]))) { - if (c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) { - data[[i]] <- InsertDim(data[[i]], posdim = margins[[i]][j], lendim = dim_template[j]) - data[[i]] <- adrop(data[[i]], drop = (margins[[i]][j] + 1)) - print("OK") - } - } - } - } for (i in 1 : length(data)) { margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) margin_length[-margins[[i]]] <- "" @@ -133,11 +144,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para WrapperFun <- splatted_f(data, ...) } if (!is.null(dim(WrapperFun))) { - names(dim(WrapperFun)) <- c(AtomicFun, names) - print(dim(WrapperFun)) + # names(dim(WrapperFun))[(length(dim(WrapperFun)) - length(names) + 1) : length(dim(WrapperFun))] <- c(names) } out <- WrapperFun -} - - - +} \ No newline at end of file -- GitLab From f43f566d1df7cc05cc97e28a5dc1cbf8b811ed01 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Mon, 16 Oct 2017 14:54:00 +0200 Subject: [PATCH 09/25] Some enhancements ongoing. --- R/Apply.R | 186 ++++++++++++++++++++++++++++++++++++------------------ R/Utils.R | 9 +++ 2 files changed, 132 insertions(+), 63 deletions(-) create mode 100644 R/Utils.R diff --git a/R/Apply.R b/R/Apply.R index 3cbe6d1..5cb9e22 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -22,88 +22,153 @@ #' margins = list(c(1, 2), c(1, 2), c(1,2)) #' test <- Apply(data, margins, AtomicFun = "test_fun") Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, parallel = FALSE, ncores = NULL) { + # Check data if (!is.list(data)) { data <- list(data) } + if (any(!sapply(data, is.numeric))) { + stop("Parameter 'data' must be one or a list of numeric objects.") + } + # Check target_dims and margins + if (is.null(margins) && is.null(target_dims)) { + stop("One of 'margins' or 'target_dims' must be specified.") + } if (!is.null(margins)) { target_dims <- NULL } - if (!is.null(target_dims)) { - if (!is.list(target_dims)) { - target_dims <- rep(list(target_dims), length(data)) + if (!is.null(margins)) { + # Check margins and build target_dims accordingly + if (!is.list(margins)) { + margins <- rep(list(margins), length(data)) + } + if (any(!sapply(margins, + function(x) is.character(x) || is.numeric(x)))) { + stop("Parameter 'margins' must be one or a list of numeric or ", + "character vectors.") + } + if (any(sapply(margins, length) == 0)) { + stop("Parameter 'margins' must not contain length-0 vectors.") } - if (is.character(unlist(target_dims[1]))) { - margins2 <- target_dims - for (i in 1 : length(data)) { - margins_new <- c() - for (j in 1 : length(margins2[[i]])) { - margins_new[j] <- which(names(dim(data[[i]])) == margins2[[i]][[j]]) + duplicate_dim_specs <- sapply(margins, + function(x) { + length(unique(x)) != length(x) + }) + if (any(duplicate_dim_specs)) { + stop("Parameter 'margins' must not contain duplicated dimension ", + "specifications.") + } + target_dims <- vector('list', length(data)) + for (i in 1 : length(data)) { + if (is.character(unlist(margins[i]))) { + margins_new <- margins[[i]] + for (j in 1 : length(margins_new)) { + margins_new[j] <- which(names(dim(data[[i]])) == margins_new[j]) } - target_dims[[i]] <- c(margins_new) + margins[[i]] <- margins_new } + target_dims[[i]] <- c(1 : length(dim(data[[i]])))[-c(margins[[i]])] + } + } else { + # Check target_dims and build margins accordingly + if (!is.list(target_dims)) { + target_dims <- rep(list(target_dims), length(data)) + } + if (any(!sapply(target_dims, + function(x) is.character(x) || is.numeric(x)))) { + stop("Parameter 'target_dims' must be one or a list of numeric or ", + "character vectors.") + } + if (any(sapply(target_dims, length) == 0)) { + stop("Parameter 'target_dims' must not contain length-0 vectors.") } + duplicate_dim_specs <- sapply(target_dims, + function(x) { + length(unique(x)) != length(x) + }) + if (any(duplicate_dim_specs)) { + stop("Parameter 'target_dims' must not contain duplicated dimension ", + "specifications.") + } + margins <- vector('list', length(data)) for (i in 1 : length(data)) { - if (is.unsorted(target_dims[[i]])) { - targ_dims <- sort(target_dims[[i]]) - marg_dims <- c(1 : length(dim(data[[i]])))[- target_dims[[i]]] - data[[i]] <- aperm(data[[i]], c(targ_dims, marg_dims)) - target_dims[[i]] <- 1 : length(targ_dims) + if (is.character(unlist(target_dims[i]))) { + margins2 <- target_dims[[i]] + margins2_new <- c() + for (j in 1 : length(margins2)) { + margins2_new[j] <- which(names(dim(data[[i]])) == margins2[j]) + } + target_dims[[i]] <- margins2_new } - margins[[i]] <- c(1 :length(dim(data[[i]])))[-c(target_dims[[i]])] + margins[[i]] <- c(1 : length(dim(data[[i]]))[-c(target_dims[[i]])]) } } - if (!is.null(margins)) { - if (!is.list(margins)) { - margins <- rep(list(margins), length(data)) + # Reorder dimensions of input data for target dims to be left-most + for (i in 1 : length(data)) { + if (is.unsorted(target_dims[[i]]) || + (max(target_dims[[i]]) > length(target_dims[[i]]))) { + targ_dims <- sort(target_dims[[i]]) + marg_dims <- (1 : length(dim(data[[i]])))[- target_dims[[i]]] + data[[i]] <- .aperm2(data[[i]], c(targ_dims, marg_dims)) + target_dims[[i]] <- 1 : length(targ_dims) + margins[[i]] <- c(1 : length(dim(data[[i]])))[-c(target_dims[[i]])] } } - if (is.character(unlist(margins[1])) && !is.null(margins)) { - margins2 <- margins - for (i in 1 : length(data)) { - margins_new <- c() - for (j in 1 : length(margins2[[i]])) { - margins_new[j] <- which(names(dim(data[[i]])) == margins2[[i]][[j]]) - } - margins[[i]] <- c(margins_new) + # Check AtomicFun + if (is.character(AtomicFun)) { + try({AtomicFun <- get(AtomicFun)}, silent = TRUE) + if (!is.function(AtomicFun)) { + stop("Could not find the function '", AtomicFun, "'.") } } + if (!is.function(AtomicFun)) { + stop("Parameter 'AtomicFun' must be a function or a character string ", + "with the name of a function.") + } + # Check parallel if (!is.logical(parallel)) { - stop("parallel must be logical") + stop("Parameter 'parallel' must be logical.") } + # Check ncores + if (parallel) { + if (is.null(ncores)) { + ncores <- availableCores() - 1 + } + if (!is.numeric(ncores)) { + stop("Parameter 'ncores' must be numeric.") + } + ncores <- round(ncores) + ncores <- min(availableCores() - 1, ncores) + } + names <- names(dim(data[[1]]))[margins[[1]]] input <- list() - if (is.function(AtomicFun)) { - splatted_f <- splat(AtomicFun) - } else { - splatted_f <- splat(get(AtomicFun)) - } - - splatted_f <- splat(get(AtomicFun)) - + + splatted_f <- splat(AtomicFun) + if (!is.null(margins)) { #Check margins match for input objects - all_dims <- c(unlist(lapply(1 : length(data), - function(x) sum(dim(data[[x]])[margins[[x]]])))) - print(all_dims) - pos_dim <- min(which(all_dims == max(all_dims))) - dim_template <- dim(data[[pos_dim]])[margins[[pos_dim]]] - print(pos_dim) - print(dim_template) - for (i in 1 : length(data)) { - if (identical(dim(data[[i]])[margins[[i]]], dim_template) == FALSE) { - for (j in 1 : (length(dim(data[[i]])[margins[[i]]]))) { - print("OK") - print(c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) - if (c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) { - print(class(data[[i]])) - data[[i]] <- InsertDim(data[[i]], posdim = margins[[i]][j], lendim = dim_template[j]) - data[[i]] <- adrop(data[[i]], drop = (margins[[i]][j] + 1)) - print(class(data[[i]])) - } - } - } - } - print(dim(data)[[2]]) + all_dims <- c(unlist(lapply(1 : length(data), + function(x) sum(dim(data[[x]])[margins[[x]]])))) + print(all_dims) + pos_dim <- min(which(all_dims == max(all_dims))) + dim_template <- dim(data[[pos_dim]])[margins[[pos_dim]]] + print(pos_dim) + print(dim_template) + for (i in 1 : length(data)) { + if (identical(dim(data[[i]])[margins[[i]]], dim_template) == FALSE) { + for (j in 1 : (length(dim(data[[i]])[margins[[i]]]))) { + print("OK") + print(c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) + if (c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) { + print(class(data[[i]])) + data[[i]] <- InsertDim(data[[i]], posdim = margins[[i]][j], lendim = dim_template[j]) + data[[i]] <- adrop(data[[i]], drop = (margins[[i]][j] + 1)) + print(class(data[[i]])) + } + } + } + } + print(dim(data)[[2]]) .isolate <- function(data, margin_length, drop = TRUE) { eval(dim(environment()$data)) structure(list(env = environment(), index = margin_length, subs = as.name("[")), @@ -120,11 +185,6 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para i_max <- length(input[[1]])[1] / dims[[1]] k <- length(input[[1]]) / i_max if (parallel == TRUE) { - if (is.null(ncores)) { - ncores <- availableCores() - 1 - } else { - ncores <- min(availableCores() - 1, ncores) - } registerDoParallel(ncores) } WrapperFun <- llply(1 : i_max, function(i) diff --git a/R/Utils.R b/R/Utils.R new file mode 100644 index 0000000..4404d32 --- /dev/null +++ b/R/Utils.R @@ -0,0 +1,9 @@ +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + y <- array(1:length(x), dim = dim(x)) + y <- aperm(y, new_order) + old_dims <- dim(x) + x <- x[as.vector(y)] + dim(x) <- old_dims[new_order] + x +} -- GitLab From 79bdaf3f98d92e0b0739bf9d65083a795e324eb1 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Mon, 16 Oct 2017 21:46:12 +0200 Subject: [PATCH 10/25] Some progress. --- R/Apply.R | 88 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index 5cb9e22..cffc871 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -29,6 +29,13 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para if (any(!sapply(data, is.numeric))) { stop("Parameter 'data' must be one or a list of numeric objects.") } + is_vector <- rep(FALSE, length(data)) + for (i in 1 : length(data)) { + if (is.null(dim(data[[i]]))) { + is_vector[i] <- TRUE + dim(data[[i]]) <- length(data[[i]]) + } + } # Check target_dims and margins if (is.null(margins) && is.null(target_dims)) { stop("One of 'margins' or 'target_dims' must be specified.") @@ -36,19 +43,18 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para if (!is.null(margins)) { target_dims <- NULL } + margins_names <- vector('list', length(data)) + target_dims_names <- vector('list', length(data)) if (!is.null(margins)) { # Check margins and build target_dims accordingly if (!is.list(margins)) { margins <- rep(list(margins), length(data)) } if (any(!sapply(margins, - function(x) is.character(x) || is.numeric(x)))) { + function(x) is.character(x) || is.numeric(x) || is.null(x)))) { stop("Parameter 'margins' must be one or a list of numeric or ", "character vectors.") } - if (any(sapply(margins, length) == 0)) { - stop("Parameter 'margins' must not contain length-0 vectors.") - } duplicate_dim_specs <- sapply(margins, function(x) { length(unique(x)) != length(x) @@ -59,14 +65,35 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para } target_dims <- vector('list', length(data)) for (i in 1 : length(data)) { - if (is.character(unlist(margins[i]))) { - margins_new <- margins[[i]] - for (j in 1 : length(margins_new)) { - margins_new[j] <- which(names(dim(data[[i]])) == margins_new[j]) + if (length(margins[[i]]) > 0) { + if (is.character(unlist(margins[i]))) { + if (is.null(names(dim(data[[i]])))) { + stop("Parameter 'margins' contains dimension names, but ", + "some of the corresponding objects in 'data' do not have ", + "dimension names.") + } + margins_new <- margins[[i]] + for (j in 1 : length(margins_new)) { + matches <- which(names(dim(data[[i]])) == margins_new[j]) + if (length(matches) < 1) { + stop("Could not find dimension '", margins_new[j], "' in ", i, + "th object provided in 'data'.") + } + margins_new[j] <- matches[1] + } + margins_names[[i]] <- margins[[i]] + margins[[i]] <- margins_new + } + if (!is.null(names(dim(data[[i]])))) { + target_dims_names[[i]] <- names(dim(data[[i]]))[- margins[[i]]] + } + target_dims[[i]] <- c(1 : length(dim(data[[i]])))[- margins[[i]]] + } else { + target_dims[[i]] <- 1 : length(dim(data[[i]])) + if (!is.null(names(dim(data[[i]])))) { + target_dims_names[[i]] <- names(dim(data[[i]])) } - margins[[i]] <- margins_new } - target_dims[[i]] <- c(1 : length(dim(data[[i]])))[-c(margins[[i]])] } } else { # Check target_dims and build margins accordingly @@ -92,25 +119,39 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para margins <- vector('list', length(data)) for (i in 1 : length(data)) { if (is.character(unlist(target_dims[i]))) { - margins2 <- target_dims[[i]] - margins2_new <- c() - for (j in 1 : length(margins2)) { - margins2_new[j] <- which(names(dim(data[[i]])) == margins2[j]) + if (is.null(names(dim(data[[i]])))) { + stop("Parameter 'target_dims' contains dimension names, but ", + "some of the corresponding objects in 'data' do not have ", + "dimension names.") + } + margins2_new <- target_dims[[i]] + for (j in 1 : length(margins2_new)) { + matches <- which(names(dim(data[[i]])) == margins2_new[j]) + if (length(matches) < 1) { + stop("Could not find dimension '", margins2_new[j], "' in ", i, + "th object provided in 'data'.") + } + margins2_new[j] <- matches[1] } + target_dims_names[[i]] <- target_dims[[i]] target_dims[[i]] <- margins2_new } - margins[[i]] <- c(1 : length(dim(data[[i]]))[-c(target_dims[[i]])]) + if (!is.null(names(dim(data[[i]])))) { + margins_names[[i]] <- names(dim(data[[i]]))[- target_dims[[i]]] + } + margins[[i]] <- c(1 : length(dim(data[[i]]))[- target_dims[[i]]]) + } } # Reorder dimensions of input data for target dims to be left-most + # and in the required order. for (i in 1 : length(data)) { if (is.unsorted(target_dims[[i]]) || (max(target_dims[[i]]) > length(target_dims[[i]]))) { - targ_dims <- sort(target_dims[[i]]) marg_dims <- (1 : length(dim(data[[i]])))[- target_dims[[i]]] - data[[i]] <- .aperm2(data[[i]], c(targ_dims, marg_dims)) - target_dims[[i]] <- 1 : length(targ_dims) - margins[[i]] <- c(1 : length(dim(data[[i]])))[-c(target_dims[[i]])] + data[[i]] <- .aperm2(data[[i]], c(target_dims[[i]], marg_dims)) + target_dims[[i]] <- 1 : length(target_dims[[i]]) + margins[[i]] <- (length(target_dims[[i]]) + 1) : length(dim(data[[i]])) } } # Check AtomicFun @@ -145,12 +186,12 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para splatted_f <- splat(AtomicFun) - if (!is.null(margins)) { + if (any(!sapply(margins, is.null))) { #Check margins match for input objects - all_dims <- c(unlist(lapply(1 : length(data), - function(x) sum(dim(data[[x]])[margins[[x]]])))) + all_dims <- c(sapply(1 : length(data), + function(x) sum(dim(data[[x]])[margins[[x]]]))) print(all_dims) - pos_dim <- min(which(all_dims == max(all_dims))) + pos_dim <- which.max(all_dims)[1] dim_template <- dim(data[[pos_dim]])[margins[[pos_dim]]] print(pos_dim) print(dim_template) @@ -206,5 +247,6 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para if (!is.null(dim(WrapperFun))) { names(dim(WrapperFun))[(length(dim(WrapperFun)) - length(names) + 1) : length(dim(WrapperFun))] <- c(names) } + out <- WrapperFun } -- GitLab From ebe226fe66b5800683c74f0cd2aab7d97d35f456 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 17 Oct 2017 11:36:55 +0200 Subject: [PATCH 11/25] Some progress. --- R/Apply.R | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/R/Apply.R b/R/Apply.R index cffc871..7c5fac7 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -181,6 +181,36 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para ncores <- min(availableCores() - 1, ncores) } + # Consistency checks of margins of all arrays +# accumulated_found_margins <- list() +# steps: +# for each data array, add its margins to the list if not present. +# if there are unnamed margins in the list, check their size matches the margins being added +# and simply assing them a name +# those margins present, check that they match +# if unnamed margins, check consistency with found margins +# if more mrgins than found, add numbers to the list, without names +# with this we end up with a named list of margin sizes +# for data arrays with unnamed margins, we can assume their margins names are those of the first entries in the resulting list +# then need to check which margins are common for all the data arrays. Those will be used by llply. +# For the margins that are not common, we will need to iterate manually across them, and use data arrays repeatedly as needed +# Extra: take into account in this process that there can be empty margin vectors +# +# for (i in 1 : length(data)) { +# if (!is.null(names(dim(data[[i]])))) { +# matches <- which(names(dim(data[[i]])) %in% names(accumulated_found_sizes)) +# no_match_names <- names(dim(data[[i]])) +# if (length(matches) > 0) { +# match_names <- names(dim(data[[i]]))[matches] +# if (any(unlist(accumulated_found_sizes[match_names]) != dim(data[[i]])[match_names])) { +# stop("Found dimensions with the same name and different size in 'data'.") +# } +# no_match_names <- no_match_names[- matches] +# } +# accumulated_found_sizes <- c(accumulated_found_sizes, as.list(dim(data[[i]])[no_match_names])) +# } +# + names <- names(dim(data[[1]]))[margins[[1]]] input <- list() -- GitLab From af732610045ec806bd80b3bef2a95f97d3280fa3 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Wed, 18 Oct 2017 20:55:28 +0200 Subject: [PATCH 12/25] Progress with checks. --- R/Apply.R | 286 ++++++++++++++++++++++++++----------------- R/{Utils.R => zzz.R} | 0 2 files changed, 177 insertions(+), 109 deletions(-) rename R/{Utils.R => zzz.R} (100%) diff --git a/R/Apply.R b/R/Apply.R index 7c5fac7..8c7dd7c 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -46,6 +46,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para margins_names <- vector('list', length(data)) target_dims_names <- vector('list', length(data)) if (!is.null(margins)) { + # Check margins and build target_dims accordingly if (!is.list(margins)) { margins <- rep(list(margins), length(data)) @@ -72,22 +73,23 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para "some of the corresponding objects in 'data' do not have ", "dimension names.") } - margins_new <- margins[[i]] - for (j in 1 : length(margins_new)) { - matches <- which(names(dim(data[[i]])) == margins_new[j]) + margins2 <- margins[[i]] + margins2_new_num <- c() + for (j in 1 : length(margins2)) { + matches <- which(names(dim(data[[i]])) == margins2[j]) if (length(matches) < 1) { - stop("Could not find dimension '", margins_new[j], "' in ", i, + stop("Could not find dimension '", margins2[j], "' in ", i, "th object provided in 'data'.") } - margins_new[j] <- matches[1] + margins2_new_num[j] <- matches[1] } margins_names[[i]] <- margins[[i]] - margins[[i]] <- margins_new + margins[[i]] <- margins2_new_num } if (!is.null(names(dim(data[[i]])))) { target_dims_names[[i]] <- names(dim(data[[i]]))[- margins[[i]]] } - target_dims[[i]] <- c(1 : length(dim(data[[i]])))[- margins[[i]]] + target_dims[[i]] <- (1 : length(dim(data[[i]])))[- margins[[i]]] } else { target_dims[[i]] <- 1 : length(dim(data[[i]])) if (!is.null(names(dim(data[[i]])))) { @@ -124,25 +126,26 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para "some of the corresponding objects in 'data' do not have ", "dimension names.") } - margins2_new <- target_dims[[i]] - for (j in 1 : length(margins2_new)) { - matches <- which(names(dim(data[[i]])) == margins2_new[j]) + targs2 <- target_dims[[i]] + targs2_new_num <- c() + for (j in 1 : length(targs2)) { + matches <- which(names(dim(data[[i]])) == targs2[j]) if (length(matches) < 1) { - stop("Could not find dimension '", margins2_new[j], "' in ", i, + stop("Could not find dimension '", targs2[j], "' in ", i, "th object provided in 'data'.") } - margins2_new[j] <- matches[1] + targs2_new_num[j] <- matches[1] } target_dims_names[[i]] <- target_dims[[i]] - target_dims[[i]] <- margins2_new + target_dims[[i]] <- targs2_new_num } if (!is.null(names(dim(data[[i]])))) { margins_names[[i]] <- names(dim(data[[i]]))[- target_dims[[i]]] } - margins[[i]] <- c(1 : length(dim(data[[i]]))[- target_dims[[i]]]) - + margins[[i]] <- (1 : length(dim(data[[i]])))[- target_dims[[i]]] } } + # Reorder dimensions of input data for target dims to be left-most # and in the required order. for (i in 1 : length(data)) { @@ -154,6 +157,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para margins[[i]] <- (length(target_dims[[i]]) + 1) : length(dim(data[[i]])) } } + # Check AtomicFun if (is.character(AtomicFun)) { try({AtomicFun <- get(AtomicFun)}, silent = TRUE) @@ -165,10 +169,12 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para stop("Parameter 'AtomicFun' must be a function or a character string ", "with the name of a function.") } + # Check parallel if (!is.logical(parallel)) { stop("Parameter 'parallel' must be logical.") } + # Check ncores if (parallel) { if (is.null(ncores)) { @@ -181,102 +187,164 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para ncores <- min(availableCores() - 1, ncores) } - # Consistency checks of margins of all arrays -# accumulated_found_margins <- list() -# steps: -# for each data array, add its margins to the list if not present. -# if there are unnamed margins in the list, check their size matches the margins being added -# and simply assing them a name -# those margins present, check that they match -# if unnamed margins, check consistency with found margins -# if more mrgins than found, add numbers to the list, without names -# with this we end up with a named list of margin sizes -# for data arrays with unnamed margins, we can assume their margins names are those of the first entries in the resulting list -# then need to check which margins are common for all the data arrays. Those will be used by llply. -# For the margins that are not common, we will need to iterate manually across them, and use data arrays repeatedly as needed -# Extra: take into account in this process that there can be empty margin vectors -# -# for (i in 1 : length(data)) { -# if (!is.null(names(dim(data[[i]])))) { -# matches <- which(names(dim(data[[i]])) %in% names(accumulated_found_sizes)) -# no_match_names <- names(dim(data[[i]])) -# if (length(matches) > 0) { -# match_names <- names(dim(data[[i]]))[matches] -# if (any(unlist(accumulated_found_sizes[match_names]) != dim(data[[i]])[match_names])) { -# stop("Found dimensions with the same name and different size in 'data'.") -# } -# no_match_names <- no_match_names[- matches] -# } -# accumulated_found_sizes <- c(accumulated_found_sizes, as.list(dim(data[[i]])[no_match_names])) -# } -# - - names <- names(dim(data[[1]]))[margins[[1]]] - input <- list() - - splatted_f <- splat(AtomicFun) - - if (any(!sapply(margins, is.null))) { - #Check margins match for input objects - all_dims <- c(sapply(1 : length(data), - function(x) sum(dim(data[[x]])[margins[[x]]]))) - print(all_dims) - pos_dim <- which.max(all_dims)[1] - dim_template <- dim(data[[pos_dim]])[margins[[pos_dim]]] - print(pos_dim) - print(dim_template) - for (i in 1 : length(data)) { - if (identical(dim(data[[i]])[margins[[i]]], dim_template) == FALSE) { - for (j in 1 : (length(dim(data[[i]])[margins[[i]]]))) { - print("OK") - print(c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) - if (c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) { - print(class(data[[i]])) - data[[i]] <- InsertDim(data[[i]], posdim = margins[[i]][j], lendim = dim_template[j]) - data[[i]] <- adrop(data[[i]], drop = (margins[[i]][j] + 1)) - print(class(data[[i]])) - } - } - } - } - print(dim(data)[[2]]) - .isolate <- function(data, margin_length, drop = TRUE) { - eval(dim(environment()$data)) - structure(list(env = environment(), index = margin_length, subs = as.name("[")), - class = c("indexed_array")) - } - for (i in 1 : length(data)) { - margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) - margin_length[-margins[[i]]] <- "" - margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, - stringsAsFactors = FALSE) - input[[i]] <- .isolate(data[[i]], margin_length) - } - dims <- dim(data[[1]])[margins[[1]]] - i_max <- length(input[[1]])[1] / dims[[1]] - k <- length(input[[1]]) / i_max - if (parallel == TRUE) { - registerDoParallel(ncores) - } - WrapperFun <- llply(1 : i_max, function(i) - sapply((k * i - (k - 1)) : (k * i), function(x) splatted_f(lapply(input, `[[`, x),...), simplify = FALSE), - .parallel = parallel) - if (parallel == TRUE) { - registerDoSEQ() - } - if (is.null(dim(WrapperFun[[1]][[1]]))) { - WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(length((WrapperFun[[1]])[[1]])), - dim(data[[1]])[margins[[1]]])) + # Consistency checks of margins of all input objects + # for each data array, add its margins to the list if not present. + # if there are unnamed margins in the list, check their size matches the margins being added + # and simply assing them a name + # those margins present, check that they match + # if unnamed margins, check consistency with found margins + # if more mrgins than found, add numbers to the list, without names + # with this we end up with a named list of margin sizes + # for data arrays with unnamed margins, we can assume their margins names are those of the first entries in the resulting list + accumulated_found_margins <- afm <- list() + for (i in 1:length(data)) { + if (!is.null(margins_names[[i]])) { + if (length(afm) > 0) { + matches <- which(margins_names[[i]] %in% names(afm)) + if (length(matches) > 0) { + margs_to_add <- as.list(dim(data[[i]])[margins[[i]]][- matches]) + if (any(dim(data[[i]])[margins[[i]][matches]] != unlist(afm[margins_names[[i]][matches]]))) { + stop("Found one or more margin dimensions with the same name and ", + "different length in some of the input objects in 'data'.") + } + } else { + margs_to_add <- as.list(dim(data[[i]])[margins[[i]]]) + } + unnamed_margins <- which(sapply(names(afm), nchar) == 0) + if (length(unnamed_margins) > 0) { + stop_with_error <- FALSE + if (length(unnamed_margins) <= length(margs_to_add)) { + if (any(unlist(afm[unnamed_margins]) != unlist(margs_to_add[1:length(unnamed_margins)]))) { + stop_with_error <- TRUE + } + names(afm)[unnamed_margins] <- names(margs_to_add)[1:length(unnamed_margins)] + margs_to_add <- margs_to_add[- (1:length(margs_to_add))] + } else { + if (any(unlist(afm[unnamed_margins[1:length(margs_to_add)]]) != unlist(margs_to_add))) { + stop_with_error <- TRUE + } + names(afm)[unnamed_margins[1:length(margs_to_add)]] <- names(margs_to_add) + margs_to_add <- list() + } + if (stop_with_error) { + stop("Found unnamed margins (for some objects in parameter ", + "'data') that have been associated by their position to ", + "named margins in other objects in 'data' and do not have ", + "matching length. It could also be that the unnamed ", + "margins don not follow the same order as the named ", + "margins. In that case, either put the corresponding names ", + "to the dimensions of the objects in 'data', or put them ", + "in a consistent order.") + } + } + afm <- c(afm, margs_to_add) + } else { + afm <- as.list(dim(data[[i]])[margins[[i]]]) + } } else { - WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(dim(WrapperFun[[1]][[1]])), - dim(data[[1]])[margins[[1]]])) + margs_to_add <- as.list(dim(data[[i]])[margins[[i]]]) + names(margs_to_add) <- rep('', length(margs_to_add)) + if (length(afm) > 0) { + stop_with_error <- FALSE + if (length(afm) >= length(margs_to_add)) { + if (any(unlist(margs_to_add) != unlist(afm[1:length(margs_to_add)]))) { + stop_with_error <- TRUE + } + } else { + if (any(unlist(margs_to_add)[1:length(afm)] != unlist(afm))) { + stop_with_error <- TRUE + } + margs_to_add <- margs_to_add[- (1:length(afm))] + afm <- c(afm, margs_to_add) + } + if (stop_with_error) { + stop("Found unnamed margins (for some objects in parameter ", + "'data') that have been associated by their position to ", + "named margins in other objects in 'data' and do not have ", + "matching length. It could also be that the unnamed ", + "margins don not follow the same order as in other ", + "objects. In that case, either put the corresponding names ", + "to the dimensions of the objects in 'data', or put them ", + "in a consistent order.") + } + } else { + afm <- margs_to_add + } } - } else { - WrapperFun <- splatted_f(data, ...) - } - if (!is.null(dim(WrapperFun))) { - names(dim(WrapperFun))[(length(dim(WrapperFun)) - length(names) + 1) : length(dim(WrapperFun))] <- c(names) } - out <- WrapperFun + # Now need to check which margins are common for all the data arrays. + # Those will be used by llply. + # For the margins that are not common, we will need to iterate manually + # across them, and use data arrays repeatedly as needed. + common_margs <- c() + +# names <- names(dim(data[[1]]))[margins[[1]]] +# input <- list() +# +# splatted_f <- splat(AtomicFun) +# +# if (any(!sapply(margins, is.null))) { +# #Check margins match for input objects +# all_dims <- c(sapply(1 : length(data), +# function(x) sum(dim(data[[x]])[margins[[x]]]))) +# print(all_dims) +# pos_dim <- which.max(all_dims)[1] +# dim_template <- dim(data[[pos_dim]])[margins[[pos_dim]]] +# print(pos_dim) +# print(dim_template) +# for (i in 1 : length(data)) { +# if (identical(dim(data[[i]])[margins[[i]]], dim_template) == FALSE) { +# for (j in 1 : (length(dim(data[[i]])[margins[[i]]]))) { +# print("OK") +# print(c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) +# if (c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) { +# print(class(data[[i]])) +# data[[i]] <- InsertDim(data[[i]], posdim = margins[[i]][j], lendim = dim_template[j]) +# data[[i]] <- adrop(data[[i]], drop = (margins[[i]][j] + 1)) +# print(class(data[[i]])) +# } +# } +# } +# } +# print(dim(data)[[2]]) +# .isolate <- function(data, margin_length, drop = TRUE) { +# eval(dim(environment()$data)) +# structure(list(env = environment(), index = margin_length, subs = as.name("[")), +# class = c("indexed_array")) +# } +# for (i in 1 : length(data)) { +# margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) +# margin_length[-margins[[i]]] <- "" +# margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, +# stringsAsFactors = FALSE) +# input[[i]] <- .isolate(data[[i]], margin_length) +# } +# dims <- dim(data[[1]])[margins[[1]]] +# i_max <- length(input[[1]])[1] / dims[[1]] +# k <- length(input[[1]]) / i_max +# if (parallel == TRUE) { +# registerDoParallel(ncores) +# } +# WrapperFun <- llply(1 : i_max, function(i) +# sapply((k * i - (k - 1)) : (k * i), function(x) splatted_f(lapply(input, `[[`, x),...), simplify = FALSE), +# .parallel = parallel) +# if (parallel == TRUE) { +# registerDoSEQ() +# } +# if (is.null(dim(WrapperFun[[1]][[1]]))) { +# WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(length((WrapperFun[[1]])[[1]])), +# dim(data[[1]])[margins[[1]]])) +# } else { +# WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(dim(WrapperFun[[1]][[1]])), +# dim(data[[1]])[margins[[1]]])) +# } +# } else { +# WrapperFun <- splatted_f(data, ...) +# } +# if (!is.null(dim(WrapperFun))) { +# names(dim(WrapperFun))[(length(dim(WrapperFun)) - length(names) + 1) : length(dim(WrapperFun))] <- c(names) +# } +# +# out <- WrapperFun } diff --git a/R/Utils.R b/R/zzz.R similarity index 100% rename from R/Utils.R rename to R/zzz.R -- GitLab From 0dce49e9c63e5d0e01073f91219e6b621f52fcf4 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Thu, 19 Oct 2017 01:51:06 +0200 Subject: [PATCH 13/25] Progress. Needs debugging. --- R/Apply.R | 213 ++++++++++++++++++++++++++++++++---------------------- R/zzz.R | 57 +++++++++++++++ 2 files changed, 182 insertions(+), 88 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index 8c7dd7c..f7e29e9 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -196,34 +196,34 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para # if more mrgins than found, add numbers to the list, without names # with this we end up with a named list of margin sizes # for data arrays with unnamed margins, we can assume their margins names are those of the first entries in the resulting list - accumulated_found_margins <- afm <- list() + all_found_margins_lengths <- afml <- list() for (i in 1:length(data)) { if (!is.null(margins_names[[i]])) { - if (length(afm) > 0) { - matches <- which(margins_names[[i]] %in% names(afm)) + if (length(afml) > 0) { + matches <- which(margins_names[[i]] %in% names(afml)) if (length(matches) > 0) { margs_to_add <- as.list(dim(data[[i]])[margins[[i]]][- matches]) - if (any(dim(data[[i]])[margins[[i]][matches]] != unlist(afm[margins_names[[i]][matches]]))) { + if (any(dim(data[[i]])[margins[[i]][matches]] != unlist(afml[margins_names[[i]][matches]]))) { stop("Found one or more margin dimensions with the same name and ", "different length in some of the input objects in 'data'.") } } else { margs_to_add <- as.list(dim(data[[i]])[margins[[i]]]) } - unnamed_margins <- which(sapply(names(afm), nchar) == 0) + unnamed_margins <- which(sapply(names(afml), nchar) == 0) if (length(unnamed_margins) > 0) { stop_with_error <- FALSE if (length(unnamed_margins) <= length(margs_to_add)) { - if (any(unlist(afm[unnamed_margins]) != unlist(margs_to_add[1:length(unnamed_margins)]))) { + if (any(unlist(afml[unnamed_margins]) != unlist(margs_to_add[1:length(unnamed_margins)]))) { stop_with_error <- TRUE } - names(afm)[unnamed_margins] <- names(margs_to_add)[1:length(unnamed_margins)] + names(afml)[unnamed_margins] <- names(margs_to_add)[1:length(unnamed_margins)] margs_to_add <- margs_to_add[- (1:length(margs_to_add))] } else { - if (any(unlist(afm[unnamed_margins[1:length(margs_to_add)]]) != unlist(margs_to_add))) { + if (any(unlist(afml[unnamed_margins[1:length(margs_to_add)]]) != unlist(margs_to_add))) { stop_with_error <- TRUE } - names(afm)[unnamed_margins[1:length(margs_to_add)]] <- names(margs_to_add) + names(afml)[unnamed_margins[1:length(margs_to_add)]] <- names(margs_to_add) margs_to_add <- list() } if (stop_with_error) { @@ -237,25 +237,25 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para "in a consistent order.") } } - afm <- c(afm, margs_to_add) + afml <- c(afml, margs_to_add) } else { - afm <- as.list(dim(data[[i]])[margins[[i]]]) + afml <- as.list(dim(data[[i]])[margins[[i]]]) } } else { margs_to_add <- as.list(dim(data[[i]])[margins[[i]]]) names(margs_to_add) <- rep('', length(margs_to_add)) - if (length(afm) > 0) { + if (length(afml) > 0) { stop_with_error <- FALSE - if (length(afm) >= length(margs_to_add)) { - if (any(unlist(margs_to_add) != unlist(afm[1:length(margs_to_add)]))) { + if (length(afml) >= length(margs_to_add)) { + if (any(unlist(margs_to_add) != unlist(afml[1:length(margs_to_add)]))) { stop_with_error <- TRUE } } else { - if (any(unlist(margs_to_add)[1:length(afm)] != unlist(afm))) { + if (any(unlist(margs_to_add)[1:length(afml)] != unlist(afml))) { stop_with_error <- TRUE } - margs_to_add <- margs_to_add[- (1:length(afm))] - afm <- c(afm, margs_to_add) + margs_to_add <- margs_to_add[- (1:length(afml))] + afml <- c(afml, margs_to_add) } if (stop_with_error) { stop("Found unnamed margins (for some objects in parameter ", @@ -268,83 +268,120 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para "in a consistent order.") } } else { - afm <- margs_to_add + afml <- margs_to_add } } } + # afml is now a named list with the lenghts of all margins. Each margin + # appears once only. If some names are not provided, they are missing, + # e.g. ''. # Now need to check which margins are common for all the data arrays. # Those will be used by llply. # For the margins that are not common, we will need to iterate manually # across them, and use data arrays repeatedly as needed. - common_margs <- c() - -# names <- names(dim(data[[1]]))[margins[[1]]] -# input <- list() -# -# splatted_f <- splat(AtomicFun) -# -# if (any(!sapply(margins, is.null))) { -# #Check margins match for input objects -# all_dims <- c(sapply(1 : length(data), -# function(x) sum(dim(data[[x]])[margins[[x]]]))) -# print(all_dims) -# pos_dim <- which.max(all_dims)[1] -# dim_template <- dim(data[[pos_dim]])[margins[[pos_dim]]] -# print(pos_dim) -# print(dim_template) -# for (i in 1 : length(data)) { -# if (identical(dim(data[[i]])[margins[[i]]], dim_template) == FALSE) { -# for (j in 1 : (length(dim(data[[i]])[margins[[i]]]))) { -# print("OK") -# print(c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) -# if (c(dim(data[[i]])[margins[[i]]])[j] != dim_template[j]) { -# print(class(data[[i]])) -# data[[i]] <- InsertDim(data[[i]], posdim = margins[[i]][j], lendim = dim_template[j]) -# data[[i]] <- adrop(data[[i]], drop = (margins[[i]][j] + 1)) -# print(class(data[[i]])) -# } -# } -# } -# } -# print(dim(data)[[2]]) -# .isolate <- function(data, margin_length, drop = TRUE) { -# eval(dim(environment()$data)) -# structure(list(env = environment(), index = margin_length, subs = as.name("[")), -# class = c("indexed_array")) -# } -# for (i in 1 : length(data)) { -# margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) -# margin_length[-margins[[i]]] <- "" -# margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, -# stringsAsFactors = FALSE) -# input[[i]] <- .isolate(data[[i]], margin_length) -# } -# dims <- dim(data[[1]])[margins[[1]]] -# i_max <- length(input[[1]])[1] / dims[[1]] -# k <- length(input[[1]]) / i_max -# if (parallel == TRUE) { -# registerDoParallel(ncores) -# } -# WrapperFun <- llply(1 : i_max, function(i) -# sapply((k * i - (k - 1)) : (k * i), function(x) splatted_f(lapply(input, `[[`, x),...), simplify = FALSE), -# .parallel = parallel) -# if (parallel == TRUE) { -# registerDoSEQ() -# } -# if (is.null(dim(WrapperFun[[1]][[1]]))) { -# WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(length((WrapperFun[[1]])[[1]])), -# dim(data[[1]])[margins[[1]]])) -# } else { -# WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(dim(WrapperFun[[1]][[1]])), -# dim(data[[1]])[margins[[1]]])) -# } -# } else { -# WrapperFun <- splatted_f(data, ...) -# } -# if (!is.null(dim(WrapperFun))) { -# names(dim(WrapperFun))[(length(dim(WrapperFun)) - length(names) + 1) : length(dim(WrapperFun))] <- c(names) -# } -# -# out <- WrapperFun + margins_afml <- margins + for (i in 1:length(dat)) { + if (!is.null(margins_names[[i]])) { + margins_afml[[i]] <- sapply(margins_names[[i]], + function(x) { + sapply(x, + function(y) { + which(names(afml) == y) + } + ) + } + ) + } + } + common_margs <- margins_afml[[1]] + if (length(margins_afml) > 1) { + for (i in 2:length(margins_afml)) { + non_matches <- which(!(margins_afml[[i]] %in% common_margs)) + if (length(non_matches) > 0) { + common_margs <- common_margs[- non_matches] + } + } + } + non_common_margs <- 1:length(afml) + if (length(common_margs) > 0) { + non_common_margs <- non_common_margs[- common_args] + } + # common_margs is now a numeric vector with the indices of the common + # margins (i.e. their position in afml) + # non_common_margs is now a numeric vector with the indices of the + # non-common margins (i.e. their position in afml) + + .isolate <- function(data, margin_length, drop = TRUE) { + eval(dim(environment()$data)) + structure(list(env = environment(), index = margin_length, subs = as.name("[")), + class = c("indexed_array")) + } + splatted_f <- splat(AtomicFun) + + # Iterate along all non-common margins + non_common_margins_array <- ncma <- array(1:prod(unlist(afml[non_common_margs])), + dim = unlist(afml[non_common_margs])) + array_of_results <- vector('list', length(ncma)) + dim(array_of_results) <- dim(ncma) + for (j in 1:length(ncma)) { + marg_indices <- which(non_common_margins_array == j, arr.ind = TRUE)[1, ] + names(marg_indices) <- names(dim(ncma)) + input <- list() + for (i in 1:length(data)) { + indices_to_take <- as.list(rep(TRUE, length(dim(data[[i]])))) + inds_to_modify <- which(names(dim(data[[i]])) %in% names(marg_indices)) + if (length(inds_to_modify) > 0) { + indices_to_take[inds_to_modify] <- as.list(marg_indices[names(dim(data[[i]]))[inds_to_modify]]) + input[[i]] <- do.call('[', c(list(x = data[[i]]), indices_to_take, list(drop = FALSE))) + } else { + input[[i]] <- data[[i]] + } + } + # Each iteration of j, the variable input is populated with sub-arrays for + # each object in data (if possible). For each set of 'input's, the + # splatted_f is applied in parallel if possible. + if (length(common_margs) > 0) { + max_size <- 0 + for (i in 1 : length(input)) { + margin_length <- lapply(dim(input[[i]]), function(x) 1 : x) + margin_length[-margins[[i]]] <- "" + margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE) + input[[i]] <- .isolate(input[[i]], margin_length) + if (prod(dim(input[[i]])) > max_size) { + max_size <- prod(dim(input[[i]])) + } + } + dims <- unlist(afml[common_margs]) + i_max <- max_size / dims[1] + k <- max_size / i_max + if (parallel == TRUE) { + registerDoParallel(ncores) + } + array_of_results[[j]] <- llply(1 : i_max, function(i) + sapply((k * i - (k - 1)) : (k * i), function(x) splatted_f(lapply(input, `[[`, x), ...), simplify = FALSE), + .parallel = parallel) + if (parallel == TRUE) { + registerDoSEQ() + } + #if (is.null(dim(WrapperFun[[1]][[1]]))) { + # WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(length((WrapperFun[[1]])[[1]])), + # dim(data[[1]])[margins[[1]]])) + #} else { + # WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(dim(WrapperFun[[1]][[1]])), + # dim(data[[1]])[margins[[1]]])) + #} + } else { + array_of_results[[j]] <- splatted_f(input, ...) + } + + for (component in 1:length(array_of_results[[j]])) { + names(dim(array_of_results[[j]][[component]])) <- c(rep('', length(dim(array_of_results[[j]][[component]])) - legth(afml)), + names(afml)) + } + } + + # Merge results + .MergeArrayOfArrays(array_of_results) } diff --git a/R/zzz.R b/R/zzz.R index 4404d32..d942a35 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -7,3 +7,60 @@ dim(x) <- old_dims[new_order] x } + +# Takes as input a list of arrays. The list must have named dimensions. +.MergeArrayOfArrays <- function(array_of_arrays) { + MergeArrays <- startR:::.MergeArrays + array_dims <- (dim(array_of_arrays)) + dim_names <- names(array_dims) + + # Merge the chunks. + for (dim_index in 1:length(dim_names)) { + dim_sub_array_of_chunks <- dim_sub_array_of_chunk_indices <- NULL + if (dim_index < length(dim_names)) { + dim_sub_array_of_chunks <- array_dims[(dim_index + 1):length(dim_names)] + names(dim_sub_array_of_chunks) <- dim_names[(dim_index + 1):length(dim_names)] + dim_sub_array_of_chunk_indices <- dim_sub_array_of_chunks + sub_array_of_chunk_indices <- array(1:prod(dim_sub_array_of_chunk_indices), + dim_sub_array_of_chunk_indices) + } else { + sub_array_of_chunk_indices <- NULL + } + sub_array_of_chunks <- vector('list', prod(dim_sub_array_of_chunks)) + dim(sub_array_of_chunks) <- dim_sub_array_of_chunks + for (i in 1:prod(dim_sub_array_of_chunks)) { + if (!is.null(sub_array_of_chunk_indices)) { + chunk_sub_indices <- which(sub_array_of_chunk_indices == i, arr.ind = TRUE)[1, ] + } else { + chunk_sub_indices <- NULL + } + for (j in 1:(array_dims[dim_index])) { + new_chunk <- do.call('[[', c(list(x = array_of_arrays), + as.list(c(j, chunk_sub_indices)))) + #do.call('[[<-', c(list(x = array_of_chunks), + # as.list(c(j, chunk_sub_indices)), + # list(value = NULL))) + if (is.null(new_chunk)) { + stop("Chunks missing.") + } + if (is.null(sub_array_of_chunks[[i]])) { + sub_array_of_chunks[[i]] <- new_chunk + } else { + #if (length(new_chunk) != length(sub_array_of_chunks[[i]])) { + # stop("Missing components for some chunks.") + #} + #for (component in 1:length(new_chunk)) { + sub_array_of_chunks[[i]] <- MergeArrays(sub_array_of_chunks[[i]], + new_chunk, + dim_names[dim_index]) + #} + } + } + } + array_of_arrays <- sub_array_of_chunks + rm(sub_array_of_chunks) + gc() + } + + array_of_arrays[[1]] +} -- GitLab From ee856d8d7696fd6d1c49f7bf83b6fb4c239d8714 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Thu, 19 Oct 2017 02:50:41 +0200 Subject: [PATCH 14/25] Debug progress. --- R/Apply.R | 53 +++++++++++++++++++++------------ R/zzz.R | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 121 insertions(+), 19 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index f7e29e9..15a8412 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -281,7 +281,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para # For the margins that are not common, we will need to iterate manually # across them, and use data arrays repeatedly as needed. margins_afml <- margins - for (i in 1:length(dat)) { + for (i in 1:length(data)) { if (!is.null(margins_names[[i]])) { margins_afml[[i]] <- sapply(margins_names[[i]], function(x) { @@ -305,7 +305,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para } non_common_margs <- 1:length(afml) if (length(common_margs) > 0) { - non_common_margs <- non_common_margs[- common_args] + non_common_margs <- non_common_margs[- common_margs] } # common_margs is now a numeric vector with the indices of the common # margins (i.e. their position in afml) @@ -320,12 +320,16 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para splatted_f <- splat(AtomicFun) # Iterate along all non-common margins - non_common_margins_array <- ncma <- array(1:prod(unlist(afml[non_common_margs])), - dim = unlist(afml[non_common_margs])) + if (length(non_common_margs) > 0) { + non_common_margins_array <- ncma <- array(1:prod(unlist(afml[non_common_margs])), + dim = unlist(afml[non_common_margs])) + } else { + ncma <- array(1) + } array_of_results <- vector('list', length(ncma)) dim(array_of_results) <- dim(ncma) for (j in 1:length(ncma)) { - marg_indices <- which(non_common_margins_array == j, arr.ind = TRUE)[1, ] + marg_indices <- which(ncma == j, arr.ind = TRUE)[1, ] names(marg_indices) <- names(dim(ncma)) input <- list() for (i in 1:length(data)) { @@ -365,23 +369,36 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para if (parallel == TRUE) { registerDoSEQ() } - #if (is.null(dim(WrapperFun[[1]][[1]]))) { - # WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(length((WrapperFun[[1]])[[1]])), - # dim(data[[1]])[margins[[1]]])) - #} else { - # WrapperFun <- array(as.numeric(unlist(WrapperFun)), dim=c(c(dim(WrapperFun[[1]][[1]])), - # dim(data[[1]])[margins[[1]]])) - #} + for (component in 1:length(array_of_results[[j]])) { + print("AA") + print(length(afml)) + print(str(array_of_results[[j]])) + #if (length(common_margs) > 0) { + if (is.null(dim(array_of_results[[j]][[component]][[1]]))) { + array_of_results[[j]][[component]] <- array(unlist(array_of_results[[j]][[component]]), + dim = c(dim(array_of_results[[j]][[component]][[1]]), + setNames(rep(1, length(dim(ncma))), names(dim(ncma))), + unlist(afml[common_margs]))) + } else { + array_of_results[[j]][[component]] <- array(unlist(array_of_results[[j]][[component]]), + dim = c(length(array_of_results[[j]][[component]][[1]]), + setNames(rep(1, length(dim(ncma))), names(dim(ncma))), + unlist(afml[common_margs]))) + } + + #} + #names(dim(array_of_results[[j]][[component]])) <- c(rep('', length(dim(array_of_results[[j]][[component]])) - length(afml)), + # names(afml)) + } } else { array_of_results[[j]] <- splatted_f(input, ...) } - - for (component in 1:length(array_of_results[[j]])) { - names(dim(array_of_results[[j]][[component]])) <- c(rep('', length(dim(array_of_results[[j]][[component]])) - legth(afml)), - names(afml)) - } } # Merge results - .MergeArrayOfArrays(array_of_results) + if (length(array_of_results) > 1) { + .MergeArrayOfArrays(array_of_results) + } else { + array_of_results[[1]] + } } diff --git a/R/zzz.R b/R/zzz.R index d942a35..32fe628 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -8,9 +8,94 @@ x } +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- c() + new_dims2 <- c() + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-c(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in 1:length(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + # Takes as input a list of arrays. The list must have named dimensions. .MergeArrayOfArrays <- function(array_of_arrays) { - MergeArrays <- startR:::.MergeArrays + MergeArrays <- .MergeArrays array_dims <- (dim(array_of_arrays)) dim_names <- names(array_dims) -- GitLab From a79f50c6ae1ffc327dc7eff3ada236c0be9cc205 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Thu, 19 Oct 2017 03:07:53 +0200 Subject: [PATCH 15/25] More debug. --- R/Apply.R | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index 15a8412..b10caab 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -370,22 +370,19 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para registerDoSEQ() } for (component in 1:length(array_of_results[[j]])) { - print("AA") - print(length(afml)) - print(str(array_of_results[[j]])) #if (length(common_margs) > 0) { + component_dims <- c() if (is.null(dim(array_of_results[[j]][[component]][[1]]))) { - array_of_results[[j]][[component]] <- array(unlist(array_of_results[[j]][[component]]), - dim = c(dim(array_of_results[[j]][[component]][[1]]), - setNames(rep(1, length(dim(ncma))), names(dim(ncma))), - unlist(afml[common_margs]))) + component_dims <- length(array_of_results[[j]][[component]][[1]]) } else { - array_of_results[[j]][[component]] <- array(unlist(array_of_results[[j]][[component]]), - dim = c(length(array_of_results[[j]][[component]][[1]]), - setNames(rep(1, length(dim(ncma))), names(dim(ncma))), - unlist(afml[common_margs]))) + component_dims <- dim(array_of_results[[j]][[component]][[1]]) } - + if (length(non_common_margs) > 0) { + component_dims <- c(component_dims, setNames(rep(1, length(dim(ncma))), names(dim(ncma)))) + } + component_dims <- c(component_dims, unlist(afml[common_margs])) + array_of_results[[j]][[component]] <- array(unlist(array_of_results[[j]][[component]]), + dim = component_dims) #} #names(dim(array_of_results[[j]][[component]])) <- c(rep('', length(dim(array_of_results[[j]][[component]])) - length(afml)), # names(afml)) -- GitLab From b5d797f7b078fc700130cce0c57126175ae9ea03 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Fri, 20 Oct 2017 04:30:57 +0200 Subject: [PATCH 16/25] Debug progress. --- R/Apply.R | 181 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 130 insertions(+), 51 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index b10caab..5e10eaf 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -21,7 +21,8 @@ #' (sum(y > z) / (length(y)))) * 100} #' margins = list(c(1, 2), c(1, 2), c(1,2)) #' test <- Apply(data, margins, AtomicFun = "test_fun") -Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, parallel = FALSE, ncores = NULL) { +Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, + margins = NULL, parallel = FALSE, ncores = NULL) { # Check data if (!is.list(data)) { data <- list(data) @@ -36,6 +37,28 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para dim(data[[i]]) <- length(data[[i]]) } } + + # Check AtomicFun + if (is.character(AtomicFun)) { + try({AtomicFun <- get(AtomicFun)}, silent = TRUE) + if (!is.function(AtomicFun)) { + stop("Could not find the function '", AtomicFun, "'.") + } + } + if (!is.function(AtomicFun)) { + stop("Parameter 'AtomicFun' must be a function or a character string ", + "with the name of a function.") + } + output_dims <- NULL + if ('startR_step' %in% class(AtomicFun)) { + if (is.null(target_dims)) { + target_dims <- attr(AtomicFun, 'target_dims') + } + if (is.null(output_dims)) { + output_dims <- attr(AtomicFun, 'target_dims') + } + } + # Check target_dims and margins if (is.null(margins) && is.null(target_dims)) { stop("One of 'margins' or 'target_dims' must be specified.") @@ -158,16 +181,21 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para } } - # Check AtomicFun - if (is.character(AtomicFun)) { - try({AtomicFun <- get(AtomicFun)}, silent = TRUE) - if (!is.function(AtomicFun)) { - stop("Could not find the function '", AtomicFun, "'.") + # Check output_dims + if (!is.null(output_dims)) { + if (!is.list(output_dims)) { + output_dims <- list(output1 = output_dims) + } + if (any(sapply(output_dims, function(x) !is.character(x)))) { + stop("Parameter 'output_dims' must be one or a list of vectors of character strings.") + } + if (is.null(names(output_dims))) { + names(output_dims) <- rep('', length(output_dims)) + } + missing_output_names <- which(sapply(names(output_dims), nchar) == 0) + if (length(missing_output_names) > 0) { + names(output_dims)[missing_output_names] <- paste0('output', missing_output_names) } - } - if (!is.function(AtomicFun)) { - stop("Parameter 'AtomicFun' must be a function or a character string ", - "with the name of a function.") } # Check parallel @@ -312,11 +340,11 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para # non_common_margs is now a numeric vector with the indices of the # non-common margins (i.e. their position in afml) - .isolate <- function(data, margin_length, drop = TRUE) { - eval(dim(environment()$data)) - structure(list(env = environment(), index = margin_length, subs = as.name("[")), - class = c("indexed_array")) - } + #.isolate <- function(data, margin_length, drop = TRUE) { + # eval(dim(environment()$data)) + # structure(list(env = environment(), index = margin_length, subs = as.name("[")), + # class = c("indexed_array")) + #} splatted_f <- splat(AtomicFun) # Iterate along all non-common margins @@ -326,18 +354,25 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para } else { ncma <- array(1) } - array_of_results <- vector('list', length(ncma)) - dim(array_of_results) <- dim(ncma) + arrays_of_results <- NULL + found_first_result <- FALSE +print("AAA") +print(afml[common_margs]) +print("BBB") +print(afml[non_common_margs]) for (j in 1:length(ncma)) { marg_indices <- which(ncma == j, arr.ind = TRUE)[1, ] names(marg_indices) <- names(dim(ncma)) input <- list() + atomic_fun_out_dims <- list() for (i in 1:length(data)) { indices_to_take <- as.list(rep(TRUE, length(dim(data[[i]])))) inds_to_modify <- which(names(dim(data[[i]])) %in% names(marg_indices)) if (length(inds_to_modify) > 0) { indices_to_take[inds_to_modify] <- as.list(marg_indices[names(dim(data[[i]]))[inds_to_modify]]) input[[i]] <- do.call('[', c(list(x = data[[i]]), indices_to_take, list(drop = FALSE))) + names(dim(input[[i]])) <- names(dim(data[[i]])) + dim(input[[i]]) <- dim(input[[i]])[- inds_to_modify] } else { input[[i]] <- data[[i]] } @@ -346,56 +381,100 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., margins = NULL, para # each object in data (if possible). For each set of 'input's, the # splatted_f is applied in parallel if possible. if (length(common_margs) > 0) { - max_size <- 0 +print("EEEPPP") + #max_size <- 0 for (i in 1 : length(input)) { - margin_length <- lapply(dim(input[[i]]), function(x) 1 : x) - margin_length[-margins[[i]]] <- "" - margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, - stringsAsFactors = FALSE) - input[[i]] <- .isolate(input[[i]], margin_length) - if (prod(dim(input[[i]])) > max_size) { - max_size <- prod(dim(input[[i]])) - } + #margin_length <- lapply(dim(input[[i]]), function(x) 1 : x) + #margin_length[-margins[[i]]] <- "" + #margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, + # stringsAsFactors = FALSE) +print(str(input[[i]])) + #input[[i]] <- .isolate(input[[i]], margin_length) +print(str(input[[i]])) +stop() + #if (prod(dim(input[[i]])) > max_size) { + # max_size <- prod(dim(input[[i]])) + #} + dimnames(input[[i]]) <- Map(paste0, letters[seq_along(dim(input[[i]]))], + lapply(dim(input[[i]]), seq)) + input[[i]] <- alply(input[[i]], margins[[i]], .dims = TRUE) } dims <- unlist(afml[common_margs]) - i_max <- max_size / dims[1] + #i_max <- max_size / dims[1] + #k <- max_size / i_max + max_size <- prod(dims) + i_max <- max_size / max(dims)[1] k <- max_size / i_max if (parallel == TRUE) { registerDoParallel(ncores) } - array_of_results[[j]] <- llply(1 : i_max, function(i) +print(i_max) + result <- llply(1 : i_max, function(i) sapply((k * i - (k - 1)) : (k * i), function(x) splatted_f(lapply(input, `[[`, x), ...), simplify = FALSE), .parallel = parallel) if (parallel == TRUE) { registerDoSEQ() } - for (component in 1:length(array_of_results[[j]])) { - #if (length(common_margs) > 0) { - component_dims <- c() - if (is.null(dim(array_of_results[[j]][[component]][[1]]))) { - component_dims <- length(array_of_results[[j]][[component]][[1]]) - } else { - component_dims <- dim(array_of_results[[j]][[component]][[1]]) - } - if (length(non_common_margs) > 0) { - component_dims <- c(component_dims, setNames(rep(1, length(dim(ncma))), names(dim(ncma)))) - } - component_dims <- c(component_dims, unlist(afml[common_margs])) - array_of_results[[j]][[component]] <- array(unlist(array_of_results[[j]][[component]]), - dim = component_dims) - #} - #names(dim(array_of_results[[j]][[component]])) <- c(rep('', length(dim(array_of_results[[j]][[component]])) - length(afml)), - # names(afml)) - } + result <- list(array(unlist(result), dim = c(dim(result[[1]][[1]]), unlist(afml[common_margs])))) } else { - array_of_results[[j]] <- splatted_f(input, ...) + result <- list(splatted_f(input, ...)) + } + if (!found_first_result) { + array_of_results <- vector('list', length(ncma)) + dim(array_of_results) <- dim(ncma) + arrays_of_results <- replicate(length(result), array_of_results, simplify = FALSE) + if (!is.null(output_dims)) { + # Check number of outputs is correct. + if (length(output_dims) != length(arrays_of_results)) { + stop("The 'AtomicFun' returns ", length(arrays_of_results), " elements, but ", + length(output_dims), " elements were expected.") + } + names(arrays_of_results) <- names(output_dims) + } else { + names(arrays_of_results) <- paste0('output', 1:length(result)) + } + rm(array_of_results) + found_first_result <- TRUE + } + for (component in 1:length(result)) { + if (is.null(dim(result[[component]]))) { + component_dims <- length(result[[component]]) + } else { + component_dims <- dim(result[[component]]) + if (length(common_margs) > 0) { + component_dims <- component_dims[1:(length(component_dims) - length(common_margs))] + } + } + atomic_fun_out_dims[[component]] <- component_dims + if (length(non_common_margs) > 0) { + component_dims <- c(component_dims, setNames(rep(1, length(dim(ncma))), names(dim(ncma)))) + } + component_dims <- c(component_dims, unlist(afml[common_margs])) + dim(result[[component]]) <- component_dims + arrays_of_results[[component]][[j]] <- result[[component]] + } + if (!is.null(output_dims)) { + # Check number of output dimensions is correct. + for (component in 1:length(atomic_fun_out_dims)) { + if (length(atomic_fun_out_dims[[component]]) != output_dims[[component]]) { + stop("Expected ", component, "st returned element by 'AtomicFun'", + "to have ", length(output_dims[[component]]), " dimensions, ", + "but ", length(atomic_fun_out_dims[[component]]), " found.") + } + if (!is.null(names(atomic_fun_out_dims[[component]]))) { + # check component_dims match names of output_dims[[component]], and reorder if needed + } + } } } # Merge results - if (length(array_of_results) > 1) { - .MergeArrayOfArrays(array_of_results) - } else { - array_of_results[[1]] + for (component in 1:length(arrays_of_results)) { + if (length(arrays_of_results[[component]]) > 1) { + arrays_of_results[[component]] <- .MergeArrayOfArrays(arrays_of_results[[component]]) + } else { + arrays_of_results[[component]] <- arrays_of_results[[component]][[1]] + } } + arrays_of_results } -- GitLab From a52750818c82af9df1d127f9eecff033cd390b23 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Mon, 23 Oct 2017 03:51:48 +0200 Subject: [PATCH 17/25] Progress. --- R/Apply.R | 179 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 123 insertions(+), 56 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index 5e10eaf..55eb638 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -69,7 +69,6 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, margins_names <- vector('list', length(data)) target_dims_names <- vector('list', length(data)) if (!is.null(margins)) { - # Check margins and build target_dims accordingly if (!is.list(margins)) { margins <- rep(list(margins), length(data)) @@ -325,7 +324,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, common_margs <- margins_afml[[1]] if (length(margins_afml) > 1) { for (i in 2:length(margins_afml)) { - non_matches <- which(!(margins_afml[[i]] %in% common_margs)) + non_matches <- which(!(common_margs %in% margins_afml[[i]])) if (length(non_matches) > 0) { common_margs <- common_margs[- non_matches] } @@ -340,11 +339,43 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, # non_common_margs is now a numeric vector with the indices of the # non-common margins (i.e. their position in afml) - #.isolate <- function(data, margin_length, drop = TRUE) { - # eval(dim(environment()$data)) - # structure(list(env = environment(), index = margin_length, subs = as.name("[")), - # class = c("indexed_array")) - #} + .isolate <- function(data, margin_length, drop = FALSE) { + eval(dim(environment()$data)) + structure(list(env = environment(), index = margin_length, + drop = drop, subs = as.name("[")), + class = c("indexed_array")) + } + .consolidate <- function(subsets, dimnames, out_dimnames) { + lapply(1:length(subsets), + function(x) { + dims <- dim(subsets[[x]]) + names(dims) <- dimnames[[x]] + dims <- dims[out_dimnames[[x]]] + array(subsets[[x]], dim = dims) + }) + } + + data_indexed <- vector('list', length(data)) + data_indexed_indices <- vector('list', length(data)) + for (i in 1 : length(data)) { + non_common_margs_i <- which(names(dim(data[[i]])) %in% names(afml[non_common_margs])) + if (length(non_common_margs_i) > 0) { + margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) + margin_length[- non_common_margs_i] <- "" + } else { + margin_length <- as.list(rep("", length(dim(data[[i]])))) + } + margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE) + data_indexed[[i]] <- .isolate(data[[i]], margin_length) + if (length(non_common_margs_i) > 0) { + data_indexed_indices[[i]] <- array(1:prod(dim(data[[i]])[non_common_margs_i]), + dim = dim(data[[i]])[non_common_margs_i]) + } else { + data_indexed_indices[[i]] <- array(1, dim = 1) + } + } + splatted_f <- splat(AtomicFun) # Iterate along all non-common margins @@ -356,73 +387,99 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, } arrays_of_results <- NULL found_first_result <- FALSE -print("AAA") -print(afml[common_margs]) -print("BBB") -print(afml[non_common_margs]) +# need to parallelize this loop if no common margins or small common margins +# need to add progress bar +# need to use indexed arrays instead of arrays for (j in 1:length(ncma)) { - marg_indices <- which(ncma == j, arr.ind = TRUE)[1, ] +if (j %% 1000 == 0) { + print(j) +} + marg_indices <- arrayInd(j, dim(ncma)) + #marg_indices <- which(ncma == j, arr.ind = TRUE)[1, ] names(marg_indices) <- names(dim(ncma)) input <- list() atomic_fun_out_dims <- list() - for (i in 1:length(data)) { - indices_to_take <- as.list(rep(TRUE, length(dim(data[[i]])))) - inds_to_modify <- which(names(dim(data[[i]])) %in% names(marg_indices)) - if (length(inds_to_modify) > 0) { - indices_to_take[inds_to_modify] <- as.list(marg_indices[names(dim(data[[i]]))[inds_to_modify]]) - input[[i]] <- do.call('[', c(list(x = data[[i]]), indices_to_take, list(drop = FALSE))) - names(dim(input[[i]])) <- names(dim(data[[i]])) - dim(input[[i]]) <- dim(input[[i]])[- inds_to_modify] - } else { - input[[i]] <- data[[i]] - } - } # Each iteration of j, the variable input is populated with sub-arrays for # each object in data (if possible). For each set of 'input's, the # splatted_f is applied in parallel if possible. if (length(common_margs) > 0) { -print("EEEPPP") - #max_size <- 0 - for (i in 1 : length(input)) { - #margin_length <- lapply(dim(input[[i]]), function(x) 1 : x) - #margin_length[-margins[[i]]] <- "" - #margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, - # stringsAsFactors = FALSE) -print(str(input[[i]])) - #input[[i]] <- .isolate(input[[i]], margin_length) -print(str(input[[i]])) -stop() - #if (prod(dim(input[[i]])) > max_size) { - # max_size <- prod(dim(input[[i]])) - #} - dimnames(input[[i]]) <- Map(paste0, letters[seq_along(dim(input[[i]]))], - lapply(dim(input[[i]]), seq)) - input[[i]] <- alply(input[[i]], margins[[i]], .dims = TRUE) + input_indexed <- vector('list', length(data)) + input_indexed_indices <- vector('list', length(data)) + for (i in 1 : length(data_indexed)) { + ## + inds_to_take <- which(names(marg_indices) %in% names(dim(data_indexed_indices[[i]]))) + if (length(inds_to_take) > 0) { + input[[i]] <- data_indexed[[i]][[do.call('[', c(list(x = data_indexed_indices[[i]]), + marg_indices[inds_to_take], + list(drop = TRUE)))]] + } else { + input[[i]] <- data_indexed[[i]][[1]] + } + ## + common_margs_i <- which(names(dim(data[[i]])) %in% names(afml[common_margs])) + if (length(common_margs_i) > 0) { + margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) + margin_length[- common_margs_i] <- "" + } else { + margin_length <- as.list(rep("", length(dim(data[[i]])))) + } + margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE) + input_indexed[[i]] <- .isolate(input[[i]], margin_length) + ## + if (length(common_margs_i) > 0) { + input_indexed_indices[[i]] <- array(1:prod(dim(data[[i]])[common_margs_i]), + dim = dim(data[[i]])[common_margs_i]) + } else { + input_indexed_indices[[i]] <- array(1, dim = 1) + } } dims <- unlist(afml[common_margs]) - #i_max <- max_size / dims[1] - #k <- max_size / i_max + selected_dim <- which(dims != 1) + if (length(selected_dim) > 0) { + selected_dim <- selected_dim[1] + } else { + selected_dim <- 1 + } max_size <- prod(dims) - i_max <- max_size / max(dims)[1] + i_max <- max_size / dims[selected_dim] k <- max_size / i_max if (parallel == TRUE) { - registerDoParallel(ncores) + registerDoParallel(ncores) } -print(i_max) - result <- llply(1 : i_max, function(i) - sapply((k * i - (k - 1)) : (k * i), function(x) splatted_f(lapply(input, `[[`, x), ...), simplify = FALSE), - .parallel = parallel) + result <- llply(1 : i_max, + function(i) { + sapply((k * i - (k - 1)) : (k * i), + function(x) { + splatted_f(.consolidate(lapply(input_indexed, `[[`, x), + lapply(lapply(data, dim), names), + target_dims_names), + ...) + }, simplify = FALSE) + }, .parallel = parallel) if (parallel == TRUE) { registerDoSEQ() } - result <- list(array(unlist(result), dim = c(dim(result[[1]][[1]]), unlist(afml[common_margs])))) + result <- list(array(unlist(result), + dim = c(dim(result[[1]][[1]]), unlist(afml[common_margs])))) } else { - result <- list(splatted_f(input, ...)) + for (i in 1:length(data_indexed)) { + inds_to_take <- which(names(marg_indices) %in% names(dim(data_indexed_indices[[i]]))) + if (length(inds_to_take) > 0) { + input[[i]] <- data_indexed[[i]][[do.call('[', c(list(x = data_indexed_indices[[i]]), + marg_indices[inds_to_take], + list(drop = TRUE)))]] + } else { + input[[i]] <- data_indexed[[i]][[1]] + } + } + result <- splatted_f(.consolidate(input, lapply(lapply(data, dim), names), target_dims_names), ...) + if (!is.list(result)) { + result <- list(result) + } } if (!found_first_result) { - array_of_results <- vector('list', length(ncma)) - dim(array_of_results) <- dim(ncma) - arrays_of_results <- replicate(length(result), array_of_results, simplify = FALSE) + arrays_of_results <- vector('list', length(result)) if (!is.null(output_dims)) { # Check number of outputs is correct. if (length(output_dims) != length(arrays_of_results)) { @@ -430,11 +487,11 @@ print(i_max) length(output_dims), " elements were expected.") } names(arrays_of_results) <- names(output_dims) + } else if (!is.null(names(result))) { + names(arrays_of_results) <- names(result) } else { names(arrays_of_results) <- paste0('output', 1:length(result)) } - rm(array_of_results) - found_first_result <- TRUE } for (component in 1:length(result)) { if (is.null(dim(result[[component]]))) { @@ -451,8 +508,18 @@ print(i_max) } component_dims <- c(component_dims, unlist(afml[common_margs])) dim(result[[component]]) <- component_dims + if (!found_first_result) { + component_array <- array(dim = dim(result[[component]])) + array_of_results <- replicate(length(ncma), component_array, simplify = FALSE) + dim(array_of_results) <- dim(ncma) + arrays_of_results[[component]] <- array_of_results + rm(array_of_results) + } arrays_of_results[[component]][[j]] <- result[[component]] } + if (!found_first_result) { + found_first_result <- TRUE + } if (!is.null(output_dims)) { # Check number of output dimensions is correct. for (component in 1:length(atomic_fun_out_dims)) { -- GitLab From a8a65f18b94eab1dc8c97e644405d29cc56e9837 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Mon, 23 Oct 2017 15:04:32 +0200 Subject: [PATCH 18/25] Adapted to support multiple outputs and to work with non-common margins, without duplicating inputs. Output dimension names are preserved. --- R/Apply.R | 155 +++++++++++++++--------------------------------------- 1 file changed, 42 insertions(+), 113 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index 55eb638..bc110f5 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -358,19 +358,19 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, data_indexed <- vector('list', length(data)) data_indexed_indices <- vector('list', length(data)) for (i in 1 : length(data)) { - non_common_margs_i <- which(names(dim(data[[i]])) %in% names(afml[non_common_margs])) - if (length(non_common_margs_i) > 0) { + margs_i <- which(names(dim(data[[i]])) %in% names(afml[c(non_common_margs, common_margs)])) + if (length(margs_i) > 0) { margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) - margin_length[- non_common_margs_i] <- "" + margin_length[- margs_i] <- "" } else { margin_length <- as.list(rep("", length(dim(data[[i]])))) } margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE) data_indexed[[i]] <- .isolate(data[[i]], margin_length) - if (length(non_common_margs_i) > 0) { - data_indexed_indices[[i]] <- array(1:prod(dim(data[[i]])[non_common_margs_i]), - dim = dim(data[[i]])[non_common_margs_i]) + if (length(margs_i) > 0) { + data_indexed_indices[[i]] <- array(1:prod(dim(data[[i]])[margs_i]), + dim = dim(data[[i]])[margs_i]) } else { data_indexed_indices[[i]] <- array(1, dim = 1) } @@ -379,109 +379,42 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, splatted_f <- splat(AtomicFun) # Iterate along all non-common margins - if (length(non_common_margs) > 0) { - non_common_margins_array <- ncma <- array(1:prod(unlist(afml[non_common_margs])), - dim = unlist(afml[non_common_margs])) + if (length(c(non_common_margs, common_margs)) > 0) { + marg_inds_ordered <- sort(c(non_common_margs, common_margs)) + margins_array <- ma <- array(1:prod(unlist(afml[marg_inds_ordered])), + dim = unlist(afml[marg_inds_ordered])) } else { - ncma <- array(1) + ma <- array(1) } arrays_of_results <- NULL found_first_result <- FALSE -# need to parallelize this loop if no common margins or small common margins # need to add progress bar -# need to use indexed arrays instead of arrays - for (j in 1:length(ncma)) { -if (j %% 1000 == 0) { - print(j) -} - marg_indices <- arrayInd(j, dim(ncma)) - #marg_indices <- which(ncma == j, arr.ind = TRUE)[1, ] - names(marg_indices) <- names(dim(ncma)) + iteration <- function(j) { + marg_indices <- arrayInd(j, dim(ma)) + names(marg_indices) <- names(dim(ma)) input <- list() atomic_fun_out_dims <- list() # Each iteration of j, the variable input is populated with sub-arrays for # each object in data (if possible). For each set of 'input's, the # splatted_f is applied in parallel if possible. - if (length(common_margs) > 0) { - input_indexed <- vector('list', length(data)) - input_indexed_indices <- vector('list', length(data)) - for (i in 1 : length(data_indexed)) { - ## - inds_to_take <- which(names(marg_indices) %in% names(dim(data_indexed_indices[[i]]))) - if (length(inds_to_take) > 0) { - input[[i]] <- data_indexed[[i]][[do.call('[', c(list(x = data_indexed_indices[[i]]), - marg_indices[inds_to_take], - list(drop = TRUE)))]] - } else { - input[[i]] <- data_indexed[[i]][[1]] - } - ## - common_margs_i <- which(names(dim(data[[i]])) %in% names(afml[common_margs])) - if (length(common_margs_i) > 0) { - margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) - margin_length[- common_margs_i] <- "" - } else { - margin_length <- as.list(rep("", length(dim(data[[i]])))) - } - margin_length <- expand.grid(margin_length, KEEP.OUT.ATTRS = FALSE, - stringsAsFactors = FALSE) - input_indexed[[i]] <- .isolate(input[[i]], margin_length) - ## - if (length(common_margs_i) > 0) { - input_indexed_indices[[i]] <- array(1:prod(dim(data[[i]])[common_margs_i]), - dim = dim(data[[i]])[common_margs_i]) - } else { - input_indexed_indices[[i]] <- array(1, dim = 1) - } - } - dims <- unlist(afml[common_margs]) - selected_dim <- which(dims != 1) - if (length(selected_dim) > 0) { - selected_dim <- selected_dim[1] + for (i in 1:length(data_indexed)) { + inds_to_take <- which(names(marg_indices) %in% names(dim(data_indexed_indices[[i]]))) + if (length(inds_to_take) > 0) { + input[[i]] <- data_indexed[[i]][[do.call('[', c(list(x = data_indexed_indices[[i]]), + marg_indices[inds_to_take], + list(drop = TRUE)))]] } else { - selected_dim <- 1 - } - max_size <- prod(dims) - i_max <- max_size / dims[selected_dim] - k <- max_size / i_max - if (parallel == TRUE) { - registerDoParallel(ncores) - } - result <- llply(1 : i_max, - function(i) { - sapply((k * i - (k - 1)) : (k * i), - function(x) { - splatted_f(.consolidate(lapply(input_indexed, `[[`, x), - lapply(lapply(data, dim), names), - target_dims_names), - ...) - }, simplify = FALSE) - }, .parallel = parallel) - if (parallel == TRUE) { - registerDoSEQ() - } - result <- list(array(unlist(result), - dim = c(dim(result[[1]][[1]]), unlist(afml[common_margs])))) - } else { - for (i in 1:length(data_indexed)) { - inds_to_take <- which(names(marg_indices) %in% names(dim(data_indexed_indices[[i]]))) - if (length(inds_to_take) > 0) { - input[[i]] <- data_indexed[[i]][[do.call('[', c(list(x = data_indexed_indices[[i]]), - marg_indices[inds_to_take], - list(drop = TRUE)))]] - } else { - input[[i]] <- data_indexed[[i]][[1]] - } - } - result <- splatted_f(.consolidate(input, lapply(lapply(data, dim), names), target_dims_names), ...) - if (!is.list(result)) { - result <- list(result) + input[[i]] <- data_indexed[[i]][[1]] } } + result <- splatted_f(.consolidate(input, lapply(lapply(data, dim), names), + target_dims_names), ...) + if (!is.list(result)) { + result <- list(result) + } if (!found_first_result) { arrays_of_results <- vector('list', length(result)) if (!is.null(output_dims)) { - # Check number of outputs is correct. if (length(output_dims) != length(arrays_of_results)) { stop("The 'AtomicFun' returns ", length(arrays_of_results), " elements, but ", length(output_dims), " elements were expected.") @@ -498,24 +431,15 @@ if (j %% 1000 == 0) { component_dims <- length(result[[component]]) } else { component_dims <- dim(result[[component]]) - if (length(common_margs) > 0) { - component_dims <- component_dims[1:(length(component_dims) - length(common_margs))] - } } atomic_fun_out_dims[[component]] <- component_dims - if (length(non_common_margs) > 0) { - component_dims <- c(component_dims, setNames(rep(1, length(dim(ncma))), names(dim(ncma)))) - } - component_dims <- c(component_dims, unlist(afml[common_margs])) + component_dims <- c(component_dims, setNames(rep(1, length(dim(ma))), names(dim(ma)))) dim(result[[component]]) <- component_dims if (!found_first_result) { - component_array <- array(dim = dim(result[[component]])) - array_of_results <- replicate(length(ncma), component_array, simplify = FALSE) - dim(array_of_results) <- dim(ncma) - arrays_of_results[[component]] <- array_of_results - rm(array_of_results) + arrays_of_results[[component]] <- array(dim = c(dim(result[[component]]), dim(ma))) } - arrays_of_results[[component]][[j]] <- result[[component]] + arrays_of_results[[component]][(1:prod(component_dims)) + + (j - 1) * prod(component_dims)] <- result[[component]] } if (!found_first_result) { found_first_result <- TRUE @@ -533,15 +457,20 @@ if (j %% 1000 == 0) { } } } + TRUE } - # Merge results - for (component in 1:length(arrays_of_results)) { - if (length(arrays_of_results[[component]]) > 1) { - arrays_of_results[[component]] <- .MergeArrayOfArrays(arrays_of_results[[component]]) - } else { - arrays_of_results[[component]] <- arrays_of_results[[component]][[1]] - } + # Execute in parallel if needed + if (parallel) { + registerDoParallel(ncores) + j <- seq(length(ma)) + fe <- eval(as.call(list(quote(foreach::foreach), j = j))) + info <- foreach::`%dopar%`(fe, iteration(j)) + registerDoSEQ() + } else { + info <- sapply(seq(length(ma)), iteration) } + + # Return arrays_of_results } -- GitLab From 2ef7190a1eb5110c668fe6ae64b699e54687d9f8 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Mon, 23 Oct 2017 23:07:28 +0200 Subject: [PATCH 19/25] Mostly working. --- R/Apply.R | 234 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 165 insertions(+), 69 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index bc110f5..5fca345 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -31,11 +31,20 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, stop("Parameter 'data' must be one or a list of numeric objects.") } is_vector <- rep(FALSE, length(data)) + is_unnamed <- rep(FALSE, length(data)) for (i in 1 : length(data)) { if (is.null(dim(data[[i]]))) { is_vector[i] <- TRUE dim(data[[i]]) <- length(data[[i]]) } + if (!is.null(names(dim(data)))) { + if (any(sapply(names(dim(data)), nchar) == 0)) { + stop("Dimension names of arrays in 'data' must be at least ", + "one character long.") + } + } else { + is_unnamed[i] <- TRUE + } } # Check AtomicFun @@ -299,9 +308,14 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, } } } + missing_margin_names <- which(names(afml) == '') + if (length(missing_margin_names) > 0) { + names(afml)[missing_margin_names] <- paste0('_unnamed_margin_', + 1:length(missing_margin_names), '_') + } # afml is now a named list with the lenghts of all margins. Each margin - # appears once only. If some names are not provided, they are missing, - # e.g. ''. + # appears once only. If some names are not provided, they are set automatically + # to 'unnamed_dim_1', 'unamed_dim_2', ... # Now need to check which margins are common for all the data arrays. # Those will be used by llply. @@ -310,6 +324,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, margins_afml <- margins for (i in 1:length(data)) { if (!is.null(margins_names[[i]])) { + margins_afml[[i]] <- sapply(margins_names[[i]], function(x) { sapply(x, @@ -319,14 +334,23 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, ) } ) + } else if (length(margins_afml[[i]]) > 0) { + margins_afml[[i]] <- margins_afml[[i]] - min(margins_afml[[i]]) + 1 + # The missing margin and dim names are filled in. + margins_names[[i]] <- names(afml)[margins_afml[[i]]] + names(dim(data[[i]]))[margins[[i]]] <- margins_names[[i]] } } common_margs <- margins_afml[[1]] if (length(margins_afml) > 1) { for (i in 2:length(margins_afml)) { - non_matches <- which(!(common_margs %in% margins_afml[[i]])) - if (length(non_matches) > 0) { - common_margs <- common_margs[- non_matches] + margs_a <- unlist(afml[common_margs]) + margs_b <- unlist(afml[margins_afml[[i]]]) + matches <- which(names(margs_a) %in% names(margs_b)) + if (length(matches) > 0) { + common_margs <- common_margs[matches] + } else { + common_margs <- NULL } } } @@ -345,12 +369,14 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, drop = drop, subs = as.name("[")), class = c("indexed_array")) } - .consolidate <- function(subsets, dimnames, out_dimnames) { + .consolidate <- function(subsets, dimnames, out_dims) { lapply(1:length(subsets), function(x) { dims <- dim(subsets[[x]]) - names(dims) <- dimnames[[x]] - dims <- dims[out_dimnames[[x]]] + if (!is_unnamed[x]) { + names(dims) <- dimnames[[x]] + } + dims <- dims[out_dims[[x]]] array(subsets[[x]], dim = dims) }) } @@ -388,87 +414,157 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, } arrays_of_results <- NULL found_first_result <- FALSE + + total_size <- prod(dim(ma)) + if (!is.null(ncores)) { + chunk_size <- round(total_size / (ncores * 4)) + } else { + chunk_size <- 4 + } + if (chunk_size < 1) { + chunk_size <- 1 + } + nchunks <- floor(total_size / chunk_size) + chunk_sizes <- rep(chunk_size, nchunks) + if (total_size %% chunk_size != 0) { + chunk_sizes <- c(chunk_sizes, total_size %% chunk_size) + } + # need to add progress bar - iteration <- function(j) { - marg_indices <- arrayInd(j, dim(ma)) - names(marg_indices) <- names(dim(ma)) - input <- list() - atomic_fun_out_dims <- list() - # Each iteration of j, the variable input is populated with sub-arrays for - # each object in data (if possible). For each set of 'input's, the - # splatted_f is applied in parallel if possible. - for (i in 1:length(data_indexed)) { - inds_to_take <- which(names(marg_indices) %in% names(dim(data_indexed_indices[[i]]))) - if (length(inds_to_take) > 0) { - input[[i]] <- data_indexed[[i]][[do.call('[', c(list(x = data_indexed_indices[[i]]), - marg_indices[inds_to_take], - list(drop = TRUE)))]] - } else { - input[[i]] <- data_indexed[[i]][[1]] + iteration <- function(m) { + sub_arrays_of_results <- list() + found_first_sub_result <- FALSE + for (n in 1:chunk_sizes[m]) { + # j is the index of the data piece to load in data_indexed + j <- n + (m - 1) * chunk_size + marg_indices <- arrayInd(j, dim(ma)) + names(marg_indices) <- names(dim(ma)) + input <- list() + atomic_fun_out_dims <- list() + # Each iteration of n, the variable input is populated with sub-arrays for + # each object in data (if possible). For each set of 'input's, the + # splatted_f is applied in parallel if possible. + for (i in 1:length(data_indexed)) { + inds_to_take <- which(names(marg_indices) %in% names(dim(data_indexed_indices[[i]]))) + if (length(inds_to_take) > 0) { + input[[i]] <- data_indexed[[i]][[do.call('[', c(list(x = data_indexed_indices[[i]]), + marg_indices[inds_to_take], + list(drop = TRUE)))]] + } else { + input[[i]] <- data_indexed[[i]][[1]] + } + } + result <- splatted_f(.consolidate(input, lapply(lapply(data, dim), names), + target_dims), ...) + if (!is.list(result)) { + result <- list(result) + } + if (!found_first_sub_result) { + sub_arrays_of_results <- vector('list', length(result)) + if (!is.null(output_dims)) { + if (length(output_dims) != length(sub_arrays_of_results)) { + stop("The 'AtomicFun' returns ", length(sub_arrays_of_results), + " elements, but ", length(output_dims), + " elements were expected.") + } + names(sub_arrays_of_results) <- names(output_dims) + } else if (!is.null(names(result))) { + names(sub_arrays_of_results) <- names(result) + } else { + names(sub_arrays_of_results) <- paste0('output', 1:length(result)) + } + } + for (component in 1:length(result)) { + if (is.null(dim(result[[component]]))) { + if (length(result[[component]] == 1)) { + component_dims <- NULL + } else { + component_dims <- length(result[[component]]) + } + } else { + component_dims <- dim(result[[component]]) + } + if (!found_first_sub_result) { + sub_arrays_of_results[[component]] <- array(dim = c(component_dims, chunk_sizes[m])) + } + atomic_fun_out_dims[[component]] <- component_dims + sub_arrays_of_results[[component]][(1:prod(component_dims)) + + (n - 1) * prod(component_dims)] <- result[[component]] + } + if (!found_first_sub_result) { + found_first_sub_result <- TRUE + } + if (!is.null(output_dims)) { + # Check number of output dimensions is correct. + for (component in 1:length(atomic_fun_out_dims)) { + if (length(atomic_fun_out_dims[[component]]) != output_dims[[component]]) { + stop("Expected ", component, "st returned element by 'AtomicFun'", + "to have ", length(output_dims[[component]]), " dimensions, ", + "but ", length(atomic_fun_out_dims[[component]]), " found.") + } + } } } - result <- splatted_f(.consolidate(input, lapply(lapply(data, dim), names), - target_dims_names), ...) - if (!is.list(result)) { - result <- list(result) - } + sub_arrays_of_results + } + + # Execute in parallel if needed + if (parallel) registerDoParallel(ncores) + result <- llply(1:length(chunk_sizes), iteration, .parallel = parallel) + if (parallel) registerDoSEQ() + + # Merge the results + chunk_length <- NULL + fun_out_dims <- NULL + for (m in 1:length(result)) { if (!found_first_result) { - arrays_of_results <- vector('list', length(result)) + arrays_of_results <- vector('list', length(result[[1]])) if (!is.null(output_dims)) { if (length(output_dims) != length(arrays_of_results)) { stop("The 'AtomicFun' returns ", length(arrays_of_results), " elements, but ", length(output_dims), " elements were expected.") } names(arrays_of_results) <- names(output_dims) - } else if (!is.null(names(result))) { - names(arrays_of_results) <- names(result) + } else if (!is.null(names(result[[1]]))) { + names(arrays_of_results) <- names(result[[1]]) } else { - names(arrays_of_results) <- paste0('output', 1:length(result)) + names(arrays_of_results) <- paste0('output', 1:length(result[[1]])) } } - for (component in 1:length(result)) { - if (is.null(dim(result[[component]]))) { - component_dims <- length(result[[component]]) - } else { - component_dims <- dim(result[[component]]) - } - atomic_fun_out_dims[[component]] <- component_dims - component_dims <- c(component_dims, setNames(rep(1, length(dim(ma))), names(dim(ma)))) - dim(result[[component]]) <- component_dims + for (component in 1:length(result[[m]])) { + component_dims <- dim(result[[m]][[component]]) if (!found_first_result) { - arrays_of_results[[component]] <- array(dim = c(dim(result[[component]]), dim(ma))) + if (length(component_dims) > 0) { + fun_out_dims[[component]] <- component_dims[- length(component_dims)] + } else { + fun_out_dims[[component]] <- NULL + } + arrays_of_results[[component]] <- array(dim = c(fun_out_dims[[component]], + dim(ma))) + dimnames_to_remove <- which(grepl('^_unnamed_margin_', + names(dim(arrays_of_results[[component]])))) + if (length(dimnames_to_remove) > 0) { + names(dim(arrays_of_results[[component]]))[dimnames_to_remove] <- rep('', length(dimnames_to_remove)) + } + if (all(names(dim(arrays_of_results[[component]])) == '')) { + names(dim(arrays_of_results[[component]])) <- NULL + } + chunk_length <- prod(component_dims) } arrays_of_results[[component]][(1:prod(component_dims)) + - (j - 1) * prod(component_dims)] <- result[[component]] + (m - 1) * chunk_length] <- result[[m]][[component]] } if (!found_first_result) { found_first_result <- TRUE } - if (!is.null(output_dims)) { - # Check number of output dimensions is correct. - for (component in 1:length(atomic_fun_out_dims)) { - if (length(atomic_fun_out_dims[[component]]) != output_dims[[component]]) { - stop("Expected ", component, "st returned element by 'AtomicFun'", - "to have ", length(output_dims[[component]]), " dimensions, ", - "but ", length(atomic_fun_out_dims[[component]]), " found.") - } - if (!is.null(names(atomic_fun_out_dims[[component]]))) { - # check component_dims match names of output_dims[[component]], and reorder if needed - } - } - } - TRUE - } - - # Execute in parallel if needed - if (parallel) { - registerDoParallel(ncores) - j <- seq(length(ma)) - fe <- eval(as.call(list(quote(foreach::foreach), j = j))) - info <- foreach::`%dopar%`(fe, iteration(j)) - registerDoSEQ() - } else { - info <- sapply(seq(length(ma)), iteration) + #if (!is.null(output_dims)) { + # # Check number of output dimensions is correct. + # for (component in 1:length(atomic_fun_out_dims)) { + # if (!is.null(names(fun_out_dims[[component]]))) { + # # check component_dims match names of output_dims[[component]], and reorder if needed + # } + # } + #} } # Return -- GitLab From eb7f099edeb20f255a86808f523aebd81d603e80 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 24 Oct 2017 00:09:03 +0200 Subject: [PATCH 20/25] Small fix. --- R/Apply.R | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index 5fca345..ee3449f 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -20,7 +20,7 @@ #' test_fun <- function(x, y, z) {((sum(x > z) / (length(x))) / #' (sum(y > z) / (length(y)))) * 100} #' margins = list(c(1, 2), c(1, 2), c(1,2)) -#' test <- Apply(data, margins, AtomicFun = "test_fun") +#' test <- Apply(data, margins = margins, AtomicFun = "test_fun") Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, margins = NULL, parallel = FALSE, ncores = NULL) { # Check data @@ -176,16 +176,17 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, margins[[i]] <- (1 : length(dim(data[[i]])))[- target_dims[[i]]] } } - # Reorder dimensions of input data for target dims to be left-most # and in the required order. for (i in 1 : length(data)) { - if (is.unsorted(target_dims[[i]]) || - (max(target_dims[[i]]) > length(target_dims[[i]]))) { - marg_dims <- (1 : length(dim(data[[i]])))[- target_dims[[i]]] - data[[i]] <- .aperm2(data[[i]], c(target_dims[[i]], marg_dims)) - target_dims[[i]] <- 1 : length(target_dims[[i]]) - margins[[i]] <- (length(target_dims[[i]]) + 1) : length(dim(data[[i]])) + if (length(target_dims[[i]]) > 0) { + if (is.unsorted(target_dims[[i]]) || + (max(target_dims[[i]]) > length(target_dims[[i]]))) { + marg_dims <- (1 : length(dim(data[[i]])))[- target_dims[[i]]] + data[[i]] <- .aperm2(data[[i]], c(target_dims[[i]], marg_dims)) + target_dims[[i]] <- 1 : length(target_dims[[i]]) + margins[[i]] <- (length(target_dims[[i]]) + 1) : length(dim(data[[i]])) + } } } @@ -372,12 +373,16 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, .consolidate <- function(subsets, dimnames, out_dims) { lapply(1:length(subsets), function(x) { - dims <- dim(subsets[[x]]) - if (!is_unnamed[x]) { - names(dims) <- dimnames[[x]] + if (length(out_dims[[x]]) > 0) { + dims <- dim(subsets[[x]]) + if (!is_unnamed[x]) { + names(dims) <- dimnames[[x]] + } + dims <- dims[out_dims[[x]]] + array(subsets[[x]], dim = dims) + } else { + as.vector(subsets[[x]]) } - dims <- dims[out_dims[[x]]] - array(subsets[[x]], dim = dims) }) } -- GitLab From 78b4e51e7f12b710b0e67571e4691e81073b6cb3 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 24 Oct 2017 22:56:34 +0200 Subject: [PATCH 21/25] Fixes. --- R/Apply.R | 35 ++++++++++++++++++++--------------- R/zzz.R | 14 +++----------- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index ee3449f..984d1a4 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -35,10 +35,11 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, for (i in 1 : length(data)) { if (is.null(dim(data[[i]]))) { is_vector[i] <- TRUE + is_unnamed[i] <- TRUE dim(data[[i]]) <- length(data[[i]]) } - if (!is.null(names(dim(data)))) { - if (any(sapply(names(dim(data)), nchar) == 0)) { + if (!is.null(names(dim(data[[i]])))) { + if (any(sapply(names(dim(data[[i]])), nchar) == 0)) { stop("Dimension names of arrays in 'data' must be at least ", "one character long.") } @@ -58,13 +59,12 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, stop("Parameter 'AtomicFun' must be a function or a character string ", "with the name of a function.") } - output_dims <- NULL if ('startR_step' %in% class(AtomicFun)) { if (is.null(target_dims)) { target_dims <- attr(AtomicFun, 'target_dims') } if (is.null(output_dims)) { - output_dims <- attr(AtomicFun, 'target_dims') + output_dims <- attr(AtomicFun, 'output_dims') } } @@ -195,8 +195,8 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, if (!is.list(output_dims)) { output_dims <- list(output1 = output_dims) } - if (any(sapply(output_dims, function(x) !is.character(x)))) { - stop("Parameter 'output_dims' must be one or a list of vectors of character strings.") + if (any(sapply(output_dims, function(x) !(is.character(x) || is.null(x))))) { + stop("Parameter 'output_dims' must be one or a list of vectors of character strings (or NULLs).") } if (is.null(names(output_dims))) { names(output_dims) <- rep('', length(output_dims)) @@ -445,7 +445,6 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, marg_indices <- arrayInd(j, dim(ma)) names(marg_indices) <- names(dim(ma)) input <- list() - atomic_fun_out_dims <- list() # Each iteration of n, the variable input is populated with sub-arrays for # each object in data (if possible). For each set of 'input's, the # splatted_f is applied in parallel if possible. @@ -479,6 +478,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, names(sub_arrays_of_results) <- paste0('output', 1:length(result)) } } + atomic_fun_out_dims <- vector('list', length(result)) for (component in 1:length(result)) { if (is.null(dim(result[[component]]))) { if (length(result[[component]] == 1)) { @@ -492,7 +492,9 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, if (!found_first_sub_result) { sub_arrays_of_results[[component]] <- array(dim = c(component_dims, chunk_sizes[m])) } - atomic_fun_out_dims[[component]] <- component_dims + if (!is.null(component_dims)) { + atomic_fun_out_dims[[component]] <- component_dims + } sub_arrays_of_results[[component]][(1:prod(component_dims)) + (n - 1) * prod(component_dims)] <- result[[component]] } @@ -500,10 +502,15 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, found_first_sub_result <- TRUE } if (!is.null(output_dims)) { + # Check number of outputs. + if (length(output_dims) != length(result)) { + stop("Expected AtomicFun to return ", length(output_dims), " components, ", + "but ", length(result), " found.") + } # Check number of output dimensions is correct. - for (component in 1:length(atomic_fun_out_dims)) { - if (length(atomic_fun_out_dims[[component]]) != output_dims[[component]]) { - stop("Expected ", component, "st returned element by 'AtomicFun'", + for (component in 1:length(result)) { + if (length(atomic_fun_out_dims[[component]]) != length(output_dims[[component]])) { + stop("Expected ", component, "st returned element by 'AtomicFun' ", "to have ", length(output_dims[[component]]), " dimensions, ", "but ", length(atomic_fun_out_dims[[component]]), " found.") } @@ -520,7 +527,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, # Merge the results chunk_length <- NULL - fun_out_dims <- NULL + fun_out_dims <- vector('list', length(result[[1]])) for (m in 1:length(result)) { if (!found_first_result) { arrays_of_results <- vector('list', length(result[[1]])) @@ -539,10 +546,8 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, for (component in 1:length(result[[m]])) { component_dims <- dim(result[[m]][[component]]) if (!found_first_result) { - if (length(component_dims) > 0) { + if (length(component_dims) > 1) { fun_out_dims[[component]] <- component_dims[- length(component_dims)] - } else { - fun_out_dims[[component]] <- NULL } arrays_of_results[[component]] <- array(dim = c(fun_out_dims[[component]], dim(ma))) diff --git a/R/zzz.R b/R/zzz.R index 32fe628..3e04077 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -122,23 +122,15 @@ for (j in 1:(array_dims[dim_index])) { new_chunk <- do.call('[[', c(list(x = array_of_arrays), as.list(c(j, chunk_sub_indices)))) - #do.call('[[<-', c(list(x = array_of_chunks), - # as.list(c(j, chunk_sub_indices)), - # list(value = NULL))) if (is.null(new_chunk)) { stop("Chunks missing.") } if (is.null(sub_array_of_chunks[[i]])) { sub_array_of_chunks[[i]] <- new_chunk } else { - #if (length(new_chunk) != length(sub_array_of_chunks[[i]])) { - # stop("Missing components for some chunks.") - #} - #for (component in 1:length(new_chunk)) { - sub_array_of_chunks[[i]] <- MergeArrays(sub_array_of_chunks[[i]], - new_chunk, - dim_names[dim_index]) - #} + sub_array_of_chunks[[i]] <- MergeArrays(sub_array_of_chunks[[i]], + new_chunk, + dim_names[dim_index]) } } } -- GitLab From 82ce19f4970caebfbbb59c5b24695993c9bba212 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Fri, 10 Nov 2017 19:57:20 +0100 Subject: [PATCH 22/25] Fix. --- R/Apply.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/Apply.R b/R/Apply.R index 984d1a4..dd7447d 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -451,9 +451,10 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, for (i in 1:length(data_indexed)) { inds_to_take <- which(names(marg_indices) %in% names(dim(data_indexed_indices[[i]]))) if (length(inds_to_take) > 0) { - input[[i]] <- data_indexed[[i]][[do.call('[', c(list(x = data_indexed_indices[[i]]), - marg_indices[inds_to_take], - list(drop = TRUE)))]] + marg_inds_to_take <- marg_indices[inds_to_take][names(dim(data_indexed_indices[[i]]))] + input[[i]] <- data_indexed[[i]][[do.call("[", + c(list(x = data_indexed_indices[[i]]), marg_inds_to_take, + list(drop = TRUE)))]] } else { input[[i]] <- data_indexed[[i]][[1]] } -- GitLab From bc290aefc2662d707dec1d97c418e70e13a04edb Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Wed, 29 Nov 2017 21:55:19 +0100 Subject: [PATCH 23/25] Small fix. --- R/Apply.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Apply.R b/R/Apply.R index dd7447d..91267a3 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -482,7 +482,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, atomic_fun_out_dims <- vector('list', length(result)) for (component in 1:length(result)) { if (is.null(dim(result[[component]]))) { - if (length(result[[component]] == 1)) { + if (length(result[[component]]) == 1) { component_dims <- NULL } else { component_dims <- length(result[[component]]) -- GitLab From b5d492cfdabba48e0a6ddf6df3cfb8b84f9a42ba Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Thu, 30 Nov 2017 02:02:05 +0100 Subject: [PATCH 24/25] Fixes. --- DESCRIPTION | 1 - NAMESPACE | 1 - R/Apply.R | 38 ++++++++++++++++++++------------------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 131c6d2..5e23363 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,6 @@ Imports: abind, plyr, doParallel, - future, foreach, s2dverification License: LGPL-3 diff --git a/NAMESPACE b/NAMESPACE index a6a7711..0ef43f7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand importFrom(plyr, llply, splat) importFrom(abind, abind) -importFrom(future, availableCores) importFrom(doParallel, registerDoParallel) importFrom(foreach, registerDoSEQ) importFrom(s2dverification, InsertDim) diff --git a/R/Apply.R b/R/Apply.R index 91267a3..717ab8d 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -6,8 +6,7 @@ #' @param AtomicFun Function to be applied to the arrays. #' @param ... Additional arguments to be used in the AtomicFun. #' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims. -#' @param parallel Logical, should the function be applied in parallel. -#' @param ncores The number of cores to use for parallel computation. +#' @param ncores The number multicore threads to use for parallel computation. #' @details When using a single object as input, Apply is almost identical to the apply function. For multiple input objects, the output array will have dimensions equal to the dimensions specified in 'margins'. #' @return Array or matrix or vector resulting from AtomicFun. #' @references Wickham, H (2011), The Split-Apply-Combine Strategy for Data Analysis, Journal of Statistical Software. @@ -22,7 +21,7 @@ #' margins = list(c(1, 2), c(1, 2), c(1,2)) #' test <- Apply(data, margins = margins, AtomicFun = "test_fun") Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, - margins = NULL, parallel = FALSE, ncores = NULL) { + margins = NULL, ncores = NULL) { # Check data if (!is.list(data)) { data <- list(data) @@ -207,22 +206,14 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, } } - # Check parallel - if (!is.logical(parallel)) { - stop("Parameter 'parallel' must be logical.") - } - # Check ncores - if (parallel) { - if (is.null(ncores)) { - ncores <- availableCores() - 1 - } - if (!is.numeric(ncores)) { - stop("Parameter 'ncores' must be numeric.") - } - ncores <- round(ncores) - ncores <- min(availableCores() - 1, ncores) + if (is.null(ncores)) { + ncores <- 1 + } + if (!is.numeric(ncores)) { + stop("Parameter 'ncores' must be numeric.") } + ncores <- round(ncores) # Consistency checks of margins of all input objects # for each data array, add its margins to the list if not present. @@ -371,7 +362,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, class = c("indexed_array")) } .consolidate <- function(subsets, dimnames, out_dims) { - lapply(1:length(subsets), + lapply(setNames(1:length(subsets), names(subsets)), function(x) { if (length(out_dims[[x]]) > 0) { dims <- dim(subsets[[x]]) @@ -390,6 +381,8 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, data_indexed_indices <- vector('list', length(data)) for (i in 1 : length(data)) { margs_i <- which(names(dim(data[[i]])) %in% names(afml[c(non_common_margs, common_margs)])) + false_margs_i <- which(margs_i %in% target_dims[[i]]) + margs_i <- setdiff(margs_i, false_margs_i) if (length(margs_i) > 0) { margin_length <- lapply(dim(data[[i]]), function(x) 1 : x) margin_length[- margs_i] <- "" @@ -522,6 +515,7 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, } # Execute in parallel if needed + parallel <- ncores > 1 if (parallel) registerDoParallel(ncores) result <- llply(1:length(chunk_sizes), iteration, .parallel = parallel) if (parallel) registerDoSEQ() @@ -577,6 +571,14 @@ Apply <- function(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, # } #} } + # Assign 'output_dims' as dimension names if possible + if (!is.null(output_dims)) { + for (component in 1:length(output_dims)) { + if (length(output_dims[[component]]) > 0) { + names(dim(arrays_of_results[[component]]))[1:length(output_dims[[component]])] <- output_dims[[component]] + } + } + } # Return arrays_of_results -- GitLab From 68892a786873c2f1804dafd3e1eeb714a080b06a Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Mon, 5 Feb 2018 13:32:47 +0100 Subject: [PATCH 25/25] Updating documentation. --- DESCRIPTION | 5 ++--- NAMESPACE | 6 +++--- R/Apply.R | 9 +++++---- man/Apply.Rd | 34 ++++++++++++++-------------------- 4 files changed, 24 insertions(+), 30 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5e23363..3e7559a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,11 +9,10 @@ Description: The base apply function and its variants, as well as the related fu Depends: R (>= 3.2.0) Imports: - abind, - plyr, + abind, doParallel, foreach, - s2dverification + plyr License: LGPL-3 URL: https://earth.bsc.es/gitlab/ces/multiApply BugReports: https://earth.bsc.es/gitlab/ces/multiApply/issues diff --git a/NAMESPACE b/NAMESPACE index 0ef43f7..213333a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,7 @@ # Generated by roxygen2: do not edit by hand -importFrom(plyr, llply, splat) importFrom(abind, abind) -importFrom(doParallel, registerDoParallel) importFrom(foreach, registerDoSEQ) -importFrom(s2dverification, InsertDim) +importFrom(doParallel, registerDoParallel) +importFrom(plyr, splat) +importFrom(plyr, llply) export(Apply) diff --git a/R/Apply.R b/R/Apply.R index 717ab8d..b49b11d 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -1,14 +1,15 @@ #' Wrapper for Applying Atomic Functions to Arrays. #' -#' The Apply function is an extension of the mapply function, which instead of taking lists of unidimensional objects as input, takes lists of multidimensional objects as input, which may have different numbers of dimensions and dimension lengths. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the margins option. +#' This wrapper applies a given function, which takes N [multi-dimensional] arrays as inputs (which may have different numbers of dimensions and dimension lengths), and applies it to a list of N [multi-dimensional] arrays with at least as many dimensions as expected by the given function. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the \code{margins} or \code{target_dims} option. A user can apply a function that receives (in addition to other helper parameters) 1 or more arrays as input, each with a different number of dimensions, and returns any number of multidimensional arrays. The target dimensions can be specified by their names. It is recommended to use this wrapper with multidimensional arrays with named dimensions. #' @param data A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun. -#' @param target_dims List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims. +#' @param target_dims List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. This parameter is mandatory if margins is not specified. If both margins and target_dims are specified, margins takes priority over target_dims. #' @param AtomicFun Function to be applied to the arrays. #' @param ... Additional arguments to be used in the AtomicFun. +#' @param output_dims Optional list of vectors containing the names of the dimensions to be output from the AtomicFun for each of the objects it returns (or a single vector if the function has only one output). #' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims. -#' @param ncores The number multicore threads to use for parallel computation. +#' @param ncores The number of multicore threads to use for parallel computation. #' @details When using a single object as input, Apply is almost identical to the apply function. For multiple input objects, the output array will have dimensions equal to the dimensions specified in 'margins'. -#' @return Array or matrix or vector resulting from AtomicFun. +#' @return List of arrays or matrices or vectors resulting from applying AtomicFun to data. #' @references Wickham, H (2011), The Split-Apply-Combine Strategy for Data Analysis, Journal of Statistical Software. #' @export #' @examples diff --git a/man/Apply.Rd b/man/Apply.Rd index 63a64de..2f69b6c 100644 --- a/man/Apply.Rd +++ b/man/Apply.Rd @@ -4,48 +4,42 @@ \alias{Apply} \title{Wrapper for Applying Atomic Functions to Arrays.} \usage{ -Apply(data, target_dims = NULL, AtomicFun, ..., margins = NULL, parallel = FALSE, - ncores = NULL) +Apply(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, + margins = NULL, ncores = NULL) } \arguments{ \item{data}{A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun.} -\item{target_dims}{List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, target_dims takes priority over margins.} +\item{target_dims}{List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. This parameter is mandatory if margins is not specified. If both margins and target_dims are specified, margins takes priority over target_dims.} \item{AtomicFun}{Function to be applied to the arrays.} \item{...}{Additional arguments to be used in the AtomicFun.} -\item{margins}{List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, target_dims takes priority over margins.} +\item{output_dims}{Optional list of vectors containing the names of the dimensions to be output from the AtomicFun for each of the objects it returns (or a single vector if the function has only one output).} -\item{parallel}{Logical, should the function be applied in parallel.} +\item{margins}{List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims.} -\item{ncores}{The number of cores to use for parallel computation.} +\item{ncores}{The number of multicore threads to use for parallel computation.} } \value{ -Array or matrix or vector resulting from AtomicFun. +List of arrays or matrices or vectors resulting from applying AtomicFun to data. } \description{ -Takes lists of multidimensional objects as input, which may have different numbers of dimensions and dimension lengths. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the margins option. +This wrapper applies a given function, which takes N [multi-dimensional] arrays as inputs (which may have different numbers of dimensions and dimension lengths), and applies it to a list of N [multi-dimensional] arrays with at least as many dimensions as expected by the given function. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the \code{margins} or \code{target_dims} option. A user can apply a function that receives (in addition to other helper parameters) 1 or more arrays as input, each with a different number of dimensions, and returns any number of multidimensional arrays. The target dimensions can be specified by their names. It is recommended to use this wrapper with multidimensional arrays with named dimensions. } \details{ -A user can apply a function that receives 1 or more objects as input, each with a different number of dimensions, and returns as a result a single array with any number of dimensions. +When using a single object as input, Apply is almost identical to the apply function. For multiple input objects, the output array will have dimensions equal to the dimensions specified in 'margins'. } \examples{ #Change in the rate of exceedance for two arrays, with different #dimensions, for some matrix of exceedances. -array_1 <- array(rnorm(2000), c(10,10,20)) # array with 20 timesteps -array_2 <- array(rnorm(1000), c(10, 10, 15)) # array with 15 timesteps -thresholds <- matrix(rnorm(100), 10, 10) # matrix of thresholds (no timesteps) - -# Function for calculating the change in the frequency of exceedances over the -#thresholds for array_1 relative to array_2 (percentage change). - -test_fun <- function(x, y, z) {(((sum(x > z) / (length(x))) / - (sum(y > z) / (length(y)))) * 100) - 100} -data = list(array_1, array_2, thresholds) +data = list(array(rnorm(2000), c(10,10,20)), array(rnorm(1000), c(10,10,10)), + array(rnorm(100), c(10, 10))) +test_fun <- function(x, y, z) {((sum(x > z) / (length(x))) / + (sum(y > z) / (length(y)))) * 100} margins = list(c(1, 2), c(1, 2), c(1,2)) -test <- Apply(data = data, margins = margins, AtomicFun = "test_fun") +test <- Apply(data, margins = margins, AtomicFun = "test_fun") } \references{ Wickham, H (2011), The Split-Apply-Combine Strategy for Data Analysis, Journal of Statistical Software. -- GitLab