From 44f2ae552a39efcf0dd75bba7dbe3c72a476fcd7 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Sun, 11 Nov 2018 15:55:41 +0100 Subject: [PATCH 1/3] Fix for unnamed dimensions. Updates in documentation. Added GitLab CI. --- .gitlab-ci.yml | 16 +++++++++++ NAMESPACE | 12 ++++---- R/Apply.R | 49 ++++++++++++++++++++++++++------- README.md | 5 +++- man/Apply.Rd | 35 +++++++++++++---------- tests/testthat/test-use-cases.R | 27 +++++++++++++++++- 6 files changed, 112 insertions(+), 32 deletions(-) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..f72523a --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,16 @@ +stages: + - build + - report + +build: + stage: build + script: + - module load R + - R -e 'devtools::build()' + - R -e 'devtools::check()' + +report: + stage: report + script: + - module load R + - R -e 'covr::package_coverage()' diff --git a/NAMESPACE b/NAMESPACE index 283fbf0..aaad7dd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,8 @@ # Generated by roxygen2: do not edit by hand -importFrom(abind, abind) -importFrom(foreach, registerDoSEQ) -importFrom(doParallel, registerDoParallel) -importFrom(plyr, splat) -importFrom(plyr, llply) -importFrom(stats, setNames) + export(Apply) +importFrom(abind,abind) +importFrom(doParallel,registerDoParallel) +importFrom(foreach,registerDoSEQ) +importFrom(plyr,llply) +importFrom(plyr,splat) diff --git a/R/Apply.R b/R/Apply.R index 83ab76b..b126918 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -7,6 +7,7 @@ #' @param ... Additional arguments to be used in the fun. #' @param output_dims Optional list of vectors containing the names of the dimensions to be output from the fun for each of the objects it returns (or a single vector if the function has only one output). #' @param margins List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims. +#' @param guess_dim_names Whether to automatically guess missing dimension names for dimensions of equal length across different inputs in 'data' with a warning (TRUE; default), or to crash whenever unnamed dimensions of equa length are identified across different inputs (FALSE). #' @param ncores The number of multicore threads to use for parallel computation. #' @param split_factor Factor telling to which degree the input data should be split into smaller pieces to be processed by the available cores. By default (split_factor = 1) the data is split into 4 pieces for each of the cores (as specified in ncores). A split_factor of 2 will result in 8 pieces for each of the cores, and so on. The special value 'greatest' will split the input data into as many pieces as possible. #' @details When using a single object as input, Apply is almost identical to the apply function. For multiple input objects, the output array will have dimensions equal to the dimensions specified in 'margins'. @@ -16,14 +17,21 @@ #' @examples #' #Change in the rate of exceedance for two arrays, with different #' #dimensions, for some matrix of exceedances. -#' data = list(array(rnorm(2000), c(10,10,20)), array(rnorm(1000), c(10,10,10)), -#' array(rnorm(100), c(10, 10))) -#' test_fun <- function(x, y, z) {((sum(x > z) / (length(x))) / -#' (sum(y > z) / (length(y)))) * 100} -#' margins = list(c(1, 2), c(1, 2), c(1,2)) -#' test <- Apply(data, margins = margins, fun = "test_fun") -Apply <- function(data, target_dims = NULL, fun, ..., output_dims = NULL, - margins = NULL, ncores = NULL, split_factor = 1) { +#' data <- list(array(rnorm(1000), c(5, 10, 20)), +#' array(rnorm(500), c(5, 10, 10)), +#' array(rnorm(50), c(5, 10))) +#' test_fun <- function(x, y, z) { +#' ((sum(x > z) / (length(x))) / +#' (sum(y > z) / (length(y)))) * 100 +#' } +#' test <- Apply(data, target = list(3, 3, NULL), test_fun) +#' @importFrom abind abind +#' @importFrom foreach registerDoSEQ +#' @importFrom doParallel registerDoParallel +#' @importFrom plyr splat llply +Apply <- function(data, target_dims = NULL, fun, ..., + output_dims = NULL, margins = NULL, guess_dim_names = TRUE, + ncores = NULL, split_factor = 1) { # Check data if (!is.list(data)) { data <- list(data) @@ -34,6 +42,7 @@ Apply <- function(data, target_dims = NULL, fun, ..., output_dims = NULL, is_vector <- rep(FALSE, length(data)) is_unnamed <- rep(FALSE, length(data)) unnamed_dims <- c() + guessed_any_dimnames <- FALSE for (i in 1 : length(data)) { if (length(data[[i]]) < 1) { stop("Arrays in 'data' must be of length > 0.") @@ -57,12 +66,19 @@ Apply <- function(data, target_dims = NULL, fun, ..., output_dims = NULL, } else { is_unnamed[i] <- TRUE new_unnamed_dims <- c() + unnamed_dims_copy <- unnamed_dims for (j in 1 : length(dim(data[[i]]))) { len_of_dim_j <- dim(data[[i]])[j] - found_match <- which(unnamed_dims == len_of_dim_j) - if (length(found_match) > 0) { + found_match <- which(unnamed_dims_copy == len_of_dim_j) + if (!guess_dim_names && (length(found_match) > 0)) { stop("Arrays in 'data' have multiple unnamed dimensions of the ", "same length. Please provide dimension names.") + } + if (length(found_match) > 0) { + found_match <- found_match[1] + names(dim(data[[i]]))[j] <- names(unnamed_dims_copy[found_match]) + unnamed_dims_copy <- unnamed_dims_copy[-found_match] + guessed_any_dimnames <- TRUE } else { new_dim <- len_of_dim_j names(new_dim) <- paste0('_unnamed_dim_', length(unnamed_dims) + @@ -74,6 +90,19 @@ Apply <- function(data, target_dims = NULL, fun, ..., output_dims = NULL, unnamed_dims <- c(unnamed_dims, new_unnamed_dims) } } + if (guessed_any_dimnames) { + dim_names_string <- "" + for (i in 1:length(data)) { + dim_names_string <- c(dim_names_string, "\n\tInput ", i, ":", + sapply(capture.output(print(dim(data[[i]]))), + function(x) paste0('\n\t\t', x))) + } + warning("Guessed names for some unnamed dimensions of equal length ", + "found across different inputs in 'data'. Please check ", + "carefully the assumed names below are correct, or provide ", + "dimension names for safety, or disable the parameter ", + "'guess_dimension_names'.", dim_names_string) + } # Check fun if (is.character(fun)) { diff --git a/README.md b/README.md index 975ccf3..3bdb2c3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ ## multiApply -[![](https://cranlogs.r-pkg.org/badges/multiApply)](https://cran.rstudio.com/web/packages/multiApply/index.html) +[![build status](https://earth.bsc.es/gitlab/ces/multiApply/badges/master/build.svg)](https://earth.bsc.es/gitlab/ces/multiApply/commits/master) +[![coverage report](https://earth.bsc.es/gitlab/ces/multiApply/badges/master/coverage.svg)](https://earth.bsc.es/gitlab/ces/multiApply/commits/master) +[![CRAN version](http://www.r-pkg.org/badges/version/multiApply)](https://cran.r-project.org/package=multiApply) +[![CRAN RStudio Downloads](https://cranlogs.r-pkg.org/badges/multiApply)](https://cran.rstudio.com/web/packages/multiApply/index.html) This package extends the apply and plyr families of functions to applications which involve the use of multiple arrays as input, and is useful to apply a function taking multiple numeric objects as input across multiple multi-dimensional arrays. diff --git a/man/Apply.Rd b/man/Apply.Rd index 2f69b6c..592c6af 100644 --- a/man/Apply.Rd +++ b/man/Apply.Rd @@ -4,26 +4,31 @@ \alias{Apply} \title{Wrapper for Applying Atomic Functions to Arrays.} \usage{ -Apply(data, target_dims = NULL, AtomicFun, ..., output_dims = NULL, - margins = NULL, ncores = NULL) +Apply(data, target_dims = NULL, fun, ..., output_dims = NULL, + margins = NULL, guess_dim_names = TRUE, ncores = NULL, + split_factor = 1) } \arguments{ -\item{data}{A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by AtomicFun.} +\item{data}{A single object (vector, matrix or array) or a list of objects. They must be in the same order as expected by fun.} -\item{target_dims}{List of vectors containing the dimensions to be input into AtomicFun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. This parameter is mandatory if margins is not specified. If both margins and target_dims are specified, margins takes priority over target_dims.} +\item{target_dims}{List of vectors containing the dimensions to be input into fun for each of the objects in the data. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. This parameter is mandatory if margins is not specified. If both margins and target_dims are specified, margins takes priority over target_dims.} -\item{AtomicFun}{Function to be applied to the arrays.} +\item{fun}{Function to be applied to the arrays.} -\item{...}{Additional arguments to be used in the AtomicFun.} +\item{...}{Additional arguments to be used in the fun.} -\item{output_dims}{Optional list of vectors containing the names of the dimensions to be output from the AtomicFun for each of the objects it returns (or a single vector if the function has only one output).} +\item{output_dims}{Optional list of vectors containing the names of the dimensions to be output from the fun for each of the objects it returns (or a single vector if the function has only one output).} \item{margins}{List of vectors containing the margins for the input objects to be split by. Or, if there is a single vector of margins specified and a list of objects in data, then the single set of margins is applied over all objects. These vectors can contain either integers specifying the dimension position, or characters corresponding to the dimension names. If both margins and target_dims are specified, margins takes priority over target_dims.} +\item{guess_dim_names}{Whether to automatically guess missing dimension names for dimensions of equal length across different inputs in 'data' with a warning (TRUE; default), or to crash whenever unnamed dimensions of equa length are identified across different inputs (FALSE).} + \item{ncores}{The number of multicore threads to use for parallel computation.} + +\item{split_factor}{Factor telling to which degree the input data should be split into smaller pieces to be processed by the available cores. By default (split_factor = 1) the data is split into 4 pieces for each of the cores (as specified in ncores). A split_factor of 2 will result in 8 pieces for each of the cores, and so on. The special value 'greatest' will split the input data into as many pieces as possible.} } \value{ -List of arrays or matrices or vectors resulting from applying AtomicFun to data. +List of arrays or matrices or vectors resulting from applying fun to data. } \description{ This wrapper applies a given function, which takes N [multi-dimensional] arrays as inputs (which may have different numbers of dimensions and dimension lengths), and applies it to a list of N [multi-dimensional] arrays with at least as many dimensions as expected by the given function. The user can specify which dimensions of each array (or matrix) the function is to be applied over with the \code{margins} or \code{target_dims} option. A user can apply a function that receives (in addition to other helper parameters) 1 or more arrays as input, each with a different number of dimensions, and returns any number of multidimensional arrays. The target dimensions can be specified by their names. It is recommended to use this wrapper with multidimensional arrays with named dimensions. @@ -34,12 +39,14 @@ When using a single object as input, Apply is almost identical to the apply func \examples{ #Change in the rate of exceedance for two arrays, with different #dimensions, for some matrix of exceedances. -data = list(array(rnorm(2000), c(10,10,20)), array(rnorm(1000), c(10,10,10)), - array(rnorm(100), c(10, 10))) -test_fun <- function(x, y, z) {((sum(x > z) / (length(x))) / - (sum(y > z) / (length(y)))) * 100} -margins = list(c(1, 2), c(1, 2), c(1,2)) -test <- Apply(data, margins = margins, AtomicFun = "test_fun") +data <- list(array(rnorm(1000), c(5, 10, 20)), + array(rnorm(500), c(5, 10, 10)), + array(rnorm(50), c(5, 10))) +test_fun <- function(x, y, z) { + ((sum(x > z) / (length(x))) / + (sum(y > z) / (length(y)))) * 100 +} +test <- Apply(data, target = list(3, 3, NULL), test_fun) } \references{ Wickham, H (2011), The Split-Apply-Combine Strategy for Data Analysis, Journal of Statistical Software. diff --git a/tests/testthat/test-use-cases.R b/tests/testthat/test-use-cases.R index a9a6498..e686250 100644 --- a/tests/testthat/test-use-cases.R +++ b/tests/testthat/test-use-cases.R @@ -790,9 +790,34 @@ test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0-2, 0-1; out1: 1 dim; out2: 1 va expect_error( Apply(list(array(1:10, dim = c(10, 3)), array(1:3 * 10, dim = c(3))), - NULL, f), + NULL, f, guess_dim_names = FALSE), "multiple unnamed dimensions of the same length" ) + expect_warning( + Apply(list(array(1:10, dim = c(10, 3)), + array(1:3 * 10, dim = c(3))), + NULL, f), + "Guessed names for some unnamed dimensions" + ) + expect_equal( + Apply(list(array(1:10, dim = c(10, 3)), + array(1:3 * 10, dim = c(3))), + NULL, f), + list(output1 = array(sapply(c(10, 20, 30), function(x) { + x + rep(sapply(1:10, function(y) { + y:(y + 3) + }), 1) + }), + dim = c(4, 10, 3)), + output2 = array(sapply(c(10, 20, 30), function (x) x + rep(1:10, 1)), + dim = c(10, 3)), + output3 = array(sapply(c(10, 20, 30), function(x) { + x + rep(sapply(1:10, function(y) { + rep(y:(y + 4), 6 * 7) + }), 1) + }), + dim = c(5, 6, 7, 10, 3))) + ) # unnamed input dim # unnamed output # unnamed output dim -- GitLab From 5c7e103d9cfe09dda22aef2f68b152bc0e715894 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Sun, 11 Nov 2018 16:02:47 +0100 Subject: [PATCH 2/3] Added gitlab-ci.yml to Rbuildignore. --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index 834c4fa..97c7dbe 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,5 +1,6 @@ .git .gitignore +.gitlab-ci.yml .tar.gz .pdf ./.nc -- GitLab From e1ecbbda64364d3c175f2242bafec40dd7fab8b1 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Sun, 11 Nov 2018 16:29:44 +0100 Subject: [PATCH 3/3] Simplified GitLab CI pipeline. --- .gitlab-ci.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f72523a..398a915 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,6 +1,5 @@ stages: - build - - report build: stage: build @@ -8,9 +7,4 @@ build: - module load R - R -e 'devtools::build()' - R -e 'devtools::check()' - -report: - stage: report - script: - - module load R - R -e 'covr::package_coverage()' -- GitLab