diff --git a/.Rbuildignore b/.Rbuildignore index 97c7dbee587781349aa9529b8a3b300efeb00aa6..bdb1d434212d483ed088618eecc1873ec2e88206 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,5 @@ .tar.gz .pdf ./.nc +# unit tests should be ignored when building the package for CRAN +^tests$ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a7d3f8954fc859541b2ce5e564804bf6b7624848..050032ee92013d96f52eb851c09600443a00233a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,7 +4,7 @@ stages: build: stage: build script: - - module load R + - module load R/4.1.2-foss-2015a-bare - R CMD build --resave-data . - R CMD check --as-cran multiApply_*.tar.gz - R -e 'covr::package_coverage()' diff --git a/DESCRIPTION b/DESCRIPTION index 12a312837f13f01ab8b83e13a92e8516015d6686..09bc59cae0cefcd51c7d4fd5a3296fb38fc61d24 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,12 @@ Package: multiApply Title: Apply Functions to Multiple Multidimensional Arrays or Vectors -Version: 2.1.3 +Version: 2.1.4 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = "aut"), person("Alasdair", "Hunter", , "alasdair.hunter@bsc.es", role = "aut"), - person("Nuria", "Perez", , "nuria.perez@bsc.es", role = "cre")) + person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("ctb", "cre")), + person("Nuria", "Perez", , "nuria.perez@bsc.es", role = "ctb")) Description: The base apply function and its variants, as well as the related functions in the 'plyr' package, typically apply user-defined functions to a single argument (or a list of vectorized arguments in the case of mapply). The @@ -29,9 +30,8 @@ Imports: plyr Suggests: testthat -License: Apache License 2.0 +License: GPL-3 URL: https://earth.bsc.es/gitlab/ces/multiApply BugReports: https://earth.bsc.es/gitlab/ces/multiApply/-/issues Encoding: UTF-8 -LazyData: true -RoxygenNote: 5.0.0 +RoxygenNote: 7.2.0 diff --git a/NAMESPACE b/NAMESPACE index f0319b712f8d71e70ca33dab0c6318523aee731c..502e253863c98d66bf57f3d437670d5bd3247017 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,3 +7,4 @@ importFrom(plyr,llply) importFrom(plyr,splat) importFrom(stats,setNames) importFrom(utils,capture.output) +importFrom(utils,head) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000000000000000000000000000000000000..e364792fe9dc77fb631e13ea6a94c921c509b6fd --- /dev/null +++ b/NEWS.md @@ -0,0 +1,2 @@ +# multiApply 2.1.4 (Release date: 2023-03-27) +- Check if the output chunks all have the same dimension length. If not, return errors. diff --git a/R/Apply.R b/R/Apply.R index bbd71ae426f9973c3ea77ea4f2a0c214a5f4d4ce..b04ee15add8851f3c38a282c0b26a1a1c1228b61 100644 --- a/R/Apply.R +++ b/R/Apply.R @@ -31,7 +31,7 @@ #' @importFrom foreach registerDoSEQ #' @importFrom doParallel registerDoParallel #' @importFrom plyr splat llply -#' @importFrom utils capture.output +#' @importFrom utils capture.output head #' @importFrom stats setNames Apply <- function(data, target_dims = NULL, fun, ..., output_dims = NULL, margins = NULL, @@ -666,6 +666,25 @@ Apply <- function(data, target_dims = NULL, fun, ..., if (!is.null(component_dims)) { atomic_fun_out_dims[[component]] <- component_dims } + + # Check if component_dims matches the first_sub_result + if (found_first_sub_result) { + error_inconsistent_output <- FALSE + if (is.null(atomic_fun_out_dims[[component]])) { # result[[component]] is a number + if (length(dim(sub_arrays_of_results[[component]])) != 1) { + error_inconsistent_output <- TRUE + } + } else if (!identical(atomic_fun_out_dims[[component]], + head(dim(sub_arrays_of_results[[component]]), + length(dim(sub_arrays_of_results[[component]])) - 1))) { + #NOTE: The last dim in sub_arrays_of_results[[component]] is chunk_sizes[m] + error_inconsistent_output <- TRUE + } + if (error_inconsistent_output) { + stop("Output '", names(sub_arrays_of_results)[[component]], "' doesn't have consistent output length among chunks.") + } + } + if (length(result[[component]]) > 0) { sub_arrays_of_results[[component]][(1:prod(component_dims)) + (n - 1) * prod(component_dims)] <- result[[component]] @@ -697,6 +716,21 @@ Apply <- function(data, target_dims = NULL, fun, ..., parallel <- ncores > 1 if (parallel) registerDoParallel(ncores) result <- llply(1:length(chunk_sizes), iteration, .parallel = parallel) + + # Check if all the results have the same output length + for (i_output in 1:sapply(result, length)[1]) { + result_i_output <- lapply(result, '[[', i_output) + dims_each_result <- lapply(result_i_output, dim) + tmp <- lapply(dims_each_result, length) + dims_each_result_without_chunk <- lapply(1:length(dims_each_result), function(ii) { + head(dims_each_result[[ii]], tmp[[ii]] - 1) + }) + + if (length(unique(dims_each_result_without_chunk)) != 1) { + stop("Output '", names(result[[1]])[i_output], "' doesn't have consistent output length among chunks.") + } + } + if (parallel) registerDoSEQ() # Merge the results arrays_of_results <- NULL diff --git a/README.md b/README.md index 6258c1935af75f5d79423cef756e4190c3561fdf..9dbd05c9022b18defc2bfd040b3eca8f2b559663 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,8 @@ -## multiApply [![build status](https://earth.bsc.es/gitlab/ces/multiApply/badges/master/build.svg)](https://earth.bsc.es/gitlab/ces/multiApply/commits/master) [![CRAN version](http://www.r-pkg.org/badges/version/multiApply)](https://cran.r-project.org/package=multiApply) [![coverage report](https://earth.bsc.es/gitlab/ces/multiApply/badges/master/coverage.svg)](https://earth.bsc.es/gitlab/ces/multiApply/commits/master) [![License: LGPL v3](https://img.shields.io/badge/License-LGPL%20v3-blue.svg)](https://www.gnu.org/licenses/lgpl-3.0) [![CRAN RStudio Downloads](https://cranlogs.r-pkg.org/badges/multiApply)](https://cran.r-project.org/package=multiApply) +## multiApply +[![CRAN version](http://www.r-pkg.org/badges/version/multiApply)](https://CRAN.R-project.org/package=multiApply) +![coverage report](https://earth.bsc.es/gitlab/ces/multiApply/badges/master/coverage.svg) +[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) +[![CRAN RStudio Downloads](https://cranlogs.r-pkg.org/badges/multiApply)](https://CRAN.R-project.org/package=multiApply) This package includes the function `Apply` as its only function. It extends the `apply` function to applications in which a function needs to be applied simultaneously over multiple input arrays. Although this can be done manually with for loops and calls to the base `apply` function, it can often be a challenging task which can easily result in error-prone or memory-inefficient code. @@ -53,12 +57,6 @@ install.packages('multiApply') library(multiApply) ``` -Also, you can install the latest stable version from the GitHub repository as follows: - -```r -devtools::install_git('https://earth.bsc.es/gitlab/ces/multiApply') -``` - ### How to use This package consistis in a single function, `Apply`, which is used in a similar fashion as the base `apply`. Full documentation can be found in `?Apply`. diff --git a/man/Apply.Rd b/man/Apply.Rd index a368553101b7307567bf436584696e72824ef6d1..3fd2daefbe1574fd1580db96060cad30f1d201ac 100644 --- a/man/Apply.Rd +++ b/man/Apply.Rd @@ -4,9 +4,19 @@ \alias{Apply} \title{Apply Functions to Multiple Multidimensional Arrays or Vectors} \usage{ -Apply(data, target_dims = NULL, fun, ..., output_dims = NULL, - margins = NULL, use_attributes = NULL, extra_info = NULL, - guess_dim_names = TRUE, ncores = NULL, split_factor = 1) +Apply( + data, + target_dims = NULL, + fun, + ..., + output_dims = NULL, + margins = NULL, + use_attributes = NULL, + extra_info = NULL, + guess_dim_names = TRUE, + ncores = NULL, + split_factor = 1 +) } \arguments{ \item{data}{One or a list of vectors, matrices or arrays. They must be in the same order as expected by the function provided in the parameter 'fun'. The dimensions do not necessarily have to be ordered. If the 'target_dims' require a different order than the provided, \code{Apply} will automatically reorder the dimensions as needed.} @@ -55,4 +65,3 @@ test <- Apply(data, target = list(3, 3, NULL), test_fun) \references{ Wickham, H (2011), The Split-Apply-Combine Strategy for Data Analysis, Journal of Statistical Software. } - diff --git a/multiApply-manual.pdf b/multiApply-manual.pdf deleted file mode 100644 index 5a2f348b0e965429433fa55f22b4056849e6e587..0000000000000000000000000000000000000000 Binary files a/multiApply-manual.pdf and /dev/null differ diff --git a/tests/testthat/test-inconsistent_output_length.R b/tests/testthat/test-inconsistent_output_length.R new file mode 100644 index 0000000000000000000000000000000000000000..8f1470cdde98822b6ddc1c98a7642ba65b990674 --- /dev/null +++ b/tests/testthat/test-inconsistent_output_length.R @@ -0,0 +1,159 @@ +context("Inconsistent output length") + +test_that("dat1", { + +dat1 <- array(1:60, dim = c(time = 5, sdate = 2, member = 6)) + +func1 <- function(x) { + res <- if (ii %% 2 == 0) {x} else {x[1:2, ]} + ii <<- ii + 1 + return(list(res, x)) +} + +ii <- 1 +expect_error( +Apply(dat1, fun = func1, target_dims = c("time", "sdate")), +"Output 'output1' doesn't have consistent output length among chunks." +) +#--------------------------- + +func2 <- function(x) { + res <- if (ii %% 2 == 0) {x} else {x[1, ]} + ii <<- ii + 1 + return(list(out1 = res, out2 = x)) +} +ii <- 2 +expect_error( +Apply(dat1, fun = func2, target_dims = c("time", "sdate")), +"Output 'out1' doesn't have consistent output length among chunks." +) +#--------------------------- + +func3 <- function(x) { + res <- x[1, ] + return(list(res, x)) +} +res <- Apply(dat1, fun = func3, target_dims = c("time", "sdate")) +expect_equal( +dim(res[[1]]), +c(2, member = 6) +) +expect_equal( +dim(res[[2]]), +c(time = 5, sdate = 2, member = 6) +) +#-------------------------------- + +}) + + +test_that("dat2", { + +dat2 <- array(1:60, dim = c(time = 5, member = 7)) + +func1 <- function(x) { + res <- if (ii %% 2 == 0) {x} else {x[1:2]} + ii <<- ii + 1 + return(list(res)) +} + +ii <- 1 +expect_error( +Apply(dat2, fun = func1, target_dims = c("time")), +"Output 'output1' doesn't have consistent output length among chunks." +) +#--------------------------- + + +func2 <- function(x) { + res <- if (ii %% 2 == 0) {x} else {x[1]} + ii <<- ii + 1 + return(list(out1 = res, out2 = x)) +} +ii <- 1 +expect_error( +Apply(dat2, fun = func2, target_dims = c("time")), +"Output 'out1' doesn't have consistent output length among chunks." +) +#--------------------------- + +func3 <- function(x) { + res <- x[1] + return(list(res, x)) +} +res <- Apply(dat2, fun = func3, target_dims = c("time")) +expect_equal( +dim(res[[1]]), +c(member = 7) +) +expect_equal( +dim(res[[2]]), +c(time = 5, member = 7) +) +expect_equal( +as.vector(res[[1]]), +seq(1, 31, by = 5) +) +expect_equal( +res[[2]], +dat2 +) +#-------------------------------- + +}) + + +test_that("dat3", { + +dat3 <- array(1:15, dim = c(time = 5, member = 3)) + +func2 <- function(x) { + res <- if (ii %% 2 == 0) {x} else {as.vector(x[1])} + ii <<- ii + 1 + return(list(out1 = res)) +} +ii <- 1 +expect_error( +Apply(dat3, fun = func2, target_dims = c("time")), +"Output 'out1' doesn't have consistent output length among chunks." +) +ii <- 2 +expect_error( +Apply(dat3, fun = func2, target_dims = c("time")), +"Output 'out1' doesn't have consistent output length among chunks." +) +#--------------------------- + +func3 <- function(x) { + res <- as.vector(x[1:2]) + return(list(res)) +} +res <- Apply(dat3, fun = func3, target_dims = "time") + +expect_equal( +dim(res[[1]]), +c(2, member = 3) +) +expect_equal( +as.vector(res[[1]]), +c(1, 2, 6, 7, 11, 12) +) +#------------------------- + +func4 <- function(x) { + res <- x[1:2] + return(list(res)) +} +res <- Apply(dat3, fun = func4, target_dims = "time") + +expect_equal( +dim(res[[1]]), +c(time = 2, member = 3) +) +expect_equal( +as.vector(res[[1]]), +c(1, 2, 6, 7, 11, 12) +) + +}) + diff --git a/tests/testthat/test-use-cases.R b/tests/testthat/test-use-cases.R index 169366cc57127e4efc4b660da272ec697c5b7ef7..349fbe6880e79be3e04ae64a3bcdd03294ba8959 100644 --- a/tests/testthat/test-use-cases.R +++ b/tests/testthat/test-use-cases.R @@ -800,9 +800,11 @@ test_that("in1: 2 dim; in2: 1 dim; targ. dims: 0-2, 0-1; out1: 1 dim; out2: 1 va "Guessed names for some unnamed dimensions" ) expect_equal( + suppressWarnings( Apply(list(array(1:10, dim = c(10, 3)), array(1:3 * 10, dim = c(3))), - NULL, f), + NULL, f) + ), list(output1 = array(sapply(c(10, 20, 30), function(x) { x + rep(sapply(1:10, function(y) { y:(y + 3) @@ -1274,6 +1276,7 @@ test_that("Margin indices and extra info are provided correctly.", { stopifnot(identical(attr(b, 'test_attr_b'), list(x = 1, z = 2))) } +suppressWarnings({ r <- multiApply::Apply(list(a, b), list(c('b', 'c', 'd'), c('b', 'c')), @@ -1281,7 +1284,7 @@ test_that("Margin indices and extra info are provided correctly.", { use_attributes = list(a = 'test_attr_a', b = 'test_attr_b'), f) - +}) r <- multiApply::Apply(list(a = a, b = b), list(c('b', 'c', 'd'), c('b', 'c')),