From 81df28e7969e7975ae235990814da4ecfdeeed9f Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 22 Jun 2021 09:38:03 +0200 Subject: [PATCH 1/4] Revise the checks for depending dimension list names; Add explanation of how to use values() to define dependency relation. --- R/Start.R | 14 +++- tests/testthat/test-Start-depends_values.R | 83 ++++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/test-Start-depends_values.R diff --git a/R/Start.R b/R/Start.R index ff4b978..443e07b 100644 --- a/R/Start.R +++ b/R/Start.R @@ -208,6 +208,10 @@ #'section 'electronics' has items 'a', 'b' and 'c' but section 'clothing' has #'items 'd', 'e', 'f'. Otherwise Start() would expect to find the same #'item names in all the sections. +#'If values() is used to define dimensions, it is possible to provide different +#'values of the depending dimension for each depended dimension values. For +#'example, if \code{section = c('electronics', 'clothing')}, we can use +#'\code{item = list(electronics = c('a', 'b', 'c'), clothing = c('d', 'e', 'f'))}. #'\cr\cr #'The \bold{name of another dimension} to be specified in '_across', #'only available for inner dimensions, must be a character string with the name @@ -1484,6 +1488,13 @@ Start <- function(..., # dim = indices/selectors, for (file_dim in file_dims) { if (file_dim %in% names(depending_file_dims)) { ## TODO: Detect multi-dependancies and forbid. + #NOTE: The if statement below is tricky. It tries to distinguish if the depending dim + # has the depended dim as the names of the list. However, if the depending dim + # doesn't have list names and its length is 2 (i.e., list( , )), Start() thinks + # it means the range, just like `lat = values(list(10, 20))`. And because of this, + # we won't enter the following if statement, and the error will occur later in + # SelectorChecker(). Need to find a way to distinguish if list( , ) means range or + # just the values. if (all(c(file_dim, depending_file_dims[[file_dim]]) %in% defined_file_dims)) { if (length(dat_selectors[[file_dim]]) != length(dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { stop(paste0("If providing selectors for the depending ", @@ -1491,7 +1502,8 @@ Start <- function(..., # dim = indices/selectors, "vector of selectors must be provided for ", "each selector of the dimension it depends on, '", depending_file_dims[[file_dim]], "'.")) - } else if (!all(names(dat_selectors[[file_dim]]) == dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { + } else if (is.null(names(dat_selectors[[file_dim]])) | + !all(names(dat_selectors[[file_dim]]) == dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { stop(paste0("If providing selectors for the depending ", "dimension '", file_dim, "', the name of the ", "provided vectors of selectors must match ", diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R new file mode 100644 index 0000000..07a1612 --- /dev/null +++ b/tests/testthat/test-Start-depends_values.R @@ -0,0 +1,83 @@ +# This unit test tests the case that using values() to define the depended +# and depending dimensions. The depending dimension should be a list with +# names that are the values of depended dimensions. + +context("Start() using values() to define dependency relations") + + +path <- '/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' + +sdates <- c('2016', '2017', '2018') +chunks <- array(dim = c(chunk = 3, sdate = 3)) +chunks[ , 1] <- c("201701-201712", "201801-201812", "201901-201912") +chunks[ , 2] <- c("201801-201812", "201901-201912", "202001-202012") +chunks[ , 3] <- c("201901-201912", "202001-202012", "202101-202112") + +test_that("1. ", { +dat1 <- Start(dat = path, + var = 'tos', + sdate = sdates[1:2], + chunk = list('2016' = chunks[, 1], '2017' = chunks[, 2]), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:500), + j = indices(650:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE) + + +dat2 <- Start(dat = path, + var = 'tos', + sdate = sdates, + chunk = list('2016' = chunks[, 1], '2017' = chunks[, 2], '2018' = chunks[ ,3]), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:500), + j = indices(650:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE) + + expect_equal( + dat1[1,1,1:2,,,], + dat2[1,1,1:2,,,] + ) + expect_equal( + mean(dat2, na.rm = T), + 29.28614, + tolerance = 0.0001 + ) + expect_equal( + mean(dat1, na.rm = T), + 29.21995, + tolerance = 0.0001 + ) + expect_equal( + dat2[1, 1, 2, 2, 1:3, 10], + c(28.99903, 28.98451, 28.96989), + tolerance = 0.0001 + ) + +}) + +test_that("2. ", { +expect_error( + Start(dat = path, + var = 'tos', + sdate = sdates, + chunk = list(chunks[, 1], chunks[, 2], chunks[ ,3]), + chunk_depends = 'sdate', + time = 'all', + i = indices(400:500), + j = indices(600:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE), + "If providing selectors for the depending dimension 'chunk', the name of the provided vectors of selectors must match exactly the selectors of the dimension it depends on, 'sdate'." +) + +}) -- GitLab From 632f6a8b40d275d30ddafd474b815b9f4e5e88b0 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 22 Jun 2021 14:23:46 +0200 Subject: [PATCH 2/4] Keep the original code available and return a warning if the depending dim doesn't have list names. --- R/Start.R | 7 +++-- tests/testthat/test-Start-depends_values.R | 36 ++++++++++------------ 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/R/Start.R b/R/Start.R index 443e07b..a55ff6b 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1502,13 +1502,16 @@ Start <- function(..., # dim = indices/selectors, "vector of selectors must be provided for ", "each selector of the dimension it depends on, '", depending_file_dims[[file_dim]], "'.")) - } else if (is.null(names(dat_selectors[[file_dim]])) | - !all(names(dat_selectors[[file_dim]]) == dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { + } else if (!all(names(dat_selectors[[file_dim]]) == dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { stop(paste0("If providing selectors for the depending ", "dimension '", file_dim, "', the name of the ", "provided vectors of selectors must match ", "exactly the selectors of the dimension it ", "depends on, '", depending_file_dims[[file_dim]], "'.")) + } else if (is.null(names(dat_selectors[[file_dim]]))) { + .warning(paste0("The selectors for the depending dimension '", file_dim, "' do not ", + "have list names. Assume that the order of the selectors matches the ", + "depended dimensions '", depending_file_dims[[file_dim]], "''s order.")) } } } diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R index 07a1612..14f9533 100644 --- a/tests/testthat/test-Start-depends_values.R +++ b/tests/testthat/test-Start-depends_values.R @@ -41,6 +41,19 @@ dat2 <- Start(dat = path, return_vars = list(time = 'sdate'), retrieve = TRUE) + dat3 <- Start(dat = path, + var = 'tos', + sdate = sdates, + chunk = list(chunks[, 1], chunks[, 2], chunks[ ,3]), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:500), + j = indices(650:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE) + expect_equal( dat1[1,1,1:2,,,], dat2[1,1,1:2,,,] @@ -60,24 +73,9 @@ dat2 <- Start(dat = path, c(28.99903, 28.98451, 28.96989), tolerance = 0.0001 ) - + expect_equal( + as.vector(dat2), + as.vector(dat3) + ) }) -test_that("2. ", { -expect_error( - Start(dat = path, - var = 'tos', - sdate = sdates, - chunk = list(chunks[, 1], chunks[, 2], chunks[ ,3]), - chunk_depends = 'sdate', - time = 'all', - i = indices(400:500), - j = indices(600:700), - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = 'sdate'), - retrieve = TRUE), - "If providing selectors for the depending dimension 'chunk', the name of the provided vectors of selectors must match exactly the selectors of the dimension it depends on, 'sdate'." -) - -}) -- GitLab From 0cba01dabe5289b3e9091e8b52b46c0b492499cc Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 22 Jun 2021 16:25:37 +0200 Subject: [PATCH 3/4] Update .Rd --- man/Start.Rd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/man/Start.Rd b/man/Start.Rd index efd258f..3bdae42 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -174,6 +174,10 @@ Start() aware that the item names vary in function of the section, i.e. section 'electronics' has items 'a', 'b' and 'c' but section 'clothing' has items 'd', 'e', 'f'. Otherwise Start() would expect to find the same item names in all the sections. +If values() is used to define dimensions, it is possible to provide different +values of the depending dimension for each depended dimension values. For +example, if \code{section = c('electronics', 'clothing')}, we can use +\code{item = list(electronics = c('a', 'b', 'c'), clothing = c('d', 'e', 'f'))}. \cr\cr The \bold{name of another dimension} to be specified in '_across', only available for inner dimensions, must be a character string with the name -- GitLab From ca0c5e3f7d19eb4401b48b064e4e3a851cff48f5 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 13 Jul 2021 17:20:56 +0200 Subject: [PATCH 4/4] New use case for file dependency --- inst/doc/usecase.md | 4 + inst/doc/usecase/ex1_13_implicit_dependency.R | 2 +- inst/doc/usecase/ex1_14_file_dependency.R | 79 +++++++++++++++++++ tests/testthat/test-Start-depends_values.R | 27 ++++--- 4 files changed, 99 insertions(+), 13 deletions(-) create mode 100644 inst/doc/usecase/ex1_14_file_dependency.R diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index b1cca75..013b47a 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -60,6 +60,10 @@ You can find more explanation in FAQ [How-to-20](inst/doc/faq.md#20-use-metadata dependency on a file dimension. By this means, we do not need to specify the *_across parameter and Start() can recognize this dependecy relationship. + 14. [Specify the dependency between file dimensions](inst/doc/usecase/ex1_14_file_dependency.R) + This script shows how to define the dependency between file dimensions. Note that ex1_13 is for +the dependency between one inner dimension and one file dimension (i.e., the usage of *_across), while +this use case is for two file dimensions (i.e., the usage of *_depends). 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) diff --git a/inst/doc/usecase/ex1_13_implicit_dependency.R b/inst/doc/usecase/ex1_13_implicit_dependency.R index 8d8f337..1ea9381 100644 --- a/inst/doc/usecase/ex1_13_implicit_dependency.R +++ b/inst/doc/usecase/ex1_13_implicit_dependency.R @@ -1,5 +1,5 @@ # Author: An-Chi Ho -# Date: 13rd July 2021 +# Date: 13th July 2021 #--------------------------------------------------------------------- # This script shows how to use a value array as the inner dimension selector to express # dependency on a file dimension. By this means, we don't need to specify the *_across diff --git a/inst/doc/usecase/ex1_14_file_dependency.R b/inst/doc/usecase/ex1_14_file_dependency.R new file mode 100644 index 0000000..c23266c --- /dev/null +++ b/inst/doc/usecase/ex1_14_file_dependency.R @@ -0,0 +1,79 @@ +# Author: An-Chi Ho +# Date: 13th July 2021 +#-------------------------------------------------------------------------------- +# This script shows how to define the dependency between file dimensions. +# Note that ex1_13 is for the dependency between one inner dimension and one file +# dimension (i.e., the usage of *_across), while this use case is for two file +# dimensions (i.e., the usage of *_depends). + +# The first case simply use indices() or 'all' to define the depending file dimension. +# In the second case, we use values() to define both the depended and depending +# dimensions. The depending dimension should be a list with names that are the values +# of depended dimensions. +#-------------------------------------------------------------------------------- + +library(startR) + +path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/', + 'cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/', + 'r1i1p1f2/Omon/tos/gn/v20200417/', + '$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc') + + +# Case 1: Define the depending dimension ('chunk') by indices or 'all' + +data1 <- Start(dat = path, + var = 'tos', + sdate = sdates, + chunk = indices(2:4), # 'all' if you want to read all the files + chunk_depends = 'sdate', + time = 'all', + i = indices(450:500), + j = indices(650:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE) + + +dim(data1) +# dat var sdate time i j +# 1 1 3 36 51 51 +data1[1, 1, 1:3, 1:5, 1, 1] +# [,1] [,2] [,3] [,4] [,5] +#[1,] 29.26021 29.73614 29.67156 29.61240 29.59503 +#[2,] 29.37948 29.38624 29.73120 29.97264 29.89160 +#[3,] 30.43721 30.58396 30.06479 30.51131 29.81269 + +#===================================================================== + +# Case 2: Define the depended ('sdate') and depending ('chunk') dimensions by values +sdates <- c('2016', '2017', '2018') +chunks <- array(dim = c(chunk = 3, sdate = 3)) +chunks[, 1] <- c("201701-201712", "201801-201812", "201901-201912") +chunks[, 2] <- c("201801-201812", "201901-201912", "202001-202012") +chunks[, 3] <- c("201901-201912", "202001-202012", "202101-202112") + + +data2 <- Start(dat = path, + var = 'tos', + sdate = sdates, + # the names should be the values of the depended dimension + chunk = list('2016' = chunks[, 1], '2017' = chunks[, 2], '2018' = chunks[ ,3]), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:500), + j = indices(650:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE) + +dim(data2) +# dat var sdate time i j +# 1 1 3 36 51 51 + +all.equal(as.vector(data1), as.vector(data2)) +#[1] TRUE + + diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R index 14f9533..49114e7 100644 --- a/tests/testthat/test-Start-depends_values.R +++ b/tests/testthat/test-Start-depends_values.R @@ -14,63 +14,66 @@ chunks[ , 2] <- c("201801-201812", "201901-201912", "202001-202012") chunks[ , 3] <- c("201901-201912", "202001-202012", "202101-202112") test_that("1. ", { +suppressWarnings( dat1 <- Start(dat = path, var = 'tos', sdate = sdates[1:2], chunk = list('2016' = chunks[, 1], '2017' = chunks[, 2]), chunk_depends = 'sdate', time = 'all', - i = indices(450:500), - j = indices(650:700), + i = indices(450:460), + j = indices(685:700), time_across = 'chunk', merge_across_dims = TRUE, return_vars = list(time = 'sdate'), retrieve = TRUE) +) - +suppressWarnings( dat2 <- Start(dat = path, var = 'tos', sdate = sdates, chunk = list('2016' = chunks[, 1], '2017' = chunks[, 2], '2018' = chunks[ ,3]), chunk_depends = 'sdate', time = 'all', - i = indices(450:500), - j = indices(650:700), + i = indices(450:460), + j = indices(685:700), time_across = 'chunk', merge_across_dims = TRUE, return_vars = list(time = 'sdate'), retrieve = TRUE) - +) +suppressWarnings( dat3 <- Start(dat = path, var = 'tos', sdate = sdates, chunk = list(chunks[, 1], chunks[, 2], chunks[ ,3]), chunk_depends = 'sdate', time = 'all', - i = indices(450:500), - j = indices(650:700), + i = indices(450:460), + j = indices(685:700), time_across = 'chunk', merge_across_dims = TRUE, return_vars = list(time = 'sdate'), retrieve = TRUE) - +) expect_equal( dat1[1,1,1:2,,,], dat2[1,1,1:2,,,] ) expect_equal( mean(dat2, na.rm = T), - 29.28614, + 29.11137, tolerance = 0.0001 ) expect_equal( mean(dat1, na.rm = T), - 29.21995, + 29.07394, tolerance = 0.0001 ) expect_equal( dat2[1, 1, 2, 2, 1:3, 10], - c(28.99903, 28.98451, 28.96989), + c(28.38624, 28.19837, 28.08603), tolerance = 0.0001 ) expect_equal( -- GitLab