diff --git a/R/Start.R b/R/Start.R index 689374a89c1c351eb99410691583ff5fbf10c552..7f14a300128f5f4b165a7a5017a629913a04692d 100644 --- a/R/Start.R +++ b/R/Start.R @@ -208,6 +208,10 @@ #'section 'electronics' has items 'a', 'b' and 'c' but section 'clothing' has #'items 'd', 'e', 'f'. Otherwise Start() would expect to find the same #'item names in all the sections. +#'If values() is used to define dimensions, it is possible to provide different +#'values of the depending dimension for each depended dimension values. For +#'example, if \code{section = c('electronics', 'clothing')}, we can use +#'\code{item = list(electronics = c('a', 'b', 'c'), clothing = c('d', 'e', 'f'))}. #'\cr\cr #'The \bold{name of another dimension} to be specified in '_across', #'only available for inner dimensions, must be a character string with the name @@ -1488,6 +1492,13 @@ Start <- function(..., # dim = indices/selectors, for (file_dim in file_dims) { if (file_dim %in% names(depending_file_dims)) { ## TODO: Detect multi-dependancies and forbid. + #NOTE: The if statement below is tricky. It tries to distinguish if the depending dim + # has the depended dim as the names of the list. However, if the depending dim + # doesn't have list names and its length is 2 (i.e., list( , )), Start() thinks + # it means the range, just like `lat = values(list(10, 20))`. And because of this, + # we won't enter the following if statement, and the error will occur later in + # SelectorChecker(). Need to find a way to distinguish if list( , ) means range or + # just the values. if (all(c(file_dim, depending_file_dims[[file_dim]]) %in% defined_file_dims)) { if (length(dat_selectors[[file_dim]]) != length(dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { stop(paste0("If providing selectors for the depending ", @@ -1501,6 +1512,10 @@ Start <- function(..., # dim = indices/selectors, "provided vectors of selectors must match ", "exactly the selectors of the dimension it ", "depends on, '", depending_file_dims[[file_dim]], "'.")) + } else if (is.null(names(dat_selectors[[file_dim]]))) { + .warning(paste0("The selectors for the depending dimension '", file_dim, "' do not ", + "have list names. Assume that the order of the selectors matches the ", + "depended dimensions '", depending_file_dims[[file_dim]], "''s order.")) } } } diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index b1cca75252a077f7895b4e4da0cab111f147bd99..013b47a3c159be2931b3b2c24b47857f2e254525 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -60,6 +60,10 @@ You can find more explanation in FAQ [How-to-20](inst/doc/faq.md#20-use-metadata dependency on a file dimension. By this means, we do not need to specify the *_across parameter and Start() can recognize this dependecy relationship. + 14. [Specify the dependency between file dimensions](inst/doc/usecase/ex1_14_file_dependency.R) + This script shows how to define the dependency between file dimensions. Note that ex1_13 is for +the dependency between one inner dimension and one file dimension (i.e., the usage of *_across), while +this use case is for two file dimensions (i.e., the usage of *_depends). 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) diff --git a/inst/doc/usecase/ex1_13_implicit_dependency.R b/inst/doc/usecase/ex1_13_implicit_dependency.R index 8d8f337308d06cca6c212e183ae31707bf96b948..1ea9381e53a225612e295b526656910702456503 100644 --- a/inst/doc/usecase/ex1_13_implicit_dependency.R +++ b/inst/doc/usecase/ex1_13_implicit_dependency.R @@ -1,5 +1,5 @@ # Author: An-Chi Ho -# Date: 13rd July 2021 +# Date: 13th July 2021 #--------------------------------------------------------------------- # This script shows how to use a value array as the inner dimension selector to express # dependency on a file dimension. By this means, we don't need to specify the *_across diff --git a/inst/doc/usecase/ex1_14_file_dependency.R b/inst/doc/usecase/ex1_14_file_dependency.R new file mode 100644 index 0000000000000000000000000000000000000000..c23266ce8c9c7ef017d1bf4a95f42db55cc738d9 --- /dev/null +++ b/inst/doc/usecase/ex1_14_file_dependency.R @@ -0,0 +1,79 @@ +# Author: An-Chi Ho +# Date: 13th July 2021 +#-------------------------------------------------------------------------------- +# This script shows how to define the dependency between file dimensions. +# Note that ex1_13 is for the dependency between one inner dimension and one file +# dimension (i.e., the usage of *_across), while this use case is for two file +# dimensions (i.e., the usage of *_depends). + +# The first case simply use indices() or 'all' to define the depending file dimension. +# In the second case, we use values() to define both the depended and depending +# dimensions. The depending dimension should be a list with names that are the values +# of depended dimensions. +#-------------------------------------------------------------------------------- + +library(startR) + +path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/', + 'cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/', + 'r1i1p1f2/Omon/tos/gn/v20200417/', + '$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc') + + +# Case 1: Define the depending dimension ('chunk') by indices or 'all' + +data1 <- Start(dat = path, + var = 'tos', + sdate = sdates, + chunk = indices(2:4), # 'all' if you want to read all the files + chunk_depends = 'sdate', + time = 'all', + i = indices(450:500), + j = indices(650:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE) + + +dim(data1) +# dat var sdate time i j +# 1 1 3 36 51 51 +data1[1, 1, 1:3, 1:5, 1, 1] +# [,1] [,2] [,3] [,4] [,5] +#[1,] 29.26021 29.73614 29.67156 29.61240 29.59503 +#[2,] 29.37948 29.38624 29.73120 29.97264 29.89160 +#[3,] 30.43721 30.58396 30.06479 30.51131 29.81269 + +#===================================================================== + +# Case 2: Define the depended ('sdate') and depending ('chunk') dimensions by values +sdates <- c('2016', '2017', '2018') +chunks <- array(dim = c(chunk = 3, sdate = 3)) +chunks[, 1] <- c("201701-201712", "201801-201812", "201901-201912") +chunks[, 2] <- c("201801-201812", "201901-201912", "202001-202012") +chunks[, 3] <- c("201901-201912", "202001-202012", "202101-202112") + + +data2 <- Start(dat = path, + var = 'tos', + sdate = sdates, + # the names should be the values of the depended dimension + chunk = list('2016' = chunks[, 1], '2017' = chunks[, 2], '2018' = chunks[ ,3]), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:500), + j = indices(650:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE) + +dim(data2) +# dat var sdate time i j +# 1 1 3 36 51 51 + +all.equal(as.vector(data1), as.vector(data2)) +#[1] TRUE + + diff --git a/man/Start.Rd b/man/Start.Rd index efd258f3079af64f00b94262fcbd10da4f4cb0b9..3bdae42cdd792b32c68cd45656ab47e1f20c2120 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -174,6 +174,10 @@ Start() aware that the item names vary in function of the section, i.e. section 'electronics' has items 'a', 'b' and 'c' but section 'clothing' has items 'd', 'e', 'f'. Otherwise Start() would expect to find the same item names in all the sections. +If values() is used to define dimensions, it is possible to provide different +values of the depending dimension for each depended dimension values. For +example, if \code{section = c('electronics', 'clothing')}, we can use +\code{item = list(electronics = c('a', 'b', 'c'), clothing = c('d', 'e', 'f'))}. \cr\cr The \bold{name of another dimension} to be specified in '_across', only available for inner dimensions, must be a character string with the name diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R new file mode 100644 index 0000000000000000000000000000000000000000..49114e7bd15cee0d0a77d480a5cfc90ed8328c0b --- /dev/null +++ b/tests/testthat/test-Start-depends_values.R @@ -0,0 +1,84 @@ +# This unit test tests the case that using values() to define the depended +# and depending dimensions. The depending dimension should be a list with +# names that are the values of depended dimensions. + +context("Start() using values() to define dependency relations") + + +path <- '/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' + +sdates <- c('2016', '2017', '2018') +chunks <- array(dim = c(chunk = 3, sdate = 3)) +chunks[ , 1] <- c("201701-201712", "201801-201812", "201901-201912") +chunks[ , 2] <- c("201801-201812", "201901-201912", "202001-202012") +chunks[ , 3] <- c("201901-201912", "202001-202012", "202101-202112") + +test_that("1. ", { +suppressWarnings( +dat1 <- Start(dat = path, + var = 'tos', + sdate = sdates[1:2], + chunk = list('2016' = chunks[, 1], '2017' = chunks[, 2]), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:460), + j = indices(685:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE) +) + +suppressWarnings( +dat2 <- Start(dat = path, + var = 'tos', + sdate = sdates, + chunk = list('2016' = chunks[, 1], '2017' = chunks[, 2], '2018' = chunks[ ,3]), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:460), + j = indices(685:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE) +) +suppressWarnings( + dat3 <- Start(dat = path, + var = 'tos', + sdate = sdates, + chunk = list(chunks[, 1], chunks[, 2], chunks[ ,3]), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:460), + j = indices(685:700), + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = 'sdate'), + retrieve = TRUE) +) + expect_equal( + dat1[1,1,1:2,,,], + dat2[1,1,1:2,,,] + ) + expect_equal( + mean(dat2, na.rm = T), + 29.11137, + tolerance = 0.0001 + ) + expect_equal( + mean(dat1, na.rm = T), + 29.07394, + tolerance = 0.0001 + ) + expect_equal( + dat2[1, 1, 2, 2, 1:3, 10], + c(28.38624, 28.19837, 28.08603), + tolerance = 0.0001 + ) + expect_equal( + as.vector(dat2), + as.vector(dat3) + ) +}) +