diff --git a/R/Start.R b/R/Start.R index 5571dae0775469506a0c7d72944e87b50f66ac0e..8c5a2e88fc3d4ed3bde4053d485a7ade0038c70e 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1363,6 +1363,9 @@ Start <- function(..., # dim = indices/selectors, } first_class <- class(dat_selectors[[file_dim]][[1]]) first_length <- length(dat_selectors[[file_dim]][[1]]) + + # Length will be > 1 if it is list since beginning, e.g., depending dim is a list with + # names as depended dim. for (j in 1:length(dat_selectors[[file_dim]])) { sv <- selector_vector <- dat_selectors[[file_dim]][[j]] if (!identical(first_class, class(sv)) || @@ -1371,13 +1374,31 @@ Start <- function(..., # dim = indices/selectors, "be vectors of the same length and of the same class.") } if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% c('all', 'first', 'last')))) { + #NOTE: ???? It doesn't make any changes. dat_selectors[[file_dim]][[j]] <- selector_checker(selectors = sv, return_indices = FALSE) - # Take chunk if needed - dat_selectors[[file_dim]][[j]] <- dat_selectors[[file_dim]][[j]][chunk_indices(length(dat_selectors[[file_dim]][[j]]), - chunks[[file_dim]]['chunk'], - chunks[[file_dim]]['n_chunks'], - file_dim)] + # Take chunk if needed (only defined dim; undefined dims will be chunked later in + # find_ufd_value(). + if (chunks[[file_dim]]['n_chunks'] > 1) { + desired_chunk_indices <- chunk_indices( + length(dat_selectors[[file_dim]][[j]]), + chunks[[file_dim]]['chunk'], + chunks[[file_dim]]['n_chunks'], + file_dim) + dat_selectors[[file_dim]][[j]] <- dat_selectors[[file_dim]][[j]][desired_chunk_indices] + # chunk the depending dim as well + if (file_dim %in% depending_file_dims) { + depending_dim_name <- names(which(file_dim == depending_file_dims)) + # Chunk it only if it is defined dim (i.e., list of character with names of depended dim) + if (!(length(dat_selectors[[depending_dim_name]]) == 1 && + dat_selectors[[depending_dim_name]] %in% c('all', 'first', 'last'))) { + if (sapply(dat_selectors[[depending_dim_name]], is.character)) { + dat_selectors[[depending_dim_name]] <- + dat_selectors[[depending_dim_name]][desired_chunk_indices] + } + } + } + } } else if (!(is.numeric(sv) || (is.character(sv) && (length(sv) == 1) && (sv %in% c('all', 'first', 'last'))) || (is.list(sv) && (length(sv) == 2) && (all(sapply(sv, is.character)) || @@ -1400,6 +1421,17 @@ Start <- function(..., # dim = indices/selectors, # if the depending and depended file dims are both explicited defined. for (file_dim in file_dims) { if (file_dim %in% names(depending_file_dims)) { + + # Return error if depended dim is a list of values while depending dim is not + # defined (i.e., indices or 'all') + if (file_dim %in% defined_file_dims & + !(depending_file_dims[[file_dim]] %in% defined_file_dims)) { + stop(paste0("The depended dimension, ", file_dim, ", is explictly defined ", + "by a list of values, while the depending dimension, ", + depending_file_dims[[file_dim]], ", is not explictly defined. ", + "Specify ", depending_file_dims[[file_dim]], " by characters.")) + } + ## TODO: Detect multi-dependancies and forbid. #NOTE: The if statement below is tricky. It tries to distinguish if the depending dim # has the depended dim as the names of the list. However, if the depending dim diff --git a/inst/doc/usecase/ex1_14_file_dependency.R b/inst/doc/usecase/ex1_14_file_dependency.R index c23266ce8c9c7ef017d1bf4a95f42db55cc738d9..95cc3daa50bdd9ee03f09fb8c0e8e880f6669664 100644 --- a/inst/doc/usecase/ex1_14_file_dependency.R +++ b/inst/doc/usecase/ex1_14_file_dependency.R @@ -19,6 +19,7 @@ path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/', 'r1i1p1f2/Omon/tos/gn/v20200417/', '$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc') +sdates <- c('2016', '2017', '2018') # Case 1: Define the depending dimension ('chunk') by indices or 'all' diff --git a/tests/testthat/test-Compute-chunk_depend_dim.R b/tests/testthat/test-Compute-chunk_depend_dim.R new file mode 100644 index 0000000000000000000000000000000000000000..a08b1e53926dba2e470c756283ebd43d288b2bc3 --- /dev/null +++ b/tests/testthat/test-Compute-chunk_depend_dim.R @@ -0,0 +1,219 @@ +# This unit test tests the chunking over depended and depending dimension. +# ex1_14 +# 1. depending dim is values() +# 2. depending dim is indices() +# a. depended dim is indices() +# b. depended dim is list of values +# Note that 2.b. doesn't work. + +context("Chunk over dimensions that have dependency relationship") + +path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/', + 'cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/', + 'r1i1p1f2/Omon/tos/gn/v20200417/', + '$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc') +sdates <- c('2016', '2017', '2018') + +# retrieve = T for verification +suppressWarnings( +data_T <- Start(dat = path, + var = 'tos', + sdate = sdates, + chunk = indices(2:4), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:452), + j = indices(650:651), + time_across = 'chunk', + return_vars = list(time = 'sdate'), + retrieve = T, silent = T) +) + +test_that("1.a. depending dim is values(); depended dim is indices()", { + +suppressWarnings( +data <- Start(dat = path, + var = 'tos', + sdate = sdates, + chunk = indices(2:4), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:452), + j = indices(650:651), + time_across = 'chunk', + return_vars = list(time = 'sdate'), + retrieve = F) +) +fun <- function(x) { +return(x) +} +step <- Step(fun = fun, + target_dims = 'dat', output_dims = 'dat') +wf <- AddStep(inputs = data, step_fun = step) +suppressWarnings( +res1 <- Compute(workflow = wf, chunks = list(chunk = 2))$output1 +) +suppressWarnings( +res2 <- Compute(workflow = wf, chunks = list(sdate = 2))$output1 +) +suppressWarnings( +res3 <- Compute(workflow = wf, chunks = list(chunk = 2, sdate = 2))$output1 +) + +expect_equal( +as.vector(data_T), +as.vector(res1) +) +expect_equal( +res1, +res2 +) +expect_equal( +res1, +res3 +) + +expect_equal( +as.vector(drop(res1)[, , 1, 1, 1]), +c(29.26021, 29.37948, 30.43721, 30.66117, 30.09621, 30.14460, 30.19445, 30.93453, 30.50104), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(res1)[, , 2, 1, 1]), +c(29.73614, 29.38624, 30.58396, 30.66175, 30.09205, 30.11643, 29.82516, 30.57528, 30.12949), +tolerance = 0.0001 +) + +}) + +################################################################# +################################################################# +################################################################# + +test_that("1.b. depending dim is values(); depended dim is list of values", { + +chunks <- list('2016' = c("201701-201712","201801-201812","201901-201912"), + '2017' = c("201801-201812","201901-201912","202001-202012"), + '2018' = c("201901-201912","202001-202012","202101-202112")) +suppressWarnings( +data <- Start(dat = path, + var = 'tos', + sdate = sdates, + chunk = chunks, + chunk_depends = 'sdate', + time = 'all', + i = indices(450:452), + j = indices(650:651), + time_across = 'chunk', + return_vars = list(time = 'sdate'), + retrieve = F) +) +fun <- function(x) { +return(x) +} +step <- Step(fun = fun, + target_dims = 'dat', output_dims = 'dat') +wf <- AddStep(inputs = data, step_fun = step) + +suppressWarnings( +res1 <- Compute(workflow = wf, chunks = list(chunk = 2))$output1 +) +suppressWarnings( +res2 <- Compute(workflow = wf, chunks = list(sdate = 2))$output1 +) +suppressWarnings( +res3 <- Compute(workflow = wf, chunks = list(chunk = 2, sdate = 2))$output1 +) + +expect_equal( +as.vector(data_T), +as.vector(res1) +) +expect_equal( +res1, +res2 +) +expect_equal( +res1, +res3 +) + +}) + +################################################################# +################################################################# +################################################################# + +test_that("2.a. depending dim is indices(); depended dim is indices()", { + +suppressWarnings( +data <- Start(dat = path, + var = 'tos', + sdate = indices(57:59), # 2016, 2017, 2018 + chunk = indices(2:4), + chunk_depends = 'sdate', + time = 'all', + i = indices(450:452), + j = indices(650:651), + time_across = 'chunk', + return_vars = list(time = 'sdate'), + retrieve = F) +) +fun <- function(x) { +return(x) +} +step <- Step(fun = fun, + target_dims = 'dat', output_dims = 'dat') +wf <- AddStep(inputs = data, step_fun = step) + +suppressWarnings( +res1 <- Compute(workflow = wf, chunks = list(chunk = 2))$output1 +) +suppressWarnings( +res2 <- Compute(workflow = wf, chunks = list(sdate = 2))$output1 +) +suppressWarnings( +res3 <- Compute(workflow = wf, chunks = list(chunk = 2, sdate = 2))$output1 +) + +expect_equal( +as.vector(data_T), +as.vector(res1) +) +expect_equal( +res1, +res2 +) +expect_equal( +res1, +res3 +) + +}) + +################################################################# +################################################################# +################################################################# + +test_that("2.b. depending dim is indices(); depended dim is list of values", { + +chunks <- list('2016' = c("201701-201712","201801-201812","201901-201912"), + '2017' = c("201801-201812","201901-201912","202001-202012"), + '2018' = c("201901-201912","202001-202012","202101-202112")) +expect_error( +suppressWarnings( +Start(dat = path, + var = 'tos', + sdate = indices(57:59), # 2016, 2017, 2018 + chunk = chunks, + chunk_depends = 'sdate', + time = 'all', + i = indices(450:452), + j = indices(650:651), + time_across = 'chunk', + return_vars = list(time = 'sdate'), + retrieve = F)), +"The depended dimension, chunk, is explictly defined by a list of values, while the depending dimension, sdate, is not explictly defined. Specify sdate by characters." +) + +})