diff --git a/R/Start.R b/R/Start.R index 25b0e2dd1859bcdbbf9e09cb50a0371dd041dbd1..606cb7a9d7d0f87cc9b2ebf0d479575feb6fd0fc 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1913,13 +1913,19 @@ Start <- function(..., # dim = indices/selectors, ## TODO: To be run in parallel (local multi-core) # Now time to work out the inner file dimensions. # First pick the requested variables. - dims_to_iterate <- NULL - for (return_var in names(return_vars)) { - dims_to_iterate <- unique(c(dims_to_iterate, return_vars[[return_var]])) - } - if (found_pattern_dim %in% dims_to_iterate) { - dims_to_iterate <- dims_to_iterate[-which(dims_to_iterate == found_pattern_dim)] - } + +#//// This part is moved below the new code//// +# NOTE: 'dims_to_iterate' only consider common_return_vars. Move to below can save some work +# and get the revised common_return_vars if it is changed. +# dims_to_iterate <- NULL +# for (return_var in names(return_vars)) { +# dims_to_iterate <- unique(c(dims_to_iterate, return_vars[[return_var]])) +# } +# if (found_pattern_dim %in% dims_to_iterate) { +# dims_to_iterate <- dims_to_iterate[-which(dims_to_iterate == found_pattern_dim)] +# } +#////////////////////////////////////////////// + common_return_vars <- NULL common_first_found_file <- NULL common_return_vars_pos <- NULL @@ -1940,6 +1946,58 @@ Start <- function(..., # dim = indices/selectors, x } }) + +#//////////////////////////////////////////// + # Force return_vars = (time = NULL) to (time = 'sdate') if (1) selector = [sdate = 2, time = 4] or + # (2) time_across = 'sdate'. + # NOTE: Here is not in for loop of dat[[i]] + for (i in 1:length(dat)) { + for (inner_dim in expected_inner_dims[[i]]) { + # The selectors for the inner dimension are taken. + selector_array <- dat[[i]][['selectors']][[inner_dim]] + file_dim_as_selector_array_dim <- 1 + + if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) { + file_dim_as_selector_array_dim <- found_file_dims[[i]][which(found_file_dims[[i]] %in% names(dim(selector_array)))] + } + + if (inner_dim %in% inner_dims_across_files | is.character(file_dim_as_selector_array_dim)) { #(2) or (1) + # inner_dim is not in return_vars or is NULL + if (((!inner_dim %in% names(common_return_vars)) & (!inner_dim %in% names(return_vars))) | + (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]])) ) { + if (is.character(file_dim_as_selector_array_dim)) { #(1) + if (file_dim_as_selector_array_dim %in% found_pattern_dim) { + return_vars[[inner_dim]] <- file_dim_as_selector_array_dim + } else { + common_return_vars[[inner_dim]] <- file_dim_as_selector_array_dim + } + } else if (inner_dim %in% inner_dims_across_files) { #(2) + file_dim_name <- names(which(inner_dim == inner_dims_across_files)) + if (file_dim_name %in% found_pattern_dim) { + return_vars[[inner_dim]] <- file_dim_name + } else { + common_return_vars[[inner_dim]] <- file_dim_name + } + } + .warning(paste0("Found ", inner_dim, " dependency on file diemnsion '", tmp, + "', but '", inner_dim, "' is not in return_vars list or is NULL. ", + "To provide the correct metadata, the value of ", inner_dim, + " in 'return_vars' is specified as '", tmp, "'.")) + } + } + } + } +#//////////////////////////////////////////// + +# This part was above where return_vars is seperated into return_vars and common_return_vars +# NOTE: 'dims_to_iterate' only consider common_return_vars. Move to here can save some work +# and get the revised common_return_vars if it is changed in the part right above. + dims_to_iterate <- NULL + for (common_return_var in names(common_return_vars)) { + dims_to_iterate <- unique(c(dims_to_iterate, common_return_vars[[common_return_var]])) + } +#//////////////////////////////////////////// + if (length(common_return_vars) > 0) { picked_common_vars <- vector('list', length = length(common_return_vars)) names(picked_common_vars) <- names(common_return_vars) @@ -2510,9 +2568,13 @@ Start <- function(..., # dim = indices/selectors, } taken_chunks <- rep(FALSE, chunk_amount) selector_file_dims <- 1 + + #NOTE: Change 'selector_file_dims' (from 1) if selector is an array with a file_dim dimname. + # I.e., If time = [sdate = 2, time = 4], selector_file_dims <- array(sdate = 2) if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) { selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])] } + selector_inner_dims <- dim(selector_array)[which(!(names(dim(selector_array)) %in% found_file_dims[[i]]))] var_with_selectors <- NULL var_with_selectors_name <- var_params[[inner_dim]] @@ -3589,7 +3651,10 @@ Start <- function(..., # dim = indices/selectors, picked_vars[[i]] <- vars_to_crop } if (i == length(dat)) { - picked_common_vars <- common_vars_to_crop + #NOTE: To avoid redundant run + if (inner_dim %in% names(common_vars_to_crop)) { + picked_common_vars <- common_vars_to_crop + } } } } diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R new file mode 100644 index 0000000000000000000000000000000000000000..a5fa558cb8a76ad5ff6542c5678c2c7187e17226 --- /dev/null +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -0,0 +1,199 @@ +context("Start() metadata filedim dependency") +# When inner dimension selector is an array with filedim dimension name (e.g., time = [sdate = 2, time = 4], +# or *_across is used, the inner dim has dependency on file dim. In this case, return_vars must +# specify this relationship, i.e., return_vars = list(time = 'sdate'). + + +# Preparation: Get the time values +repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + +suppressWarnings( +data <- Start(dat = repos, + var = 'tas', + sdate = c('20170101', '20180101'), + ensemble = indices(1), + time = indices(1:4), + latitude = indices(1), longitude = indices(1), + return_vars = list(time = 'sdate'), + silent = TRUE, + retrieve = FALSE) +) +time_val <- attr(data, 'Variables')$common$time +time_val_vector <- time_val[c(1, 3, 5, 7, 2, 4, 6, 8)] + +#--------------------------------------------------------------- + +test_that("1. Selector is values()", { + +#================== values() with dimensions =================== + suppressWarnings( + test4 <- Start( + dat = repos, + var = 'tas', + sdate = c('20170101', '20180101'), + time = time_val, + # time_across = 'sdate', + ensemble = c(1, 3), + latitude = indices(1:3), + longitude = indices(1:2), + return_vars = list(time = 'sdate'), + retrieve = TRUE) + ) + time4 <- attr(test4, 'Variables')$common$time + +expect_equal( + dim(time4), + c(sdate = 2, time = 4) +) +expect_equal( + time4[2, 2], + as.POSIXct("2018-03-01", tz = 'UTC') +) + +#---------------------------------------- + + suppressWarnings( + test6 <- Start( + dat = repos, + var = 'tas', + sdate = c('20170101', '20180101'), + time = time_val, + # time_across = 'sdate', + ensemble = c(1, 3), + latitude = indices(1:3), + longitude = indices(1:2), + return_vars = list(time = NULL), + retrieve = TRUE) + ) + time6 <- attr(test4, 'Variables')$common$time + +expect_equal( + dim(time6), + c(sdate = 2, time = 4) +) +expect_equal( + time6, + time4 +) +expect_equal( + as.vector(test6), + as.vector(test4) +) + +expect_equal( + test6, + test4, + check.attributes = FALSE +) + +#---------------------------------------- + + suppressWarnings( + test6a <- Start( + dat = repos, + var = 'tas', + sdate = c('20170101', '20180101'), + time = time_val, + time_across = 'sdate', + ensemble = c(1, 3), + latitude = indices(1:3), + longitude = indices(1:2), + return_vars = list(time = NULL), + retrieve = TRUE) + ) + time6a <- attr(test4, 'Variables')$common$time + +expect_equal( + dim(time6a), + c(sdate = 2, time = 4) +) +expect_equal( + time6, + time6a +) +expect_equal( + as.vector(test6), + as.vector(test4) +) + +expect_equal( + test6, + test6a, + check.attributes = FALSE +) + +#---------------------------------------- + +#================== values() is vector =================== + + suppressWarnings( + test14a <- Start( + dat = repos, + var = 'tas', + sdate = c('20170101', '20180101'), + time = time_val_vector, + time_across = 'sdate', + ensemble = c(1, 3), + latitude = indices(1:3), + longitude = indices(1:2), + return_vars = list(time = NULL), + retrieve = TRUE) + ) + time14a <- attr(test14a, 'Variables')$common$time + +expect_equal( + dim(time14a), + c(sdate = 2, time = 4) +) +expect_equal( + time14a, + time6a +) +expect_equal( + as.vector(test14a), + as.vector(test4) +) + +expect_equal( + test14a, + test6a, + check.attributes = FALSE +) + +#------------------------------------------------- + + suppressWarnings( + test15a <- Start( + dat = repos, + var = 'tas', + sdate = c('20170101', '20180101'), + time = time_val_vector, + time_across = 'sdate', + ensemble = c(1, 3), + latitude = indices(1:3), + longitude = indices(1:2), + return_vars = list(time = 'sdate'), + retrieve = TRUE) + ) + time15a <- attr(test15a, 'Variables')$common$time + +expect_equal( + dim(time15a), + c(sdate = 2, time = 4) +) +expect_equal( + time15a, + time6a +) +expect_equal( + as.vector(test15a), + as.vector(test4) +) + +expect_equal( + test15a, + test6a, + check.attributes = FALSE +) + +})