From 202d87207183a9eae94ebaacee887386e18d2e3d Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 8 Apr 2022 18:33:12 +0200 Subject: [PATCH 1/5] Add check to detect wrong *_across value --- R/zzz.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/zzz.R b/R/zzz.R index 5a8ea5e..ad334e6 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -118,6 +118,9 @@ rebuild_dim_params <- function(dim_params, merge_across_dims, # Reallocating pairs of across file and inner dimensions if they have # to be merged. They are put one next to the other to ease merge later. if (merge_across_dims) { + if (any(!names(inner_dims_across_files) %in% names(dim_params)) | + any(!unlist(inner_dims_across_files) %in% names(dim_params))) + stop("All *_across parameters must have value as a file dimension name.") for (inner_dim_across in names(inner_dims_across_files)) { inner_dim_pos <- which(names(dim_params) == inner_dim_across) file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) -- GitLab From bb50483c12aa0503b92ebe9d4ecf8c2a68100d9a Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 8 Apr 2022 18:38:04 +0200 Subject: [PATCH 2/5] Correct version --- tests/testthat/test-Start-calendar.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 10cf7bf..53122c8 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -48,7 +48,7 @@ expect_equal( }) test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { -path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20191219/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' suppressWarnings( data <- Start(dat = path_bcc_csm2, -- GitLab From 1b3715ad8bc3724ba1bc0f80084ff3541e4a8a17 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Apr 2022 11:50:49 +0200 Subject: [PATCH 3/5] Add comment and fix typo --- R/Start.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/Start.R b/R/Start.R index 7ae3096..80baa51 100644 --- a/R/Start.R +++ b/R/Start.R @@ -2222,7 +2222,7 @@ Start <- function(..., # dim = indices/selectors, selector_file_dims <- 1 #NOTE: Change 'selector_file_dims' (from 1) if selector is an array with a file_dim dimname. - # I.e., If time = [sdate = 2, time = 4], selector_file_dims <- array(sdate = 2) + # I.e., If time = [sdate = 2, time = 4], selector_file_dims <- c(sdate = 2) if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) { selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])] } @@ -3475,7 +3475,8 @@ Start <- function(..., # dim = indices/selectors, Subset(transformed_var_with_selectors, inner_dim, crop_indices) } } else { - if (!is.null(file_dim)) { #merge_across_dims, file_dims is the depended file dim + if (!is.null(file_dim)) { #merge_across_dims, file_dim is the depended file dim + #NOTE: When is not this case??? if (file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { tmp <- common_vars_to_crop[[common_var_to_crop]] tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) -- GitLab From c217ec54562dfd4e8fece281fb3190b2fca3b806 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Apr 2022 20:31:17 +0200 Subject: [PATCH 4/5] Development for metadata dimension of implicit dependency (define selector by array) --- R/Start.R | 47 ++++++++++++++----- ...st-Start-implicit_dependency_by_selector.R | 8 ++-- .../testthat/test-Start-metadata_reshaping.R | 3 -- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/R/Start.R b/R/Start.R index fa90548..3d69279 100644 --- a/R/Start.R +++ b/R/Start.R @@ -2184,6 +2184,22 @@ Start <- function(..., # dim = indices/selectors, crossed_file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]]) names(chunk_amount) <- crossed_file_dim + } else if (!is.null(names(dim(dat[[i]][['selectors']][[inner_dim]][[1]]))) & + inner_dim %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])) & + any(found_file_dims[[i]] %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))) { + # inner dim is dependent on file dim in the form of selector array (e.g., time = [sdate = 2, time = 4]) + crossed_file_dim <- found_file_dims[[i]][which(found_file_dims[[i]] %in% + names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))] + if (length(crossed_file_dim) == 1) { + chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]]) + names(chunk_amount) <- crossed_file_dim + } else { + # e.g., region = [memb = 2, sdate = 3, region = 1] + chunk_amount <- prod( + sapply(lapply( + dat[[i]][['selectors']][crossed_file_dim], "[[", 1), length)) + names(chunk_amount) <- paste(crossed_file_dim, collapse = '.') + } } else { chunk_amount <- 1 } @@ -2628,9 +2644,12 @@ Start <- function(..., # dim = indices/selectors, fri_dims <- rep(NA, length(fri_dim_names)) names(fri_dims) <- fri_dim_names fri_dims[names(selector_file_dims)] <- selector_file_dims - if (!is.null(crossed_file_dim)) { - fri_dims[crossed_file_dim] <- chunk_amount - } + #NOTE: Not sure how it works here, but "chunk_amount" is the same as + # "selector_file_dims" above in the cases we've seen so far, + # and it causes problem when crossed_file_dim is more than one. +# if (!is.null(crossed_file_dim)) { +# fri_dims[crossed_file_dim] <- chunk_amount +# } } fri <- vector('list', length = prod(fri_dims)) dim(fri) <- fri_dims @@ -3196,11 +3215,15 @@ Start <- function(..., # dim = indices/selectors, } fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position), list(value = sub_array_of_fri))) - if (!is.null(crossed_file_dim)) { - taken_chunks[selector_store_position[[crossed_file_dim]]] <- TRUE - } else { - taken_chunks <- TRUE - } + + #NOTE: This part existed always but never was used. taken_chunks + # is related to merge_across_dims, but I don't know how it is + # used (maybe for higher efficiency?) +# if (!is.null(crossed_file_dim)) { +# taken_chunks[selector_store_position[[crossed_file_dim]]] <- TRUE +# } else { + taken_chunks <- TRUE +# } } } else { # The inner dim goes across a file dim (e.g., time_across = 'sdate') @@ -3487,17 +3510,17 @@ Start <- function(..., # dim = indices/selectors, } } else { if (!is.null(crossed_file_dim)) { #merge_across_dims, crossed_file_dim is the depended file dim - #NOTE: When is not this case??? - if (crossed_file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { + #NOTE: When is not this case??? Maybe this condition is not needed + if (any(crossed_file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]])))) { tmp <- common_vars_to_crop[[common_var_to_crop]] tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) dim_extra_ind <- which(!names(dim(tmp)) %in% c(crossed_file_dim, inner_dim)) if (!identical(dim_extra_ind, integer(0))) { tmp_list <- asplit(tmp, dim_extra_ind) - dim_file_ind <- which(names(dim(tmp_list[[1]])) == crossed_file_dim) + dim_file_ind <- which(names(dim(tmp_list[[1]])) %in% crossed_file_dim) tmp_list <- lapply(tmp_list, asplit, dim_file_ind) } else { # only crossed_file_dim and inner_dim - dim_file_ind <- which(names(dim(tmp)) == crossed_file_dim) + dim_file_ind <- which(names(dim(tmp)) %in% crossed_file_dim) tmp_list <- asplit(tmp, dim_file_ind) # Add another layer to be consistent with the first case above tmp_list <- list(tmp_list) diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 1f320bf..10e5545 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -148,10 +148,10 @@ c(26.87246, 26.88851, 27.28198, 27.28953, 27.65627, 27.68499), tolerance = 0.0001 ) -#expect_equal( -#dim(attr(data, 'Variables')$common$region), -#c(memb = 2, sdate = 3, region = 2) -#) +expect_equal( +dim(attr(data, 'Variables')$common$region), +c(memb = 2, sdate = 3, region = 1) +) }) diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 5b50692..2cacc50 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -634,9 +634,6 @@ data <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$fi time = 'file_date'), retrieve = TRUE) ) -dim(obs) -# dat var time syear latitude longitude -# 1 1 5 2 1 1 dates <- attr(data, 'Variables')$common$time -- GitLab From f9d0573685adbe533aa4a325e19cbd1dd648b5c4 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 13 Apr 2022 12:53:57 +0200 Subject: [PATCH 5/5] Revise conditional statement and add one test --- R/Start.R | 14 ++-- .../testthat/test-Start-metadata_reshaping.R | 66 +++++++++++++++++++ 2 files changed, 73 insertions(+), 7 deletions(-) diff --git a/R/Start.R b/R/Start.R index 3d69279..db573a8 100644 --- a/R/Start.R +++ b/R/Start.R @@ -2450,11 +2450,11 @@ Start <- function(..., # dim = indices/selectors, }) # Use other file dims as the factors to categorize. - other_file_dims <- dim(array_of_files_to_load)[which(found_file_dims[[i]] != crossed_file_dim)] + other_file_dims <- dim(array_of_files_to_load)[which(!found_file_dims[[i]] %in% crossed_file_dim)] other_file_dims <- lapply(lapply(other_file_dims, seq, 1), rev) other_file_dims_factor <- expand.grid(other_file_dims) selector_indices_save_subset <- - lapply(selector_indices_save[[i]], '[', which(found_file_dims[[i]] != crossed_file_dim)) + lapply(selector_indices_save[[i]], '[', which(!found_file_dims[[i]] %in% crossed_file_dim)) # Put the fyear with the same other file dims (sdate, etc.) together, and find the largest length (in theory all of them should be the same) inner_dim_lengths_cat <- vector('list', dim(other_file_dims_factor)[1]) @@ -2505,7 +2505,7 @@ Start <- function(..., # dim = indices/selectors, #sri <- NULL } } else { - if ((!is.null(crossed_file_dim)) && !(crossed_file_dim %in% names(var_file_dims))) { + if (!is.null(crossed_file_dim) & any(!(crossed_file_dim %in% names(var_file_dims)))) { stop("The variable '", var_with_selectors_name, "' must also be ", "requested for the file dimension '", crossed_file_dim, "' in ", "this configuration.") @@ -2605,9 +2605,9 @@ Start <- function(..., # dim = indices/selectors, if (is.null(crossed_file_dim)) { raise_error <- TRUE } else { - if (!((length(unmatching_file_dims) == 1) && - (names(var_file_dims)[unmatching_file_dims] == crossed_file_dim) && - (inner_dim %in% names(selector_inner_dims)))) { + if (!(length(unmatching_file_dims) == 1 & + names(var_file_dims)[unmatching_file_dims] %in% crossed_file_dim & + inner_dim %in% names(selector_inner_dims))) { raise_error <- TRUE } } @@ -2699,7 +2699,7 @@ Start <- function(..., # dim = indices/selectors, # The inner dim selector is an array in which have file dim (e.g., time = [sdate = 2, time = 4], # or the inner dim doesn't go across any file dim (e.g., no time_across = 'sdate') - if ((!is.null(crossed_file_dim) && (crossed_file_dim %in% names(selector_file_dims))) || is.null(crossed_file_dim)) { + if ((!is.null(crossed_file_dim) & (any(crossed_file_dim %in% names(selector_file_dims)))) || is.null(crossed_file_dim)) { if (length(sub_array_of_selectors) > 0) { if (debug) { if (inner_dim %in% dims_to_check) { diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 2cacc50..d97cd18 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -684,3 +684,69 @@ dates ) }) + +test_that("10. implicit dependency, leap year", { + +a <- as.POSIXct('1995-02-27 18:00:00', tz = 'UTC') +b <- as.POSIXct('1995-03-01 18:00:00', tz = 'UTC') +y1 <- seq(a, b, by = 'days') +a <- as.POSIXct('1996-02-27 18:00:00', tz = 'UTC') +b <- as.POSIXct('1996-03-01 18:00:00', tz = 'UTC') +y2 <- seq(a, b, by = 'days') +y2 <- y2[-3] # remove 28 Feb +time_array <- array(c(y1, y2), dim = c(time = 3, file_date = 2)) +time_array <- as.POSIXct(time_array, origin = '1970-01-01', tz = 'UTC') +suppressWarnings( +data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", + var = "tas", + file_date = paste0(1994:1995, '1101'), #1996 is leap year + time = time_array, #[time = 3, file_date = 2] + latitude = indices(1), longitude = indices(1), ensemble = indices(1), + return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), + retrieve = TRUE) +) +dates <- attr(data, 'Variables')$common$time + +expect_equal( +dim(dates), +c(file_date = 2, time = 3) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(aperm(dates, 2:1)), +as.vector(time_array) +) + +suppressWarnings( +dataF <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", + var = "tas", + file_date = paste0(1994:1995, '1101'), #1996 is leap year + time = time_array, #[time = 3, file_date = 2] + latitude = indices(1), longitude = indices(1), ensemble = indices(1), + return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + + +}) + -- GitLab