diff --git a/R/Start.R b/R/Start.R index 93fb3c1738145f23a4c1f33d8371803191272a56..db573a86685a3e3cd363c87cc9803ce807c04921 100644 --- a/R/Start.R +++ b/R/Start.R @@ -2184,6 +2184,22 @@ Start <- function(..., # dim = indices/selectors, crossed_file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]]) names(chunk_amount) <- crossed_file_dim + } else if (!is.null(names(dim(dat[[i]][['selectors']][[inner_dim]][[1]]))) & + inner_dim %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])) & + any(found_file_dims[[i]] %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))) { + # inner dim is dependent on file dim in the form of selector array (e.g., time = [sdate = 2, time = 4]) + crossed_file_dim <- found_file_dims[[i]][which(found_file_dims[[i]] %in% + names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))] + if (length(crossed_file_dim) == 1) { + chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]]) + names(chunk_amount) <- crossed_file_dim + } else { + # e.g., region = [memb = 2, sdate = 3, region = 1] + chunk_amount <- prod( + sapply(lapply( + dat[[i]][['selectors']][crossed_file_dim], "[[", 1), length)) + names(chunk_amount) <- paste(crossed_file_dim, collapse = '.') + } } else { chunk_amount <- 1 } @@ -2233,7 +2249,7 @@ Start <- function(..., # dim = indices/selectors, selector_file_dims <- 1 #NOTE: Change 'selector_file_dims' (from 1) if selector is an array with a file_dim dimname. - # I.e., If time = [sdate = 2, time = 4], selector_file_dims <- array(sdate = 2) + # I.e., If time = [sdate = 2, time = 4], selector_file_dims <- c(sdate = 2) if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) { selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])] } @@ -2434,11 +2450,11 @@ Start <- function(..., # dim = indices/selectors, }) # Use other file dims as the factors to categorize. - other_file_dims <- dim(array_of_files_to_load)[which(found_file_dims[[i]] != crossed_file_dim)] + other_file_dims <- dim(array_of_files_to_load)[which(!found_file_dims[[i]] %in% crossed_file_dim)] other_file_dims <- lapply(lapply(other_file_dims, seq, 1), rev) other_file_dims_factor <- expand.grid(other_file_dims) selector_indices_save_subset <- - lapply(selector_indices_save[[i]], '[', which(found_file_dims[[i]] != crossed_file_dim)) + lapply(selector_indices_save[[i]], '[', which(!found_file_dims[[i]] %in% crossed_file_dim)) # Put the fyear with the same other file dims (sdate, etc.) together, and find the largest length (in theory all of them should be the same) inner_dim_lengths_cat <- vector('list', dim(other_file_dims_factor)[1]) @@ -2489,7 +2505,7 @@ Start <- function(..., # dim = indices/selectors, #sri <- NULL } } else { - if ((!is.null(crossed_file_dim)) && !(crossed_file_dim %in% names(var_file_dims))) { + if (!is.null(crossed_file_dim) & any(!(crossed_file_dim %in% names(var_file_dims)))) { stop("The variable '", var_with_selectors_name, "' must also be ", "requested for the file dimension '", crossed_file_dim, "' in ", "this configuration.") @@ -2589,9 +2605,9 @@ Start <- function(..., # dim = indices/selectors, if (is.null(crossed_file_dim)) { raise_error <- TRUE } else { - if (!((length(unmatching_file_dims) == 1) && - (names(var_file_dims)[unmatching_file_dims] == crossed_file_dim) && - (inner_dim %in% names(selector_inner_dims)))) { + if (!(length(unmatching_file_dims) == 1 & + names(var_file_dims)[unmatching_file_dims] %in% crossed_file_dim & + inner_dim %in% names(selector_inner_dims))) { raise_error <- TRUE } } @@ -2628,9 +2644,12 @@ Start <- function(..., # dim = indices/selectors, fri_dims <- rep(NA, length(fri_dim_names)) names(fri_dims) <- fri_dim_names fri_dims[names(selector_file_dims)] <- selector_file_dims - if (!is.null(crossed_file_dim)) { - fri_dims[crossed_file_dim] <- chunk_amount - } + #NOTE: Not sure how it works here, but "chunk_amount" is the same as + # "selector_file_dims" above in the cases we've seen so far, + # and it causes problem when crossed_file_dim is more than one. +# if (!is.null(crossed_file_dim)) { +# fri_dims[crossed_file_dim] <- chunk_amount +# } } fri <- vector('list', length = prod(fri_dims)) dim(fri) <- fri_dims @@ -2680,7 +2699,7 @@ Start <- function(..., # dim = indices/selectors, # The inner dim selector is an array in which have file dim (e.g., time = [sdate = 2, time = 4], # or the inner dim doesn't go across any file dim (e.g., no time_across = 'sdate') - if ((!is.null(crossed_file_dim) && (crossed_file_dim %in% names(selector_file_dims))) || is.null(crossed_file_dim)) { + if ((!is.null(crossed_file_dim) & (any(crossed_file_dim %in% names(selector_file_dims)))) || is.null(crossed_file_dim)) { if (length(sub_array_of_selectors) > 0) { if (debug) { if (inner_dim %in% dims_to_check) { @@ -3196,11 +3215,15 @@ Start <- function(..., # dim = indices/selectors, } fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position), list(value = sub_array_of_fri))) - if (!is.null(crossed_file_dim)) { - taken_chunks[selector_store_position[[crossed_file_dim]]] <- TRUE - } else { - taken_chunks <- TRUE - } + + #NOTE: This part existed always but never was used. taken_chunks + # is related to merge_across_dims, but I don't know how it is + # used (maybe for higher efficiency?) +# if (!is.null(crossed_file_dim)) { +# taken_chunks[selector_store_position[[crossed_file_dim]]] <- TRUE +# } else { + taken_chunks <- TRUE +# } } } else { # The inner dim goes across a file dim (e.g., time_across = 'sdate') @@ -3487,16 +3510,17 @@ Start <- function(..., # dim = indices/selectors, } } else { if (!is.null(crossed_file_dim)) { #merge_across_dims, crossed_file_dim is the depended file dim - if (crossed_file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { + #NOTE: When is not this case??? Maybe this condition is not needed + if (any(crossed_file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]])))) { tmp <- common_vars_to_crop[[common_var_to_crop]] tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) dim_extra_ind <- which(!names(dim(tmp)) %in% c(crossed_file_dim, inner_dim)) if (!identical(dim_extra_ind, integer(0))) { tmp_list <- asplit(tmp, dim_extra_ind) - dim_file_ind <- which(names(dim(tmp_list[[1]])) == crossed_file_dim) + dim_file_ind <- which(names(dim(tmp_list[[1]])) %in% crossed_file_dim) tmp_list <- lapply(tmp_list, asplit, dim_file_ind) } else { # only crossed_file_dim and inner_dim - dim_file_ind <- which(names(dim(tmp)) == crossed_file_dim) + dim_file_ind <- which(names(dim(tmp)) %in% crossed_file_dim) tmp_list <- asplit(tmp, dim_file_ind) # Add another layer to be consistent with the first case above tmp_list <- list(tmp_list) diff --git a/R/zzz.R b/R/zzz.R index c76bee00780fafb392ad24cbd55b174e2a874448..b2a8add754cdac41e188bf3a30030a64faf8fb21 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -118,6 +118,9 @@ rebuild_dim_params <- function(dim_params, merge_across_dims, # Reallocating pairs of across file and inner dimensions if they have # to be merged. They are put one next to the other to ease merge later. if (merge_across_dims) { + if (any(!names(inner_dims_across_files) %in% names(dim_params)) | + any(!unlist(inner_dims_across_files) %in% names(dim_params))) + stop("All *_across parameters must have value as a file dimension name.") for (inner_dim_across in names(inner_dims_across_files)) { inner_dim_pos <- which(names(dim_params) == inner_dim_across) file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 1f320bfc4e8a610eac96a6409aad44aa9f57d260..10e5545fcb80f3af7eefed32a8f00dadd65feacc 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -148,10 +148,10 @@ c(26.87246, 26.88851, 27.28198, 27.28953, 27.65627, 27.68499), tolerance = 0.0001 ) -#expect_equal( -#dim(attr(data, 'Variables')$common$region), -#c(memb = 2, sdate = 3, region = 2) -#) +expect_equal( +dim(attr(data, 'Variables')$common$region), +c(memb = 2, sdate = 3, region = 1) +) }) diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 5b50692cb2b17a34b8f1aa4782c1f83f7a456bb2..d97cd18f4f7654e41f741d76fb1411cd58d5bbaa 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -634,9 +634,6 @@ data <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$fi time = 'file_date'), retrieve = TRUE) ) -dim(obs) -# dat var time syear latitude longitude -# 1 1 5 2 1 1 dates <- attr(data, 'Variables')$common$time @@ -687,3 +684,69 @@ dates ) }) + +test_that("10. implicit dependency, leap year", { + +a <- as.POSIXct('1995-02-27 18:00:00', tz = 'UTC') +b <- as.POSIXct('1995-03-01 18:00:00', tz = 'UTC') +y1 <- seq(a, b, by = 'days') +a <- as.POSIXct('1996-02-27 18:00:00', tz = 'UTC') +b <- as.POSIXct('1996-03-01 18:00:00', tz = 'UTC') +y2 <- seq(a, b, by = 'days') +y2 <- y2[-3] # remove 28 Feb +time_array <- array(c(y1, y2), dim = c(time = 3, file_date = 2)) +time_array <- as.POSIXct(time_array, origin = '1970-01-01', tz = 'UTC') +suppressWarnings( +data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", + var = "tas", + file_date = paste0(1994:1995, '1101'), #1996 is leap year + time = time_array, #[time = 3, file_date = 2] + latitude = indices(1), longitude = indices(1), ensemble = indices(1), + return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), + retrieve = TRUE) +) +dates <- attr(data, 'Variables')$common$time + +expect_equal( +dim(dates), +c(file_date = 2, time = 3) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(aperm(dates, 2:1)), +as.vector(time_array) +) + +suppressWarnings( +dataF <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", + var = "tas", + file_date = paste0(1994:1995, '1101'), #1996 is leap year + time = time_array, #[time = 3, file_date = 2] + latitude = indices(1), longitude = indices(1), ensemble = indices(1), + return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + + +}) +