From 593898cdc63d85fbac8a0cfbd0e958f6db069c66 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 16 Feb 2022 10:43:57 +0100 Subject: [PATCH 01/45] Cancel the ignorance of the tests --- .Rbuildignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index b320a05..90018c7 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,7 +6,7 @@ ^README\.md$ #\..*\.RData$ #^vignettes$ -^tests$ +#^tests$ ^inst/doc$ #^inst/doc/*$ #^inst/doc/figures/$ -- GitLab From ad6ca99fe5271709c80f13cee4d9774f901674fd Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 16 Feb 2022 10:44:29 +0100 Subject: [PATCH 02/45] Add name = 'time' as unit adjustment condition --- R/NcDataReader.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/NcDataReader.R b/R/NcDataReader.R index ebc58fc..fb61ef8 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -183,7 +183,7 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, } }) - if (length(names(attr(result, 'variables'))) == 1) { + if (length(names(attr(result, 'variables'))) == 1 & all(names(attr(result, 'variables')) == 'time')) { var_name <- names(attr(result, 'variables')) units <- attr(result, 'variables')[[var_name]][['units']] -- GitLab From 46d6c3b40542ae381e42c22d43427efb9858f064 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 21 Feb 2022 17:44:39 +0100 Subject: [PATCH 03/45] Reshape time attribute if time is across a file dim. --- R/Start.R | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++----- R/zzz.R | 24 +++++++++++++++++++--- 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/R/Start.R b/R/Start.R index d11c669..8c15236 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3462,8 +3462,47 @@ Start <- function(..., # dim = indices/selectors, common_vars_to_crop[[common_var_to_crop]] <- Subset(transformed_var_with_selectors, inner_dim, crop_indices) } - } else { #old code - common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices) + } else { + if (!is.null(file_dim)) { #merge_across_dims, file_dims is the depended file dim + tmp <- common_vars_to_crop[[common_var_to_crop]] + dim_extra_ind <- which(!names(dim(tmp)) %in% c(file_dim, inner_dim)) + if (!identical(dim_extra_ind, integer(0))) { + tmp_list <- asplit(tmp, dim_extra_ind) + dim_file_ind <- which(names(dim(tmp_list[[1]])) == file_dim) + tmp_list <- lapply(tmp_list, asplit, dim_file_ind) + } else { # only file_dim and inner_dim + dim_file_ind <- which(names(dim(tmp)) == file_dim) + tmp_list <- asplit(tmp, dim_file_ind) + } + max_fri_length <- max(sapply(fri, length)) + for (i_extra_dim in 1:length(tmp_list)) { + for (i_fri in 1:length(fri)) { + tmp_list[[i_extra_dim]][[i_fri]] <- + tmp_list[[i_extra_dim]][[i_fri]][fri[[i_fri]]] + + if (length(tmp_list[[i_extra_dim]][[i_fri]]) != max_fri_length) { + tmp_list[[i_extra_dim]][[i_fri]] <- + c(tmp_list[[i_extra_dim]][[i_fri]], rep(NA, max_fri_length - length(tmp_list[[i_extra_dim]][[i_fri]]))) + } + } + } + # Change list back to array + tmp_new_dim <- c(max_fri_length, dim(tmp)[file_dim], dim(tmp)[dim_extra_ind]) + names(tmp_new_dim) <- c(inner_dim, file_dim, names(dim(tmp))[dim_extra_ind]) + common_vars_to_crop[[common_var_to_crop]] <- + array(unlist(tmp_list), dim = tmp_new_dim) + # Reorder back + common_vars_to_crop[[common_var_to_crop]] <- + aperm(common_vars_to_crop[[common_var_to_crop]], match(names(dim(tmp)), names(tmp_new_dim))) + # Convert number back to time + common_vars_to_crop[[common_var_to_crop]] <- + as.POSIXct(common_vars_to_crop[[common_var_to_crop]], + origin = "1970-01-01", tz = 'UTC') + } else { # old code + + common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices) + } + } } @@ -3790,9 +3829,14 @@ Start <- function(..., # dim = indices/selectors, if (!merge_across_dims_narm) { data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims) } else { - data_array_tmp <- remove_additional_na_from_merge( - inner_dims_across_files, final_dims, across_inner_dim, - length_inner_across_dim, data_array) + tmp <- remove_additional_na_from_merge( + inner_dims_across_files, final_dims, across_inner_dim, + length_inner_across_dim, data_array, + merge_dim_metadata = picked_common_vars[[across_inner_dim]]) + data_array_tmp <- tmp$data_array_tmp + # Save dim for later use + dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) + picked_common_vars[[across_inner_dim]] <- tmp$merge_dim_metadata } if (length(data_array_tmp) != prod(final_dims_fake)) { @@ -3809,6 +3853,12 @@ Start <- function(..., # dim = indices/selectors, } data_array <- array(data_array_tmp, dim = final_dims_fake) + final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] + picked_common_vars[[across_inner_dim]] <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) + # Convert numeric back to dates + picked_common_vars[[across_inner_dim]] <- + as.POSIXct(picked_common_vars[[across_inner_dim]], + origin = "1970-01-01", tz = 'UTC') } else { # ! (merge_across_dims + split_multiselected_dims) (old version) data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) diff --git a/R/zzz.R b/R/zzz.R index 0130724..7cf781e 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1029,7 +1029,7 @@ retrieve_progress_message <- function(work_pieces, num_procs, silent) { # If merge_across_dims = TRUE and merge_across_dims_narm = TRUE, remove the additional NAs # due to unequal inner_dim ('time') length across file_dim ('sdate'). remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, across_inner_dim, - length_inner_across_dim, data_array) { + length_inner_across_dim, data_array, merge_dim_metadata) { across_file_dim <- names(inner_dims_across_files) #TODO: more than one? # Get the length of these two dimensions in final_dims length_inner_across_store_dims <- final_dims[across_inner_dim] @@ -1051,7 +1051,13 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, # dim(data_array) = [time, file_date] # dim(logi_array) = [time, file_date] # Change the blank spaces from NA to -9999 - data_array[which(!logi_array)] <- -9999 + if (any(class(data_array) %in% c("POSIXct", "POSIXt"))) { + # change to numeric first + data_array <- array(as.vector(data_array), dim = dim(data_array)) + data_array[which(!logi_array)] <- max(data_array, na.rm = T) + 1 + } else { + data_array[which(!logi_array)] <- -9999 + } return(data_array) } data_array_final_dims <- multiApply::Apply(data_array_final_dims, @@ -1059,12 +1065,24 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, output_dims = c(across_inner_dim, across_file_dim), fun = func_remove_blank, logi_array = logi_array)$output1 + + merge_dim_metadata <- multiApply::Apply(merge_dim_metadata, + target_dims = c(across_inner_dim, across_file_dim), + output_dims = c(across_inner_dim, across_file_dim), + fun = func_remove_blank, + logi_array = logi_array)$output1 + ## reorder back to the correct dim tmp <- match(names(final_dims), names(dim(data_array_final_dims))) data_array_final_dims <- .aperm2(data_array_final_dims, tmp) data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999] # become a vector - return(data_array_tmp) + # Reorder metadata dim as final dim + tmp <- match(names(final_dims), names(dim(merge_dim_metadata))) + merge_dim_metadata <- aperm(merge_dim_metadata, tmp[!is.na(tmp)]) + merge_dim_metadata <- merge_dim_metadata[merge_dim_metadata != max(merge_dim_metadata, na.rm = TRUE)] + + return(list(data_array_tmp = data_array_tmp, merge_dim_metadata = merge_dim_metadata)) } -- GitLab From a7b8d3d6f8c5a436e5dbaa9e71d41dda8712dda5 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 21 Feb 2022 18:04:18 +0100 Subject: [PATCH 04/45] Add condition to time conversion --- R/Start.R | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/R/Start.R b/R/Start.R index 8c15236..0fba6d8 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3494,10 +3494,12 @@ Start <- function(..., # dim = indices/selectors, # Reorder back common_vars_to_crop[[common_var_to_crop]] <- aperm(common_vars_to_crop[[common_var_to_crop]], match(names(dim(tmp)), names(tmp_new_dim))) - # Convert number back to time - common_vars_to_crop[[common_var_to_crop]] <- - as.POSIXct(common_vars_to_crop[[common_var_to_crop]], - origin = "1970-01-01", tz = 'UTC') + if (common_var_to_crop == 'time') { + # Convert number back to time + common_vars_to_crop[[common_var_to_crop]] <- + as.POSIXct(common_vars_to_crop[[common_var_to_crop]], + origin = "1970-01-01", tz = 'UTC') + } } else { # old code common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices) @@ -3855,10 +3857,12 @@ Start <- function(..., # dim = indices/selectors, data_array <- array(data_array_tmp, dim = final_dims_fake) final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] picked_common_vars[[across_inner_dim]] <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) - # Convert numeric back to dates - picked_common_vars[[across_inner_dim]] <- - as.POSIXct(picked_common_vars[[across_inner_dim]], - origin = "1970-01-01", tz = 'UTC') + if (across_inner_dim == 'time') { + # Convert numeric back to dates + picked_common_vars[[across_inner_dim]] <- + as.POSIXct(picked_common_vars[[across_inner_dim]], + origin = "1970-01-01", tz = 'UTC') + } } else { # ! (merge_across_dims + split_multiselected_dims) (old version) data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) -- GitLab From 3db54a0906df47eb0d742644d51dbc9a77080a7c Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 21 Feb 2022 19:32:59 +0100 Subject: [PATCH 05/45] code improvement --- R/Start.R | 4 +++- R/zzz.R | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/R/Start.R b/R/Start.R index 0fba6d8..9589e25 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3473,6 +3473,8 @@ Start <- function(..., # dim = indices/selectors, } else { # only file_dim and inner_dim dim_file_ind <- which(names(dim(tmp)) == file_dim) tmp_list <- asplit(tmp, dim_file_ind) + # Add another layer to be consistent with the first case above + tmp_list <- list(tmp_list) } max_fri_length <- max(sapply(fri, length)) for (i_extra_dim in 1:length(tmp_list)) { @@ -3832,7 +3834,7 @@ Start <- function(..., # dim = indices/selectors, data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims) } else { tmp <- remove_additional_na_from_merge( - inner_dims_across_files, final_dims, across_inner_dim, + inner_dims_across_files, final_dims, length_inner_across_dim, data_array, merge_dim_metadata = picked_common_vars[[across_inner_dim]]) data_array_tmp <- tmp$data_array_tmp diff --git a/R/zzz.R b/R/zzz.R index 7cf781e..b21e51c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1028,9 +1028,10 @@ retrieve_progress_message <- function(work_pieces, num_procs, silent) { # If merge_across_dims = TRUE and merge_across_dims_narm = TRUE, remove the additional NAs # due to unequal inner_dim ('time') length across file_dim ('sdate'). -remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, across_inner_dim, +remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, length_inner_across_dim, data_array, merge_dim_metadata) { across_file_dim <- names(inner_dims_across_files) #TODO: more than one? + across_inner_dim <- inner_dims_across_files[[1]] #TODO: more than one? # Get the length of these two dimensions in final_dims length_inner_across_store_dims <- final_dims[across_inner_dim] length_file_across_store_dims <- final_dims[across_file_dim] -- GitLab From 7322a6893b0246ad6b7636145376cd263355a669 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 21 Feb 2022 20:10:48 +0100 Subject: [PATCH 06/45] Add crossed file dim in return_vars instead of replacing the original value --- R/Start.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/Start.R b/R/Start.R index 9589e25..2eb9cda 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1746,10 +1746,12 @@ Start <- function(..., # dim = indices/selectors, is.character(file_dim_as_selector_array_dim)) { #(2) or (1) # inner_dim is not in return_vars or is NULL if (((!inner_dim %in% names(common_return_vars)) & (!inner_dim %in% names(return_vars))) | - (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]]))) { - common_return_vars[[inner_dim]] <- correct_return_vars( - inner_dim, inner_dims_across_files, - found_pattern_dim, file_dim_as_selector_array_dim) + (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]])) | + (inner_dim %in% names(common_return_vars) & !names(inner_dims_across_files) %in% common_return_vars[[inner_dim]])) { + common_return_vars[[inner_dim]] <- + c(common_return_vars[[inner_dim]], + correct_return_vars(inner_dim, inner_dims_across_files, + found_pattern_dim, file_dim_as_selector_array_dim)) } } } -- GitLab From 32550ef154307e999e6fe6a878cc5d6eced038fd Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 22 Feb 2022 12:56:52 +0100 Subject: [PATCH 07/45] Put the lost metadata back after reshaping --- R/Start.R | 23 ++++++++++++++++--- .../test-Start-metadata_filedim_dependency.R | 4 ++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/R/Start.R b/R/Start.R index 2eb9cda..cce3834 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1745,9 +1745,19 @@ Start <- function(..., # dim = indices/selectors, if (inner_dim %in% inner_dims_across_files | is.character(file_dim_as_selector_array_dim)) { #(2) or (1) # inner_dim is not in return_vars or is NULL - if (((!inner_dim %in% names(common_return_vars)) & (!inner_dim %in% names(return_vars))) | - (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]])) | - (inner_dim %in% names(common_return_vars) & !names(inner_dims_across_files) %in% common_return_vars[[inner_dim]])) { + need_correct <- FALSE + if (((!inner_dim %in% names(common_return_vars)) & + (!inner_dim %in% names(return_vars))) | + (inner_dim %in% names(common_return_vars) & + is.null(common_return_vars[[inner_dim]]))) { + need_correct <- TRUE + } else if (inner_dim %in% names(common_return_vars) & + !is.null(names(inner_dims_across_files))) { + if (!names(inner_dims_across_files) %in% common_return_vars[[inner_dim]]) { + need_correct <- TRUE + } + } + if (need_correct) { common_return_vars[[inner_dim]] <- c(common_return_vars[[inner_dim]], correct_return_vars(inner_dim, inner_dims_across_files, @@ -3467,6 +3477,7 @@ Start <- function(..., # dim = indices/selectors, } else { if (!is.null(file_dim)) { #merge_across_dims, file_dims is the depended file dim tmp <- common_vars_to_crop[[common_var_to_crop]] + tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) dim_extra_ind <- which(!names(dim(tmp)) %in% c(file_dim, inner_dim)) if (!identical(dim_extra_ind, integer(0))) { tmp_list <- asplit(tmp, dim_extra_ind) @@ -3498,6 +3509,12 @@ Start <- function(..., # dim = indices/selectors, # Reorder back common_vars_to_crop[[common_var_to_crop]] <- aperm(common_vars_to_crop[[common_var_to_crop]], match(names(dim(tmp)), names(tmp_new_dim))) + # Put attributes back + tmp <- which(!names(tmp_attributes) %in% names(attributes(common_vars_to_crop[[common_var_to_crop]]))) + attributes(common_vars_to_crop[[common_var_to_crop]]) <- + c(attributes(common_vars_to_crop[[common_var_to_crop]]), + tmp_attributes[tmp]) + if (common_var_to_crop == 'time') { # Convert number back to time common_vars_to_crop[[common_var_to_crop]] <- diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index a5fa558..cfd7dfb 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -65,7 +65,7 @@ expect_equal( return_vars = list(time = NULL), retrieve = TRUE) ) - time6 <- attr(test4, 'Variables')$common$time + time6 <- attr(test6, 'Variables')$common$time expect_equal( dim(time6), @@ -101,7 +101,7 @@ expect_equal( return_vars = list(time = NULL), retrieve = TRUE) ) - time6a <- attr(test4, 'Variables')$common$time + time6a <- attr(test6a, 'Variables')$common$time expect_equal( dim(time6a), -- GitLab From 6ee2db8783a2916bb56169f47847b20b0c71407c Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 22 Feb 2022 18:21:19 +0100 Subject: [PATCH 08/45] Keep metadata integrate during the reshaping process --- R/Start.R | 3 + R/zzz.R | 6 ++ .../testthat/test-Start-metadata_reshaping.R | 93 +++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 tests/testthat/test-Start-metadata_reshaping.R diff --git a/R/Start.R b/R/Start.R index cce3834..784d25e 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3877,7 +3877,10 @@ Start <- function(..., # dim = indices/selectors, data_array <- array(data_array_tmp, dim = final_dims_fake) final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] + tmp_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') picked_common_vars[[across_inner_dim]] <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) + attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + if (across_inner_dim == 'time') { # Convert numeric back to dates picked_common_vars[[across_inner_dim]] <- diff --git a/R/zzz.R b/R/zzz.R index b21e51c..44f3cdc 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1067,6 +1067,7 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, fun = func_remove_blank, logi_array = logi_array)$output1 + tmp_attr <- attributes(merge_dim_metadata)$variables merge_dim_metadata <- multiApply::Apply(merge_dim_metadata, target_dims = c(across_inner_dim, across_file_dim), output_dims = c(across_inner_dim, across_file_dim), @@ -1082,7 +1083,9 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, tmp <- match(names(final_dims), names(dim(merge_dim_metadata))) merge_dim_metadata <- aperm(merge_dim_metadata, tmp[!is.na(tmp)]) merge_dim_metadata <- merge_dim_metadata[merge_dim_metadata != max(merge_dim_metadata, na.rm = TRUE)] + attr(merge_dim_metadata, 'variables') <- tmp_attr + #NOTE: both outputs are vectors. If 'merge_dim_metadata' is actually time, it is just numeric here. return(list(data_array_tmp = data_array_tmp, merge_dim_metadata = merge_dim_metadata)) } @@ -1335,8 +1338,11 @@ generate_picked_var_of_read <- function(var_to_read, var_to_check, array_of_file } else { padding <- array(dim = padding_dims) } + tmp_attr <- attributes(either_picked_vars)$variables either_picked_vars <- .abind2(either_picked_vars, padding, names(full_array_var_dims)[longer_dims_in_full_array]) + attr(either_picked_vars, 'variables') <- tmp_attr + } else { stop("Error while reading the variable '", var_to_read, "' from ", "the file. Found size (", paste(var_dims, collapse = ' x '), diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R new file mode 100644 index 0000000..d4d4c28 --- /dev/null +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -0,0 +1,93 @@ +context("Start() metadata reshaping") +# When data is reshaping (e.g., time_across = 'sdate'), the corresponding attribute should be reshaped too. + +test_that("1. time across fyear, fyear depends on sdate", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + +expect_equal( +dim(dates), +c(sdate = 2, time = 365) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +names(attributes(dates)), +c('variables', 'dim', 'class', 'tzone') +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[1, c(1:2, 365)]), +as.vector(as.POSIXct(c("1961-01-01 12:00:00", "1961-01-02 12:00:00", "1961-12-31 12:00:00"), + tz = 'UTC')) +) +expect_equal( +as.vector(dates[2, c(1:2, 365)]), +as.vector(as.POSIXct(c("1962-01-01 12:00:00", "1962-01-02 12:00:00", "1962-12-31 12:00:00"), + tz = 'UTC')) +) + +}) + + +test_that("2. time across fyear, only one sdate", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', +# fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(time = 365) +) +expect_equal( +length(data), +length(dates) +) +expect_equal( +as.vector(dates[c(1:2, 365)]), +as.vector(as.POSIXct(c("1961-01-01 12:00:00", "1961-01-02 12:00:00", "1961-12-31 12:00:00"), + tz = 'UTC')) +) + +}) -- GitLab From a18a0fee7387390268bb6a090db19d307d0c90bf Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 23 Feb 2022 12:06:05 +0100 Subject: [PATCH 09/45] Revise the NA substituted value --- R/zzz.R | 4 +- .../testthat/test-Start-metadata_reshaping.R | 64 ++++++++++++++++--- 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 44f3cdc..5b2b786 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1055,7 +1055,7 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, if (any(class(data_array) %in% c("POSIXct", "POSIXt"))) { # change to numeric first data_array <- array(as.vector(data_array), dim = dim(data_array)) - data_array[which(!logi_array)] <- max(data_array, na.rm = T) + 1 + data_array[which(!logi_array)] <- -12^10 } else { data_array[which(!logi_array)] <- -9999 } @@ -1082,7 +1082,7 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, # Reorder metadata dim as final dim tmp <- match(names(final_dims), names(dim(merge_dim_metadata))) merge_dim_metadata <- aperm(merge_dim_metadata, tmp[!is.na(tmp)]) - merge_dim_metadata <- merge_dim_metadata[merge_dim_metadata != max(merge_dim_metadata, na.rm = TRUE)] + merge_dim_metadata <- merge_dim_metadata[merge_dim_metadata != -12^10] attr(merge_dim_metadata, 'variables') <- tmp_attr #NOTE: both outputs are vectors. If 'merge_dim_metadata' is actually time, it is just numeric here. diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index d4d4c28..680f8d6 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -40,14 +40,12 @@ class(dates), c("POSIXct", "POSIXt") ) expect_equal( -as.vector(dates[1, c(1:2, 365)]), -as.vector(as.POSIXct(c("1961-01-01 12:00:00", "1961-01-02 12:00:00", "1961-12-31 12:00:00"), - tz = 'UTC')) +as.vector(dates[1, ]), +as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12-31 12:00:00', tz = 'UTC'), by = 'day')) ) expect_equal( -as.vector(dates[2, c(1:2, 365)]), -as.vector(as.POSIXct(c("1962-01-01 12:00:00", "1962-01-02 12:00:00", "1962-12-31 12:00:00"), - tz = 'UTC')) +as.vector(dates[2, ]), +as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12-31 12:00:00', tz = 'UTC'), by = 'day')) ) }) @@ -85,9 +83,57 @@ length(data), length(dates) ) expect_equal( -as.vector(dates[c(1:2, 365)]), -as.vector(as.POSIXct(c("1961-01-01 12:00:00", "1961-01-02 12:00:00", "1961-12-31 12:00:00"), - tz = 'UTC')) +as.vector(dates), +as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12-31 12:00:00', tz = 'UTC'), by = 'day')) +) + + + +}) + + +test_that("3. time across fyear, fyear depends on sdate, 1st fyear is empty, 3rd fyear has more indices than 2nd one, 1964 is leap year", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 700:860, ## initialised in Nov + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + +expect_equal( +dim(dates), +c(sdate = 2, time = 161) +) + +expect_equal( +names(attributes(dates)), +c('variables', 'dim', 'class', 'tzone') +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[1, ]), +as.vector(seq(as.POSIXct('1962-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1963-03-10 12:00:00', tz = 'UTC'), by = 'day')) +) +expect_equal( +as.vector(dates[2, ]), +as.vector(seq(as.POSIXct('1963-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1964-03-09 12:00:00', tz = 'UTC'), by = 'day')) ) }) -- GitLab From 2ec599cb7d4986610f78766ffce88c8a0f313335 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 24 Feb 2022 18:21:05 +0100 Subject: [PATCH 10/45] Reshape metadata for split dim case --- R/Start.R | 37 ++++++++++++++++++++++++------------- R/zzz.R | 19 ++++++++++++++++--- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/R/Start.R b/R/Start.R index 784d25e..f473dfd 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3870,17 +3870,37 @@ Start <- function(..., # dim = indices/selectors, #NOTE: When one file contains values for dicrete dimensions, rearrange the # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { - data_array_tmp <- rebuild_array_merge_split( - data_array_tmp, indices_chunk, all_split_dims, final_dims_fake, - across_inner_dim, length_inner_across_dim) + tmp <- rebuild_array_merge_split( + data_array_tmp, indices_chunk, all_split_dims, final_dims_fake, + across_inner_dim, length_inner_across_dim, metadata_tmp = picked_common_vars[[across_inner_dim]]) + data_array_tmp <- tmp$data_array_tmp + picked_common_vars[[across_inner_dim]] <- tmp$metadata_tmp } data_array <- array(data_array_tmp, dim = final_dims_fake) - final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] + + # Find final_dim_fake for metadata and put it in an array + if (!split_multiselected_dims) { + final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] + } else { + final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] + } + tmp_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') picked_common_vars[[across_inner_dim]] <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + # If split_multiselected_dims + merge_across_dims, the dimension order may change above. + # To get the user-required dim order, we need to reorder the array again. + if (split_multiselected_dims) { + if (inner_dim_pos_in_split_dims != 1) { + correct_order <- match(names(final_dims_fake_output), names(final_dims_fake)) + data_array <- .aperm2(data_array, correct_order) + correct_order_metadata <- match(names(final_dims_fake_output), names(all_split_dims[[across_inner_dim]])) + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], correct_order_metadata[!is.na(correct_order_metadata)]) + } + } + if (across_inner_dim == 'time') { # Convert numeric back to dates picked_common_vars[[across_inner_dim]] <- @@ -3892,15 +3912,6 @@ Start <- function(..., # dim = indices/selectors, data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) } - # NOTE: If split_multiselected_dims + merge_across_dims, the dimension order may change above. - # To get the user-required dim order, we need to reorder the array again. - if (split_multiselected_dims & merge_across_dims) { - if (inner_dim_pos_in_split_dims != 1) { - correct_order <- match(names(final_dims_fake_output), names(final_dims_fake)) - data_array <- .aperm2(data_array, correct_order) - } - } - gc() # Load metadata and remove the metadata folder diff --git a/R/zzz.R b/R/zzz.R index 5b2b786..af601f7 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1094,7 +1094,7 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, # When merge_across_dims = TRUE and split_multiselected_dims = TRUE, rearrange the chunks # (i.e., work_piece) is necessary if one file contains values for discrete dimensions rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_dims, - final_dims_fake, across_inner_dim, length_inner_across_dim) { + final_dims_fake, across_inner_dim, length_inner_across_dim, metadata_tmp) { # generate the correct order list from indices_chunk final_order_list <- list() i <- 1 @@ -1126,14 +1126,20 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)]) } data_array_no_split <- array(data_array_tmp, dim = new_dims) + metadata_no_split <- array(metadata_tmp, dim = new_dims) + # seperate 'time' dim into each work_piece length data_array_seperate <- list() + metadata_seperate <- list() tmp <- cumsum(unlist(length_inner_across_dim)) tmp <- c(0, tmp) for (i in 1:length(length_inner_across_dim)) { data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array_no_split, across_inner_dim, (tmp[i] + 1):tmp[i + 1]) + metadata_seperate[[i]] <- ClimProjDiags::Subset(metadata_no_split, + across_inner_dim, + (tmp[i] + 1):tmp[i + 1]) } # re-build the array: chunk which_chunk <- as.numeric(names(final_order_list)) @@ -1142,26 +1148,33 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d how_many_indices <- unlist(final_order_list) array_piece <- list() + metadata_piece <- list() ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate))) for (i in 1:length(final_order_list)) { array_piece[[i]] <- ClimProjDiags::Subset( data_array_seperate[[which_chunk[i]]], across_inner_dim, ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + metadata_piece[[i]] <- ClimProjDiags::Subset( + metadata_seperate[[which_chunk[i]]], across_inner_dim, + ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] } # re-build the array: paste data_array_tmp <- array_piece[[1]] + metadata_tmp <- metadata_piece[[1]] along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim) if (length(array_piece) > 1) { for (i in 2:length(array_piece)) { data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]], along = along_pos) + metadata_tmp <- abind::abind(metadata_tmp, metadata_piece[[i]], + along = along_pos) } } } - - return(data_array_tmp) + + return(list(data_array_tmp = data_array_tmp, metadata_tmp = metadata_tmp)) } -- GitLab From 979a14efcdb051cc593c55b6ddac00111cae1e8f Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 25 Feb 2022 19:13:02 +0100 Subject: [PATCH 11/45] Finish metadata reshaping for retrieve = TRUE, including different combinations of merge_across_dims, merge_across_dims_narm, and split_multiselected_dims --- R/NcDataReader.R | 191 ++++++------ R/Start.R | 92 ++++-- .../testthat/test-Start-metadata_reshaping.R | 274 +++++++++++++++++- 3 files changed, 425 insertions(+), 132 deletions(-) diff --git a/R/NcDataReader.R b/R/NcDataReader.R index fb61ef8..7613172 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -183,120 +183,121 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, } }) - if (length(names(attr(result, 'variables'))) == 1 & all(names(attr(result, 'variables')) == 'time')) { - var_name <- names(attr(result, 'variables')) - units <- attr(result, 'variables')[[var_name]][['units']] + if (length(names(attr(result, 'variables'))) == 1) { + if ('time' %in% synonims[[names(attr(result, 'variables'))]]) { + var_name <- names(attr(result, 'variables')) + units <- attr(result, 'variables')[[var_name]][['units']] - if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) { - if (units == 'seconds') { -# units <- 'secs' - } else if (units == 'minutes') { -# units <- 'mins' - result <- result * 60 # min to sec - } - result[] <- paste(result[], units) - - } else if (grepl(' since ', units)) { - # Find the calendar - calendar <- attr(result, 'variables')[[var_name]]$calendar - if (calendar == 'standard') calendar <- 'gregorian' + if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) { + if (units == 'seconds') { +# units <- 'secs' + } else if (units == 'minutes') { +# units <- 'mins' + result <- result * 60 # min to sec + } + result[] <- paste(result[], units) - parts <- strsplit(units, ' since ')[[1]] - units <- parts[1] + } else if (grepl(' since ', units)) { + # Find the calendar + calendar <- attr(result, 'variables')[[var_name]]$calendar + if (calendar == 'standard') calendar <- 'gregorian' - if (units %in% c('second', 'seconds')) { -# units <- 'secs' - } else if (units %in% c('minute', 'minutes')) { -# units <- 'mins' - result <- result * 60 # min to sec - } else if (units %in% c('hour', 'hours')) { - result <- result * 60 * 60 # hour to sec - } else if (units %in% c('day', 'days')) { -# units <- 'days' - result <- result * 24 * 60 * 60 # day to sec - } else if (units %in% c('month', 'months')) { - # define day in each month - leap_month_day <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - no_leap_month_day <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - # Origin year and month - ori_year <- as.numeric(substr(parts[2], 1, 4)) - ori_month <- as.numeric(substr(parts[2], 6, 7)) - if (is.na(ori_month)) { - ori_month <- as.numeric(substr(parts[2], 6, 6)) - } - if (!is.numeric(ori_year) | !is.numeric(ori_month)) { - stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ", - "Check the file or contact the maintainer.")) - } + parts <- strsplit(units, ' since ')[[1]] + units <- parts[1] - if (calendar == 'gregorian') { - # Find how many years + months - yr_num <- floor(result / 12) - month_left <- result - yr_num * 12 - # Find the leap years we care - if (ori_month <= 2) { - leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear))) - } else { - leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear))) + if (units %in% c('second', 'seconds')) { +# units <- 'secs' + } else if (units %in% c('minute', 'minutes')) { +# units <- 'mins' + result <- result * 60 # min to sec + } else if (units %in% c('hour', 'hours')) { + result <- result * 60 * 60 # hour to sec + } else if (units %in% c('day', 'days')) { +# units <- 'days' + result <- result * 24 * 60 * 60 # day to sec + } else if (units %in% c('month', 'months')) { + # define day in each month + leap_month_day <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + no_leap_month_day <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + # Origin year and month + ori_year <- as.numeric(substr(parts[2], 1, 4)) + ori_month <- as.numeric(substr(parts[2], 6, 7)) + if (is.na(ori_month)) { + ori_month <- as.numeric(substr(parts[2], 6, 6)) + } + if (!is.numeric(ori_year) | !is.numeric(ori_month)) { + stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ", + "Check the file or contact the maintainer.")) } - total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet + if (calendar == 'gregorian') { + # Find how many years + months + yr_num <- floor(result / 12) + month_left <- result - yr_num * 12 + # Find the leap years we care + if (ori_month <= 2) { + leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear))) + } else { + leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear))) + } + total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet - if (month_left != 0) { - if ((ori_month + month_left) <= 12) { # the last month is still in the same last yr - # Is the last year a leap year? - last_leap <- s2dv::LeapYear(ori_year + yr_num) - if (last_leap) { - total_days <- total_days + sum(leap_month_day[ori_month:(ori_month + month_left - 1)]) - } else { - total_days <- total_days + sum(no_leap_month_day[ori_month:(ori_month + month_left - 1)]) - } - } else { # the last month ends in the next yr - if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16 - last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005 + if (month_left != 0) { + if ((ori_month + month_left) <= 12) { # the last month is still in the same last yr + # Is the last year a leap year? + last_leap <- s2dv::LeapYear(ori_year + yr_num) if (last_leap) { - total_days <- total_days + sum(leap_month_day[2:12]) + total_days <- total_days + sum(leap_month_day[ori_month:(ori_month + month_left - 1)]) } else { - total_days <- total_days + sum(no_leap_month_day[2:12]) + total_days <- total_days + sum(no_leap_month_day[ori_month:(ori_month + month_left - 1)]) } - } else { # e.g., 2005-04-16 + 11mth = 2006-03-16 - last_leap <- s2dv::LeapYear(ori_year + yr_num + 1) - needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1)) - if (last_leap) { - total_days <- total_days + sum(leap_month_day[needed_month]) - } else { - total_days <- total_days + sum(no_leap_month_day[needed_month]) + } else { # the last month ends in the next yr + if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16 + last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005 + if (last_leap) { + total_days <- total_days + sum(leap_month_day[2:12]) + } else { + total_days <- total_days + sum(no_leap_month_day[2:12]) + } + } else { # e.g., 2005-04-16 + 11mth = 2006-03-16 + last_leap <- s2dv::LeapYear(ori_year + yr_num + 1) + needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1)) + if (last_leap) { + total_days <- total_days + sum(leap_month_day[needed_month]) + } else { + total_days <- total_days + sum(no_leap_month_day[needed_month]) + } } } } - } - result <- total_days * 24 * 60 * 60 # day to sec - } else if (calendar %in% c('365_day',' 365', 'noleap')) { - yr_num <- floor(result / 12) - month_left <- result - yr_num * 12 - total_days <- 365 * yr_num + sum(no_leap_month_day[ori_month:(month_left - 1)]) - result <- total_days * 24 * 60 * 60 # day to sec + result <- total_days * 24 * 60 * 60 # day to sec + } else if (calendar %in% c('365_day',' 365', 'noleap')) { + yr_num <- floor(result / 12) + month_left <- result - yr_num * 12 + total_days <- 365 * yr_num + sum(no_leap_month_day[ori_month:(month_left - 1)]) + result <- total_days * 24 * 60 * 60 # day to sec - } else if (calendar %in% c('360_day', '360')) { - result <- result * 30 * 24 * 60 * 60 # day to sec + } else if (calendar %in% c('360_day', '360')) { + result <- result * 30 * 24 * 60 * 60 # day to sec - } else { #old code. The calendar is not in any of the above. - result <- result * 30.5 - result <- result * 24 * 60 * 60 # day to sec + } else { #old code. The calendar is not in any of the above. + result <- result * 30.5 + result <- result * 24 * 60 * 60 # day to sec + } } - } - new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[] - new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC")) + new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[] + new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC")) - #new_array <- seq(as.POSIXct(parts[2]), - # length = max(result, na.rm = TRUE) + 1, - # by = units)[result[] + 1] - dim(new_array) <- dim(result) - attr(new_array, 'variables') <- attr(result, 'variables') - result <- new_array + #new_array <- seq(as.POSIXct(parts[2]), + # length = max(result, na.rm = TRUE) + 1, + # by = units)[result[] + 1] + dim(new_array) <- dim(result) + attr(new_array, 'variables') <- attr(result, 'variables') + result <- new_array + } } - } + } } if (close) { diff --git a/R/Start.R b/R/Start.R index f473dfd..e8ba7d3 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3515,7 +3515,7 @@ Start <- function(..., # dim = indices/selectors, c(attributes(common_vars_to_crop[[common_var_to_crop]]), tmp_attributes[tmp]) - if (common_var_to_crop == 'time') { + if ('time' %in% synonims[[common_var_to_crop]]) { # Convert number back to time common_vars_to_crop[[common_var_to_crop]] <- as.POSIXct(common_vars_to_crop[[common_var_to_crop]], @@ -3848,47 +3848,56 @@ Start <- function(..., # dim = indices/selectors, # unequal inner_dim ('time') length across file_dim ('file_date'). # If merge_across_dims_narm = TRUE, add additional lines to remove these NAs. # TODO: Now it assumes that only one '_across'. Add a for loop for more-than-one case. + if (merge_across_dims) { + # Save dim and attributes for later use + dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) + tmp_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') + # Find final_dim_fake for metadata and put it in an array + if (!split_multiselected_dims) { + final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] + } else { + final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] + } + } else if (split_multiselected_dims) { + tmp_attr <- attr(picked_common_vars[[names(all_split_dims)]], 'variables') + final_dims_fake_merge_dim <- all_split_dims[[1]] + } + if (merge_across_dims & (split_multiselected_dims | merge_across_dims_narm)) { if (!merge_across_dims_narm) { data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims) + tmp <- match(names(final_dims), names(dims_of_merge_dim)) + if (any(diff(tmp[!is.na(tmp)]) < 0)) { #need to reorder + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp[!is.na(tmp)]) + } + metadata_tmp <- picked_common_vars[[across_inner_dim]] } else { + tmp <- remove_additional_na_from_merge( inner_dims_across_files, final_dims, length_inner_across_dim, data_array, merge_dim_metadata = picked_common_vars[[across_inner_dim]]) data_array_tmp <- tmp$data_array_tmp - # Save dim for later use - dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) - picked_common_vars[[across_inner_dim]] <- tmp$merge_dim_metadata + metadata_tmp <- tmp$merge_dim_metadata } if (length(data_array_tmp) != prod(final_dims_fake)) { stop(paste0("After reshaping, the data do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly.")) } - + #NOTE: When one file contains values for dicrete dimensions, rearrange the # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { tmp <- rebuild_array_merge_split( data_array_tmp, indices_chunk, all_split_dims, final_dims_fake, - across_inner_dim, length_inner_across_dim, metadata_tmp = picked_common_vars[[across_inner_dim]]) + across_inner_dim, length_inner_across_dim, metadata_tmp = metadata_tmp) data_array_tmp <- tmp$data_array_tmp - picked_common_vars[[across_inner_dim]] <- tmp$metadata_tmp - } - - data_array <- array(data_array_tmp, dim = final_dims_fake) - - # Find final_dim_fake for metadata and put it in an array - if (!split_multiselected_dims) { - final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] - } else { - final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] + metadata_tmp <- tmp$metadata_tmp } - tmp_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') - picked_common_vars[[across_inner_dim]] <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) - attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + data_array <- array(data_array_tmp, dim = final_dims_fake) + metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) # If split_multiselected_dims + merge_across_dims, the dimension order may change above. # To get the user-required dim order, we need to reorder the array again. @@ -3897,19 +3906,48 @@ Start <- function(..., # dim = indices/selectors, correct_order <- match(names(final_dims_fake_output), names(final_dims_fake)) data_array <- .aperm2(data_array, correct_order) correct_order_metadata <- match(names(final_dims_fake_output), names(all_split_dims[[across_inner_dim]])) - picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], correct_order_metadata[!is.na(correct_order_metadata)]) + metadata_tmp <- .aperm2(metadata_tmp, correct_order_metadata[!is.na(correct_order_metadata)]) } } - - if (across_inner_dim == 'time') { - # Convert numeric back to dates - picked_common_vars[[across_inner_dim]] <- - as.POSIXct(picked_common_vars[[across_inner_dim]], - origin = "1970-01-01", tz = 'UTC') + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } - + + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + } else { # ! (merge_across_dims + split_multiselected_dims) (old version) data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) + if (merge_across_dims) { + # merge_across_dims = TRUE but (merge_across_dims_narm = F & split_multiselected_dims = F) + + inner_dim_pos <- which(names(dims_of_merge_dim) == inner_dims_across_files) + file_dim_pos <- which(names(dims_of_merge_dim) == names(inner_dims_across_files)) + if (file_dim_pos < inner_dim_pos) { #need to reorder + tmp <- seq(1, length(dims_of_merge_dim)) + tmp[inner_dim_pos] <- file_dim_pos + tmp[file_dim_pos] <- inner_dim_pos + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp) + } + metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + } + + if (split_multiselected_dims) { + metadata_tmp <- array(picked_common_vars[[names(all_split_dims)]], dim = final_dims_fake_merge_dim) + # Convert numeric back to dates + if ('time' %in% synonims[[names(all_split_dims)]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[names(all_split_dims)]] <- metadata_tmp + attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr + } } gc() diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 680f8d6..03674f1 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -32,8 +32,12 @@ dim(drop(data)), dim(dates) ) expect_equal( -names(attributes(dates)), -c('variables', 'dim', 'class', 'tzone') +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE ) expect_equal( class(dates), @@ -91,7 +95,6 @@ as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12 }) - test_that("3. time across fyear, fyear depends on sdate, 1st fyear is empty, 3rd fyear has more indices than 2nd one, 1964 is leap year", { suppressWarnings( @@ -100,16 +103,17 @@ data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/E lat = indices(1), lon = indices(1), sdate = paste0(1960:1961), - time = 700:860, ## initialised in Nov - time_across = 'fyear', + day = 700:860, ## initialised in Nov + day_across = 'fyear', merge_across_dims = TRUE, fyear = 'all', fyear_depends = 'sdate', member = 'r1i4p1f1', synonims = list(lat = c('lat','latitude'), - lon = c('lon','longitude')), + lon = c('lon','longitude'), + day = c('day', 'time')), return_vars = list(lat = NULL, lon = NULL, - time = c('sdate', 'fyear')), + day = c('sdate', 'fyear')), retrieve = TRUE) ) dates <- attr(data,'Variables')$common[['time']] @@ -118,10 +122,13 @@ expect_equal( dim(dates), c(sdate = 2, time = 161) ) - expect_equal( -names(attributes(dates)), -c('variables', 'dim', 'class', 'tzone') +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE ) expect_equal( class(dates), @@ -137,3 +144,250 @@ as.vector(seq(as.POSIXct('1963-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1964-03 ) }) + + +test_that("4. merge and split time dim", { +datess <- seq(as.POSIXct('1994-05-01', tz = 'UTC'), as.POSIXct('1994-12-31', tz = 'UTC'), by = 'days') +datess <- c(datess[c(1:31, 32:62, 62:92, 93:123, 124:154, 154:184, 185:215, 215:245)]) +datess <- as.POSIXct(array(datess, dim = c(time = 31, sdate = 8)), + origin = '1970-01-01', tz = 'UTC') +dates_file <- sort(unique(gsub('-', '', sapply(as.character(datess), + substr, 1, 7)))) + +suppressWarnings( + data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = dates_file, + time = values(datess), #[time = 31, sdate = 8] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + #because time is assigned by 'values', set the tolerance to avoid too distinct match + time_tolerance = as.difftime(1, units = 'hours'), + #time values are across all the files + time_across = 'file_date', + #combine time and file_date dims + merge_across_dims = TRUE, + #exclude the additional NAs generated by merge_across_dims + merge_across_dims_narm = TRUE, + #split time dim, because it is two-dimensional + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = TRUE) +) + +dates <- attr(data,'Variables')$common[['time']] + +expect_equal( +dim(dates), +c(time = 31, sdate = 8) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates), +as.vector(datess) +) + +}) + +test_that("5. test 1 but merge_across_dims_narm = F", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + +expect_equal( +dim(dates), +c(sdate = 2, time = 608) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[1, ]), +c(as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12-31 12:00:00', tz = 'UTC'), by = 'day')), rep(NA, 243)) +) +expect_equal( +as.vector(dates[2, ]), +c(as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12-31 12:00:00', tz = 'UTC'), by = 'day')), rep(NA, 243)) +) + +}) + +test_that("6. split dim only", { + +datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'days') +datess <- as.POSIXct(array(datess, dim = c(time = 7, week = 2)), + origin = '1970-01-01', tz = 'UTC') + +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_199407.nc'), + var = 'tas', +# file_date = '199407', + time = values(datess), #[time = 7, week = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), +# time_across = 'file_date', +# merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = NULL), #'file_date'), + retrieve = TRUE) + +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(time = 7, week = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[, ]), +as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'day')) +) + + +}) + +test_that("7. split dim + merge + merge_narm = F", { + +datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz = 'UTC'), by = 'days') +datess <- as.POSIXct(array(datess, dim = c(time = 31, month = 2)), + origin = '1970-01-01', tz = 'UTC') + +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = c('199407', '199408'), + time = values(datess), #[time = 31, month = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = TRUE) + +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(time = 31, month = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[, ]), +as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz = 'UTC'), by = 'day')) +) + +}) + +test_that("8. test 1 but retrieve = F", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = FALSE) +) +dates <- attr(data,'Variables')$common[['time']] + +}) + + -- GitLab From 04573b9a03d1bbf9683101e094ff48c6f58070b2 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 25 Feb 2022 20:27:41 +0100 Subject: [PATCH 12/45] Revise split dim case's condition; fix typo --- R/Start.R | 16 +++++++++------- tests/testthat/test-Start-metadata_reshaping.R | 6 +++--- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/R/Start.R b/R/Start.R index e8ba7d3..145851c 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3938,16 +3938,18 @@ Start <- function(..., # dim = indices/selectors, picked_common_vars[[across_inner_dim]] <- metadata_tmp attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr } - if (split_multiselected_dims) { - metadata_tmp <- array(picked_common_vars[[names(all_split_dims)]], dim = final_dims_fake_merge_dim) - # Convert numeric back to dates - if ('time' %in% synonims[[names(all_split_dims)]]) { - metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + if (names(all_split_dims) %in% names(picked_common_vars)) { + metadata_tmp <- array(picked_common_vars[[names(all_split_dims)]], dim = final_dims_fake_merge_dim) + # Convert numeric back to dates + if ('time' %in% synonims[[names(all_split_dims)]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[names(all_split_dims)]] <- metadata_tmp + attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr } - picked_common_vars[[names(all_split_dims)]] <- metadata_tmp - attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr } + } gc() diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 03674f1..d6868d6 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -116,7 +116,7 @@ data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/E day = c('sdate', 'fyear')), retrieve = TRUE) ) -dates <- attr(data,'Variables')$common[['time']] +dates <- attr(data,'Variables')$common[['day']] expect_equal( dim(dates), @@ -263,7 +263,7 @@ test_that("6. split dim only", { datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'days') datess <- as.POSIXct(array(datess, dim = c(time = 7, week = 2)), origin = '1970-01-01', tz = 'UTC') - +suppressWarnings( data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', '$var$/$var$_199407.nc'), var = 'tas', @@ -280,7 +280,7 @@ data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', longitude = NULL, time = NULL), #'file_date'), retrieve = TRUE) - +) dates <- attr(data,'Variables')$common[['time']] -- GitLab From 4ca9d58675c40e6dae47c9cbe40f926710c7972d Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 28 Feb 2022 09:24:13 +0100 Subject: [PATCH 13/45] Typo fixed --- tests/testthat/test-Start-metadata_reshaping.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index d6868d6..a4f7d69 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -120,7 +120,7 @@ dates <- attr(data,'Variables')$common[['day']] expect_equal( dim(dates), -c(sdate = 2, time = 161) +c(sdate = 2, day = 161) ) expect_equal( length(attributes(dates)), @@ -318,6 +318,7 @@ datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz datess <- as.POSIXct(array(datess, dim = c(time = 31, month = 2)), origin = '1970-01-01', tz = 'UTC') +suppressWarnings( data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', '$var$/$var$_$file_date$.nc'), var = 'tas', @@ -335,7 +336,7 @@ data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', longitude = NULL, time = 'file_date'), retrieve = TRUE) - +) dates <- attr(data,'Variables')$common[['time']] -- GitLab From c65b9245c1ee3959f5b91f6b2f05d0a9e67bf0b5 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 28 Feb 2022 09:28:25 +0100 Subject: [PATCH 14/45] Revise condition statement to include implicit inner dim case --- R/NcDataReader.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/NcDataReader.R b/R/NcDataReader.R index 7613172..95fd1f1 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -184,7 +184,8 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, }) if (length(names(attr(result, 'variables'))) == 1) { - if ('time' %in% synonims[[names(attr(result, 'variables'))]]) { + # The 1st condition is for implicit time dim (if time length = 1, it is allowed to not be defined in Start call. Therefore, it is not in the list of synonims) + if (names(attr(result, 'variables')) == 'time' | 'time' %in% synonims[[names(attr(result, 'variables'))]]) { var_name <- names(attr(result, 'variables')) units <- attr(result, 'variables')[[var_name]][['units']] -- GitLab From eba39b1a4a8b07828795332eaf98db91b28ac4af Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 1 Mar 2022 15:30:43 +0100 Subject: [PATCH 15/45] Add metadata reshaping to retrieve = FALSE --- R/Start.R | 141 ++++++++++--- R/zzz.R | 179 ++++++++++------ .../testthat/test-Start-metadata_reshaping.R | 198 ++++++++++++++++-- 3 files changed, 409 insertions(+), 109 deletions(-) diff --git a/R/Start.R b/R/Start.R index 145851c..59230f6 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3730,6 +3730,21 @@ Start <- function(..., # dim = indices/selectors, all_split_dims[[1]] <- tmp[[2]] } } + + if (merge_across_dims) { + # Save dim and attributes for later use + dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) + tmp_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') + # Find final_dim_fake for metadata and put it in an array + if (!split_multiselected_dims) { + final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] + } else { + final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] + } + } else if (split_multiselected_dims) { + tmp_attr <- attr(picked_common_vars[[names(all_split_dims)]], 'variables') + final_dims_fake_merge_dim <- all_split_dims[[1]] + } # The following several lines will only run if retrieve = TRUE if (retrieve) { @@ -3844,25 +3859,10 @@ Start <- function(..., # dim = indices/selectors, } #print("P") - # NOTE: If merge_across_dims = TRUE, there might be additional NAs due to - # unequal inner_dim ('time') length across file_dim ('file_date'). - # If merge_across_dims_narm = TRUE, add additional lines to remove these NAs. + # If merge_across_dims = TRUE, there might be additional NAs due to unequal + # inner_dim ('time') length across file_dim ('file_date'). + # If merge_across_dims_narm = TRUE, add additional lines to remove these NAs. # TODO: Now it assumes that only one '_across'. Add a for loop for more-than-one case. - if (merge_across_dims) { - # Save dim and attributes for later use - dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) - tmp_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') - # Find final_dim_fake for metadata and put it in an array - if (!split_multiselected_dims) { - final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] - } else { - final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] - } - } else if (split_multiselected_dims) { - tmp_attr <- attr(picked_common_vars[[names(all_split_dims)]], 'variables') - final_dims_fake_merge_dim <- all_split_dims[[1]] - } - if (merge_across_dims & (split_multiselected_dims | merge_across_dims_narm)) { if (!merge_across_dims_narm) { data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims) @@ -3875,9 +3875,9 @@ Start <- function(..., # dim = indices/selectors, tmp <- remove_additional_na_from_merge( inner_dims_across_files, final_dims, - length_inner_across_dim, data_array, + length_inner_across_dim, data_array = bigmemory::as.matrix(data_array), merge_dim_metadata = picked_common_vars[[across_inner_dim]]) - data_array_tmp <- tmp$data_array_tmp + data_array_tmp <- tmp$data_array metadata_tmp <- tmp$merge_dim_metadata } @@ -3885,15 +3885,18 @@ Start <- function(..., # dim = indices/selectors, stop(paste0("After reshaping, the data do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly.")) } + if (length(metadata_tmp) != prod(final_dims_fake_merge_dim)) { + stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly or contact support.")) + } #NOTE: When one file contains values for dicrete dimensions, rearrange the # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { tmp <- rebuild_array_merge_split( - data_array_tmp, indices_chunk, all_split_dims, final_dims_fake, - across_inner_dim, length_inner_across_dim, metadata_tmp = metadata_tmp) - data_array_tmp <- tmp$data_array_tmp - metadata_tmp <- tmp$metadata_tmp + data_array = data_array_tmp, indices_chunk, all_split_dims, final_dims_fake, + across_inner_dim, length_inner_across_dim, metadata = metadata_tmp) + data_array_tmp <- tmp$data_array + metadata_tmp <- tmp$metadata } data_array <- array(data_array_tmp, dim = final_dims_fake) @@ -3999,7 +4002,95 @@ Start <- function(..., # dim = indices/selectors, } } # End if (retrieve) - + else { # if retrieve = FALSE, metadata still needs to reshape + + if (merge_across_dims & (split_multiselected_dims | merge_across_dims_narm)) { + if (!merge_across_dims_narm) { +# data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims) + tmp <- match(names(final_dims), names(dims_of_merge_dim)) + if (any(diff(tmp[!is.na(tmp)]) < 0)) { #need to reorder + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp[!is.na(tmp)]) + } + metadata_tmp <- picked_common_vars[[across_inner_dim]] + } else { + + tmp <- remove_additional_na_from_merge( + inner_dims_across_files, final_dims, + length_inner_across_dim, data_array = NULL, + merge_dim_metadata = picked_common_vars[[across_inner_dim]]) +# data_array_tmp <- tmp$data_array_tmp + metadata_tmp <- tmp$merge_dim_metadata + } + + if (length(metadata_tmp) != prod(final_dims_fake_merge_dim)) { + stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", + "Check if the reshaping parameters are used correctly or contact support.")) + } + + #NOTE: When one file contains values for dicrete dimensions, rearrange the + # chunks (i.e., work_piece) is necessary. + if (split_multiselected_dims) { + tmp <- rebuild_array_merge_split( + data_array = NULL, indices_chunk, all_split_dims, final_dims_fake, + across_inner_dim, length_inner_across_dim, metadata = metadata_tmp) +# data_array_tmp <- tmp$data_array + metadata_tmp <- tmp$metadata + } + +# data_array <- array(data_array_tmp, dim = final_dims_fake) + metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) + + # If split_multiselected_dims + merge_across_dims, the dimension order may change above. + # To get the user-required dim order, we need to reorder the array again. + if (split_multiselected_dims) { + if (inner_dim_pos_in_split_dims != 1) { + correct_order <- match(names(final_dims_fake_output), names(final_dims_fake)) +# data_array <- .aperm2(data_array, correct_order) + correct_order_metadata <- match(names(final_dims_fake_output), names(all_split_dims[[across_inner_dim]])) + metadata_tmp <- .aperm2(metadata_tmp, correct_order_metadata[!is.na(correct_order_metadata)]) + } + } + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + } else { # ! (merge_across_dims + split_multiselected_dims) (old version) + if (merge_across_dims) { + # merge_across_dims = TRUE but (merge_across_dims_narm = F & split_multiselected_dims = F) + + inner_dim_pos <- which(names(dims_of_merge_dim) == inner_dims_across_files) + file_dim_pos <- which(names(dims_of_merge_dim) == names(inner_dims_across_files)) + if (file_dim_pos < inner_dim_pos) { #need to reorder + tmp <- seq(1, length(dims_of_merge_dim)) + tmp[inner_dim_pos] <- file_dim_pos + tmp[file_dim_pos] <- inner_dim_pos + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp) + } + metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + } + if (split_multiselected_dims) { + if (names(all_split_dims) %in% names(picked_common_vars)) { + metadata_tmp <- array(picked_common_vars[[names(all_split_dims)]], dim = final_dims_fake_merge_dim) + # Convert numeric back to dates + if ('time' %in% synonims[[names(all_split_dims)]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[names(all_split_dims)]] <- metadata_tmp + attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr + } + } + } + + } + # Change final_dims_fake back because retrieve = FALSE will use it for attributes later if (exists("final_dims_fake_output")) { final_dims_fake <- final_dims_fake_output diff --git a/R/zzz.R b/R/zzz.R index af601f7..b1694e1 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1029,7 +1029,10 @@ retrieve_progress_message <- function(work_pieces, num_procs, silent) { # If merge_across_dims = TRUE and merge_across_dims_narm = TRUE, remove the additional NAs # due to unequal inner_dim ('time') length across file_dim ('sdate'). remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, - length_inner_across_dim, data_array, merge_dim_metadata) { + length_inner_across_dim, data_array = NULL, merge_dim_metadata = NULL) { + # data_array is a vector from bigmemory::as.matrix + # merge_dim_metadata is an array + across_file_dim <- names(inner_dims_across_files) #TODO: more than one? across_inner_dim <- inner_dims_across_files[[1]] #TODO: more than one? # Get the length of these two dimensions in final_dims @@ -1043,15 +1046,19 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, for (i in 1:length_file_across_store_dims) { #1:4 logi_array[1:length_inner_across_dim[[i]], i] <- TRUE } - - # First, get the data array with final_dims dimension - data_array_final_dims <- array(bigmemory::as.matrix(data_array), dim = final_dims) - + + if (!is.null(data_array)) { + # First, turn the data vector into array with final_dims + data_array_final_dims <- array(data_array, dim = final_dims) + } + # Change the NA derived from additional spaces to -9999, then remove these -9999 func_remove_blank <- function(data_array, logi_array) { # dim(data_array) = [time, file_date] # dim(logi_array) = [time, file_date] - # Change the blank spaces from NA to -9999 + # data_array can be data or metadata; if data, change the blank spaces from + # NA to -9999; if metadata (supposed to be 'time'), change the corresponding + # spaces to -12^10. if (any(class(data_array) %in% c("POSIXct", "POSIXt"))) { # change to numeric first data_array <- array(as.vector(data_array), dim = dim(data_array)) @@ -1061,40 +1068,48 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, } return(data_array) } - data_array_final_dims <- multiApply::Apply(data_array_final_dims, - target_dims = c(across_inner_dim, across_file_dim), #c('time', 'file_date') - output_dims = c(across_inner_dim, across_file_dim), - fun = func_remove_blank, - logi_array = logi_array)$output1 - - tmp_attr <- attributes(merge_dim_metadata)$variables - merge_dim_metadata <- multiApply::Apply(merge_dim_metadata, - target_dims = c(across_inner_dim, across_file_dim), - output_dims = c(across_inner_dim, across_file_dim), - fun = func_remove_blank, - logi_array = logi_array)$output1 - - ## reorder back to the correct dim - tmp <- match(names(final_dims), names(dim(data_array_final_dims))) - data_array_final_dims <- .aperm2(data_array_final_dims, tmp) - data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999] # become a vector - - # Reorder metadata dim as final dim - tmp <- match(names(final_dims), names(dim(merge_dim_metadata))) - merge_dim_metadata <- aperm(merge_dim_metadata, tmp[!is.na(tmp)]) - merge_dim_metadata <- merge_dim_metadata[merge_dim_metadata != -12^10] - attr(merge_dim_metadata, 'variables') <- tmp_attr + + if (!is.null(data_array)) { + data_array_final_dims <- multiApply::Apply(data_array_final_dims, + target_dims = c(across_inner_dim, across_file_dim), #c('time', 'file_date') + output_dims = c(across_inner_dim, across_file_dim), + fun = func_remove_blank, + logi_array = logi_array)$output1 + } + if (!is.null(merge_dim_metadata)) { + tmp_attr <- attributes(merge_dim_metadata)$variables + merge_dim_metadata <- multiApply::Apply(merge_dim_metadata, + target_dims = c(across_inner_dim, across_file_dim), + output_dims = c(across_inner_dim, across_file_dim), + fun = func_remove_blank, + logi_array = logi_array)$output1 + } + + if (!is.null(data_array)) { + ## reorder back to the correct dim + tmp <- match(names(final_dims), names(dim(data_array_final_dims))) + data_array_final_dims <- .aperm2(data_array_final_dims, tmp) + data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999] # become a vector + } else { + data_array_tmp <- NULL + } + if (!is.null(merge_dim_metadata)) { + # Reorder metadata dim as final dim + tmp <- match(names(final_dims), names(dim(merge_dim_metadata))) + merge_dim_metadata <- aperm(merge_dim_metadata, tmp[!is.na(tmp)]) + merge_dim_metadata <- merge_dim_metadata[merge_dim_metadata != -12^10] + attr(merge_dim_metadata, 'variables') <- tmp_attr + } #NOTE: both outputs are vectors. If 'merge_dim_metadata' is actually time, it is just numeric here. - return(list(data_array_tmp = data_array_tmp, merge_dim_metadata = merge_dim_metadata)) + return(list(data_array = data_array_tmp, merge_dim_metadata = merge_dim_metadata)) } - # When merge_across_dims = TRUE and split_multiselected_dims = TRUE, rearrange the chunks # (i.e., work_piece) is necessary if one file contains values for discrete dimensions -rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_dims, - final_dims_fake, across_inner_dim, length_inner_across_dim, metadata_tmp) { +rebuild_array_merge_split <- function(data_array = NULL, indices_chunk, all_split_dims, + final_dims_fake, across_inner_dim, length_inner_across_dim, metadata = NULL) { # generate the correct order list from indices_chunk final_order_list <- list() i <- 1 @@ -1125,21 +1140,32 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) { new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)]) } - data_array_no_split <- array(data_array_tmp, dim = new_dims) - metadata_no_split <- array(metadata_tmp, dim = new_dims) - # seperate 'time' dim into each work_piece length - data_array_seperate <- list() - metadata_seperate <- list() + if (!is.null(data_array)) { + data_array_no_split <- array(data_array, dim = new_dims) + # seperate 'time' dim into each work_piece length + data_array_seperate <- list() + array_piece <- list() + } + if (!is.null(metadata)) { + metadata_no_split <- array(metadata, dim = new_dims) + metadata_seperate <- list() + metadata_piece <- list() + } + tmp <- cumsum(unlist(length_inner_across_dim)) tmp <- c(0, tmp) for (i in 1:length(length_inner_across_dim)) { - data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array_no_split, + if (!is.null(data_array)) { + data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array_no_split, + across_inner_dim, + (tmp[i] + 1):tmp[i + 1]) + } + if (!is.null(metadata)) { + metadata_seperate[[i]] <- ClimProjDiags::Subset(metadata_no_split, across_inner_dim, (tmp[i] + 1):tmp[i + 1]) - metadata_seperate[[i]] <- ClimProjDiags::Subset(metadata_no_split, - across_inner_dim, - (tmp[i] + 1):tmp[i + 1]) + } } # re-build the array: chunk which_chunk <- as.numeric(names(final_order_list)) @@ -1147,34 +1173,65 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d which_chunk <- sapply(lapply(which_chunk, '==', sort_which_chunk), which) how_many_indices <- unlist(final_order_list) - array_piece <- list() - metadata_piece <- list() - ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate))) + + if (!is.null(data_array)) { + ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate))) + } else if (!is.null(metadata)) { + ind_in_array_seperate <- as.list(rep(1, length(metadata_seperate))) + } + for (i in 1:length(final_order_list)) { - array_piece[[i]] <- ClimProjDiags::Subset( - data_array_seperate[[which_chunk[i]]], across_inner_dim, - ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) - metadata_piece[[i]] <- ClimProjDiags::Subset( - metadata_seperate[[which_chunk[i]]], across_inner_dim, - ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + if (!is.null(data_array)) { + array_piece[[i]] <- ClimProjDiags::Subset( + data_array_seperate[[which_chunk[i]]], across_inner_dim, + ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + } + if (!is.null(metadata)) { + metadata_piece[[i]] <- ClimProjDiags::Subset( + metadata_seperate[[which_chunk[i]]], across_inner_dim, + ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + } ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] } # re-build the array: paste - data_array_tmp <- array_piece[[1]] - metadata_tmp <- metadata_piece[[1]] - along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim) - if (length(array_piece) > 1) { - for (i in 2:length(array_piece)) { - data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]], - along = along_pos) - metadata_tmp <- abind::abind(metadata_tmp, metadata_piece[[i]], + if (!is.null(data_array)) { + data_array_tmp <- array_piece[[1]] + } else { + data_array_tmp <- NULL + } + if (!is.null(metadata)) { + metadata_tmp <- metadata_piece[[1]] + } else { + metadata_tmp <- NULL + } + + if (!is.null(data_array)) { + along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim) + length_piece <- length(array_piece) + } else if (!is.null(metadata)) { + along_pos <- which(names(dim(metadata_tmp)) == across_inner_dim) + length_piece <- length(metadata_piece) + } + + if (length_piece > 1) { + for (i in 2:length_piece) { + if (!is.null(data_array)) { + data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]], + along = along_pos) + } + if (!is.null(metadata)) { + metadata_tmp <- abind::abind(metadata_tmp, metadata_piece[[i]], along = along_pos) + } } } - } + } else { + data_array_tmp <- data_array + metadata_tmp <- metadata + } - return(list(data_array_tmp = data_array_tmp, metadata_tmp = metadata_tmp)) + return(list(data_array = data_array_tmp, metadata = metadata_tmp)) } diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index a4f7d69..91a793d 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -52,6 +52,31 @@ as.vector(dates[2, ]), as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12-31 12:00:00', tz = 'UTC'), by = 'day')) ) +# retrieve = FALSE +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + }) @@ -91,7 +116,30 @@ as.vector(dates), as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12-31 12:00:00', tz = 'UTC'), by = 'day')) ) - +#retrieve = FALSE +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', +# fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) }) @@ -143,6 +191,32 @@ as.vector(dates[2, ]), as.vector(seq(as.POSIXct('1963-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1964-03-09 12:00:00', tz = 'UTC'), by = 'day')) ) + +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + day = 700:860, ## initialised in Nov + day_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude'), + day = c('day', 'time')), + return_vars = list(lat = NULL, lon = NULL, + day = c('sdate', 'fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['day']] +expect_equal( +datesF, +dates +) + }) @@ -202,6 +276,36 @@ as.vector(dates), as.vector(datess) ) +suppressWarnings( + dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = dates_file, + time = values(datess), #[time = 31, sdate = 8] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + #because time is assigned by 'values', set the tolerance to avoid too distinct match + time_tolerance = as.difftime(1, units = 'hours'), + #time values are across all the files + time_across = 'file_date', + #combine time and file_date dims + merge_across_dims = TRUE, + #exclude the additional NAs generated by merge_across_dims + merge_across_dims_narm = TRUE, + #split time dim, because it is two-dimensional + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + }) test_that("5. test 1 but merge_across_dims_narm = F", { @@ -256,6 +360,31 @@ as.vector(dates[2, ]), c(as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12-31 12:00:00', tz = 'UTC'), by = 'day')), rep(NA, 243)) ) +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + }) test_that("6. split dim only", { @@ -309,6 +438,29 @@ as.vector(dates[, ]), as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'day')) ) +suppressWarnings( +dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_199407.nc'), + var = 'tas', +# file_date = '199407', + time = values(datess), #[time = 7, week = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), +# time_across = 'file_date', +# merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = NULL), #'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) }) @@ -365,30 +517,30 @@ as.vector(dates[, ]), as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz = 'UTC'), by = 'day')) ) -}) - -test_that("8. test 1 but retrieve = F", { - suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', - var = 'tasmin', - lat = indices(1), - lon = indices(1), - sdate = paste0(1960:1961), - time = 62:426, ## Jan to Dec (initialised in Nov) - time_across = 'fyear', - merge_across_dims = TRUE, - fyear = 'all', - fyear_depends = 'sdate', - member = 'r1i4p1f1', - synonims = list(lat = c('lat','latitude'), - lon = c('lon','longitude')), - return_vars = list(lat = NULL, lon = NULL, - time = c('sdate', 'fyear')), - retrieve = FALSE) +dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = c('199407', '199408'), + time = values(datess), #[time = 31, month = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates ) -dates <- attr(data,'Variables')$common[['time']] }) - -- GitLab From f70070b25e82f8826eb36a0b1739d21808e4d076 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 1 Mar 2022 16:01:55 +0100 Subject: [PATCH 16/45] Improve warning message --- R/zzz.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index b1694e1..c2362b6 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -446,10 +446,10 @@ correct_return_vars <- function(inner_dim, inner_dims_across_files, found_patter corrected_value <- file_dim_name } } - .warning(paste0("Found ", inner_dim, " dependency on file diemnsion '", corrected_value, - "', but '", inner_dim, "' is not in return_vars list or is NULL. ", - "To provide the correct metadata, the value of ", inner_dim, - " in 'return_vars' is specified as '", corrected_value, "'.")) + .warning(paste0("Found '", inner_dim, "' dependency on file dimension '", corrected_value, + "', but '", inner_dim, "' is not in return_vars list or does not include '", corrected_value, + "'. To provide the correct metadata, '", corrected_value, "' is included under '", inner_dim, + "' in 'return_vars.")) return(corrected_value) } -- GitLab From 5fb304b0a4f02e5280e0588d12256c31de34c0cd Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 1 Mar 2022 18:46:46 +0100 Subject: [PATCH 17/45] refine the code --- R/Start.R | 29 +++++++++++++---------------- R/zzz.R | 7 +++---- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/R/Start.R b/R/Start.R index 59230f6..6fc05ea 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3476,6 +3476,7 @@ Start <- function(..., # dim = indices/selectors, } } else { if (!is.null(file_dim)) { #merge_across_dims, file_dims is the depended file dim + if (file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { tmp <- common_vars_to_crop[[common_var_to_crop]] tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) dim_extra_ind <- which(!names(dim(tmp)) %in% c(file_dim, inner_dim)) @@ -3521,6 +3522,7 @@ Start <- function(..., # dim = indices/selectors, as.POSIXct(common_vars_to_crop[[common_var_to_crop]], origin = "1970-01-01", tz = 'UTC') } + } } else { # old code common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices) @@ -3872,11 +3874,11 @@ Start <- function(..., # dim = indices/selectors, } metadata_tmp <- picked_common_vars[[across_inner_dim]] } else { - tmp <- remove_additional_na_from_merge( + data_array = bigmemory::as.matrix(data_array), + merge_dim_metadata = picked_common_vars[[across_inner_dim]], inner_dims_across_files, final_dims, - length_inner_across_dim, data_array = bigmemory::as.matrix(data_array), - merge_dim_metadata = picked_common_vars[[across_inner_dim]]) + length_inner_across_dim) data_array_tmp <- tmp$data_array metadata_tmp <- tmp$merge_dim_metadata } @@ -3886,15 +3888,16 @@ Start <- function(..., # dim = indices/selectors, "Check if the reshaping parameters are used correctly.")) } if (length(metadata_tmp) != prod(final_dims_fake_merge_dim)) { - stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly or contact support.")) + stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", + "Check if the reshaping parameters are used correctly or contact support.")) } #NOTE: When one file contains values for dicrete dimensions, rearrange the # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { tmp <- rebuild_array_merge_split( - data_array = data_array_tmp, indices_chunk, all_split_dims, final_dims_fake, - across_inner_dim, length_inner_across_dim, metadata = metadata_tmp) + data_array = data_array_tmp, metadata = metadata_tmp, indices_chunk, + all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) data_array_tmp <- tmp$data_array metadata_tmp <- tmp$metadata } @@ -4006,19 +4009,17 @@ Start <- function(..., # dim = indices/selectors, if (merge_across_dims & (split_multiselected_dims | merge_across_dims_narm)) { if (!merge_across_dims_narm) { -# data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims) tmp <- match(names(final_dims), names(dims_of_merge_dim)) if (any(diff(tmp[!is.na(tmp)]) < 0)) { #need to reorder picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp[!is.na(tmp)]) } metadata_tmp <- picked_common_vars[[across_inner_dim]] } else { - tmp <- remove_additional_na_from_merge( + data_array = NULL, + merge_dim_metadata = picked_common_vars[[across_inner_dim]], inner_dims_across_files, final_dims, - length_inner_across_dim, data_array = NULL, - merge_dim_metadata = picked_common_vars[[across_inner_dim]]) -# data_array_tmp <- tmp$data_array_tmp + length_inner_across_dim) metadata_tmp <- tmp$merge_dim_metadata } @@ -4031,13 +4032,9 @@ Start <- function(..., # dim = indices/selectors, # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { tmp <- rebuild_array_merge_split( - data_array = NULL, indices_chunk, all_split_dims, final_dims_fake, - across_inner_dim, length_inner_across_dim, metadata = metadata_tmp) -# data_array_tmp <- tmp$data_array + data_array = NULL, metadata = metadata_tmp, indices_chunk, all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) metadata_tmp <- tmp$metadata } - -# data_array <- array(data_array_tmp, dim = final_dims_fake) metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) # If split_multiselected_dims + merge_across_dims, the dimension order may change above. diff --git a/R/zzz.R b/R/zzz.R index c2362b6..0067d6d 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1028,8 +1028,8 @@ retrieve_progress_message <- function(work_pieces, num_procs, silent) { # If merge_across_dims = TRUE and merge_across_dims_narm = TRUE, remove the additional NAs # due to unequal inner_dim ('time') length across file_dim ('sdate'). -remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, - length_inner_across_dim, data_array = NULL, merge_dim_metadata = NULL) { +remove_additional_na_from_merge <- function(data_array = NULL, merge_dim_metadata = NULL, + inner_dims_across_files, final_dims, length_inner_across_dim) { # data_array is a vector from bigmemory::as.matrix # merge_dim_metadata is an array @@ -1108,8 +1108,7 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, # When merge_across_dims = TRUE and split_multiselected_dims = TRUE, rearrange the chunks # (i.e., work_piece) is necessary if one file contains values for discrete dimensions -rebuild_array_merge_split <- function(data_array = NULL, indices_chunk, all_split_dims, - final_dims_fake, across_inner_dim, length_inner_across_dim, metadata = NULL) { +rebuild_array_merge_split <- function(data_array = NULL, metadata = NULL, indices_chunk, all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) { # generate the correct order list from indices_chunk final_order_list <- list() i <- 1 -- GitLab From d38902fefc32cecd22a7cf5695551dc7cfbd6220 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 1 Mar 2022 18:48:41 +0100 Subject: [PATCH 18/45] Change indices to value for file dim --- tests/testthat/test-Start-two_dats.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R index 4fa8642..ff83441 100644 --- a/tests/testthat/test-Start-two_dats.R +++ b/tests/testthat/test-Start-two_dats.R @@ -19,7 +19,7 @@ data <- Start(dataset = list(list(path = path_tas), list(path = path_tos)), lon = values(list(150, 170)), lon_reorder = CircularSort(0, 360), fyear = 'all', - member = indices(1), + member = 'r10i1p1f1', #indices(1), fyear_depends = 'sdate', time_across = 'fyear', merge_across_dims = TRUE, -- GitLab From 3d1d557069c41971882c4764e5886f9c346bee17 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 2 Mar 2022 11:52:01 +0100 Subject: [PATCH 19/45] Correct test for new dev --- tests/testthat/test-Start-calendar.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 4f58a87..10cf7bf 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -212,9 +212,13 @@ suppressWarnings( ) expect_equal( - attr(obs, 'Variables')$common$time[1, 1], + attr(obs, 'Variables')$common$time[1], as.POSIXct('2005-05-16 12:00:00', tz = 'UTC') ) +expect_equal( + dim(attr(obs, 'Variables')$common$time), + c(time = 1) +) }) -- GitLab From 814abdc610490257ff0da970dc002f7fdce217fd Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 7 Mar 2022 18:06:02 +0100 Subject: [PATCH 20/45] Change the class condition to is() --- R/AddStep.R | 9 ++++----- R/ByChunks.R | 4 ++-- R/Collect.R | 2 +- R/Compute.R | 4 ++-- R/SelectorChecker.R | 8 ++++---- R/Start.R | 2 +- R/Utils.R | 2 +- R/zzz.R | 2 +- 8 files changed, 16 insertions(+), 17 deletions(-) diff --git a/R/AddStep.R b/R/AddStep.R index 037bd58..a129f1e 100644 --- a/R/AddStep.R +++ b/R/AddStep.R @@ -44,19 +44,18 @@ #'@export AddStep <- function(inputs, step_fun, ...) { # Check step_fun - if (!('startR_step_fun' %in% class(step_fun))) { + if (!is(step_fun, 'startR_step_fun')) { stop("Parameter 'step_fun' must be a startR step function as returned by Step.") } # Check inputs - if (any(c('startR_cube', 'startR_workflow') %in% class(inputs))) { + if (is(inputs, 'startR_cube') | is(inputs, 'startR_workflow')) { inputs <- list(inputs) names(inputs) <- 'input1' } else if (is.list(inputs)) { if (any(!sapply(inputs, - function(x) any(c('startR_cube', - 'startR_workflow') %in% class(x))))) { + function(x) is(x, 'startR_cube') | is(x, 'startR_workflow')))) { stop("Parameter 'inputs' must be one or a list of objects of the class ", "'startR_cube' or 'startR_workflow'.") } @@ -90,7 +89,7 @@ AddStep <- function(inputs, step_fun, ...) { stop("The target dimensions required by 'step_fun' for the input ", input, " are not present in the corresponding provided object in 'inputs'.") } - if ('startR_workflow' %in% class(inputs[[input]])) { + if (is(inputs[[input]], 'startR_workflow')) { if (is.null(previous_target_dims)) { previous_target_dims <- attr(inputs[[input]], 'TargetDims') } else { diff --git a/R/ByChunks.R b/R/ByChunks.R index 8185763..4782ef2 100644 --- a/R/ByChunks.R +++ b/R/ByChunks.R @@ -109,7 +109,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', MergeArrays <- .MergeArrays # Check input headers - if ('startR_cube' %in% class(cube_headers)) { + if (is(cube_headers, 'startR_cube')) { cube_headers <- list(cube_headers) } if (!all(sapply(lapply(cube_headers, class), @@ -411,7 +411,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', timings[['nchunks']] <- prod(unlist(chunks)) # Check step_fun - if (!('startR_step_fun' %in% class(step_fun))) { + if (!is(step_fun, 'startR_step_fun')) { stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", "by the function Step.") } diff --git a/R/Collect.R b/R/Collect.R index bf38729..4c80b03 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -72,7 +72,7 @@ #' #'@export Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { - if (!('startR_exec' %in% class(startr_exec))) { + if (!is(startr_exec, 'startR_exec')) { stop("Parameter 'startr_exec' must be an object of the class ", "'startR_exec', as returned by Collect(..., wait = FALSE).") } diff --git a/R/Compute.R b/R/Compute.R index 0e8d42c..778d9ce 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -89,13 +89,13 @@ Compute <- function(workflow, chunks = 'auto', ecflow_server = NULL, silent = FALSE, debug = FALSE, wait = TRUE) { # Check workflow - if (!any(c('startR_cube', 'startR_workflow') %in% class(workflow))) { + if (!is(workflow, 'startR_cube') & !is(workflow, 'startR_workflow')) { stop("Parameter 'workflow' must be an object of class 'startR_cube' as ", "returned by Start or of class 'startR_workflow' as returned by ", "AddStep.") } - if ('startR_cube' %in% class(workflow)) { + if (is(workflow, 'startR_cube')) { #machine_free_ram <- 1000000000 #max_ram_ratio <- 0.5 #data_size <- prod(c(attr(workflow, 'Dimensions'), 8)) diff --git a/R/SelectorChecker.R b/R/SelectorChecker.R index 81ec488..76899c1 100644 --- a/R/SelectorChecker.R +++ b/R/SelectorChecker.R @@ -93,7 +93,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, tol <- 0 if (!is.null(tolerance)) { - if (!any(class(tolerance) %in% "numeric")) { + if (!is(tolerance, "numeric")) { stop("Expected a numeric *_tolerance.") } tol <- tolerance @@ -148,7 +148,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, val <- selectors[[i]] tol <- 0 if (!is.null(tolerance)) { - if (!any(class(tolerance) %in% "difftime")) { + if (!is(tolerance, "difftime")) { stop("Expected a difftime *_tolerance.") } tol <- tolerance @@ -194,7 +194,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, "nearest values.")) } if (!is.null(tolerance)) { - if (!any(class(tolerance) %in% 'numeric')) { + if (!is(tolerance, 'numeric')) { stop("Expected a numeric *_tolerance.") } } @@ -228,7 +228,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, "nearest values.")) } if (!is.null(tolerance)) { - if (!any(class(tolerance) %in% 'difftime')) { + if (!is(tolerance, 'difftime')) { stop("Expected a difftime *_tolerance.") } } diff --git a/R/Start.R b/R/Start.R index 6fc05ea..f615336 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1384,7 +1384,7 @@ Start <- function(..., # dim = indices/selectors, # names as depended dim. for (j in 1:length(dat_selectors[[file_dim]])) { sv <- selector_vector <- dat_selectors[[file_dim]][[j]] - if (!identical(first_class, class(sv)) || + if (!is(sv, first_class) || !identical(first_length, length(sv))) { stop("All provided selectors for depending dimensions must ", "be vectors of the same length and of the same class.") diff --git a/R/Utils.R b/R/Utils.R index d0e850e..425336e 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -806,7 +806,7 @@ readRDS(paste0(shared_dir, '/', chunk_files_original[found_chunk])) }) - if (('try-error' %in% class(array_of_chunks[[i]]))) { + if (is(array_of_chunks[[i]], 'try-error')) { message("Waiting for an incomplete file transfer...") Sys.sleep(5) } else { diff --git a/R/zzz.R b/R/zzz.R index 0067d6d..83e2b7b 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1059,7 +1059,7 @@ remove_additional_na_from_merge <- function(data_array = NULL, merge_dim_metadat # data_array can be data or metadata; if data, change the blank spaces from # NA to -9999; if metadata (supposed to be 'time'), change the corresponding # spaces to -12^10. - if (any(class(data_array) %in% c("POSIXct", "POSIXt"))) { + if (is(data_array, "POSIXct")) { # change to numeric first data_array <- array(as.vector(data_array), dim = dim(data_array)) data_array[which(!logi_array)] <- -12^10 -- GitLab From 6e9d69467d904e7eceadf1b8db7d879ae50bf1f9 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Mar 2022 18:04:15 +0100 Subject: [PATCH 21/45] Enable implicit dependency on multiple file dims --- R/zzz.R | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 0067d6d..fa1e440 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -873,7 +873,16 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, x_dim_name <- attr(attr(selectors[[x]][['fri']], "dim"), "names") if (!is.null(x_dim_name)) { which_chunk <- file_to_load_sub_indices[x_dim_name] - selectors[[x]][['fri']][[which_chunk]] + if (length(which_chunk) > 1) { + tmp_dim <- attr(selectors[[x]][['fri']], "dim") + vec_ind <- which_chunk[1] + for (i_dim in length(tmp_dim):2) { + vec_ind <- vec_ind + (which_chunk[i_dim] - 1) * prod(tmp_dim[1:(i_dim - 1)]) + } + selectors[[x]][['fri']][[vec_ind]] + } else { #old code + selectors[[x]][['fri']][[which_chunk]] + } } else { selectors[[x]][['fri']][[1]] } @@ -889,7 +898,16 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, x_dim_name <- attr(attr(selectors[[x]][['sri']], "dim"), "names") if (!is.null(x_dim_name)) { which_chunk <- file_to_load_sub_indices[x_dim_name] - selectors[[x]][['sri']][[which_chunk]] + if (length(which_chunk) > 1) { + tmp_dim <- attr(selectors[[x]][['sri']], "dim") + vec_ind <- which_chunk[1] + for (i_dim in length(tmp_dim):2) { + vec_ind <- vec_ind + (which_chunk[i_dim] - 1) * prod(tmp_dim[1:(i_dim - 1)]) + } + selectors[[x]][['sri']][[vec_ind]] + } else { #old code + selectors[[x]][['sri']][[which_chunk]] + } } else { selectors[[x]][['sri']][[1]] } -- GitLab From a83cc261812f7fef4d3ba76c9c82682bfa9c78f2 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Mar 2022 18:13:55 +0100 Subject: [PATCH 22/45] Add unit test (undone) --- ...st-Start-implicit_dependency_by_selector.R | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 5c2f050..5ad93b9 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -115,4 +115,38 @@ tolerance = 0.0001 ) +}) + +test_that("3. region depends on member and sdate", { +#NOTE: This case, region indices are not dependent on sdate. But it should work if it is +reg <- array('Nino3.4', dim = c(sdate = 3, memb = 2, region = 1)) + +path_SR <- paste0('/esarchive/exp/ecearth/a42y/diags/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gn/v*/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gn_$chunk$.nc') +suppressWarnings( +data <- Start(dat = path_SR, + var = 'tosmean', + memb = paste0('r', c(24, 28), 'i1p1f1'), + region = reg, + region_var = 'region', + sdate = paste0(2000:2002), + time = c(1:4), + chunk = 'all', + chunk_depends = 'sdate', + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time=c('sdate','chunk'), region=c('sdate', 'memb')), + retrieve = T) +) + +expect_equal( +dim(data), +c(dat = 1, var = 1, memb = 2, region = 1, sdate = 3, time = 4) +) + +#expect_equal( +#dim(attr(data, 'Variables')$common$region), +#c(memb = 2, sdate = 3, region = 2) +#) + + }) -- GitLab From c860762fc675a19d3f8b0473066ca09756eab1ad Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 15 Mar 2022 18:34:59 +0100 Subject: [PATCH 23/45] Improve return_vars check for implicit dependency case --- R/Start.R | 12 +++++++++--- R/zzz.R | 2 +- .../test-Start-implicit_dependency_by_selector.R | 4 ++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/R/Start.R b/R/Start.R index 6fc05ea..afa13db 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1751,10 +1751,16 @@ Start <- function(..., # dim = indices/selectors, (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]]))) { need_correct <- TRUE - } else if (inner_dim %in% names(common_return_vars) & - !is.null(names(inner_dims_across_files))) { - if (!names(inner_dims_across_files) %in% common_return_vars[[inner_dim]]) { + } else if (inner_dim %in% names(common_return_vars) & + (inner_dim %in% inner_dims_across_files) & + !is.null(names(inner_dims_across_files))) { #(2) + if (!names(inner_dims_across_files) %in% common_return_vars[[inner_dim]]) need_correct <- TRUE + + } else if (inner_dim %in% names(common_return_vars) & + is.character(file_dim_as_selector_array_dim)) { #(1) + if (!all(file_dim_as_selector_array_dim %in% common_return_vars[[inner_dim]])) { need_correct <- TRUE + file_dim_as_selector_array_dim <- file_dim_as_selector_array_dim[which(!file_dim_as_selector_array_dim %in% common_return_vars[[inner_dim]])] } } if (need_correct) { diff --git a/R/zzz.R b/R/zzz.R index fa1e440..01c8795 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -429,7 +429,7 @@ correct_return_vars <- function(inner_dim, inner_dims_across_files, found_patter file_dim_as_selector_array_dim) { # inner_dim is not in return_vars or is NULL if (is.character(file_dim_as_selector_array_dim)) { #(1) - if (file_dim_as_selector_array_dim %in% found_pattern_dim) { + if (any(file_dim_as_selector_array_dim %in% found_pattern_dim)) { stop(paste0("Found '", inner_dim, "' selector has dimension of the pattern dim '", found_pattern_dim, "', which is not allowed. To assign the dependency on the pattern dim, ", diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 5ad93b9..92aa488 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -142,6 +142,10 @@ expect_equal( dim(data), c(dat = 1, var = 1, memb = 2, region = 1, sdate = 3, time = 4) ) +expect_equal( +as.vector(drop(data)[,,1]), +c(26.87246, 26.88851, 27.28198, 27.28953, 27.65627, 27.68499) +) #expect_equal( #dim(attr(data, 'Variables')$common$region), -- GitLab From cb6c3921155ce8118224db6ffb6c81bafdcd2b14 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 17 Mar 2022 17:30:48 +0100 Subject: [PATCH 24/45] Several minor bugfixes --- R/Start.R | 160 +++++++++--------- ...st-Start-implicit_dependency_by_selector.R | 3 +- 2 files changed, 84 insertions(+), 79 deletions(-) diff --git a/R/Start.R b/R/Start.R index afa13db..f57ea3c 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1731,7 +1731,11 @@ Start <- function(..., # dim = indices/selectors, #//////////////////////////////////////////// # Force return_vars = (time = NULL) to (time = 'sdate') if (1) selector = [sdate = 2, time = 4] or # (2) time_across = 'sdate'. - # NOTE: Here is not in for loop of dat[[i]] + # NOTE: Not sure if the loop over dat is needed here. In theory, all the dat + # should have the same dimensions (?) so expected_inner_dims and + # found_file_dims are the same. The selector_array may possible be + # different, but then the attribute will be correct? If it's different, + # it should depend on 'dat' (but here we only consider common_return_vars) for (i in 1:length(dat)) { for (inner_dim in expected_inner_dims[[i]]) { # The selectors for the inner dimension are taken. @@ -2174,11 +2178,11 @@ Start <- function(..., # dim = indices/selectors, print("-> DEFINING INDICES FOR INNER DIMENSION:") print(inner_dim) } - file_dim <- NULL + crossed_file_dim <- NULL if (inner_dim %in% unlist(inner_dims_across_files)) { - file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] - chunk_amount <- length(dat[[i]][['selectors']][[file_dim]][[1]]) - names(chunk_amount) <- file_dim + crossed_file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] + chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]]) + names(chunk_amount) <- crossed_file_dim } else { chunk_amount <- 1 } @@ -2244,7 +2248,7 @@ Start <- function(..., # dim = indices/selectors, if (!is.null(var_with_selectors_name)) { if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) { with_transform <- TRUE - if (!is.null(file_dim)) { + if (!is.null(crossed_file_dim)) { stop("Requested a transformation over the dimension '", inner_dim, "', wich goes across files. This feature ", "is not supported. Either do the request without the ", @@ -2384,7 +2388,7 @@ Start <- function(..., # dim = indices/selectors, #TODO: if (chunk_amount != number of chunks in selector_file_dims), crash if (length(var_dims) > 1) { stop("Specified a '", inner_dim, "_var' for the dimension '", - inner_dim, "', which goes across files (across '", file_dim, + inner_dim, "', which goes across files (across '", crossed_file_dim, "'). The specified variable, '", var_with_selectors_name, "', has more ", "than one dimension and can not be used as selector variable. ", "Select another variable or fix it in the files.") @@ -2418,7 +2422,7 @@ Start <- function(..., # dim = indices/selectors, # need to know each length to create the indices for each file later. # Record 'inner_dim_lengths' here for later usage. inner_dim_lengths <- NULL - if (largest_dims_length & !is.null(file_dim)) { + if (largest_dims_length & !is.null(crossed_file_dim)) { # inner_dim_lengths here includes all the files, but we only want # the files of fyear for certain "sdate". We will categorize it later. inner_dim_lengths <- tryCatch({ @@ -2429,11 +2433,11 @@ Start <- function(..., # dim = indices/selectors, }) # Use other file dims as the factors to categorize. - other_file_dims <- dim(array_of_files_to_load)[which(file_dims != file_dim)] + other_file_dims <- dim(array_of_files_to_load)[which(found_file_dims[[i]] != crossed_file_dim)] other_file_dims <- lapply(lapply(other_file_dims, seq, 1), rev) other_file_dims_factor <- expand.grid(other_file_dims) selector_indices_save_subset <- - lapply(selector_indices_save[[i]], '[', which(file_dims != file_dim)) + lapply(selector_indices_save[[i]], '[', which(found_file_dims[[i]] != crossed_file_dim)) # Put the fyear with the same other file dims (sdate, etc.) together, and find the largest length (in theory all of them should be the same) inner_dim_lengths_cat <- vector('list', dim(other_file_dims_factor)[1]) @@ -2484,9 +2488,9 @@ Start <- function(..., # dim = indices/selectors, #sri <- NULL } } else { - if ((!is.null(file_dim)) && !(file_dim %in% names(var_file_dims))) { + if ((!is.null(crossed_file_dim)) && !(crossed_file_dim %in% names(var_file_dims))) { stop("The variable '", var_with_selectors_name, "' must also be ", - "requested for the file dimension '", file_dim, "' in ", + "requested for the file dimension '", crossed_file_dim, "' in ", "this configuration.") } fri <- vector('list', length = prod(var_file_dims)) @@ -2581,11 +2585,11 @@ Start <- function(..., # dim = indices/selectors, unmatching_file_dims <- which(!(names(var_file_dims) %in% names(selector_file_dims))) if ((length(unmatching_file_dims) > 0)) { raise_error <- FALSE - if (is.null(file_dim)) { + if (is.null(crossed_file_dim)) { raise_error <- TRUE } else { if (!((length(unmatching_file_dims) == 1) && - (names(var_file_dims)[unmatching_file_dims] == file_dim) && + (names(var_file_dims)[unmatching_file_dims] == crossed_file_dim) && (inner_dim %in% names(selector_inner_dims)))) { raise_error <- TRUE } @@ -2608,23 +2612,23 @@ Start <- function(..., # dim = indices/selectors, } ## TODO: If var dimensions are not in the same order as selector dimensions, reorder if (is.null(names(selector_file_dims))) { - if (is.null(file_dim)) { + if (is.null(crossed_file_dim)) { fri_dims <- 1 } else { fri_dims <- chunk_amount - names(fri_dims) <- file_dim + names(fri_dims) <- crossed_file_dim } } else { fri_dim_names <- names(selector_file_dims) - if (!is.null(file_dim)) { - fri_dim_names <- c(fri_dim_names, file_dim) + if (!is.null(crossed_file_dim)) { + fri_dim_names <- c(fri_dim_names, crossed_file_dim) } fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% fri_dim_names)] fri_dims <- rep(NA, length(fri_dim_names)) names(fri_dims) <- fri_dim_names fri_dims[names(selector_file_dims)] <- selector_file_dims - if (!is.null(file_dim)) { - fri_dims[file_dim] <- chunk_amount + if (!is.null(crossed_file_dim)) { + fri_dims[crossed_file_dim] <- chunk_amount } } fri <- vector('list', length = prod(fri_dims)) @@ -2669,13 +2673,13 @@ Start <- function(..., # dim = indices/selectors, print(str(sub_array_of_values)) print(dim(sub_array_of_values)) print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:") - print(file_dim) + print(crossed_file_dim) } } # The inner dim selector is an array in which have file dim (e.g., time = [sdate = 2, time = 4], # or the inner dim doesn't go across any file dim (e.g., no time_across = 'sdate') - if ((!is.null(file_dim) && (file_dim %in% names(selector_file_dims))) || is.null(file_dim)) { + if ((!is.null(crossed_file_dim) && (crossed_file_dim %in% names(selector_file_dims))) || is.null(crossed_file_dim)) { if (length(sub_array_of_selectors) > 0) { if (debug) { if (inner_dim %in% dims_to_check) { @@ -3191,8 +3195,8 @@ Start <- function(..., # dim = indices/selectors, } fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position), list(value = sub_array_of_fri))) - if (!is.null(file_dim)) { - taken_chunks[selector_store_position[[file_dim]]] <- TRUE + if (!is.null(crossed_file_dim)) { + taken_chunks[selector_store_position[[crossed_file_dim]]] <- TRUE } else { taken_chunks <- TRUE } @@ -3204,7 +3208,7 @@ Start <- function(..., # dim = indices/selectors, print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.") } } - # If "_across = + merge_across_dims = FALSE + chunk over ", return error because this instance is not logically correct. + # If "_across = + merge_across_dims = FALSE + chunk over ", return error because this instance is not logically correct. if (chunks[[inner_dim]]["n_chunks"] > 1 & inner_dim %in% inner_dims_across_files) { stop("Chunk over dimension '", inner_dim, "' is not allowed because '", inner_dim, "' is across '", @@ -3300,7 +3304,7 @@ Start <- function(..., # dim = indices/selectors, for (chunk in 1:chunk_amount) { if (!is.null(names(selector_store_position))) { - selector_store_position[file_dim] <- chunk + selector_store_position[crossed_file_dim] <- chunk } else { selector_store_position <- chunk } @@ -3362,7 +3366,7 @@ Start <- function(..., # dim = indices/selectors, } } else { stop("Provided array of indices for dimension '", inner_dim, "', ", - "which goes across the file dimension '", file_dim, "', but ", + "which goes across the file dimension '", crossed_file_dim, "', but ", "the provided array does not have the dimension '", inner_dim, "', which is mandatory.") } @@ -3402,12 +3406,12 @@ Start <- function(..., # dim = indices/selectors, # end_chunks_to_remove <- empty_chunks[first_chunk_to_remove:length(empty_chunks)] # chunks_to_keep <- which(!((1:length(taken_chunks)) %in% c(start_chunks_to_remove, end_chunks_to_remove))) chunks_to_keep <- which(taken_chunks) - dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], list(chunks_to_keep)) - # found_indices <- Subset(found_indices, file_dim, chunks_to_keep) + dims_to_crop[[crossed_file_dim]] <- c(dims_to_crop[[crossed_file_dim]], list(chunks_to_keep)) + # found_indices <- Subset(found_indices, crossed_file_dim, chunks_to_keep) # # Crop dataset variables file dims. # for (picked_var in names(picked_vars[[i]])) { - # if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { - # picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, chunks_to_keep) + # if (crossed_file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { + # picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], crossed_file_dim, chunks_to_keep) # } # } } @@ -3481,53 +3485,53 @@ Start <- function(..., # dim = indices/selectors, Subset(transformed_var_with_selectors, inner_dim, crop_indices) } } else { - if (!is.null(file_dim)) { #merge_across_dims, file_dims is the depended file dim - if (file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { - tmp <- common_vars_to_crop[[common_var_to_crop]] - tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) - dim_extra_ind <- which(!names(dim(tmp)) %in% c(file_dim, inner_dim)) - if (!identical(dim_extra_ind, integer(0))) { - tmp_list <- asplit(tmp, dim_extra_ind) - dim_file_ind <- which(names(dim(tmp_list[[1]])) == file_dim) - tmp_list <- lapply(tmp_list, asplit, dim_file_ind) - } else { # only file_dim and inner_dim - dim_file_ind <- which(names(dim(tmp)) == file_dim) - tmp_list <- asplit(tmp, dim_file_ind) - # Add another layer to be consistent with the first case above - tmp_list <- list(tmp_list) - } - max_fri_length <- max(sapply(fri, length)) - for (i_extra_dim in 1:length(tmp_list)) { - for (i_fri in 1:length(fri)) { - tmp_list[[i_extra_dim]][[i_fri]] <- - tmp_list[[i_extra_dim]][[i_fri]][fri[[i_fri]]] - - if (length(tmp_list[[i_extra_dim]][[i_fri]]) != max_fri_length) { + if (!is.null(crossed_file_dim)) { #merge_across_dims, crossed_file_dim is the depended file dim + if (crossed_file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { + tmp <- common_vars_to_crop[[common_var_to_crop]] + tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) + dim_extra_ind <- which(!names(dim(tmp)) %in% c(crossed_file_dim, inner_dim)) + if (!identical(dim_extra_ind, integer(0))) { + tmp_list <- asplit(tmp, dim_extra_ind) + dim_file_ind <- which(names(dim(tmp_list[[1]])) == crossed_file_dim) + tmp_list <- lapply(tmp_list, asplit, dim_file_ind) + } else { # only crossed_file_dim and inner_dim + dim_file_ind <- which(names(dim(tmp)) == crossed_file_dim) + tmp_list <- asplit(tmp, dim_file_ind) + # Add another layer to be consistent with the first case above + tmp_list <- list(tmp_list) + } + max_fri_length <- max(sapply(fri, length)) + for (i_extra_dim in 1:length(tmp_list)) { + for (i_fri in 1:length(fri)) { tmp_list[[i_extra_dim]][[i_fri]] <- - c(tmp_list[[i_extra_dim]][[i_fri]], rep(NA, max_fri_length - length(tmp_list[[i_extra_dim]][[i_fri]]))) + tmp_list[[i_extra_dim]][[i_fri]][fri[[i_fri]]] + + if (length(tmp_list[[i_extra_dim]][[i_fri]]) != max_fri_length) { + tmp_list[[i_extra_dim]][[i_fri]] <- + c(tmp_list[[i_extra_dim]][[i_fri]], rep(NA, max_fri_length - length(tmp_list[[i_extra_dim]][[i_fri]]))) + } } } - } - # Change list back to array - tmp_new_dim <- c(max_fri_length, dim(tmp)[file_dim], dim(tmp)[dim_extra_ind]) - names(tmp_new_dim) <- c(inner_dim, file_dim, names(dim(tmp))[dim_extra_ind]) - common_vars_to_crop[[common_var_to_crop]] <- - array(unlist(tmp_list), dim = tmp_new_dim) - # Reorder back - common_vars_to_crop[[common_var_to_crop]] <- - aperm(common_vars_to_crop[[common_var_to_crop]], match(names(dim(tmp)), names(tmp_new_dim))) - # Put attributes back - tmp <- which(!names(tmp_attributes) %in% names(attributes(common_vars_to_crop[[common_var_to_crop]]))) - attributes(common_vars_to_crop[[common_var_to_crop]]) <- - c(attributes(common_vars_to_crop[[common_var_to_crop]]), - tmp_attributes[tmp]) - - if ('time' %in% synonims[[common_var_to_crop]]) { - # Convert number back to time + # Change list back to array + tmp_new_dim <- c(max_fri_length, dim(tmp)[crossed_file_dim], dim(tmp)[dim_extra_ind]) + names(tmp_new_dim) <- c(inner_dim, crossed_file_dim, names(dim(tmp))[dim_extra_ind]) common_vars_to_crop[[common_var_to_crop]] <- - as.POSIXct(common_vars_to_crop[[common_var_to_crop]], - origin = "1970-01-01", tz = 'UTC') - } + array(unlist(tmp_list), dim = tmp_new_dim) + # Reorder back + common_vars_to_crop[[common_var_to_crop]] <- + aperm(common_vars_to_crop[[common_var_to_crop]], match(names(dim(tmp)), names(tmp_new_dim))) + # Put attributes back + tmp <- which(!names(tmp_attributes) %in% names(attributes(common_vars_to_crop[[common_var_to_crop]]))) + attributes(common_vars_to_crop[[common_var_to_crop]]) <- + c(attributes(common_vars_to_crop[[common_var_to_crop]]), + tmp_attributes[tmp]) + + if ('time' %in% synonims[[common_var_to_crop]]) { + # Convert number back to time + common_vars_to_crop[[common_var_to_crop]] <- + as.POSIXct(common_vars_to_crop[[common_var_to_crop]], + origin = "1970-01-01", tz = 'UTC') + } } } else { # old code @@ -3866,7 +3870,7 @@ Start <- function(..., # dim = indices/selectors, } } #print("P") - + # If merge_across_dims = TRUE, there might be additional NAs due to unequal # inner_dim ('time') length across file_dim ('file_date'). # If merge_across_dims_narm = TRUE, add additional lines to remove these NAs. @@ -3961,9 +3965,8 @@ Start <- function(..., # dim = indices/selectors, attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr } } - } - + gc() # Load metadata and remove the metadata folder @@ -4038,7 +4041,8 @@ Start <- function(..., # dim = indices/selectors, # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { tmp <- rebuild_array_merge_split( - data_array = NULL, metadata = metadata_tmp, indices_chunk, all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) + data_array = NULL, metadata = metadata_tmp, indices_chunk, + all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) metadata_tmp <- tmp$metadata } metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 92aa488..1f320bf 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -144,7 +144,8 @@ c(dat = 1, var = 1, memb = 2, region = 1, sdate = 3, time = 4) ) expect_equal( as.vector(drop(data)[,,1]), -c(26.87246, 26.88851, 27.28198, 27.28953, 27.65627, 27.68499) +c(26.87246, 26.88851, 27.28198, 27.28953, 27.65627, 27.68499), +tolerance = 0.0001 ) #expect_equal( -- GitLab From f18a8c618b105f9163b902ef04a083c8a7a94ef0 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 18 Mar 2022 19:20:15 +0100 Subject: [PATCH 25/45] Modify split case --- R/Start.R | 38 ++++++---- R/Utils.R | 14 ++++ .../testthat/test-Start-metadata_reshaping.R | 75 ++++++++++++++++++- 3 files changed, 113 insertions(+), 14 deletions(-) diff --git a/R/Start.R b/R/Start.R index f615336..bcd6943 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3744,8 +3744,21 @@ Start <- function(..., # dim = indices/selectors, final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] } } else if (split_multiselected_dims) { - tmp_attr <- attr(picked_common_vars[[names(all_split_dims)]], 'variables') - final_dims_fake_merge_dim <- all_split_dims[[1]] + tmp_fun <- function (x, y) { + any(names(dim(x)) %in% y) + } + inner_dim_has_split_dim <- names(which(unlist(lapply( + picked_common_vars, tmp_fun, names(all_split_dims))))) + if (!identical(inner_dim_has_split_dim, character(0))) { + tmp_attr <- attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') + target_split_dim_ind <- which(names(dim(picked_common_vars[[inner_dim_has_split_dim]])) == names(all_split_dims)) + margin_dim_ind <- c(1:length(dim(picked_common_vars[[inner_dim_has_split_dim]])))[-target_split_dim_ind] + if (identical(margin_dim_ind, numeric(0)) | identical(margin_dim_ind, integer(0))) { + final_dims_fake_merge_dim <- all_split_dims[[1]] + } else { + final_dims_fake_merge_dim <- .ReplaceElementInVector(dim(picked_common_vars[[inner_dim_has_split_dim]]), target = names(all_split_dims), new_val = all_split_dims[[1]]) + } + } } # The following several lines will only run if retrieve = TRUE @@ -3945,17 +3958,16 @@ Start <- function(..., # dim = indices/selectors, attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr } if (split_multiselected_dims) { - if (names(all_split_dims) %in% names(picked_common_vars)) { - metadata_tmp <- array(picked_common_vars[[names(all_split_dims)]], dim = final_dims_fake_merge_dim) + if (!identical(inner_dim_has_split_dim, character(0))) { + metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_merge_dim) # Convert numeric back to dates - if ('time' %in% synonims[[names(all_split_dims)]]) { + if (is(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } - picked_common_vars[[names(all_split_dims)]] <- metadata_tmp - attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr + picked_common_vars[[inner_dim_has_split_dim]] <- metadata_tmp + attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') <- tmp_attr } } - } gc() @@ -4074,14 +4086,14 @@ Start <- function(..., # dim = indices/selectors, attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr } if (split_multiselected_dims) { - if (names(all_split_dims) %in% names(picked_common_vars)) { - metadata_tmp <- array(picked_common_vars[[names(all_split_dims)]], dim = final_dims_fake_merge_dim) + if (!identical(inner_dim_has_split_dim, character(0))) { + metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_merge_dim) # Convert numeric back to dates - if ('time' %in% synonims[[names(all_split_dims)]]) { + if (is(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } - picked_common_vars[[names(all_split_dims)]] <- metadata_tmp - attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr + picked_common_vars[[inner_dim_has_split_dim]] <- metadata_tmp + attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') <- tmp_attr } } } diff --git a/R/Utils.R b/R/Utils.R index 425336e..11d239f 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -844,3 +844,17 @@ .KnownLatNames <- function() { known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') } + +.ReplaceElementInVector <- function(x, target, new_val) { + # x is a vector with name + # target is a string + # new_val is a vector with name + # E.g., Change [a = 2, b = 3] to [c = 1, d = 2, b = 3], then: + # x = c(a = 2, b = 3), target = 'a', new_val = c(c = 1, d = 2) + new_names <- unlist(lapply(as.list(names(x)), function(x) if (x == target) names(new_val) else x)) + new_list <- vector('list', length = length(new_names)) + for (i in 1:length(new_list)) { + new_list[[i]] <- c(new_val, x)[which(c(names(new_val), names(x)) == new_names[i])] + } + return(unlist(new_list)) +} diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 91a793d..2374571 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -387,7 +387,7 @@ dates }) -test_that("6. split dim only", { +test_that("6. split time dim only", { datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'days') datess <- as.POSIXct(array(datess, dim = c(time = 7, week = 2)), @@ -544,3 +544,76 @@ dates }) + +test_that("8. split sdate dim", { + +file_date <- array(c(paste0(1993:1995, '07'), paste0(1993:1995, '08')), + dim = c(syear = 3, smonth = 2)) +suppressWarnings( +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = file_date, #[syear = 3, smonth = 2] + time = indices(1:2), + latitude = indices(1), + longitude = indices(1), + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(syear = 3, smonth = 2, time = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +dates[, 1, 1], +seq(as.POSIXct('1993-07-01', tz = 'UTC'), as.POSIXct('1995-07-01', tz = 'UTC'), by = 'year') +) +expect_equal( +dates[, 2, 2], +seq(as.POSIXct('1993-08-01 06:00:00', tz = 'UTC'), as.POSIXct('1995-08-01 06:00:00', tz = 'UTC'), by = 'year') +) + +suppressWarnings( +dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = file_date, #[syear = 3, smonth = 2] + time = indices(1:2), + latitude = indices(1), + longitude = indices(1), + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + -- GitLab From 16687146fa494430550ec01c78f705d787ddfe78 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 18 Mar 2022 20:32:54 +0100 Subject: [PATCH 26/45] Revise for new development --- tests/testthat/test-Start-split-merge.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index fe686dc..8793296 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -176,10 +176,10 @@ c(dat = 1, var = 1, smonth = 2, syear = 2, time = 1, latitude = 18, longitude = ) expect_equal( dim(attr(obs, 'Variables')$common$time), -c(file_date = 4, time = 1) +c(smonth = 2, syear = 2, time = 1) ) expect_equal( -attr(obs, 'Variables')$common$time[1, 1], +attr(obs, 'Variables')$common$time[1, 1, 1], as.POSIXct('2013-11-15', tz = 'UTC') ) -- GitLab From 3a75d41dde838f3ef317b87084d73c574de55d74 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 18 Mar 2022 20:33:03 +0100 Subject: [PATCH 27/45] Modularize --- R/Start.R | 73 +++++++++++++++++++++++-------------------------------- R/zzz.R | 23 ++++++++++++++++++ 2 files changed, 54 insertions(+), 42 deletions(-) diff --git a/R/Start.R b/R/Start.R index bcd6943..51f47fa 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3676,6 +3676,14 @@ Start <- function(..., # dim = indices/selectors, split_multiselected_dims <- FALSE .warning(paste0("Not found any dimensions able to be split. The parameter ", "'split_multiselected_dims' is changed to FALSE.")) + } else { + tmp_fun <- function (x, y) { + any(names(dim(x)) %in% y) + } + inner_dim_has_split_dim <- names(which(unlist(lapply( + picked_common_vars, tmp_fun, names(all_split_dims))))) + # If merge_across_dims also, it will be replaced later + saved_reshaped_attr <- attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') } } #====================================================================== @@ -3687,6 +3695,9 @@ Start <- function(..., # dim = indices/selectors, across_inner_dim <- inner_dims_across_files[[1]] #TODO: more than one? # Get the length of each inner_dim ('time') along each file_dim ('file_date') length_inner_across_dim <- lapply(dat[[i]][['selectors']][[across_inner_dim]][['fri']], length) + dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) + # Save attributes for later use. If split_multiselected_dims, this variable has been created above but is replaced here + saved_reshaped_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') if (merge_across_dims_narm & !split_multiselected_dims) { final_dims_fake <- merge_narm_dims(final_dims_fake, across_inner_dim, length_inner_across_dim) @@ -3733,33 +3744,10 @@ Start <- function(..., # dim = indices/selectors, } } - if (merge_across_dims) { - # Save dim and attributes for later use - dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) - tmp_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') - # Find final_dim_fake for metadata and put it in an array - if (!split_multiselected_dims) { - final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] - } else { - final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] - } - } else if (split_multiselected_dims) { - tmp_fun <- function (x, y) { - any(names(dim(x)) %in% y) - } - inner_dim_has_split_dim <- names(which(unlist(lapply( - picked_common_vars, tmp_fun, names(all_split_dims))))) - if (!identical(inner_dim_has_split_dim, character(0))) { - tmp_attr <- attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') - target_split_dim_ind <- which(names(dim(picked_common_vars[[inner_dim_has_split_dim]])) == names(all_split_dims)) - margin_dim_ind <- c(1:length(dim(picked_common_vars[[inner_dim_has_split_dim]])))[-target_split_dim_ind] - if (identical(margin_dim_ind, numeric(0)) | identical(margin_dim_ind, integer(0))) { - final_dims_fake_merge_dim <- all_split_dims[[1]] - } else { - final_dims_fake_merge_dim <- .ReplaceElementInVector(dim(picked_common_vars[[inner_dim_has_split_dim]]), target = names(all_split_dims), new_val = all_split_dims[[1]]) - } - } - } + final_dims_fake_metadata <- find_final_dims_fake_metadata( + merge_across_dims, split_multiselected_dims, picked_common_vars, across_inner_dim, + final_dims_fake, dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) + # The following several lines will only run if retrieve = TRUE if (retrieve) { @@ -3900,7 +3888,7 @@ Start <- function(..., # dim = indices/selectors, stop(paste0("After reshaping, the data do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly.")) } - if (length(metadata_tmp) != prod(final_dims_fake_merge_dim)) { + if (length(metadata_tmp) != prod(final_dims_fake_metadata)) { stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly or contact support.")) } @@ -3916,7 +3904,7 @@ Start <- function(..., # dim = indices/selectors, } data_array <- array(data_array_tmp, dim = final_dims_fake) - metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) + metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_metadata) # If split_multiselected_dims + merge_across_dims, the dimension order may change above. # To get the user-required dim order, we need to reorder the array again. @@ -3934,7 +3922,7 @@ Start <- function(..., # dim = indices/selectors, } picked_common_vars[[across_inner_dim]] <- metadata_tmp - attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr } else { # ! (merge_across_dims + split_multiselected_dims) (old version) data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) @@ -3949,23 +3937,23 @@ Start <- function(..., # dim = indices/selectors, tmp[file_dim_pos] <- inner_dim_pos picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp) } - metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) + metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates if ('time' %in% synonims[[across_inner_dim]]) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } picked_common_vars[[across_inner_dim]] <- metadata_tmp - attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr } if (split_multiselected_dims) { if (!identical(inner_dim_has_split_dim, character(0))) { - metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_merge_dim) + metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates if (is(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } picked_common_vars[[inner_dim_has_split_dim]] <- metadata_tmp - attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') <- tmp_attr + attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') <- saved_reshaped_attr } } } @@ -4035,7 +4023,7 @@ Start <- function(..., # dim = indices/selectors, metadata_tmp <- tmp$merge_dim_metadata } - if (length(metadata_tmp) != prod(final_dims_fake_merge_dim)) { + if (length(metadata_tmp) != prod(final_dims_fake_metadata)) { stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly or contact support.")) } @@ -4044,10 +4032,11 @@ Start <- function(..., # dim = indices/selectors, # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { tmp <- rebuild_array_merge_split( - data_array = NULL, metadata = metadata_tmp, indices_chunk, all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) + data_array = NULL, metadata = metadata_tmp, indices_chunk, + all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) metadata_tmp <- tmp$metadata } - metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) + metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_metadata) # If split_multiselected_dims + merge_across_dims, the dimension order may change above. # To get the user-required dim order, we need to reorder the array again. @@ -4064,7 +4053,7 @@ Start <- function(..., # dim = indices/selectors, metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } picked_common_vars[[across_inner_dim]] <- metadata_tmp - attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr } else { # ! (merge_across_dims + split_multiselected_dims) (old version) if (merge_across_dims) { # merge_across_dims = TRUE but (merge_across_dims_narm = F & split_multiselected_dims = F) @@ -4077,23 +4066,23 @@ Start <- function(..., # dim = indices/selectors, tmp[file_dim_pos] <- inner_dim_pos picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp) } - metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) + metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates if ('time' %in% synonims[[across_inner_dim]]) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } picked_common_vars[[across_inner_dim]] <- metadata_tmp - attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr } if (split_multiselected_dims) { if (!identical(inner_dim_has_split_dim, character(0))) { - metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_merge_dim) + metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates if (is(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } picked_common_vars[[inner_dim_has_split_dim]] <- metadata_tmp - attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') <- tmp_attr + attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') <- saved_reshaped_attr } } } diff --git a/R/zzz.R b/R/zzz.R index 83e2b7b..476fc8e 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -822,6 +822,29 @@ reorder_split_dims <- function(all_split_dims, inner_dim_pos_in_split_dims, fina return(list(final_dims_fake, all_split_dims)) } +# Find the final_dims_fake for metadata if it needs to be reshaped +find_final_dims_fake_metadata <- function(merge_across_dims, split_multiselected_dims, + picked_common_vars, across_inner_dim, final_dims_fake, + dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) { + if (merge_across_dims) { + if (!split_multiselected_dims) { + final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] + } else { + final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] + } + } else if (split_multiselected_dims) { + if (!identical(inner_dim_has_split_dim, character(0))) { + target_split_dim_ind <- which(names(dim(picked_common_vars[[inner_dim_has_split_dim]])) == names(all_split_dims)) + margin_dim_ind <- c(1:length(dim(picked_common_vars[[inner_dim_has_split_dim]])))[-target_split_dim_ind] + if (identical(margin_dim_ind, numeric(0)) | identical(margin_dim_ind, integer(0))) { + final_dims_fake_metadata <- all_split_dims[[1]] + } else { + final_dims_fake_metadata <- .ReplaceElementInVector(dim(picked_common_vars[[inner_dim_has_split_dim]]), target = names(all_split_dims), new_val = all_split_dims[[1]]) + } + } + } + return(final_dims_fake_metadata) +} # Build the work pieces. build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, final_dims, -- GitLab From a66a20965f0e93699bc631ec14ba6a900f33ea85 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 18 Mar 2022 20:54:21 +0100 Subject: [PATCH 28/45] Add condition --- R/Start.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/Start.R b/R/Start.R index 51f47fa..2a818a1 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3743,11 +3743,11 @@ Start <- function(..., # dim = indices/selectors, all_split_dims[[1]] <- tmp[[2]] } } - - final_dims_fake_metadata <- find_final_dims_fake_metadata( - merge_across_dims, split_multiselected_dims, picked_common_vars, across_inner_dim, - final_dims_fake, dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) - + if (merge_across_dims | split_multiselected_dims) { + final_dims_fake_metadata <- find_final_dims_fake_metadata( + merge_across_dims, split_multiselected_dims, picked_common_vars, across_inner_dim, + final_dims_fake, dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) + } # The following several lines will only run if retrieve = TRUE if (retrieve) { -- GitLab From 3e5aa2c9fa219ce6f5844a1b89918c871ec3d7cc Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 21 Mar 2022 11:21:51 +0100 Subject: [PATCH 29/45] Condition statement fix --- R/Start.R | 7 +++++-- R/zzz.R | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/Start.R b/R/Start.R index 2a818a1..ac4faba 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3666,6 +3666,7 @@ Start <- function(..., # dim = indices/selectors, # Find the dimension to split if split_multiselected_dims = TRUE. # If there is no dimension able to be split, change split_multiselected_dims to FALSE. all_split_dims <- NULL + inner_dim_has_split_dim <- NULL if (split_multiselected_dims) { tmp <- dims_split(dim_params, final_dims_fake) final_dims_fake <- tmp[[1]] @@ -3682,8 +3683,10 @@ Start <- function(..., # dim = indices/selectors, } inner_dim_has_split_dim <- names(which(unlist(lapply( picked_common_vars, tmp_fun, names(all_split_dims))))) - # If merge_across_dims also, it will be replaced later - saved_reshaped_attr <- attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') + if (!identical(inner_dim_has_split_dim, character(0))) { + # If merge_across_dims also, it will be replaced later + saved_reshaped_attr <- attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') + } } } #====================================================================== diff --git a/R/zzz.R b/R/zzz.R index 476fc8e..8055c69 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -826,6 +826,7 @@ reorder_split_dims <- function(all_split_dims, inner_dim_pos_in_split_dims, fina find_final_dims_fake_metadata <- function(merge_across_dims, split_multiselected_dims, picked_common_vars, across_inner_dim, final_dims_fake, dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) { + final_dims_fake_metadata <- NULL if (merge_across_dims) { if (!split_multiselected_dims) { final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] -- GitLab From a901dc5099397626373f4b1d6aa3ea424194924c Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 29 Mar 2022 11:46:08 +0200 Subject: [PATCH 30/45] Fix metadata reshaping when time dim is implicit and the split file dim array has time as a dimension --- R/Start.R | 29 +++++++- R/zzz.R | 18 +++-- .../testthat/test-Start-metadata_reshaping.R | 70 +++++++++++++++++++ 3 files changed, 104 insertions(+), 13 deletions(-) diff --git a/R/Start.R b/R/Start.R index ac4faba..36eed74 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3747,9 +3747,32 @@ Start <- function(..., # dim = indices/selectors, } } if (merge_across_dims | split_multiselected_dims) { - final_dims_fake_metadata <- find_final_dims_fake_metadata( - merge_across_dims, split_multiselected_dims, picked_common_vars, across_inner_dim, - final_dims_fake, dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) +#--------NEW3-------------------- + if (!merge_across_dims & split_multiselected_dims & identical(inner_dim_has_split_dim, character(0))) { + final_dims_fake_metadata <- NULL + } else { + if (!merge_across_dims & split_multiselected_dims) { + if (any(names(all_split_dims[[1]]) %in% names(dim(picked_common_vars[[inner_dim_has_split_dim]]))) & + names(all_split_dims)[1] != inner_dim_has_split_dim) { + if (inner_dim_has_split_dim %in% names(final_dims)) { + stop("Detect inner dimension in the split array, but merge_across_dims is not used. The output dimensions will be repeated. Check if the dimensions and parameters are correctly defined.") + } else { + # Only split no merge, time dim is not explicitly defined because the + # length is 1, the sdate dim to be split having 'time' as one dimension. + # --> Take 'time' dim off from picked_common_vars. + dim(picked_common_vars[[inner_dim_has_split_dim]]) <- dim(picked_common_vars[[inner_dim_has_split_dim]])[-which(names(dim(picked_common_vars[[inner_dim_has_split_dim]])) == inner_dim_has_split_dim)] + } + } + } + final_dims_fake_metadata <- find_final_dims_fake_metadata( + merge_across_dims, split_multiselected_dims, picked_common_vars = picked_common_vars[[inner_dim_has_split_dim]], across_inner_dim, + final_dims_fake, dims_of_merge_dim, all_split_dims) + } + +# final_dims_fake_metadata <- find_final_dims_fake_metadata( +# merge_across_dims, split_multiselected_dims, picked_common_vars, across_inner_dim, +# final_dims_fake, dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) +#-------NEW3_END------------- } # The following several lines will only run if retrieve = TRUE diff --git a/R/zzz.R b/R/zzz.R index 8055c69..5a8ea5e 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -825,8 +825,7 @@ reorder_split_dims <- function(all_split_dims, inner_dim_pos_in_split_dims, fina # Find the final_dims_fake for metadata if it needs to be reshaped find_final_dims_fake_metadata <- function(merge_across_dims, split_multiselected_dims, picked_common_vars, across_inner_dim, final_dims_fake, - dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) { - final_dims_fake_metadata <- NULL + dims_of_merge_dim, all_split_dims) { if (merge_across_dims) { if (!split_multiselected_dims) { final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] @@ -834,16 +833,15 @@ find_final_dims_fake_metadata <- function(merge_across_dims, split_multiselected final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] } } else if (split_multiselected_dims) { - if (!identical(inner_dim_has_split_dim, character(0))) { - target_split_dim_ind <- which(names(dim(picked_common_vars[[inner_dim_has_split_dim]])) == names(all_split_dims)) - margin_dim_ind <- c(1:length(dim(picked_common_vars[[inner_dim_has_split_dim]])))[-target_split_dim_ind] - if (identical(margin_dim_ind, numeric(0)) | identical(margin_dim_ind, integer(0))) { - final_dims_fake_metadata <- all_split_dims[[1]] - } else { - final_dims_fake_metadata <- .ReplaceElementInVector(dim(picked_common_vars[[inner_dim_has_split_dim]]), target = names(all_split_dims), new_val = all_split_dims[[1]]) - } + target_split_dim_ind <- which(names(dim(picked_common_vars)) == names(all_split_dims)) + margin_dim_ind <- c(1:length(dim(picked_common_vars)))[-target_split_dim_ind] + if (identical(margin_dim_ind, numeric(0)) | identical(margin_dim_ind, integer(0))) { + final_dims_fake_metadata <- all_split_dims[[1]] + } else { + final_dims_fake_metadata <- .ReplaceElementInVector(dim(picked_common_vars), target = names(all_split_dims), new_val = all_split_dims[[1]]) } } + return(final_dims_fake_metadata) } diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 2374571..bc048f0 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -617,3 +617,73 @@ dates }) +test_that("9. split file dim that contains 'time', and 'time' inner dim is implicit", { + +dates_arr <- array(c(paste0(1961, '0', 1:5), paste0(1962, '0', 1:5)), dim = c(time = 5, syear = 2)) + +suppressWarnings( +data <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', + var = 'tas', + file_date = dates_arr, # [syear, time] + split_multiselected_dims = TRUE, + latitude = indices(1), + longitude = indices(1), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + retrieve = TRUE) +) +dim(obs) +# dat var time syear latitude longitude +# 1 1 5 2 1 1 + +dates <- attr(obs, 'Variables')$common$time + + +expect_equal( +dim(dates), +c(time = 5, syear = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +format(dates, '%Y%m'), +as.vector(dates_arr) +) + + +suppressWarnings( +dataF <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', + var = 'tas', + file_date = dates_arr, # [syear, time] + split_multiselected_dims = TRUE, + latitude = indices(1), + longitude = indices(1), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) -- GitLab From a03c0eb22ca834389ff4625e021406cc8133fa37 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 29 Mar 2022 11:51:40 +0200 Subject: [PATCH 31/45] Remove extra comments --- R/Start.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/Start.R b/R/Start.R index 36eed74..7ae3096 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3747,7 +3747,6 @@ Start <- function(..., # dim = indices/selectors, } } if (merge_across_dims | split_multiselected_dims) { -#--------NEW3-------------------- if (!merge_across_dims & split_multiselected_dims & identical(inner_dim_has_split_dim, character(0))) { final_dims_fake_metadata <- NULL } else { @@ -3768,11 +3767,6 @@ Start <- function(..., # dim = indices/selectors, merge_across_dims, split_multiselected_dims, picked_common_vars = picked_common_vars[[inner_dim_has_split_dim]], across_inner_dim, final_dims_fake, dims_of_merge_dim, all_split_dims) } - -# final_dims_fake_metadata <- find_final_dims_fake_metadata( -# merge_across_dims, split_multiselected_dims, picked_common_vars, across_inner_dim, -# final_dims_fake, dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) -#-------NEW3_END------------- } # The following several lines will only run if retrieve = TRUE -- GitLab From da6a1329e3c102d8b1eaa3955e70f4bb45f7301d Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 29 Mar 2022 12:53:41 +0200 Subject: [PATCH 32/45] Fix typo --- tests/testthat/test-Start-metadata_reshaping.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index bc048f0..5b50692 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -638,7 +638,7 @@ dim(obs) # dat var time syear latitude longitude # 1 1 5 2 1 1 -dates <- attr(obs, 'Variables')$common$time +dates <- attr(data, 'Variables')$common$time expect_equal( -- GitLab From 785e989d7c689a11bd10f30d34643093c6fe6848 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 6 Apr 2022 18:50:19 +0200 Subject: [PATCH 33/45] Add region depending on sdate & member case in document --- inst/doc/faq.md | 4 ++ inst/doc/usecase/ex1_13_implicit_dependency.R | 54 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 689e13a..83065d4 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -927,6 +927,10 @@ data <- Start(dat = path, region = 'sdate'), retrieve = T) ``` +The dependency can be on more than one file dimension. What you need to do is just creat +an array with the depended file dimension as the array dimension name. See more examples +in [use case ex1_13](inst/doc/usecase/ex1_13_implicit_dependency.R). + ### 23. The best practice of using vector and list for selectors There are three ways to define the selectors in Start(): `indices()`, `values()`, and character string diff --git a/inst/doc/usecase/ex1_13_implicit_dependency.R b/inst/doc/usecase/ex1_13_implicit_dependency.R index 1ea9381..6740a21 100644 --- a/inst/doc/usecase/ex1_13_implicit_dependency.R +++ b/inst/doc/usecase/ex1_13_implicit_dependency.R @@ -1,5 +1,7 @@ # Author: An-Chi Ho # Date: 13th July 2021 +# Implement case 3: 6th April 2022 + #--------------------------------------------------------------------- # This script shows how to use a value array as the inner dimension selector to express # dependency on a file dimension. By this means, we don't need to specify the *_across @@ -11,6 +13,8 @@ # index for Nino3. sdate 1993 has 'Nino3' at index 9 while sdate 2013 has 'Nino3' at # index 11. Create a value array for region selector so Start() can look for 'Nino3' in # each file. +# In the third case, 'region' is defined as an array that has dimensions 'sdate', 'member', +# and 'region'. It works if region indices is dependent on both sdate and member. #--------------------------------------------------------------------- library(startR) @@ -80,9 +84,59 @@ data[1, 1, , 1, ] #[2,] 24.47482 24.75953 # --> region index 11 in orginal file +#============================================================================= + +# Case 3: 'region' depends on 'sdate' and 'member' + +#NOTE: Actually, the region indices are not dependent on sdate in this case, but +# it should work if it is. If you have a better example, please let me know. +region <- array(c('Nino3.4', 'Nino3'), dim = c(region = 2, sdate = 3, memb = 5)) +# check the array +region[, 1, 1] +#[1] "Nino3.4" "Nino3" +region[, 1, 2] +#[1] "Nino3.4" "Nino3" +region[, 2, 2] +#[1] "Nino3.4" "Nino3" +#--> For each sdate-memb combination, the desired regions are "Nino3.4" and "Nino3". +path <- paste0('/esarchive/exp/ecearth/a42y/diags/DCPP/EC-Earth-Consortium/', + 'EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gn/v*/', + '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gn_$chunk$.nc') +data <- Start(dat = path, + var = 'tosmean', + memb = paste0('r', c(24:28), 'i1p1f1'), + region = region, + region_var = 'region', + sdate = paste0(2000:2002), + time = 'all', + chunk = 'all', + chunk_depends = 'sdate', + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = c('sdate','chunk'), + region = c('sdate', 'memb')), + retrieve = T) +# Check output +## Nino3.4 +drop(data)[ , 1, , 1] +# [,1] [,2] [,3] +#[1,] 26.87246 27.28198 27.65627 +#[2,] 26.87331 27.31887 27.63275 +#[3,] 26.89038 27.31446 27.58801 +#[4,] 26.90285 27.26750 27.66004 +#[5,] 26.88851 27.28953 27.68499 + +## Nino3 + drop(data)[ , 2, , 1] +# [,1] [,2] [,3] +#[1,] 26.58774 26.38932 26.80643 +#[2,] 26.58879 26.43760 26.68655 +#[3,] 26.59319 26.41373 26.64150 +#[4,] 26.69607 26.40465 26.69096 +#[5,] 26.59114 26.40454 26.71252 -- GitLab From 2893ef019e6220e2b38e417879376c00f8992b46 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 6 Apr 2022 18:50:33 +0200 Subject: [PATCH 34/45] Correct typo --- R/Start.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Start.R b/R/Start.R index adb87bf..0e0e90b 100644 --- a/R/Start.R +++ b/R/Start.R @@ -2605,7 +2605,7 @@ Start <- function(..., # dim = indices/selectors, } if (any(names(selector_file_dims) %in% names(dim(var_with_selectors)))) { if (any(dim(var_with_selectors)[names(selector_file_dims)] != selector_file_dims)) { - stop("Size of selector file dimensions must mach size of requested ", + stop("Size of selector file dimensions must match size of the corresponding ", "variable dimensions.") } } -- GitLab From 202d87207183a9eae94ebaacee887386e18d2e3d Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 8 Apr 2022 18:33:12 +0200 Subject: [PATCH 35/45] Add check to detect wrong *_across value --- R/zzz.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/zzz.R b/R/zzz.R index 5a8ea5e..ad334e6 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -118,6 +118,9 @@ rebuild_dim_params <- function(dim_params, merge_across_dims, # Reallocating pairs of across file and inner dimensions if they have # to be merged. They are put one next to the other to ease merge later. if (merge_across_dims) { + if (any(!names(inner_dims_across_files) %in% names(dim_params)) | + any(!unlist(inner_dims_across_files) %in% names(dim_params))) + stop("All *_across parameters must have value as a file dimension name.") for (inner_dim_across in names(inner_dims_across_files)) { inner_dim_pos <- which(names(dim_params) == inner_dim_across) file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) -- GitLab From bb50483c12aa0503b92ebe9d4ecf8c2a68100d9a Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 8 Apr 2022 18:38:04 +0200 Subject: [PATCH 36/45] Correct version --- tests/testthat/test-Start-calendar.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 10cf7bf..53122c8 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -48,7 +48,7 @@ expect_equal( }) test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { -path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20191219/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' suppressWarnings( data <- Start(dat = path_bcc_csm2, -- GitLab From cae3db01acc5ef1a6bf9139bc2f10dc4f71e5845 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Apr 2022 10:39:59 +0200 Subject: [PATCH 37/45] Correct version folder name --- tests/testthat/test-Start-calendar.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 10cf7bf..53122c8 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -48,7 +48,7 @@ expect_equal( }) test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { -path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20191219/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' suppressWarnings( data <- Start(dat = path_bcc_csm2, -- GitLab From c4adfb3375cb3a4a09bb874cf1ffcb44d683e13d Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Apr 2022 10:49:38 +0200 Subject: [PATCH 38/45] import method::is --- NAMESPACE | 1 + R/AddStep.R | 1 + R/ByChunks.R | 1 + R/Compute.R | 1 + R/SelectorChecker.R | 1 + R/Start.R | 1 + R/Utils.R | 1 + 7 files changed, 7 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 1434a0f..c6bca72 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ import(future) import(multiApply) import(parallel) importFrom(ClimProjDiags,Subset) +importFrom(methods,is) importFrom(s2dv,CDORemap) importFrom(stats,na.omit) importFrom(stats,setNames) diff --git a/R/AddStep.R b/R/AddStep.R index a129f1e..00af3ab 100644 --- a/R/AddStep.R +++ b/R/AddStep.R @@ -41,6 +41,7 @@ #' use_attributes = list(data = "Variables")) #' wf <- AddStep(data, step, pi_val = pi_short) #' +#'@importFrom methods is #'@export AddStep <- function(inputs, step_fun, ...) { # Check step_fun diff --git a/R/ByChunks.R b/R/ByChunks.R index 4782ef2..37a554c 100644 --- a/R/ByChunks.R +++ b/R/ByChunks.R @@ -80,6 +80,7 @@ #' #ByChunks(step, data) #' #'@import multiApply +#'@importFrom methods is #'@noRd ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', threads_load = 2, threads_compute = 1, diff --git a/R/Compute.R b/R/Compute.R index 778d9ce..0aa9424 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -82,6 +82,7 @@ #' wf <- AddStep(data, step) #' res <- Compute(wf, chunks = list(longitude = 4, sdate = 2)) #' +#'@importFrom methods is #'@export Compute <- function(workflow, chunks = 'auto', threads_load = 1, threads_compute = 1, diff --git a/R/SelectorChecker.R b/R/SelectorChecker.R index 76899c1..7b69a8b 100644 --- a/R/SelectorChecker.R +++ b/R/SelectorChecker.R @@ -35,6 +35,7 @@ #'sub_array_of_values <- seq(90, -90, length.out = 258)[2:257] #'SelectorChecker(sub_array_of_selectors, sub_array_of_values) #' +#'@importFrom methods is #'@export SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, tolerance = NULL) { diff --git a/R/Start.R b/R/Start.R index 0e0e90b..93fb3c1 100644 --- a/R/Start.R +++ b/R/Start.R @@ -802,6 +802,7 @@ #'@importFrom utils str #'@importFrom stats na.omit setNames #'@importFrom ClimProjDiags Subset +#'@importFrom methods is #'@export Start <- function(..., # dim = indices/selectors, # dim_var = 'var', diff --git a/R/Utils.R b/R/Utils.R index 11d239f..9bf23e9 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -1,4 +1,5 @@ #'@import abind +#'@importFrom methods is #'@importFrom ClimProjDiags Subset .chunk <- function(chunk, n_chunks, selectors) { if (any(chunk > n_chunks)) { -- GitLab From 3fe78e2755a4b510248131c52f80c9a1c31adde2 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Apr 2022 10:51:43 +0200 Subject: [PATCH 39/45] Include methods into DESCRIPTION --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f761503..9f482a6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,8 @@ Imports: easyNCDF, s2dv, ClimProjDiags, - PCICt + PCICt, + methods Suggests: stats, utils, -- GitLab From 1b3715ad8bc3724ba1bc0f80084ff3541e4a8a17 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Apr 2022 11:50:49 +0200 Subject: [PATCH 40/45] Add comment and fix typo --- R/Start.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/Start.R b/R/Start.R index 7ae3096..80baa51 100644 --- a/R/Start.R +++ b/R/Start.R @@ -2222,7 +2222,7 @@ Start <- function(..., # dim = indices/selectors, selector_file_dims <- 1 #NOTE: Change 'selector_file_dims' (from 1) if selector is an array with a file_dim dimname. - # I.e., If time = [sdate = 2, time = 4], selector_file_dims <- array(sdate = 2) + # I.e., If time = [sdate = 2, time = 4], selector_file_dims <- c(sdate = 2) if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) { selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])] } @@ -3475,7 +3475,8 @@ Start <- function(..., # dim = indices/selectors, Subset(transformed_var_with_selectors, inner_dim, crop_indices) } } else { - if (!is.null(file_dim)) { #merge_across_dims, file_dims is the depended file dim + if (!is.null(file_dim)) { #merge_across_dims, file_dim is the depended file dim + #NOTE: When is not this case??? if (file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { tmp <- common_vars_to_crop[[common_var_to_crop]] tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) -- GitLab From c217ec54562dfd4e8fece281fb3190b2fca3b806 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Apr 2022 20:31:17 +0200 Subject: [PATCH 41/45] Development for metadata dimension of implicit dependency (define selector by array) --- R/Start.R | 47 ++++++++++++++----- ...st-Start-implicit_dependency_by_selector.R | 8 ++-- .../testthat/test-Start-metadata_reshaping.R | 3 -- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/R/Start.R b/R/Start.R index fa90548..3d69279 100644 --- a/R/Start.R +++ b/R/Start.R @@ -2184,6 +2184,22 @@ Start <- function(..., # dim = indices/selectors, crossed_file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]]) names(chunk_amount) <- crossed_file_dim + } else if (!is.null(names(dim(dat[[i]][['selectors']][[inner_dim]][[1]]))) & + inner_dim %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])) & + any(found_file_dims[[i]] %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))) { + # inner dim is dependent on file dim in the form of selector array (e.g., time = [sdate = 2, time = 4]) + crossed_file_dim <- found_file_dims[[i]][which(found_file_dims[[i]] %in% + names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))] + if (length(crossed_file_dim) == 1) { + chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]]) + names(chunk_amount) <- crossed_file_dim + } else { + # e.g., region = [memb = 2, sdate = 3, region = 1] + chunk_amount <- prod( + sapply(lapply( + dat[[i]][['selectors']][crossed_file_dim], "[[", 1), length)) + names(chunk_amount) <- paste(crossed_file_dim, collapse = '.') + } } else { chunk_amount <- 1 } @@ -2628,9 +2644,12 @@ Start <- function(..., # dim = indices/selectors, fri_dims <- rep(NA, length(fri_dim_names)) names(fri_dims) <- fri_dim_names fri_dims[names(selector_file_dims)] <- selector_file_dims - if (!is.null(crossed_file_dim)) { - fri_dims[crossed_file_dim] <- chunk_amount - } + #NOTE: Not sure how it works here, but "chunk_amount" is the same as + # "selector_file_dims" above in the cases we've seen so far, + # and it causes problem when crossed_file_dim is more than one. +# if (!is.null(crossed_file_dim)) { +# fri_dims[crossed_file_dim] <- chunk_amount +# } } fri <- vector('list', length = prod(fri_dims)) dim(fri) <- fri_dims @@ -3196,11 +3215,15 @@ Start <- function(..., # dim = indices/selectors, } fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position), list(value = sub_array_of_fri))) - if (!is.null(crossed_file_dim)) { - taken_chunks[selector_store_position[[crossed_file_dim]]] <- TRUE - } else { - taken_chunks <- TRUE - } + + #NOTE: This part existed always but never was used. taken_chunks + # is related to merge_across_dims, but I don't know how it is + # used (maybe for higher efficiency?) +# if (!is.null(crossed_file_dim)) { +# taken_chunks[selector_store_position[[crossed_file_dim]]] <- TRUE +# } else { + taken_chunks <- TRUE +# } } } else { # The inner dim goes across a file dim (e.g., time_across = 'sdate') @@ -3487,17 +3510,17 @@ Start <- function(..., # dim = indices/selectors, } } else { if (!is.null(crossed_file_dim)) { #merge_across_dims, crossed_file_dim is the depended file dim - #NOTE: When is not this case??? - if (crossed_file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { + #NOTE: When is not this case??? Maybe this condition is not needed + if (any(crossed_file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]])))) { tmp <- common_vars_to_crop[[common_var_to_crop]] tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) dim_extra_ind <- which(!names(dim(tmp)) %in% c(crossed_file_dim, inner_dim)) if (!identical(dim_extra_ind, integer(0))) { tmp_list <- asplit(tmp, dim_extra_ind) - dim_file_ind <- which(names(dim(tmp_list[[1]])) == crossed_file_dim) + dim_file_ind <- which(names(dim(tmp_list[[1]])) %in% crossed_file_dim) tmp_list <- lapply(tmp_list, asplit, dim_file_ind) } else { # only crossed_file_dim and inner_dim - dim_file_ind <- which(names(dim(tmp)) == crossed_file_dim) + dim_file_ind <- which(names(dim(tmp)) %in% crossed_file_dim) tmp_list <- asplit(tmp, dim_file_ind) # Add another layer to be consistent with the first case above tmp_list <- list(tmp_list) diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 1f320bf..10e5545 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -148,10 +148,10 @@ c(26.87246, 26.88851, 27.28198, 27.28953, 27.65627, 27.68499), tolerance = 0.0001 ) -#expect_equal( -#dim(attr(data, 'Variables')$common$region), -#c(memb = 2, sdate = 3, region = 2) -#) +expect_equal( +dim(attr(data, 'Variables')$common$region), +c(memb = 2, sdate = 3, region = 1) +) }) diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 5b50692..2cacc50 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -634,9 +634,6 @@ data <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$fi time = 'file_date'), retrieve = TRUE) ) -dim(obs) -# dat var time syear latitude longitude -# 1 1 5 2 1 1 dates <- attr(data, 'Variables')$common$time -- GitLab From f9d0573685adbe533aa4a325e19cbd1dd648b5c4 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 13 Apr 2022 12:53:57 +0200 Subject: [PATCH 42/45] Revise conditional statement and add one test --- R/Start.R | 14 ++-- .../testthat/test-Start-metadata_reshaping.R | 66 +++++++++++++++++++ 2 files changed, 73 insertions(+), 7 deletions(-) diff --git a/R/Start.R b/R/Start.R index 3d69279..db573a8 100644 --- a/R/Start.R +++ b/R/Start.R @@ -2450,11 +2450,11 @@ Start <- function(..., # dim = indices/selectors, }) # Use other file dims as the factors to categorize. - other_file_dims <- dim(array_of_files_to_load)[which(found_file_dims[[i]] != crossed_file_dim)] + other_file_dims <- dim(array_of_files_to_load)[which(!found_file_dims[[i]] %in% crossed_file_dim)] other_file_dims <- lapply(lapply(other_file_dims, seq, 1), rev) other_file_dims_factor <- expand.grid(other_file_dims) selector_indices_save_subset <- - lapply(selector_indices_save[[i]], '[', which(found_file_dims[[i]] != crossed_file_dim)) + lapply(selector_indices_save[[i]], '[', which(!found_file_dims[[i]] %in% crossed_file_dim)) # Put the fyear with the same other file dims (sdate, etc.) together, and find the largest length (in theory all of them should be the same) inner_dim_lengths_cat <- vector('list', dim(other_file_dims_factor)[1]) @@ -2505,7 +2505,7 @@ Start <- function(..., # dim = indices/selectors, #sri <- NULL } } else { - if ((!is.null(crossed_file_dim)) && !(crossed_file_dim %in% names(var_file_dims))) { + if (!is.null(crossed_file_dim) & any(!(crossed_file_dim %in% names(var_file_dims)))) { stop("The variable '", var_with_selectors_name, "' must also be ", "requested for the file dimension '", crossed_file_dim, "' in ", "this configuration.") @@ -2605,9 +2605,9 @@ Start <- function(..., # dim = indices/selectors, if (is.null(crossed_file_dim)) { raise_error <- TRUE } else { - if (!((length(unmatching_file_dims) == 1) && - (names(var_file_dims)[unmatching_file_dims] == crossed_file_dim) && - (inner_dim %in% names(selector_inner_dims)))) { + if (!(length(unmatching_file_dims) == 1 & + names(var_file_dims)[unmatching_file_dims] %in% crossed_file_dim & + inner_dim %in% names(selector_inner_dims))) { raise_error <- TRUE } } @@ -2699,7 +2699,7 @@ Start <- function(..., # dim = indices/selectors, # The inner dim selector is an array in which have file dim (e.g., time = [sdate = 2, time = 4], # or the inner dim doesn't go across any file dim (e.g., no time_across = 'sdate') - if ((!is.null(crossed_file_dim) && (crossed_file_dim %in% names(selector_file_dims))) || is.null(crossed_file_dim)) { + if ((!is.null(crossed_file_dim) & (any(crossed_file_dim %in% names(selector_file_dims)))) || is.null(crossed_file_dim)) { if (length(sub_array_of_selectors) > 0) { if (debug) { if (inner_dim %in% dims_to_check) { diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 2cacc50..d97cd18 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -684,3 +684,69 @@ dates ) }) + +test_that("10. implicit dependency, leap year", { + +a <- as.POSIXct('1995-02-27 18:00:00', tz = 'UTC') +b <- as.POSIXct('1995-03-01 18:00:00', tz = 'UTC') +y1 <- seq(a, b, by = 'days') +a <- as.POSIXct('1996-02-27 18:00:00', tz = 'UTC') +b <- as.POSIXct('1996-03-01 18:00:00', tz = 'UTC') +y2 <- seq(a, b, by = 'days') +y2 <- y2[-3] # remove 28 Feb +time_array <- array(c(y1, y2), dim = c(time = 3, file_date = 2)) +time_array <- as.POSIXct(time_array, origin = '1970-01-01', tz = 'UTC') +suppressWarnings( +data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", + var = "tas", + file_date = paste0(1994:1995, '1101'), #1996 is leap year + time = time_array, #[time = 3, file_date = 2] + latitude = indices(1), longitude = indices(1), ensemble = indices(1), + return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), + retrieve = TRUE) +) +dates <- attr(data, 'Variables')$common$time + +expect_equal( +dim(dates), +c(file_date = 2, time = 3) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(aperm(dates, 2:1)), +as.vector(time_array) +) + +suppressWarnings( +dataF <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", + var = "tas", + file_date = paste0(1994:1995, '1101'), #1996 is leap year + time = time_array, #[time = 3, file_date = 2] + latitude = indices(1), longitude = indices(1), ensemble = indices(1), + return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + + +}) + -- GitLab From e720eca6ad64ac1d85712410fe34587128785182 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 14 Apr 2022 16:30:24 +0200 Subject: [PATCH 43/45] Remove the extra dimension in metadata reshaping --- R/zzz.R | 64 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index b2a8add..d946ebf 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1151,7 +1151,12 @@ remove_additional_na_from_merge <- function(data_array = NULL, merge_dim_metadat # When merge_across_dims = TRUE and split_multiselected_dims = TRUE, rearrange the chunks # (i.e., work_piece) is necessary if one file contains values for discrete dimensions -rebuild_array_merge_split <- function(data_array = NULL, metadata = NULL, indices_chunk, all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) { +rebuild_array_merge_split <- function(data_array = NULL, metadata = NULL, indices_chunk, + all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) { + + rebuild_data <- ifelse(is.null(data_array), FALSE, TRUE) + rebuild_metadata <- ifelse(is.null(metadata), FALSE, TRUE) + # generate the correct order list from indices_chunk final_order_list <- list() i <- 1 @@ -1183,88 +1188,91 @@ rebuild_array_merge_split <- function(data_array = NULL, metadata = NULL, indice new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)]) } - if (!is.null(data_array)) { - data_array_no_split <- array(data_array, dim = new_dims) + if (rebuild_data) { + data_array <- array(data_array, dim = new_dims) # seperate 'time' dim into each work_piece length - data_array_seperate <- list() - array_piece <- list() + data_array_seperate <- vector('list', length = length(length_inner_across_dim)) + array_piece <- vector('list', length = length(final_order_list)) } - if (!is.null(metadata)) { - metadata_no_split <- array(metadata, dim = new_dims) - metadata_seperate <- list() - metadata_piece <- list() + if (rebuild_metadata) { + metadata <- array(metadata, dim = length(metadata)) #metadata_no_split + names(dim(metadata)) <- across_inner_dim + metadata_seperate <- vector('list', length = length(length_inner_across_dim)) + metadata_piece <- vector('list', length = length(final_order_list)) } tmp <- cumsum(unlist(length_inner_across_dim)) tmp <- c(0, tmp) for (i in 1:length(length_inner_across_dim)) { - if (!is.null(data_array)) { - data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array_no_split, + if (rebuild_data) { + data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array, across_inner_dim, (tmp[i] + 1):tmp[i + 1]) } - if (!is.null(metadata)) { - metadata_seperate[[i]] <- ClimProjDiags::Subset(metadata_no_split, + if (rebuild_metadata) { + metadata_seperate[[i]] <- ClimProjDiags::Subset(metadata, across_inner_dim, (tmp[i] + 1):tmp[i + 1]) } } + # re-build the array: chunk which_chunk <- as.numeric(names(final_order_list)) sort_which_chunk <- sort(unique(which_chunk)) which_chunk <- sapply(lapply(which_chunk, '==', sort_which_chunk), which) - how_many_indices <- unlist(final_order_list) - if (!is.null(data_array)) { + if (rebuild_data) { ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate))) - } else if (!is.null(metadata)) { + } else if (rebuild_metadata) { ind_in_array_seperate <- as.list(rep(1, length(metadata_seperate))) } for (i in 1:length(final_order_list)) { - if (!is.null(data_array)) { + if (rebuild_data) { array_piece[[i]] <- ClimProjDiags::Subset( data_array_seperate[[which_chunk[i]]], across_inner_dim, ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) } - if (!is.null(metadata)) { + if (rebuild_metadata) { metadata_piece[[i]] <- ClimProjDiags::Subset( metadata_seperate[[which_chunk[i]]], across_inner_dim, ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) } ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] } - + # re-build the array: paste - if (!is.null(data_array)) { + if (rebuild_data) { data_array_tmp <- array_piece[[1]] } else { data_array_tmp <- NULL } - if (!is.null(metadata)) { + if (rebuild_metadata) { metadata_tmp <- metadata_piece[[1]] } else { metadata_tmp <- NULL } - if (!is.null(data_array)) { + if (rebuild_data) { along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim) length_piece <- length(array_piece) - } else if (!is.null(metadata)) { - along_pos <- which(names(dim(metadata_tmp)) == across_inner_dim) - length_piece <- length(metadata_piece) + } + if (rebuild_metadata) { + along_pos_metadata <- which(names(dim(metadata_tmp)) == across_inner_dim) + if (!rebuild_data) + length_piece <- length(metadata_piece) } if (length_piece > 1) { for (i in 2:length_piece) { - if (!is.null(data_array)) { + if (rebuild_data) { data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]], along = along_pos) } - if (!is.null(metadata)) { + if (rebuild_metadata) { metadata_tmp <- abind::abind(metadata_tmp, metadata_piece[[i]], - along = along_pos) + along = along_pos_metadata) } } } -- GitLab From 234e6614877bcdb4ce2ef96d79d3a0ef53ac6a7c Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 19 Apr 2022 17:15:08 +0200 Subject: [PATCH 44/45] Bump version to 2.2.0-1 and update NEWS.md --- DESCRIPTION | 2 +- NEWS.md | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9f482a6..af073b1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.2.0 +Version: 2.2.0-1 Authors@R: c( person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), diff --git a/NEWS.md b/NEWS.md index c542dd1..11f5a30 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# startR v2.2.0-1 (Release date: 2022-04-19) +- Bugfix for the case that the variable has units like time, e.g., "days". +- Development of metadata reshaping. The metadata should correspond to data if data are reshaped by parameter "merge_across_dims" and "split_multiselected_dims", as well as if data selectors are not continuous indices. +- Development of multiple dependency by array selector. An inner dimension indices can vary with multiple file dimensions. + # startR v2.2.0 (Release date: 2022-02-11) - License changes to Apache License 2.0 - R version dependency changes to >= 3.6.0 -- GitLab From f68e23a69c1e0489b7c7fe1b57f3556867459708 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 22 Apr 2022 13:47:01 +0200 Subject: [PATCH 45/45] Modify time unit judgement to consider the time variable that doesn't have name 'time'. --- R/NcDataReader.R | 13 ++- R/zzz.R | 2 +- .../testthat/test-Start-implicit_inner_dim.R | 1 + tests/testthat/test-Start-time_unit.R | 91 +++++++++++++++++++ 4 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 tests/testthat/test-Start-time_unit.R diff --git a/R/NcDataReader.R b/R/NcDataReader.R index 95fd1f1..9c85e33 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -184,8 +184,17 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, }) if (length(names(attr(result, 'variables'))) == 1) { - # The 1st condition is for implicit time dim (if time length = 1, it is allowed to not be defined in Start call. Therefore, it is not in the list of synonims) - if (names(attr(result, 'variables')) == 'time' | 'time' %in% synonims[[names(attr(result, 'variables'))]]) { + # The 1st condition is for implicit time dim (if time length = 1, it is + # allowed to not be defined in Start call. Therefore, it is not in the list + # of synonims); + # the 2nd condition is for the normal case; the 3rd one is that if return_vars + # has a variable that is not 'time'. The only way to know if it should be time + # is to check calendar. + # All these conditions are to prevent the variables with time-like units but + # actually not a time variable, e.g., drought period [days]. + if (names(attr(result, 'variables')) == 'time' | + 'time' %in% synonims[[names(attr(result, 'variables'))]] | + 'calendar' %in% names(attr(result, 'variables')[[1]])) { var_name <- names(attr(result, 'variables')) units <- attr(result, 'variables')[[var_name]][['units']] diff --git a/R/zzz.R b/R/zzz.R index d946ebf..6b6189b 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1430,7 +1430,7 @@ generate_picked_var_of_read <- function(var_to_read, var_to_check, array_of_file if (any(names(array_var_dims) %in% names(var_file_dims))) { array_var_dims <- array_var_dims[-which(names(array_var_dims) %in% names(var_file_dims))] } - if (names(array_var_dims) != names(var_dims)) { + if (any(names(array_var_dims) != names(var_dims))) { stop("Error while reading the variable '", var_to_read, "' from ", "the file. Dimensions do not match.\nExpected ", paste(paste0("'", names(array_var_dims), "'"), collapse = ', '), diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R index 6a3262a..3788af0 100644 --- a/tests/testthat/test-Start-implicit_inner_dim.R +++ b/tests/testthat/test-Start-implicit_inner_dim.R @@ -3,6 +3,7 @@ context("Start() implicit inner dimension") # startR allows it not to be specified in the call. Users can still define it in # 'return_vars'. #--------------------------------------------------------------- +#NOTE: Also useful for test-Start-time_unit.R test3 test_that("1. time = 1", { diff --git a/tests/testthat/test-Start-time_unit.R b/tests/testthat/test-Start-time_unit.R new file mode 100644 index 0000000..3aa1930 --- /dev/null +++ b/tests/testthat/test-Start-time_unit.R @@ -0,0 +1,91 @@ +context("To detect the variable with time format and adjust the units") + +test_that("1. The data has units like time", { + + +suppressWarnings( +FD <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', + var = 'FD', # units: days + time = indices(1), + longitude = indices(1), + latitude = indices(1), + num_procs = 1, + return_vars = list(time = NULL), + retrieve = TRUE) +) +suppressWarnings( +FD2 <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', + var = 'FD', # units: days + time = indices(1), + longitude = indices(1), + latitude = indices(1), + num_procs = 1, +# return_vars = list(time = NULL), + retrieve = TRUE) +) +expect_equal( +attr(FD, 'Variables')$common$FD$units, +'days' +) +expect_equal( +attr(FD2, 'Variables')$common$FD$units, +'days' +) + + +}) + +test_that("2. The metadata variable name is not time", { +# VITIGEOOS + +vari <- "rsds" +anlgs <- paste0("/esarchive/oper/VITIGEOSS","/output/cfsv2/weekly_mean/", + "$var$/$var$-vitigeoss-cat","_1999-2018_", "$file_date$.nc") + +file_date_array <- array(dim = c(sweek = 2, sday = 3)) +file_date_array[, 1] <- c(paste0('04', c('04', '07'))) +file_date_array[, 2] <- c(paste0('04', c('07', '11'))) +file_date_array[, 3] <- c(paste0('04', c('11', '14'))) + +suppressWarnings( + hcst <- Start(dat = anlgs, + var = vari, + latitude = indices(1), #'all', + longitude= indices(1), #'all', + member= indices(1), #'all', + time = 'all', # inner dim!! + syear = 'all', #inner dim!! + file_date = file_date_array, + split_multiselected_dims = TRUE, + retrieve = T, + return_vars = list(leadtimes = 'file_date'), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude'), + syear = c('sdate','syear'), + member = c('ensemble','member'))) +) +time_attr <- attr(hcst, 'Variables')$common$leadtimes + +expect_equal( +dim(time_attr), +c(sweek = 2, sday = 3, syear = 20, time = 4) +) +expect_equal( +time_attr[,1,1,1], +as.POSIXct(c("1999-04-08","1999-04-11"), tz = "UTC") +) +expect_equal( +time_attr[2,,1,1], +as.POSIXct(c("1999-04-11","1999-04-15", "1999-04-18"), tz = "UTC") +) +expect_equal( +time_attr[1,1,20,], +as.POSIXct(c("2018-04-08", "2018-04-15", "2018-04-22 UTC", "2018-04-29 UTC"), tz = "UTC") +) + +}) + + +#test_that("3. Time dimension is implicit", { +# See test-Start-implicit_inner_dim.R +#}) -- GitLab