diff --git a/R/Start.R b/R/Start.R index f61533603f3ad4f01f1a18ed2361bd7ad0a54ba3..ac4fabaa946192efe50f0b49b77bbbbda26fa005 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3666,6 +3666,7 @@ Start <- function(..., # dim = indices/selectors, # Find the dimension to split if split_multiselected_dims = TRUE. # If there is no dimension able to be split, change split_multiselected_dims to FALSE. all_split_dims <- NULL + inner_dim_has_split_dim <- NULL if (split_multiselected_dims) { tmp <- dims_split(dim_params, final_dims_fake) final_dims_fake <- tmp[[1]] @@ -3676,6 +3677,16 @@ Start <- function(..., # dim = indices/selectors, split_multiselected_dims <- FALSE .warning(paste0("Not found any dimensions able to be split. The parameter ", "'split_multiselected_dims' is changed to FALSE.")) + } else { + tmp_fun <- function (x, y) { + any(names(dim(x)) %in% y) + } + inner_dim_has_split_dim <- names(which(unlist(lapply( + picked_common_vars, tmp_fun, names(all_split_dims))))) + if (!identical(inner_dim_has_split_dim, character(0))) { + # If merge_across_dims also, it will be replaced later + saved_reshaped_attr <- attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') + } } } #====================================================================== @@ -3687,6 +3698,9 @@ Start <- function(..., # dim = indices/selectors, across_inner_dim <- inner_dims_across_files[[1]] #TODO: more than one? # Get the length of each inner_dim ('time') along each file_dim ('file_date') length_inner_across_dim <- lapply(dat[[i]][['selectors']][[across_inner_dim]][['fri']], length) + dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) + # Save attributes for later use. If split_multiselected_dims, this variable has been created above but is replaced here + saved_reshaped_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') if (merge_across_dims_narm & !split_multiselected_dims) { final_dims_fake <- merge_narm_dims(final_dims_fake, across_inner_dim, length_inner_across_dim) @@ -3732,20 +3746,10 @@ Start <- function(..., # dim = indices/selectors, all_split_dims[[1]] <- tmp[[2]] } } - - if (merge_across_dims) { - # Save dim and attributes for later use - dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) - tmp_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') - # Find final_dim_fake for metadata and put it in an array - if (!split_multiselected_dims) { - final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] - } else { - final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] - } - } else if (split_multiselected_dims) { - tmp_attr <- attr(picked_common_vars[[names(all_split_dims)]], 'variables') - final_dims_fake_merge_dim <- all_split_dims[[1]] + if (merge_across_dims | split_multiselected_dims) { + final_dims_fake_metadata <- find_final_dims_fake_metadata( + merge_across_dims, split_multiselected_dims, picked_common_vars, across_inner_dim, + final_dims_fake, dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) } # The following several lines will only run if retrieve = TRUE @@ -3887,7 +3891,7 @@ Start <- function(..., # dim = indices/selectors, stop(paste0("After reshaping, the data do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly.")) } - if (length(metadata_tmp) != prod(final_dims_fake_merge_dim)) { + if (length(metadata_tmp) != prod(final_dims_fake_metadata)) { stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly or contact support.")) } @@ -3903,7 +3907,7 @@ Start <- function(..., # dim = indices/selectors, } data_array <- array(data_array_tmp, dim = final_dims_fake) - metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) + metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_metadata) # If split_multiselected_dims + merge_across_dims, the dimension order may change above. # To get the user-required dim order, we need to reorder the array again. @@ -3921,7 +3925,7 @@ Start <- function(..., # dim = indices/selectors, } picked_common_vars[[across_inner_dim]] <- metadata_tmp - attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr } else { # ! (merge_across_dims + split_multiselected_dims) (old version) data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) @@ -3936,26 +3940,25 @@ Start <- function(..., # dim = indices/selectors, tmp[file_dim_pos] <- inner_dim_pos picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp) } - metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) + metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates if ('time' %in% synonims[[across_inner_dim]]) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } picked_common_vars[[across_inner_dim]] <- metadata_tmp - attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr } if (split_multiselected_dims) { - if (names(all_split_dims) %in% names(picked_common_vars)) { - metadata_tmp <- array(picked_common_vars[[names(all_split_dims)]], dim = final_dims_fake_merge_dim) + if (!identical(inner_dim_has_split_dim, character(0))) { + metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates - if ('time' %in% synonims[[names(all_split_dims)]]) { + if (is(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } - picked_common_vars[[names(all_split_dims)]] <- metadata_tmp - attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr + picked_common_vars[[inner_dim_has_split_dim]] <- metadata_tmp + attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') <- saved_reshaped_attr } } - } gc() @@ -4023,7 +4026,7 @@ Start <- function(..., # dim = indices/selectors, metadata_tmp <- tmp$merge_dim_metadata } - if (length(metadata_tmp) != prod(final_dims_fake_merge_dim)) { + if (length(metadata_tmp) != prod(final_dims_fake_metadata)) { stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly or contact support.")) } @@ -4032,10 +4035,11 @@ Start <- function(..., # dim = indices/selectors, # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { tmp <- rebuild_array_merge_split( - data_array = NULL, metadata = metadata_tmp, indices_chunk, all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) + data_array = NULL, metadata = metadata_tmp, indices_chunk, + all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) metadata_tmp <- tmp$metadata } - metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) + metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_metadata) # If split_multiselected_dims + merge_across_dims, the dimension order may change above. # To get the user-required dim order, we need to reorder the array again. @@ -4052,7 +4056,7 @@ Start <- function(..., # dim = indices/selectors, metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } picked_common_vars[[across_inner_dim]] <- metadata_tmp - attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr } else { # ! (merge_across_dims + split_multiselected_dims) (old version) if (merge_across_dims) { # merge_across_dims = TRUE but (merge_across_dims_narm = F & split_multiselected_dims = F) @@ -4065,23 +4069,23 @@ Start <- function(..., # dim = indices/selectors, tmp[file_dim_pos] <- inner_dim_pos picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp) } - metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) + metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates if ('time' %in% synonims[[across_inner_dim]]) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } picked_common_vars[[across_inner_dim]] <- metadata_tmp - attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr } if (split_multiselected_dims) { - if (names(all_split_dims) %in% names(picked_common_vars)) { - metadata_tmp <- array(picked_common_vars[[names(all_split_dims)]], dim = final_dims_fake_merge_dim) + if (!identical(inner_dim_has_split_dim, character(0))) { + metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates - if ('time' %in% synonims[[names(all_split_dims)]]) { + if (is(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } - picked_common_vars[[names(all_split_dims)]] <- metadata_tmp - attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr + picked_common_vars[[inner_dim_has_split_dim]] <- metadata_tmp + attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') <- saved_reshaped_attr } } } diff --git a/R/Utils.R b/R/Utils.R index 425336eff1b4d908a3030cca40bcbfed65d2ee65..11d239f6ed99c5fc376658d72f7eac281c45a152 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -844,3 +844,17 @@ .KnownLatNames <- function() { known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') } + +.ReplaceElementInVector <- function(x, target, new_val) { + # x is a vector with name + # target is a string + # new_val is a vector with name + # E.g., Change [a = 2, b = 3] to [c = 1, d = 2, b = 3], then: + # x = c(a = 2, b = 3), target = 'a', new_val = c(c = 1, d = 2) + new_names <- unlist(lapply(as.list(names(x)), function(x) if (x == target) names(new_val) else x)) + new_list <- vector('list', length = length(new_names)) + for (i in 1:length(new_list)) { + new_list[[i]] <- c(new_val, x)[which(c(names(new_val), names(x)) == new_names[i])] + } + return(unlist(new_list)) +} diff --git a/R/zzz.R b/R/zzz.R index 83e2b7b7846e6e7427de58d00477f5152884f107..8055c69aa16a786518d846403d1535a1d9fa743c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -822,6 +822,30 @@ reorder_split_dims <- function(all_split_dims, inner_dim_pos_in_split_dims, fina return(list(final_dims_fake, all_split_dims)) } +# Find the final_dims_fake for metadata if it needs to be reshaped +find_final_dims_fake_metadata <- function(merge_across_dims, split_multiselected_dims, + picked_common_vars, across_inner_dim, final_dims_fake, + dims_of_merge_dim, all_split_dims, inner_dim_has_split_dim) { + final_dims_fake_metadata <- NULL + if (merge_across_dims) { + if (!split_multiselected_dims) { + final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] + } else { + final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] + } + } else if (split_multiselected_dims) { + if (!identical(inner_dim_has_split_dim, character(0))) { + target_split_dim_ind <- which(names(dim(picked_common_vars[[inner_dim_has_split_dim]])) == names(all_split_dims)) + margin_dim_ind <- c(1:length(dim(picked_common_vars[[inner_dim_has_split_dim]])))[-target_split_dim_ind] + if (identical(margin_dim_ind, numeric(0)) | identical(margin_dim_ind, integer(0))) { + final_dims_fake_metadata <- all_split_dims[[1]] + } else { + final_dims_fake_metadata <- .ReplaceElementInVector(dim(picked_common_vars[[inner_dim_has_split_dim]]), target = names(all_split_dims), new_val = all_split_dims[[1]]) + } + } + } + return(final_dims_fake_metadata) +} # Build the work pieces. build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, final_dims, diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 91a793d53572f502442a6e89d519b849ebd0c825..2374571d2e0653491ad7fbd2de4f16b699f0917a 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -387,7 +387,7 @@ dates }) -test_that("6. split dim only", { +test_that("6. split time dim only", { datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'days') datess <- as.POSIXct(array(datess, dim = c(time = 7, week = 2)), @@ -544,3 +544,76 @@ dates }) + +test_that("8. split sdate dim", { + +file_date <- array(c(paste0(1993:1995, '07'), paste0(1993:1995, '08')), + dim = c(syear = 3, smonth = 2)) +suppressWarnings( +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = file_date, #[syear = 3, smonth = 2] + time = indices(1:2), + latitude = indices(1), + longitude = indices(1), + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(syear = 3, smonth = 2, time = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +dates[, 1, 1], +seq(as.POSIXct('1993-07-01', tz = 'UTC'), as.POSIXct('1995-07-01', tz = 'UTC'), by = 'year') +) +expect_equal( +dates[, 2, 2], +seq(as.POSIXct('1993-08-01 06:00:00', tz = 'UTC'), as.POSIXct('1995-08-01 06:00:00', tz = 'UTC'), by = 'year') +) + +suppressWarnings( +dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = file_date, #[syear = 3, smonth = 2] + time = indices(1:2), + latitude = indices(1), + longitude = indices(1), + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index fe686dc619c7aae495a6bd2815a35aefe1a4fd1a..8793296fd06001424a32a0a8369803a0a1831243 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -176,10 +176,10 @@ c(dat = 1, var = 1, smonth = 2, syear = 2, time = 1, latitude = 18, longitude = ) expect_equal( dim(attr(obs, 'Variables')$common$time), -c(file_date = 4, time = 1) +c(smonth = 2, syear = 2, time = 1) ) expect_equal( -attr(obs, 'Variables')$common$time[1, 1], +attr(obs, 'Variables')$common$time[1, 1, 1], as.POSIXct('2013-11-15', tz = 'UTC') )