From 6e9d69467d904e7eceadf1b8db7d879ae50bf1f9 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Mar 2022 18:04:15 +0100 Subject: [PATCH 1/9] Enable implicit dependency on multiple file dims --- R/zzz.R | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 0067d6d..fa1e440 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -873,7 +873,16 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, x_dim_name <- attr(attr(selectors[[x]][['fri']], "dim"), "names") if (!is.null(x_dim_name)) { which_chunk <- file_to_load_sub_indices[x_dim_name] - selectors[[x]][['fri']][[which_chunk]] + if (length(which_chunk) > 1) { + tmp_dim <- attr(selectors[[x]][['fri']], "dim") + vec_ind <- which_chunk[1] + for (i_dim in length(tmp_dim):2) { + vec_ind <- vec_ind + (which_chunk[i_dim] - 1) * prod(tmp_dim[1:(i_dim - 1)]) + } + selectors[[x]][['fri']][[vec_ind]] + } else { #old code + selectors[[x]][['fri']][[which_chunk]] + } } else { selectors[[x]][['fri']][[1]] } @@ -889,7 +898,16 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, x_dim_name <- attr(attr(selectors[[x]][['sri']], "dim"), "names") if (!is.null(x_dim_name)) { which_chunk <- file_to_load_sub_indices[x_dim_name] - selectors[[x]][['sri']][[which_chunk]] + if (length(which_chunk) > 1) { + tmp_dim <- attr(selectors[[x]][['sri']], "dim") + vec_ind <- which_chunk[1] + for (i_dim in length(tmp_dim):2) { + vec_ind <- vec_ind + (which_chunk[i_dim] - 1) * prod(tmp_dim[1:(i_dim - 1)]) + } + selectors[[x]][['sri']][[vec_ind]] + } else { #old code + selectors[[x]][['sri']][[which_chunk]] + } } else { selectors[[x]][['sri']][[1]] } -- GitLab From a83cc261812f7fef4d3ba76c9c82682bfa9c78f2 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Mar 2022 18:13:55 +0100 Subject: [PATCH 2/9] Add unit test (undone) --- ...st-Start-implicit_dependency_by_selector.R | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 5c2f050..5ad93b9 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -115,4 +115,38 @@ tolerance = 0.0001 ) +}) + +test_that("3. region depends on member and sdate", { +#NOTE: This case, region indices are not dependent on sdate. But it should work if it is +reg <- array('Nino3.4', dim = c(sdate = 3, memb = 2, region = 1)) + +path_SR <- paste0('/esarchive/exp/ecearth/a42y/diags/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gn/v*/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gn_$chunk$.nc') +suppressWarnings( +data <- Start(dat = path_SR, + var = 'tosmean', + memb = paste0('r', c(24, 28), 'i1p1f1'), + region = reg, + region_var = 'region', + sdate = paste0(2000:2002), + time = c(1:4), + chunk = 'all', + chunk_depends = 'sdate', + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time=c('sdate','chunk'), region=c('sdate', 'memb')), + retrieve = T) +) + +expect_equal( +dim(data), +c(dat = 1, var = 1, memb = 2, region = 1, sdate = 3, time = 4) +) + +#expect_equal( +#dim(attr(data, 'Variables')$common$region), +#c(memb = 2, sdate = 3, region = 2) +#) + + }) -- GitLab From c860762fc675a19d3f8b0473066ca09756eab1ad Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 15 Mar 2022 18:34:59 +0100 Subject: [PATCH 3/9] Improve return_vars check for implicit dependency case --- R/Start.R | 12 +++++++++--- R/zzz.R | 2 +- .../test-Start-implicit_dependency_by_selector.R | 4 ++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/R/Start.R b/R/Start.R index 6fc05ea..afa13db 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1751,10 +1751,16 @@ Start <- function(..., # dim = indices/selectors, (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]]))) { need_correct <- TRUE - } else if (inner_dim %in% names(common_return_vars) & - !is.null(names(inner_dims_across_files))) { - if (!names(inner_dims_across_files) %in% common_return_vars[[inner_dim]]) { + } else if (inner_dim %in% names(common_return_vars) & + (inner_dim %in% inner_dims_across_files) & + !is.null(names(inner_dims_across_files))) { #(2) + if (!names(inner_dims_across_files) %in% common_return_vars[[inner_dim]]) need_correct <- TRUE + + } else if (inner_dim %in% names(common_return_vars) & + is.character(file_dim_as_selector_array_dim)) { #(1) + if (!all(file_dim_as_selector_array_dim %in% common_return_vars[[inner_dim]])) { need_correct <- TRUE + file_dim_as_selector_array_dim <- file_dim_as_selector_array_dim[which(!file_dim_as_selector_array_dim %in% common_return_vars[[inner_dim]])] } } if (need_correct) { diff --git a/R/zzz.R b/R/zzz.R index fa1e440..01c8795 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -429,7 +429,7 @@ correct_return_vars <- function(inner_dim, inner_dims_across_files, found_patter file_dim_as_selector_array_dim) { # inner_dim is not in return_vars or is NULL if (is.character(file_dim_as_selector_array_dim)) { #(1) - if (file_dim_as_selector_array_dim %in% found_pattern_dim) { + if (any(file_dim_as_selector_array_dim %in% found_pattern_dim)) { stop(paste0("Found '", inner_dim, "' selector has dimension of the pattern dim '", found_pattern_dim, "', which is not allowed. To assign the dependency on the pattern dim, ", diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 5ad93b9..92aa488 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -142,6 +142,10 @@ expect_equal( dim(data), c(dat = 1, var = 1, memb = 2, region = 1, sdate = 3, time = 4) ) +expect_equal( +as.vector(drop(data)[,,1]), +c(26.87246, 26.88851, 27.28198, 27.28953, 27.65627, 27.68499) +) #expect_equal( #dim(attr(data, 'Variables')$common$region), -- GitLab From cb6c3921155ce8118224db6ffb6c81bafdcd2b14 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 17 Mar 2022 17:30:48 +0100 Subject: [PATCH 4/9] Several minor bugfixes --- R/Start.R | 160 +++++++++--------- ...st-Start-implicit_dependency_by_selector.R | 3 +- 2 files changed, 84 insertions(+), 79 deletions(-) diff --git a/R/Start.R b/R/Start.R index afa13db..f57ea3c 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1731,7 +1731,11 @@ Start <- function(..., # dim = indices/selectors, #//////////////////////////////////////////// # Force return_vars = (time = NULL) to (time = 'sdate') if (1) selector = [sdate = 2, time = 4] or # (2) time_across = 'sdate'. - # NOTE: Here is not in for loop of dat[[i]] + # NOTE: Not sure if the loop over dat is needed here. In theory, all the dat + # should have the same dimensions (?) so expected_inner_dims and + # found_file_dims are the same. The selector_array may possible be + # different, but then the attribute will be correct? If it's different, + # it should depend on 'dat' (but here we only consider common_return_vars) for (i in 1:length(dat)) { for (inner_dim in expected_inner_dims[[i]]) { # The selectors for the inner dimension are taken. @@ -2174,11 +2178,11 @@ Start <- function(..., # dim = indices/selectors, print("-> DEFINING INDICES FOR INNER DIMENSION:") print(inner_dim) } - file_dim <- NULL + crossed_file_dim <- NULL if (inner_dim %in% unlist(inner_dims_across_files)) { - file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] - chunk_amount <- length(dat[[i]][['selectors']][[file_dim]][[1]]) - names(chunk_amount) <- file_dim + crossed_file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] + chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]]) + names(chunk_amount) <- crossed_file_dim } else { chunk_amount <- 1 } @@ -2244,7 +2248,7 @@ Start <- function(..., # dim = indices/selectors, if (!is.null(var_with_selectors_name)) { if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) { with_transform <- TRUE - if (!is.null(file_dim)) { + if (!is.null(crossed_file_dim)) { stop("Requested a transformation over the dimension '", inner_dim, "', wich goes across files. This feature ", "is not supported. Either do the request without the ", @@ -2384,7 +2388,7 @@ Start <- function(..., # dim = indices/selectors, #TODO: if (chunk_amount != number of chunks in selector_file_dims), crash if (length(var_dims) > 1) { stop("Specified a '", inner_dim, "_var' for the dimension '", - inner_dim, "', which goes across files (across '", file_dim, + inner_dim, "', which goes across files (across '", crossed_file_dim, "'). The specified variable, '", var_with_selectors_name, "', has more ", "than one dimension and can not be used as selector variable. ", "Select another variable or fix it in the files.") @@ -2418,7 +2422,7 @@ Start <- function(..., # dim = indices/selectors, # need to know each length to create the indices for each file later. # Record 'inner_dim_lengths' here for later usage. inner_dim_lengths <- NULL - if (largest_dims_length & !is.null(file_dim)) { + if (largest_dims_length & !is.null(crossed_file_dim)) { # inner_dim_lengths here includes all the files, but we only want # the files of fyear for certain "sdate". We will categorize it later. inner_dim_lengths <- tryCatch({ @@ -2429,11 +2433,11 @@ Start <- function(..., # dim = indices/selectors, }) # Use other file dims as the factors to categorize. - other_file_dims <- dim(array_of_files_to_load)[which(file_dims != file_dim)] + other_file_dims <- dim(array_of_files_to_load)[which(found_file_dims[[i]] != crossed_file_dim)] other_file_dims <- lapply(lapply(other_file_dims, seq, 1), rev) other_file_dims_factor <- expand.grid(other_file_dims) selector_indices_save_subset <- - lapply(selector_indices_save[[i]], '[', which(file_dims != file_dim)) + lapply(selector_indices_save[[i]], '[', which(found_file_dims[[i]] != crossed_file_dim)) # Put the fyear with the same other file dims (sdate, etc.) together, and find the largest length (in theory all of them should be the same) inner_dim_lengths_cat <- vector('list', dim(other_file_dims_factor)[1]) @@ -2484,9 +2488,9 @@ Start <- function(..., # dim = indices/selectors, #sri <- NULL } } else { - if ((!is.null(file_dim)) && !(file_dim %in% names(var_file_dims))) { + if ((!is.null(crossed_file_dim)) && !(crossed_file_dim %in% names(var_file_dims))) { stop("The variable '", var_with_selectors_name, "' must also be ", - "requested for the file dimension '", file_dim, "' in ", + "requested for the file dimension '", crossed_file_dim, "' in ", "this configuration.") } fri <- vector('list', length = prod(var_file_dims)) @@ -2581,11 +2585,11 @@ Start <- function(..., # dim = indices/selectors, unmatching_file_dims <- which(!(names(var_file_dims) %in% names(selector_file_dims))) if ((length(unmatching_file_dims) > 0)) { raise_error <- FALSE - if (is.null(file_dim)) { + if (is.null(crossed_file_dim)) { raise_error <- TRUE } else { if (!((length(unmatching_file_dims) == 1) && - (names(var_file_dims)[unmatching_file_dims] == file_dim) && + (names(var_file_dims)[unmatching_file_dims] == crossed_file_dim) && (inner_dim %in% names(selector_inner_dims)))) { raise_error <- TRUE } @@ -2608,23 +2612,23 @@ Start <- function(..., # dim = indices/selectors, } ## TODO: If var dimensions are not in the same order as selector dimensions, reorder if (is.null(names(selector_file_dims))) { - if (is.null(file_dim)) { + if (is.null(crossed_file_dim)) { fri_dims <- 1 } else { fri_dims <- chunk_amount - names(fri_dims) <- file_dim + names(fri_dims) <- crossed_file_dim } } else { fri_dim_names <- names(selector_file_dims) - if (!is.null(file_dim)) { - fri_dim_names <- c(fri_dim_names, file_dim) + if (!is.null(crossed_file_dim)) { + fri_dim_names <- c(fri_dim_names, crossed_file_dim) } fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% fri_dim_names)] fri_dims <- rep(NA, length(fri_dim_names)) names(fri_dims) <- fri_dim_names fri_dims[names(selector_file_dims)] <- selector_file_dims - if (!is.null(file_dim)) { - fri_dims[file_dim] <- chunk_amount + if (!is.null(crossed_file_dim)) { + fri_dims[crossed_file_dim] <- chunk_amount } } fri <- vector('list', length = prod(fri_dims)) @@ -2669,13 +2673,13 @@ Start <- function(..., # dim = indices/selectors, print(str(sub_array_of_values)) print(dim(sub_array_of_values)) print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:") - print(file_dim) + print(crossed_file_dim) } } # The inner dim selector is an array in which have file dim (e.g., time = [sdate = 2, time = 4], # or the inner dim doesn't go across any file dim (e.g., no time_across = 'sdate') - if ((!is.null(file_dim) && (file_dim %in% names(selector_file_dims))) || is.null(file_dim)) { + if ((!is.null(crossed_file_dim) && (crossed_file_dim %in% names(selector_file_dims))) || is.null(crossed_file_dim)) { if (length(sub_array_of_selectors) > 0) { if (debug) { if (inner_dim %in% dims_to_check) { @@ -3191,8 +3195,8 @@ Start <- function(..., # dim = indices/selectors, } fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position), list(value = sub_array_of_fri))) - if (!is.null(file_dim)) { - taken_chunks[selector_store_position[[file_dim]]] <- TRUE + if (!is.null(crossed_file_dim)) { + taken_chunks[selector_store_position[[crossed_file_dim]]] <- TRUE } else { taken_chunks <- TRUE } @@ -3204,7 +3208,7 @@ Start <- function(..., # dim = indices/selectors, print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.") } } - # If "_across = + merge_across_dims = FALSE + chunk over ", return error because this instance is not logically correct. + # If "_across = + merge_across_dims = FALSE + chunk over ", return error because this instance is not logically correct. if (chunks[[inner_dim]]["n_chunks"] > 1 & inner_dim %in% inner_dims_across_files) { stop("Chunk over dimension '", inner_dim, "' is not allowed because '", inner_dim, "' is across '", @@ -3300,7 +3304,7 @@ Start <- function(..., # dim = indices/selectors, for (chunk in 1:chunk_amount) { if (!is.null(names(selector_store_position))) { - selector_store_position[file_dim] <- chunk + selector_store_position[crossed_file_dim] <- chunk } else { selector_store_position <- chunk } @@ -3362,7 +3366,7 @@ Start <- function(..., # dim = indices/selectors, } } else { stop("Provided array of indices for dimension '", inner_dim, "', ", - "which goes across the file dimension '", file_dim, "', but ", + "which goes across the file dimension '", crossed_file_dim, "', but ", "the provided array does not have the dimension '", inner_dim, "', which is mandatory.") } @@ -3402,12 +3406,12 @@ Start <- function(..., # dim = indices/selectors, # end_chunks_to_remove <- empty_chunks[first_chunk_to_remove:length(empty_chunks)] # chunks_to_keep <- which(!((1:length(taken_chunks)) %in% c(start_chunks_to_remove, end_chunks_to_remove))) chunks_to_keep <- which(taken_chunks) - dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], list(chunks_to_keep)) - # found_indices <- Subset(found_indices, file_dim, chunks_to_keep) + dims_to_crop[[crossed_file_dim]] <- c(dims_to_crop[[crossed_file_dim]], list(chunks_to_keep)) + # found_indices <- Subset(found_indices, crossed_file_dim, chunks_to_keep) # # Crop dataset variables file dims. # for (picked_var in names(picked_vars[[i]])) { - # if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { - # picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, chunks_to_keep) + # if (crossed_file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { + # picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], crossed_file_dim, chunks_to_keep) # } # } } @@ -3481,53 +3485,53 @@ Start <- function(..., # dim = indices/selectors, Subset(transformed_var_with_selectors, inner_dim, crop_indices) } } else { - if (!is.null(file_dim)) { #merge_across_dims, file_dims is the depended file dim - if (file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { - tmp <- common_vars_to_crop[[common_var_to_crop]] - tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) - dim_extra_ind <- which(!names(dim(tmp)) %in% c(file_dim, inner_dim)) - if (!identical(dim_extra_ind, integer(0))) { - tmp_list <- asplit(tmp, dim_extra_ind) - dim_file_ind <- which(names(dim(tmp_list[[1]])) == file_dim) - tmp_list <- lapply(tmp_list, asplit, dim_file_ind) - } else { # only file_dim and inner_dim - dim_file_ind <- which(names(dim(tmp)) == file_dim) - tmp_list <- asplit(tmp, dim_file_ind) - # Add another layer to be consistent with the first case above - tmp_list <- list(tmp_list) - } - max_fri_length <- max(sapply(fri, length)) - for (i_extra_dim in 1:length(tmp_list)) { - for (i_fri in 1:length(fri)) { - tmp_list[[i_extra_dim]][[i_fri]] <- - tmp_list[[i_extra_dim]][[i_fri]][fri[[i_fri]]] - - if (length(tmp_list[[i_extra_dim]][[i_fri]]) != max_fri_length) { + if (!is.null(crossed_file_dim)) { #merge_across_dims, crossed_file_dim is the depended file dim + if (crossed_file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { + tmp <- common_vars_to_crop[[common_var_to_crop]] + tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) + dim_extra_ind <- which(!names(dim(tmp)) %in% c(crossed_file_dim, inner_dim)) + if (!identical(dim_extra_ind, integer(0))) { + tmp_list <- asplit(tmp, dim_extra_ind) + dim_file_ind <- which(names(dim(tmp_list[[1]])) == crossed_file_dim) + tmp_list <- lapply(tmp_list, asplit, dim_file_ind) + } else { # only crossed_file_dim and inner_dim + dim_file_ind <- which(names(dim(tmp)) == crossed_file_dim) + tmp_list <- asplit(tmp, dim_file_ind) + # Add another layer to be consistent with the first case above + tmp_list <- list(tmp_list) + } + max_fri_length <- max(sapply(fri, length)) + for (i_extra_dim in 1:length(tmp_list)) { + for (i_fri in 1:length(fri)) { tmp_list[[i_extra_dim]][[i_fri]] <- - c(tmp_list[[i_extra_dim]][[i_fri]], rep(NA, max_fri_length - length(tmp_list[[i_extra_dim]][[i_fri]]))) + tmp_list[[i_extra_dim]][[i_fri]][fri[[i_fri]]] + + if (length(tmp_list[[i_extra_dim]][[i_fri]]) != max_fri_length) { + tmp_list[[i_extra_dim]][[i_fri]] <- + c(tmp_list[[i_extra_dim]][[i_fri]], rep(NA, max_fri_length - length(tmp_list[[i_extra_dim]][[i_fri]]))) + } } } - } - # Change list back to array - tmp_new_dim <- c(max_fri_length, dim(tmp)[file_dim], dim(tmp)[dim_extra_ind]) - names(tmp_new_dim) <- c(inner_dim, file_dim, names(dim(tmp))[dim_extra_ind]) - common_vars_to_crop[[common_var_to_crop]] <- - array(unlist(tmp_list), dim = tmp_new_dim) - # Reorder back - common_vars_to_crop[[common_var_to_crop]] <- - aperm(common_vars_to_crop[[common_var_to_crop]], match(names(dim(tmp)), names(tmp_new_dim))) - # Put attributes back - tmp <- which(!names(tmp_attributes) %in% names(attributes(common_vars_to_crop[[common_var_to_crop]]))) - attributes(common_vars_to_crop[[common_var_to_crop]]) <- - c(attributes(common_vars_to_crop[[common_var_to_crop]]), - tmp_attributes[tmp]) - - if ('time' %in% synonims[[common_var_to_crop]]) { - # Convert number back to time + # Change list back to array + tmp_new_dim <- c(max_fri_length, dim(tmp)[crossed_file_dim], dim(tmp)[dim_extra_ind]) + names(tmp_new_dim) <- c(inner_dim, crossed_file_dim, names(dim(tmp))[dim_extra_ind]) common_vars_to_crop[[common_var_to_crop]] <- - as.POSIXct(common_vars_to_crop[[common_var_to_crop]], - origin = "1970-01-01", tz = 'UTC') - } + array(unlist(tmp_list), dim = tmp_new_dim) + # Reorder back + common_vars_to_crop[[common_var_to_crop]] <- + aperm(common_vars_to_crop[[common_var_to_crop]], match(names(dim(tmp)), names(tmp_new_dim))) + # Put attributes back + tmp <- which(!names(tmp_attributes) %in% names(attributes(common_vars_to_crop[[common_var_to_crop]]))) + attributes(common_vars_to_crop[[common_var_to_crop]]) <- + c(attributes(common_vars_to_crop[[common_var_to_crop]]), + tmp_attributes[tmp]) + + if ('time' %in% synonims[[common_var_to_crop]]) { + # Convert number back to time + common_vars_to_crop[[common_var_to_crop]] <- + as.POSIXct(common_vars_to_crop[[common_var_to_crop]], + origin = "1970-01-01", tz = 'UTC') + } } } else { # old code @@ -3866,7 +3870,7 @@ Start <- function(..., # dim = indices/selectors, } } #print("P") - + # If merge_across_dims = TRUE, there might be additional NAs due to unequal # inner_dim ('time') length across file_dim ('file_date'). # If merge_across_dims_narm = TRUE, add additional lines to remove these NAs. @@ -3961,9 +3965,8 @@ Start <- function(..., # dim = indices/selectors, attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr } } - } - + gc() # Load metadata and remove the metadata folder @@ -4038,7 +4041,8 @@ Start <- function(..., # dim = indices/selectors, # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { tmp <- rebuild_array_merge_split( - data_array = NULL, metadata = metadata_tmp, indices_chunk, all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) + data_array = NULL, metadata = metadata_tmp, indices_chunk, + all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) metadata_tmp <- tmp$metadata } metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 92aa488..1f320bf 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -144,7 +144,8 @@ c(dat = 1, var = 1, memb = 2, region = 1, sdate = 3, time = 4) ) expect_equal( as.vector(drop(data)[,,1]), -c(26.87246, 26.88851, 27.28198, 27.28953, 27.65627, 27.68499) +c(26.87246, 26.88851, 27.28198, 27.28953, 27.65627, 27.68499), +tolerance = 0.0001 ) #expect_equal( -- GitLab From 785e989d7c689a11bd10f30d34643093c6fe6848 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 6 Apr 2022 18:50:19 +0200 Subject: [PATCH 5/9] Add region depending on sdate & member case in document --- inst/doc/faq.md | 4 ++ inst/doc/usecase/ex1_13_implicit_dependency.R | 54 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 689e13a..83065d4 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -927,6 +927,10 @@ data <- Start(dat = path, region = 'sdate'), retrieve = T) ``` +The dependency can be on more than one file dimension. What you need to do is just creat +an array with the depended file dimension as the array dimension name. See more examples +in [use case ex1_13](inst/doc/usecase/ex1_13_implicit_dependency.R). + ### 23. The best practice of using vector and list for selectors There are three ways to define the selectors in Start(): `indices()`, `values()`, and character string diff --git a/inst/doc/usecase/ex1_13_implicit_dependency.R b/inst/doc/usecase/ex1_13_implicit_dependency.R index 1ea9381..6740a21 100644 --- a/inst/doc/usecase/ex1_13_implicit_dependency.R +++ b/inst/doc/usecase/ex1_13_implicit_dependency.R @@ -1,5 +1,7 @@ # Author: An-Chi Ho # Date: 13th July 2021 +# Implement case 3: 6th April 2022 + #--------------------------------------------------------------------- # This script shows how to use a value array as the inner dimension selector to express # dependency on a file dimension. By this means, we don't need to specify the *_across @@ -11,6 +13,8 @@ # index for Nino3. sdate 1993 has 'Nino3' at index 9 while sdate 2013 has 'Nino3' at # index 11. Create a value array for region selector so Start() can look for 'Nino3' in # each file. +# In the third case, 'region' is defined as an array that has dimensions 'sdate', 'member', +# and 'region'. It works if region indices is dependent on both sdate and member. #--------------------------------------------------------------------- library(startR) @@ -80,9 +84,59 @@ data[1, 1, , 1, ] #[2,] 24.47482 24.75953 # --> region index 11 in orginal file +#============================================================================= + +# Case 3: 'region' depends on 'sdate' and 'member' + +#NOTE: Actually, the region indices are not dependent on sdate in this case, but +# it should work if it is. If you have a better example, please let me know. +region <- array(c('Nino3.4', 'Nino3'), dim = c(region = 2, sdate = 3, memb = 5)) +# check the array +region[, 1, 1] +#[1] "Nino3.4" "Nino3" +region[, 1, 2] +#[1] "Nino3.4" "Nino3" +region[, 2, 2] +#[1] "Nino3.4" "Nino3" +#--> For each sdate-memb combination, the desired regions are "Nino3.4" and "Nino3". +path <- paste0('/esarchive/exp/ecearth/a42y/diags/DCPP/EC-Earth-Consortium/', + 'EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gn/v*/', + '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gn_$chunk$.nc') +data <- Start(dat = path, + var = 'tosmean', + memb = paste0('r', c(24:28), 'i1p1f1'), + region = region, + region_var = 'region', + sdate = paste0(2000:2002), + time = 'all', + chunk = 'all', + chunk_depends = 'sdate', + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = c('sdate','chunk'), + region = c('sdate', 'memb')), + retrieve = T) +# Check output +## Nino3.4 +drop(data)[ , 1, , 1] +# [,1] [,2] [,3] +#[1,] 26.87246 27.28198 27.65627 +#[2,] 26.87331 27.31887 27.63275 +#[3,] 26.89038 27.31446 27.58801 +#[4,] 26.90285 27.26750 27.66004 +#[5,] 26.88851 27.28953 27.68499 + +## Nino3 + drop(data)[ , 2, , 1] +# [,1] [,2] [,3] +#[1,] 26.58774 26.38932 26.80643 +#[2,] 26.58879 26.43760 26.68655 +#[3,] 26.59319 26.41373 26.64150 +#[4,] 26.69607 26.40465 26.69096 +#[5,] 26.59114 26.40454 26.71252 -- GitLab From 2893ef019e6220e2b38e417879376c00f8992b46 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 6 Apr 2022 18:50:33 +0200 Subject: [PATCH 6/9] Correct typo --- R/Start.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Start.R b/R/Start.R index adb87bf..0e0e90b 100644 --- a/R/Start.R +++ b/R/Start.R @@ -2605,7 +2605,7 @@ Start <- function(..., # dim = indices/selectors, } if (any(names(selector_file_dims) %in% names(dim(var_with_selectors)))) { if (any(dim(var_with_selectors)[names(selector_file_dims)] != selector_file_dims)) { - stop("Size of selector file dimensions must mach size of requested ", + stop("Size of selector file dimensions must match size of the corresponding ", "variable dimensions.") } } -- GitLab From cae3db01acc5ef1a6bf9139bc2f10dc4f71e5845 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Apr 2022 10:39:59 +0200 Subject: [PATCH 7/9] Correct version folder name --- tests/testthat/test-Start-calendar.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 10cf7bf..53122c8 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -48,7 +48,7 @@ expect_equal( }) test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { -path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20191219/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' suppressWarnings( data <- Start(dat = path_bcc_csm2, -- GitLab From c4adfb3375cb3a4a09bb874cf1ffcb44d683e13d Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Apr 2022 10:49:38 +0200 Subject: [PATCH 8/9] import method::is --- NAMESPACE | 1 + R/AddStep.R | 1 + R/ByChunks.R | 1 + R/Compute.R | 1 + R/SelectorChecker.R | 1 + R/Start.R | 1 + R/Utils.R | 1 + 7 files changed, 7 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 1434a0f..c6bca72 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ import(future) import(multiApply) import(parallel) importFrom(ClimProjDiags,Subset) +importFrom(methods,is) importFrom(s2dv,CDORemap) importFrom(stats,na.omit) importFrom(stats,setNames) diff --git a/R/AddStep.R b/R/AddStep.R index a129f1e..00af3ab 100644 --- a/R/AddStep.R +++ b/R/AddStep.R @@ -41,6 +41,7 @@ #' use_attributes = list(data = "Variables")) #' wf <- AddStep(data, step, pi_val = pi_short) #' +#'@importFrom methods is #'@export AddStep <- function(inputs, step_fun, ...) { # Check step_fun diff --git a/R/ByChunks.R b/R/ByChunks.R index 4782ef2..37a554c 100644 --- a/R/ByChunks.R +++ b/R/ByChunks.R @@ -80,6 +80,7 @@ #' #ByChunks(step, data) #' #'@import multiApply +#'@importFrom methods is #'@noRd ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', threads_load = 2, threads_compute = 1, diff --git a/R/Compute.R b/R/Compute.R index 778d9ce..0aa9424 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -82,6 +82,7 @@ #' wf <- AddStep(data, step) #' res <- Compute(wf, chunks = list(longitude = 4, sdate = 2)) #' +#'@importFrom methods is #'@export Compute <- function(workflow, chunks = 'auto', threads_load = 1, threads_compute = 1, diff --git a/R/SelectorChecker.R b/R/SelectorChecker.R index 76899c1..7b69a8b 100644 --- a/R/SelectorChecker.R +++ b/R/SelectorChecker.R @@ -35,6 +35,7 @@ #'sub_array_of_values <- seq(90, -90, length.out = 258)[2:257] #'SelectorChecker(sub_array_of_selectors, sub_array_of_values) #' +#'@importFrom methods is #'@export SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, tolerance = NULL) { diff --git a/R/Start.R b/R/Start.R index 0e0e90b..93fb3c1 100644 --- a/R/Start.R +++ b/R/Start.R @@ -802,6 +802,7 @@ #'@importFrom utils str #'@importFrom stats na.omit setNames #'@importFrom ClimProjDiags Subset +#'@importFrom methods is #'@export Start <- function(..., # dim = indices/selectors, # dim_var = 'var', diff --git a/R/Utils.R b/R/Utils.R index 11d239f..9bf23e9 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -1,4 +1,5 @@ #'@import abind +#'@importFrom methods is #'@importFrom ClimProjDiags Subset .chunk <- function(chunk, n_chunks, selectors) { if (any(chunk > n_chunks)) { -- GitLab From 3fe78e2755a4b510248131c52f80c9a1c31adde2 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Apr 2022 10:51:43 +0200 Subject: [PATCH 9/9] Include methods into DESCRIPTION --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f761503..9f482a6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,8 @@ Imports: easyNCDF, s2dv, ClimProjDiags, - PCICt + PCICt, + methods Suggests: stats, utils, -- GitLab