Commit 8862cc98 authored by aho's avatar aho
Browse files

Merge branch 'master' into 'production'

Merge master into production

See merge request !186
parents 96e3ccd4 42ca5c19
Pipeline #7109 passed with stage
in 61 minutes and 12 seconds
......@@ -6,7 +6,7 @@
^README\.md$
#\..*\.RData$
#^vignettes$
^tests$
#^tests$
^inst/doc$
#^inst/doc/*$
#^inst/doc/figures/$
......
Package: startR
Title: Automatically Retrieve Multidimensional Distributed Data Sets
Version: 2.2.0
Version: 2.2.0-1
Authors@R: c(
person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")),
person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")),
......@@ -29,7 +29,8 @@ Imports:
easyNCDF,
s2dv,
ClimProjDiags,
PCICt
PCICt,
methods
Suggests:
stats,
utils,
......
......@@ -24,6 +24,7 @@ import(future)
import(multiApply)
import(parallel)
importFrom(ClimProjDiags,Subset)
importFrom(methods,is)
importFrom(s2dv,CDORemap)
importFrom(stats,na.omit)
importFrom(stats,setNames)
......
# startR v2.2.0-1 (Release date: 2022-04-19)
- Bugfix for the case that the variable has units like time, e.g., "days".
- Development of metadata reshaping. The metadata should correspond to data if data are reshaped by parameter "merge_across_dims" and "split_multiselected_dims", as well as if data selectors are not continuous indices.
- Development of multiple dependency by array selector. An inner dimension indices can vary with multiple file dimensions.
# startR v2.2.0 (Release date: 2022-02-11)
- License changes to Apache License 2.0
- R version dependency changes to >= 3.6.0
......
......@@ -41,22 +41,22 @@
#' use_attributes = list(data = "Variables"))
#' wf <- AddStep(data, step, pi_val = pi_short)
#'
#'@importFrom methods is
#'@export
AddStep <- function(inputs, step_fun, ...) {
# Check step_fun
if (!('startR_step_fun' %in% class(step_fun))) {
if (!is(step_fun, 'startR_step_fun')) {
stop("Parameter 'step_fun' must be a startR step function as returned by Step.")
}
# Check inputs
if (any(c('startR_cube', 'startR_workflow') %in% class(inputs))) {
if (is(inputs, 'startR_cube') | is(inputs, 'startR_workflow')) {
inputs <- list(inputs)
names(inputs) <- 'input1'
}
else if (is.list(inputs)) {
if (any(!sapply(inputs,
function(x) any(c('startR_cube',
'startR_workflow') %in% class(x))))) {
function(x) is(x, 'startR_cube') | is(x, 'startR_workflow')))) {
stop("Parameter 'inputs' must be one or a list of objects of the class ",
"'startR_cube' or 'startR_workflow'.")
}
......@@ -90,7 +90,7 @@ AddStep <- function(inputs, step_fun, ...) {
stop("The target dimensions required by 'step_fun' for the input ", input,
" are not present in the corresponding provided object in 'inputs'.")
}
if ('startR_workflow' %in% class(inputs[[input]])) {
if (is(inputs[[input]], 'startR_workflow')) {
if (is.null(previous_target_dims)) {
previous_target_dims <- attr(inputs[[input]], 'TargetDims')
} else {
......
......@@ -80,6 +80,7 @@
#' #ByChunks(step, data)
#'
#'@import multiApply
#'@importFrom methods is
#'@noRd
ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto',
threads_load = 2, threads_compute = 1,
......@@ -109,7 +110,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto',
MergeArrays <- .MergeArrays
# Check input headers
if ('startR_cube' %in% class(cube_headers)) {
if (is(cube_headers, 'startR_cube')) {
cube_headers <- list(cube_headers)
}
if (!all(sapply(lapply(cube_headers, class),
......@@ -411,7 +412,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto',
timings[['nchunks']] <- prod(unlist(chunks))
# Check step_fun
if (!('startR_step_fun' %in% class(step_fun))) {
if (!is(step_fun, 'startR_step_fun')) {
stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ",
"by the function Step.")
}
......
......@@ -72,7 +72,7 @@
#'
#'@export
Collect <- function(startr_exec, wait = TRUE, remove = TRUE) {
if (!('startR_exec' %in% class(startr_exec))) {
if (!is(startr_exec, 'startR_exec')) {
stop("Parameter 'startr_exec' must be an object of the class ",
"'startR_exec', as returned by Collect(..., wait = FALSE).")
}
......
......@@ -82,6 +82,7 @@
#' wf <- AddStep(data, step)
#' res <- Compute(wf, chunks = list(longitude = 4, sdate = 2))
#'
#'@importFrom methods is
#'@export
Compute <- function(workflow, chunks = 'auto',
threads_load = 1, threads_compute = 1,
......@@ -89,13 +90,13 @@ Compute <- function(workflow, chunks = 'auto',
ecflow_server = NULL, silent = FALSE, debug = FALSE,
wait = TRUE) {
# Check workflow
if (!any(c('startR_cube', 'startR_workflow') %in% class(workflow))) {
if (!is(workflow, 'startR_cube') & !is(workflow, 'startR_workflow')) {
stop("Parameter 'workflow' must be an object of class 'startR_cube' as ",
"returned by Start or of class 'startR_workflow' as returned by ",
"AddStep.")
}
if ('startR_cube' %in% class(workflow)) {
if (is(workflow, 'startR_cube')) {
#machine_free_ram <- 1000000000
#max_ram_ratio <- 0.5
#data_size <- prod(c(attr(workflow, 'Dimensions'), 8))
......
......@@ -184,119 +184,130 @@ NcDataReader <- function(file_path = NULL, file_object = NULL,
})
if (length(names(attr(result, 'variables'))) == 1) {
var_name <- names(attr(result, 'variables'))
units <- attr(result, 'variables')[[var_name]][['units']]
# The 1st condition is for implicit time dim (if time length = 1, it is
# allowed to not be defined in Start call. Therefore, it is not in the list
# of synonims);
# the 2nd condition is for the normal case; the 3rd one is that if return_vars
# has a variable that is not 'time'. The only way to know if it should be time
# is to check calendar.
# All these conditions are to prevent the variables with time-like units but
# actually not a time variable, e.g., drought period [days].
if (names(attr(result, 'variables')) == 'time' |
'time' %in% synonims[[names(attr(result, 'variables'))]] |
'calendar' %in% names(attr(result, 'variables')[[1]])) {
var_name <- names(attr(result, 'variables'))
units <- attr(result, 'variables')[[var_name]][['units']]
if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) {
if (units == 'seconds') {
# units <- 'secs'
} else if (units == 'minutes') {
# units <- 'mins'
result <- result * 60 # min to sec
}
result[] <- paste(result[], units)
} else if (grepl(' since ', units)) {
# Find the calendar
calendar <- attr(result, 'variables')[[var_name]]$calendar
if (calendar == 'standard') calendar <- 'gregorian'
if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) {
if (units == 'seconds') {
# units <- 'secs'
} else if (units == 'minutes') {
# units <- 'mins'
result <- result * 60 # min to sec
}
result[] <- paste(result[], units)
parts <- strsplit(units, ' since ')[[1]]
units <- parts[1]
} else if (grepl(' since ', units)) {
# Find the calendar
calendar <- attr(result, 'variables')[[var_name]]$calendar
if (calendar == 'standard') calendar <- 'gregorian'
if (units %in% c('second', 'seconds')) {
# units <- 'secs'
} else if (units %in% c('minute', 'minutes')) {
# units <- 'mins'
result <- result * 60 # min to sec
} else if (units %in% c('hour', 'hours')) {
result <- result * 60 * 60 # hour to sec
} else if (units %in% c('day', 'days')) {
# units <- 'days'
result <- result * 24 * 60 * 60 # day to sec
} else if (units %in% c('month', 'months')) {
# define day in each month
leap_month_day <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
no_leap_month_day <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
# Origin year and month
ori_year <- as.numeric(substr(parts[2], 1, 4))
ori_month <- as.numeric(substr(parts[2], 6, 7))
if (is.na(ori_month)) {
ori_month <- as.numeric(substr(parts[2], 6, 6))
}
if (!is.numeric(ori_year) | !is.numeric(ori_month)) {
stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ",
"Check the file or contact the maintainer."))
}
parts <- strsplit(units, ' since ')[[1]]
units <- parts[1]
if (calendar == 'gregorian') {
# Find how many years + months
yr_num <- floor(result / 12)
month_left <- result - yr_num * 12
# Find the leap years we care
if (ori_month <= 2) {
leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear)))
} else {
leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear)))
if (units %in% c('second', 'seconds')) {
# units <- 'secs'
} else if (units %in% c('minute', 'minutes')) {
# units <- 'mins'
result <- result * 60 # min to sec
} else if (units %in% c('hour', 'hours')) {
result <- result * 60 * 60 # hour to sec
} else if (units %in% c('day', 'days')) {
# units <- 'days'
result <- result * 24 * 60 * 60 # day to sec
} else if (units %in% c('month', 'months')) {
# define day in each month
leap_month_day <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
no_leap_month_day <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
# Origin year and month
ori_year <- as.numeric(substr(parts[2], 1, 4))
ori_month <- as.numeric(substr(parts[2], 6, 7))
if (is.na(ori_month)) {
ori_month <- as.numeric(substr(parts[2], 6, 6))
}
if (!is.numeric(ori_year) | !is.numeric(ori_month)) {
stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ",
"Check the file or contact the maintainer."))
}
total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet
if (calendar == 'gregorian') {
# Find how many years + months
yr_num <- floor(result / 12)
month_left <- result - yr_num * 12
# Find the leap years we care
if (ori_month <= 2) {
leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear)))
} else {
leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear)))
}
total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet
if (month_left != 0) {
if ((ori_month + month_left) <= 12) { # the last month is still in the same last yr
# Is the last year a leap year?
last_leap <- s2dv::LeapYear(ori_year + yr_num)
if (last_leap) {
total_days <- total_days + sum(leap_month_day[ori_month:(ori_month + month_left - 1)])
} else {
total_days <- total_days + sum(no_leap_month_day[ori_month:(ori_month + month_left - 1)])
}
} else { # the last month ends in the next yr
if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16
last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005
if (month_left != 0) {
if ((ori_month + month_left) <= 12) { # the last month is still in the same last yr
# Is the last year a leap year?
last_leap <- s2dv::LeapYear(ori_year + yr_num)
if (last_leap) {
total_days <- total_days + sum(leap_month_day[2:12])
total_days <- total_days + sum(leap_month_day[ori_month:(ori_month + month_left - 1)])
} else {
total_days <- total_days + sum(no_leap_month_day[2:12])
total_days <- total_days + sum(no_leap_month_day[ori_month:(ori_month + month_left - 1)])
}
} else { # e.g., 2005-04-16 + 11mth = 2006-03-16
last_leap <- s2dv::LeapYear(ori_year + yr_num + 1)
needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1))
if (last_leap) {
total_days <- total_days + sum(leap_month_day[needed_month])
} else {
total_days <- total_days + sum(no_leap_month_day[needed_month])
} else { # the last month ends in the next yr
if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16
last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005
if (last_leap) {
total_days <- total_days + sum(leap_month_day[2:12])
} else {
total_days <- total_days + sum(no_leap_month_day[2:12])
}
} else { # e.g., 2005-04-16 + 11mth = 2006-03-16
last_leap <- s2dv::LeapYear(ori_year + yr_num + 1)
needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1))
if (last_leap) {
total_days <- total_days + sum(leap_month_day[needed_month])
} else {
total_days <- total_days + sum(no_leap_month_day[needed_month])
}
}
}
}
}
result <- total_days * 24 * 60 * 60 # day to sec
} else if (calendar %in% c('365_day',' 365', 'noleap')) {
yr_num <- floor(result / 12)
month_left <- result - yr_num * 12
total_days <- 365 * yr_num + sum(no_leap_month_day[ori_month:(month_left - 1)])
result <- total_days * 24 * 60 * 60 # day to sec
result <- total_days * 24 * 60 * 60 # day to sec
} else if (calendar %in% c('365_day',' 365', 'noleap')) {
yr_num <- floor(result / 12)
month_left <- result - yr_num * 12
total_days <- 365 * yr_num + sum(no_leap_month_day[ori_month:(month_left - 1)])
result <- total_days * 24 * 60 * 60 # day to sec
} else if (calendar %in% c('360_day', '360')) {
result <- result * 30 * 24 * 60 * 60 # day to sec
} else if (calendar %in% c('360_day', '360')) {
result <- result * 30 * 24 * 60 * 60 # day to sec
} else { #old code. The calendar is not in any of the above.
result <- result * 30.5
result <- result * 24 * 60 * 60 # day to sec
} else { #old code. The calendar is not in any of the above.
result <- result * 30.5
result <- result * 24 * 60 * 60 # day to sec
}
}
}
new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[]
new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC"))
new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[]
new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC"))
#new_array <- seq(as.POSIXct(parts[2]),
# length = max(result, na.rm = TRUE) + 1,
# by = units)[result[] + 1]
dim(new_array) <- dim(result)
attr(new_array, 'variables') <- attr(result, 'variables')
result <- new_array
#new_array <- seq(as.POSIXct(parts[2]),
# length = max(result, na.rm = TRUE) + 1,
# by = units)[result[] + 1]
dim(new_array) <- dim(result)
attr(new_array, 'variables') <- attr(result, 'variables')
result <- new_array
}
}
}
}
}
if (close) {
......
......@@ -35,6 +35,7 @@
#'sub_array_of_values <- seq(90, -90, length.out = 258)[2:257]
#'SelectorChecker(sub_array_of_selectors, sub_array_of_values)
#'
#'@importFrom methods is
#'@export
SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE,
tolerance = NULL) {
......@@ -93,7 +94,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE,
tol <- 0
if (!is.null(tolerance)) {
if (!any(class(tolerance) %in% "numeric")) {
if (!is(tolerance, "numeric")) {
stop("Expected a numeric *_tolerance.")
}
tol <- tolerance
......@@ -148,7 +149,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE,
val <- selectors[[i]]
tol <- 0
if (!is.null(tolerance)) {
if (!any(class(tolerance) %in% "difftime")) {
if (!is(tolerance, "difftime")) {
stop("Expected a difftime *_tolerance.")
}
tol <- tolerance
......@@ -194,7 +195,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE,
"nearest values."))
}
if (!is.null(tolerance)) {
if (!any(class(tolerance) %in% 'numeric')) {
if (!is(tolerance, 'numeric')) {
stop("Expected a numeric *_tolerance.")
}
}
......@@ -228,7 +229,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE,
"nearest values."))
}
if (!is.null(tolerance)) {
if (!any(class(tolerance) %in% 'difftime')) {
if (!is(tolerance, 'difftime')) {
stop("Expected a difftime *_tolerance.")
}
}
......
This diff is collapsed.
#'@import abind
#'@importFrom methods is
#'@importFrom ClimProjDiags Subset
.chunk <- function(chunk, n_chunks, selectors) {
if (any(chunk > n_chunks)) {
......@@ -806,7 +807,7 @@
readRDS(paste0(shared_dir, '/',
chunk_files_original[found_chunk]))
})
if (('try-error' %in% class(array_of_chunks[[i]]))) {
if (is(array_of_chunks[[i]], 'try-error')) {
message("Waiting for an incomplete file transfer...")
Sys.sleep(5)
} else {
......@@ -844,3 +845,17 @@
.KnownLatNames <- function() {
known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat')
}
.ReplaceElementInVector <- function(x, target, new_val) {
# x is a vector with name
# target is a string
# new_val is a vector with name
# E.g., Change [a = 2, b = 3] to [c = 1, d = 2, b = 3], then:
# x = c(a = 2, b = 3), target = 'a', new_val = c(c = 1, d = 2)
new_names <- unlist(lapply(as.list(names(x)), function(x) if (x == target) names(new_val) else x))
new_list <- vector('list', length = length(new_names))
for (i in 1:length(new_list)) {
new_list[[i]] <- c(new_val, x)[which(c(names(new_val), names(x)) == new_names[i])]
}
return(unlist(new_list))
}
......@@ -118,6 +118,9 @@ rebuild_dim_params <- function(dim_params, merge_across_dims,
# Reallocating pairs of across file and inner dimensions if they have
# to be merged. They are put one next to the other to ease merge later.
if (merge_across_dims) {
if (any(!names(inner_dims_across_files) %in% names(dim_params)) |
any(!unlist(inner_dims_across_files) %in% names(dim_params)))
stop("All *_across parameters must have value as a file dimension name.")
for (inner_dim_across in names(inner_dims_across_files)) {
inner_dim_pos <- which(names(dim_params) == inner_dim_across)
file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]])
......@@ -429,7 +432,7 @@ correct_return_vars <- function(inner_dim, inner_dims_across_files, found_patter
file_dim_as_selector_array_dim) {
# inner_dim is not in return_vars or is NULL
if (is.character(file_dim_as_selector_array_dim)) { #(1)
if (file_dim_as_selector_array_dim %in% found_pattern_dim) {
if (any(file_dim_as_selector_array_dim %in% found_pattern_dim)) {
stop(paste0("Found '", inner_dim, "' selector has dimension of the pattern dim '",
found_pattern_dim,
"', which is not allowed. To assign the dependency on the pattern dim, ",
......@@ -446,10 +449,10 @@ correct_return_vars <- function(inner_dim, inner_dims_across_files, found_patter
corrected_value <- file_dim_name
}
}
.warning(paste0("Found ", inner_dim, " dependency on file diemnsion '", corrected_value,
"', but '", inner_dim, "' is not in return_vars list or is NULL. ",
"To provide the correct metadata, the value of ", inner_dim,
" in 'return_vars' is specified as '", corrected_value, "'."))
.warning(paste0("Found '", inner_dim, "' dependency on file dimension '", corrected_value,
"', but '", inner_dim, "' is not in return_vars list or does not include '", corrected_value,
"'. To provide the correct metadata, '", corrected_value, "' is included under '", inner_dim,
"' in 'return_vars."))
return(corrected_value)
}
......@@ -822,6 +825,28 @@ reorder_split_dims <- function(all_split_dims, inner_dim_pos_in_split_dims, fina
return(list(final_dims_fake, all_split_dims))
}
# Find the final_dims_fake for metadata if it needs to be reshaped
find_final_dims_fake_metadata <- function(merge_across_dims, split_multiselected_dims,
picked_common_vars, across_inner_dim, final_dims_fake,
dims_of_merge_dim, all_split_dims) {
if (merge_across_dims) {
if (!split_multiselected_dims) {
final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)]
} else {
final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])]
}
} else if (split_multiselected_dims) {
target_split_dim_ind <- which(names(dim(picked_common_vars)) == names(all_split_dims))
margin_dim_ind <- c(1:length(dim(picked_common_vars)))[-target_split_dim_ind]
if (identical(margin_dim_ind, numeric(0)) | identical(margin_dim_ind, integer(0))) {
final_dims_fake_metadata <- all_split_dims[[1]]
} else {
final_dims_fake_metadata <- .ReplaceElementInVector(dim(picked_common_vars), target = names(all_split_dims), new_val = all_split_dims[[1]])
}
}
return(final_dims_fake_metadata)
}
# Build the work pieces.
build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, final_dims,
......@@ -873,7 +898,16 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims,
x_dim_name <- attr(attr(selectors[[x]][['fri']], "dim"), "names")
if (!is.null(x_dim_name)) {
which_chunk <- file_to_load_sub_indices[x_dim_name]
selectors[[x]][['fri']][[which_chunk]]
if (length(which_chunk) > 1) {
tmp_dim <- attr(selectors[[x]][['fri']], "dim")
vec_ind <- which_chunk[1]
for (i_dim in length(tmp_dim):2) {
vec_ind <- vec_ind + (which_chunk[i_dim] - 1) * prod(tmp_dim[1:(i_dim - 1)])
}
selectors[[x]][['fri']][[vec_ind]]
} else { #old code
selectors[[x]][['fri']][[which_chunk]]
}
} else {
selectors[[x]][['fri']][[1]]
}
......@@ -889,7 +923,16 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims,
x_dim_name <- attr(attr(selectors[[x]][['sri']], "dim"), "names")
if (!is.null(x_dim_name)) {
which_chunk <- file_to_load_sub_indices[x_dim_name]
selectors[[x]][['sri']][[which_chunk]]
if (length(which_chunk) > 1) {
tmp_dim <- attr(selectors[[x]][['sri']], "dim")
vec_ind <- which_chunk[1]
for (i_dim in length(tmp_dim):2) {
vec_ind <- vec_ind + (which_chunk[i_dim] - 1) * prod(tmp_dim[1:(i_dim - 1)])
}
selectors[[x]][['sri']][[vec_ind]]
} else { #old code
selectors[[x]][['sri']][[which_chunk]]
}
} else {
selectors[[x]][['sri']][[1]]
}
......@@ -1028,9 +1071,13 @@ retrieve_progress_message <- function(work_pieces, num_procs, silent) {
# If merge_across_dims = TRUE and merge_across_dims_narm = TRUE, remove the additional NAs
# due to unequal inner_dim ('time') length across file_dim ('sdate').
remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, across_inner_dim,
length_inner_across_dim, data_array) {
remove_additional_na_from_merge <- function(data_array = NULL, merge_dim_metadata = NULL,
inner_dims_across_files, final_dims, length_inner_across_dim) {
# data_array is a vector from bigmemory::as.matrix
# merge_dim_metadata is an array
across_file_dim <- names(inner_dims_across_files) #TODO: more than one?
across_inner_dim <- inner_dims_across_files[[1]] #TODO: more than one?
# Get the length of these two dimensions in final_dims
length_inner_across_store_dims <- final_dims[across_inner_dim]
length_file_across_store_dims <- final_dims[across_file_dim]
......@@ -1042,37 +1089,74 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims,
for (i in 1:length_file_across_store_dims) { #1:4
logi_array[1:length_inner_across_dim[[i]], i] <- TRUE
}
# First, get the data array with final_dims dimension
data_array_final_dims <- array(bigmemory::as.matrix(data_array), dim = final_dims)
if (!is.null(data_array)) {
# First, turn the data vector into array with final_dims
data_array_final_dims <- array(data_array, dim = final_dims)