Newer
Older
data_dims <- NULL
aho
committed
# if (length(unlist(var_params[expected_inner_dims[[i]]])) < length(expected_inner_dims[[i]])) {
file_to_open <- file_path
data_dims <- file_dim_reader(file_to_open, NULL, selectors_of_first_files_with_data[[i]],
lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1),
synonims)
# file_dim_reader returns dimension names as found in the file.
# Need to translate accoridng to synonims:
names(data_dims) <- replace_with_synonmins(data_dims, synonims)
aho
committed
# }
if (is.numeric(largest_dims_length)) { # largest_dims_length is a named vector
# Check if the names fit the inner dimension names
if (!all(names(largest_dims_length) %in% names(data_dims))) {
#NOTE: stop or warning?
stop("Parameter 'largest_dims_length' has inconsistent names with inner dimensions.")
} else {
match_ind <- match(names(largest_dims_length), names(data_dims))
data_dims[match_ind] <- largest_dims_length
}
}
## largest_dims_length = TRUE, or is a number & merge_across_dims is across this dim
tmp <- find_largest_dims_length(
selectors_total_list[[i]], array_of_files_to_load,
selector_indices_save[[i]], dat[[i]], expected_inner_dims[[i]],
synonims, file_dim_reader)
data_dims <- tmp$largest_data_dims
# 'data_dims_each_file' is used when merge_across_dims = TRUE &
# the files have different length of inner dim.
data_dims_each_file <- tmp$data_dims_all_files
# file_dim_reader returns dimension names as found in the file.
# Need to translate accoridng to synonims:
names(data_dims) <- replace_with_synonmins(data_dims, synonims)
} # end if (largest_dims_length == TRUE)
#//////////////////////////////////////////////////
aho
committed
# Some dimension is defined in Start() call but doesn't exist in data
if (!all(expected_inner_dims[[i]] %in% names(data_dims))) {
tmp <- expected_inner_dims[[i]][which(!expected_inner_dims[[i]] %in% names(data_dims))]
stop("Could not find the dimension '", tmp, "' in the file. Either ",
"change the dimension name in your request, adjust the ",
"parameter 'dim_names_in_files' or fix the dimension name in ",
"the file.\n", file_path)
}
# Not all the inner dims are defined in Start() call
if (!all(names(data_dims) %in% expected_inner_dims[[i]])) {
tmp <- names(data_dims)[which(!names(data_dims) %in% expected_inner_dims[[i]])]
if (data_dims[tmp] != 1) {
stop("The dimension '", tmp, "' is found in the file ", file_path,
" but not defined in the Start call.")
}
}
#///////////////////////////////////////////////////////////////////
# Transform the variables if needed and keep them apart.
if (!is.null(transform) && (length(transform_vars) > 0)) {
if (!all(transform_vars %in% c(names(picked_vars[[i]]), names(picked_common_vars)))) {
stop("Could not find all the required variables in 'transform_vars' ",
"for the dataset '", dat[[i]][['name']], "'.")
}
# picked_vars[[i]]
vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_vars[[i]], transform_vars, picked_vars_ordered[[i]])
# picked_common_vars
vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_common_vars, transform_vars, picked_common_vars_ordered)
# Save the crop domain from selectors of transformed vars
# PROB: It doesn't consider aiat. If aiat, the indices are for
# after transformed data; we don't know the corresponding
# values yet.
transform_crop_domain <- vector('list')
for (transform_var in transform_vars) {
transform_crop_domain[[transform_var]] <- dat[[i]][['selectors']][[transform_var]][[1]]
# Turn indices into values
if (attr(transform_crop_domain[[transform_var]], 'indices')) {
if (transform_var %in% names(common_return_vars)) {
if (transform_var %in% names(dim_reorder_params)) {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_common_vars_ordered[[transform_var]],
transform_var)
} else {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_common_vars[[transform_var]],
transform_var)
}
} else { # return_vars
if (transform_var %in% names(dim_reorder_params)) {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_vars_ordered[[i]][[transform_var]],
transform_var)
} else {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_vars[[i]][[transform_var]],
transform_var)
}
}
} else if (is.atomic(transform_crop_domain[[transform_var]])) {
# if it is values but vector
transform_crop_domain[[transform_var]] <-
c(transform_crop_domain[[transform_var]][1],
tail(transform_crop_domain[[transform_var]], 1))
}
# For CDORemapper (not sure if it's also suitable for other transform functions):
# If lon_reorder is not used + lon selector is from big to small,
# lonmax and lonmin need to be exchanged. The ideal way is to
# exchange in CDORemapper(), but lon_reorder is used or not is not
# known by CDORemapper().
# NOTE: lat's order doesn't matter, big to small and small to big
# both work. Since we shouldn't assume transform_var in Start(),
# e.g., transform_var can be anything transformable in the assigned transform function,
# we exchange whichever parameter here anyway.
if (!transform_var %in% names(dim_reorder_params) &
diff(unlist(transform_crop_domain[[transform_var]])) < 0) {
transform_crop_domain[[transform_var]] <- rev(transform_crop_domain[[transform_var]])
}
}
do.call(transform, c(list(data_array = NULL,
variables = vars_to_transform,
file_selectors = selectors_of_first_files_with_data[[i]],
crop_domain = transform_crop_domain),
transform_params))
)
transformed_data <- tmp$value
warnings1 <- c(warnings1, tmp$warnings)
# Discard the common transformed variables if already transformed before
if (!is.null(transformed_common_vars)) {
common_ones <- which(names(picked_common_vars) %in% names(transformed_data$variables))
if (length(common_ones) > 0) {
transformed_data$variables <- transformed_data$variables[-common_ones]
}
}
transformed_vars[[i]] <- list()
transformed_vars_ordered[[i]] <- list()
transformed_vars_unorder_indices[[i]] <- list()
# Order the transformed variables if needed
# 'var_to_read' should be 'transformed_var', but is kept to reuse the same code as above.
for (var_to_read in names(transformed_data$variables)) {
if (var_to_read %in% unlist(var_params)) {
associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)]
aho
committed
if ((associated_dim_name %in% names(dim_reorder_params))) {
## Is this check really needed?
if (length(dim(transformed_data$variables[[associated_dim_name]])) > 1) {
stop("Requested a '", associated_dim_name, "_reorder' for a dimension ",
"whose coordinate variable that has more than 1 dimension (after ",
"transform). This is not supported.")
}
ordered_var_values <- dim_reorder_params[[associated_dim_name]](transformed_data$variables[[associated_dim_name]])
attr(ordered_var_values, 'variables') <- attr(transformed_data$variables[[associated_dim_name]], 'variables')
if (!all(c('x', 'ix') %in% names(ordered_var_values))) {
stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.")
}
# Save the indices to reorder back the ordered variable values.
aho
committed
# This will be used to define the first round indices (if aiat) or second round
# indices (if !aiat).
unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix
if (var_to_read %in% names(picked_common_vars)) {
transformed_common_vars_ordered[[var_to_read]] <- ordered_var_values$x
transformed_common_vars_unorder_indices[[var_to_read]] <- unorder
} else {
transformed_vars_ordered[[i]][[var_to_read]] <- ordered_var_values$x
transformed_vars_unorder_indices[[i]][[var_to_read]] <- unorder
}
}
}
}
transformed_picked_vars_names <- which(names(picked_vars[[i]]) %in% names(transformed_data$variables))
if (length(transformed_picked_vars_names) > 0) {
transformed_picked_vars_names <- names(picked_vars[[i]])[transformed_picked_vars_names]
transformed_vars[[i]][transformed_picked_vars_names] <- transformed_data$variables[transformed_picked_vars_names]
transformed_picked_common_vars_names <- which(names(picked_common_vars) %in% names(transformed_data$variables))
if (length(transformed_picked_common_vars_names) > 0) {
transformed_picked_common_vars_names <- names(picked_common_vars)[transformed_picked_common_vars_names]
transformed_common_vars <- transformed_data$variables[transformed_picked_common_vars_names]
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
}
}
}
# Once the variables are transformed, we compute the indices to be
# taken for each inner dimension.
# In all cases, indices will have to be computed to know which data
# values to take from the original data for each dimension (if a
# variable is specified for that dimension, it will be used to
# convert the provided selectors into indices). These indices are
# referred to as 'first round of indices'.
# The taken data will then be transformed if needed, together with
# the dimension variable if specified, and, in that case, indices
# will have to be computed again to know which values to take from the
# transformed data. These are the 'second round of indices'. In the
# case there is no transformation, the second round of indices will
# be all the available indices, i.e. from 1 to the number of taken
# values with the first round of indices.
for (inner_dim in expected_inner_dims[[i]]) {
if (debug) {
print("-> DEFINING INDICES FOR INNER DIMENSION:")
print(inner_dim)
}
crossed_file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]]
chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]])
names(chunk_amount) <- crossed_file_dim
} else if (!is.null(names(dim(dat[[i]][['selectors']][[inner_dim]][[1]]))) &
inner_dim %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])) &
any(found_file_dims[[i]] %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))) {
# inner dim is dependent on file dim in the form of selector array (e.g., time = [sdate = 2, time = 4])
crossed_file_dim <- found_file_dims[[i]][which(found_file_dims[[i]] %in%
names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))]
if (length(crossed_file_dim) == 1) {
chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]])
names(chunk_amount) <- crossed_file_dim
} else {
# e.g., region = [memb = 2, sdate = 3, region = 1]
chunk_amount <- prod(
sapply(lapply(
dat[[i]][['selectors']][crossed_file_dim], "[[", 1), length))
names(chunk_amount) <- paste(crossed_file_dim, collapse = '.')
}
} else {
chunk_amount <- 1
}
# In the special case that the selectors for a dimension are 'all', 'first', ...
# and chunking (dividing in more than 1 chunk) is requested, the selectors are
# replaced for equivalent indices.
if ((any(dat[[i]][['selectors']][[inner_dim]][[1]] %in% c('all', 'first', 'last'))) &&
(chunks[[inner_dim]]['n_chunks'] != 1)) {
replace_character_with_indices(selectors = dat[[i]][['selectors']][[inner_dim]][[1]], data_dims = data_dims[[inner_dim]], chunk_amount)
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
# The selectors for the inner dimension are taken.
selector_array <- dat[[i]][['selectors']][[inner_dim]][[1]]
if (debug) {
if (inner_dim %in% dims_to_check) {
print(paste0("-> DEBUG MESSAGES FOR THE DATASET", i, " AND INNER DIMENSION '", inner_dim, "':"))
print("-> STRUCTURE OF SELECTOR ARRAY:")
print(str(selector_array))
print("-> PICKED VARS:")
print(picked_vars)
print("-> TRANSFORMED VARS:")
print(transformed_vars)
}
}
if (is.null(dim(selector_array))) {
dim(selector_array) <- length(selector_array)
}
if (is.null(names(dim(selector_array)))) {
if (length(dim(selector_array)) == 1) {
names(dim(selector_array)) <- inner_dim
} else {
stop("Provided selector arrays must be provided with dimension ",
"names. Found an array of selectors without dimension names ",
"for the dimension '", inner_dim, "'.")
}
}
selectors_are_indices <- FALSE
if (!is.null(attr(selector_array, 'indices'))) {
if (!is.logical(attr(selector_array, 'indices'))) {
stop("The atribute 'indices' for the selectors for the dimension '",
inner_dim, "' must be TRUE or FALSE.")
}
selectors_are_indices <- attr(selector_array, 'indices')
}
taken_chunks <- rep(FALSE, chunk_amount)
selector_file_dims <- 1
#NOTE: Change 'selector_file_dims' (from 1) if selector is an array with a file_dim dimname.
# I.e., If time = [sdate = 2, time = 4], selector_file_dims <- c(sdate = 2)
if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) {
selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])]
}
selector_inner_dims <- dim(selector_array)[which(!(names(dim(selector_array)) %in% found_file_dims[[i]]))]
var_with_selectors <- NULL
var_with_selectors_name <- var_params[[inner_dim]]
var_ordered <- NULL
var_unorder_indices <- NULL
with_transform <- FALSE
# If the selectors come with an associated variable
if (!is.null(var_with_selectors_name)) {
if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) {
with_transform <- TRUE
stop("Requested a transformation over the dimension '",
inner_dim, "', wich goes across files. This feature ",
"is not supported. Either do the request without the ",
"transformation or request it over dimensions that do ",
"not go across files.")
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> NAME OF THE VARIABLE WITH THE SELECTOR VALUES FOR THE CURRENT INNER DIMENSION:")
print(var_with_selectors_name)
print("-> NAMES OF THE VARIABLES TO BE TRANSFORMED:")
print(transform_vars)
print("-> STRUCTURE OF THE TRANSFORMATION FUNCTION:")
print(str(transform))
}
}
aho
committed
# For fri
if (var_with_selectors_name %in% names(picked_vars[[i]])) {
var_with_selectors <- picked_vars[[i]][[var_with_selectors_name]]
var_ordered <- picked_vars_ordered[[i]][[var_with_selectors_name]]
var_unorder_indices <- picked_vars_unorder_indices[[i]][[var_with_selectors_name]]
} else if (var_with_selectors_name %in% names(picked_common_vars)) {
var_with_selectors <- picked_common_vars[[var_with_selectors_name]]
var_ordered <- picked_common_vars_ordered[[var_with_selectors_name]]
var_unorder_indices <- picked_common_vars_unorder_indices[[var_with_selectors_name]]
}
n <- prod(dim(var_with_selectors))
# if no _reorder, var_unorder_indices is NULL
if (is.null(var_unorder_indices)) {
var_unorder_indices <- 1:n
}
aho
committed
# For sri
aho
committed
## var in 'dat'
if (var_with_selectors_name %in% names(transformed_vars[[i]])) {
m <- prod(dim(transformed_vars[[i]][[var_with_selectors_name]]))
if (aiat) {
var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]]
var_ordered <- transformed_vars_ordered[[i]][[var_with_selectors_name]]
var_unorder_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]]
}
aho
committed
# For making sri ordered later
transformed_var_unordered_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]]
if (is.null(transformed_var_unordered_indices)) {
transformed_var_unordered_indices <- 1:m
}
transformed_var_with_selectors <- transformed_vars[[i]][transformed_picked_vars_names][[var_with_selectors_name]][transformed_var_unordered_indices]
aho
committed
# Sorting the transformed variable and working out the indices again after transform.
if (!is.null(dim_reorder_params[[var_with_selectors_name]])) {
transformed_var_with_selectors_reorder <- dim_reorder_params[[var_with_selectors_name]](transformed_var_with_selectors)
transformed_var_with_selectors <- transformed_var_with_selectors_reorder$x
transformed_var_with_selectors_unorder <- transformed_var_with_selectors_reorder$ix
} else {
transformed_var_with_selectors_unorder <- 1:length(transformed_var_with_selectors)
}
## var in common
} else if (var_with_selectors_name %in% names(transformed_common_vars)) {
m <- prod(dim(transformed_common_vars[[var_with_selectors_name]]))
if (aiat) {
var_with_selectors <- transformed_common_vars[[var_with_selectors_name]]
var_ordered <- transformed_common_vars_ordered[[var_with_selectors_name]]
var_unorder_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]]
}
aho
committed
# For making sri ordered later
transformed_var_unordered_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]]
if (is.null(transformed_var_unordered_indices)) {
transformed_var_unordered_indices <- 1:m
}
transformed_var_with_selectors <- transformed_common_vars[[var_with_selectors_name]][transformed_var_unordered_indices]
# Sorting the transformed variable and working out the indices again after transform.
if (!is.null(dim_reorder_params[[var_with_selectors_name]])) {
transformed_var_with_selectors_reorder <- dim_reorder_params[[var_with_selectors_name]](transformed_var_with_selectors)
transformed_var_with_selectors <- transformed_var_with_selectors_reorder$x
transformed_var_with_selectors_unorder <- transformed_var_with_selectors_reorder$ix
} else {
transformed_var_with_selectors_unorder <- 1:length(transformed_var_with_selectors)
}
}
if (is.null(var_unorder_indices)) {
var_unorder_indices <- 1:m
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> SIZE OF ORIGINAL VARIABLE:")
print(n)
print("-> SIZE OF TRANSFORMED VARIABLE:")
if (with_transform) print(m)
print("-> STRUCTURE OF ORDERED VAR:")
print(str(var_ordered))
print("-> UNORDER INDICES:")
print(var_unorder_indices)
}
}
# If this inner dim's selector (var_with_selectors) is an array
# that has file dim as dimension (e.g., across or depend relationship)
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
if (any(names(var_dims) %in% found_file_dims[[i]])) {
if (with_transform) {
stop("Requested transformation for inner dimension '",
inner_dim, "' but provided selectors for such dimension ",
"over one or more file dimensions. This is not ",
"supported. Either request no transformation for the ",
"dimension '", inner_dim, "' or specify the ",
"selectors for this dimension without the file dimensions.")
}
var_file_dims <- var_dims[which(names(var_dims) %in% found_file_dims[[i]])]
var_dims <- var_dims[-which(names(var_dims) %in% found_file_dims[[i]])]
}
## # Keep the selectors if they correspond to a variable that will be transformed.
## if (with_transform) {
## if (var_with_selectors_name %in% names(picked_vars[[i]])) {
## transformed_var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]]
## } else if (var_with_selectors_name %in% names(picked_common_vars)) {
## transformed_var_with_selectors <- transformed_common_vars[[var_with_selectors_name]]
## }
## transformed_var_dims <- dim(transformed_var_with_selectors)
## transformed_var_file_dims <- 1
## if (any(names(transformed_var_dims) %in% found_file_dims[[i]])) {
## transformed_var_file_dims <- transformed_var_dims[which(names(transformed_var_dims) %in% found_file_dims[[i]])]
## transformed_var_dims <- tranasformed_var_dims[-which(names(transformed_var_dims) %in% found_file_dims[[i]])]
## }
##if (inner_dim %in% dims_to_check) {
##print("111m")
##print(str(transformed_var_dims))
##}
##
## m <- prod(transformed_var_dims)
## }
# Work out var file dims and inner dims.
if (inner_dim %in% unlist(inner_dims_across_files)) {
#TODO: if (chunk_amount != number of chunks in selector_file_dims), crash
if (length(var_dims) > 1) {
stop("Specified a '", inner_dim, "_var' for the dimension '",
inner_dim, "', which goes across files (across '", crossed_file_dim,
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
"'). The specified variable, '", var_with_selectors_name, "', has more ",
"than one dimension and can not be used as selector variable. ",
"Select another variable or fix it in the files.")
}
}
## TODO HERE::
#- indices_of_first_files_with_data may change, because array is now extended
var_full_dims <- dim(var_with_selectors)
} else if (((is.numeric(selector_array) || is.list(selector_array)) && selectors_are_indices) ||
(is.character(selector_array) && (length(selector_array) == 1) &&
(selector_array %in% c('all', 'first', 'last')) &&
!is.null(file_dim_reader))) {
#### TODO HERE::
###- indices_of_first_files_with_data may change, because array is now extended
# Lines moved above for better performance.
##data_dims <- file_dim_reader(file_path, NULL, selectors_of_first_files_with_data[[i]],
## lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1))
} else {
stop(paste0("Can not translate the provided selectors for '", inner_dim,
"' to numeric indices. Provide numeric indices and a ",
"'file_dim_reader' function, or a '", inner_dim,
"_var' in order to calculate the indices."))
}
# At this point, if no selector variable was provided, the variable
# data_dims has been populated. If a selector variable was provided,
# the variables var_dims, var_file_dims and var_full_dims have been
# populated instead.
#////////////////////////////////////////////////////////////////////
# If the inner dim lengths differ among files,
# need to know each length to create the indices for each file later.
inner_dim_lengths <- NULL
# inner_dim_lengths here includes all the files, but we only want
# the files of fyear for certain "sdate". We will categorize it later.
inner_dim_lengths <- tryCatch({
sapply(data_dims_each_file, '[[', inner_dim)
}, error = function(x) {
sapply(data_dims_each_file, '[[',
synonims[[inner_dim]][which(synonims[[inner_dim]] != inner_dim)])
})
# Use other file dims as the factors to categorize.
other_file_dims <- dim(array_of_files_to_load)[which(!found_file_dims[[i]] %in% crossed_file_dim)]
other_file_dims <- lapply(lapply(other_file_dims, seq, 1), rev)
other_file_dims_factor <- expand.grid(other_file_dims)
selector_indices_save_subset <-
lapply(selector_indices_save[[i]], '[', which(!found_file_dims[[i]] %in% crossed_file_dim))
# Put the fyear with the same other file dims (sdate, etc.) together, and find the largest length (in theory all of them should be the same)
inner_dim_lengths_cat <- vector('list', dim(other_file_dims_factor)[1])
for (i_factor in 1:length(inner_dim_lengths_cat)) {
inner_dim_lengths_cat[[i_factor]] <-
inner_dim_lengths[which(sapply(lapply(
selector_indices_save_subset, '==',
other_file_dims_factor[i_factor, ]), all))]
}
# Find the largest length of each time step
inner_dim_lengths <- do.call(pmax, inner_dim_lengths_cat)
}
fri <- first_round_indices <- NULL
sri <- second_round_indices <- NULL
# This variable will keep the indices needed to crop the transformed
# variable (the one that has been transformed without being subset
# with the first round indices).
tvi <- tranaformed_variable_indices <- NULL
ordered_fri <- NULL
ordered_sri <- NULL
if ((length(selector_array) == 1) && is.character(selector_array) &&
(selector_array %in% c('all', 'first', 'last')) &&
(chunks[[inner_dim]]['n_chunks'] == 1)) {
if (is.null(var_with_selectors_name)) {
fri <- vector('list', length = chunk_amount)
dim(fri) <- c(chunk_amount)
sri <- vector('list', length = chunk_amount)
dim(sri) <- c(chunk_amount)
if (selector_array == 'all') {
if (is.null(inner_dim_lengths) | length(unique(inner_dim_lengths)) <= 1) { #old code
fri[] <- replicate(chunk_amount, list(1:(data_dims[inner_dim])))
} else { # files have different inner dim length
for (i_chunk in 1:length(fri)) {
fri[[i_chunk]] <- 1:inner_dim_lengths[i_chunk]
}
}
taken_chunks <- rep(TRUE, chunk_amount)
#sri <- NULL
} else if (selector_array == 'first') {
fri[[1]] <- 1
taken_chunks[1] <- TRUE
#sri <- NULL
} else if (selector_array == 'last') {
fri[[chunk_amount]] <- data_dims[inner_dim]
taken_chunks[length(taken_chunks)] <- TRUE
#sri <- NULL
}
} else {
if (!is.null(crossed_file_dim) & any(!(crossed_file_dim %in% names(var_file_dims)))) {
stop("The variable '", var_with_selectors_name, "' must also be ",
"requested for the file dimension '", crossed_file_dim, "' in ",
"this configuration.")
}
fri <- vector('list', length = prod(var_file_dims))
dim(fri) <- var_file_dims
ordered_fri <- fri
sri <- vector('list', length = prod(var_file_dims))
dim(sri) <- var_file_dims
ordered_sri <- sri
if (selector_array == 'all') {
# TODO: Populate ordered_fri
ordered_fri[] <- replicate(prod(var_file_dims), list(1:n))
fri[] <- replicate(prod(var_file_dims), list(var_unorder_indices[1:n]))
taken_chunks <- rep(TRUE, chunk_amount)
if (!with_transform) {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri <- NULL
} else {
ordered_sri[] <- replicate(prod(var_file_dims), list(1:m))
aho
committed
if (inner_dim %in% names(dim_reorder_params)) {
sri[] <- replicate(prod(var_file_dims), list(transformed_var_unordered_indices[1:m]))
} else {
sri[] <- replicate(prod(var_file_dims), list(1:m))
}
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
## var_file_dims instead??
#if (!aiat) {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri[] <- replicate(prod(transformed_var_file_dims), list(1:m))
#} else {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri[] <- replicate(prod(transformed_var_file_dims), list(1:m))
#}
tvi <- 1:m
}
} else if (selector_array == 'first') {
taken_chunks[1] <- TRUE
if (!with_transform) {
ordered_fri[[1]] <- 1
fri[[1]] <- var_unorder_indices[1]
#taken_chunks[1] <- TRUE
#sri <- NULL
} else {
if (!aiat) {
ordered_fri[[1]] <- 1
fri[[1]] <- var_unorder_indices[1]
# TODO: TO BE IMPROVED
#taken_chunks[1] <- TRUE
ordered_sri[[1]] <- 1:ceiling(m / n)
sri[[1]] <- 1:ceiling(m / n)
tvi <- 1:ceiling(m / n)
} else {
ordered_fri[[1]] <- 1:ceiling(m / n)
fri[[1]] <- var_unorder_indices[1:ceiling(m / n)]
#taken_chunks[1] <- TRUE
ordered_sri[[1]] <- 1
sri[[1]] <- 1
tvi <- 1
}
}
} else if (selector_array == 'last') {
taken_chunks[length(taken_chunks)] <- TRUE
if (!with_transform) {
ordered_fri[[prod(var_file_dims)]] <- n
fri[[prod(var_file_dims)]] <- var_unorder_indices[n]
#taken_chunks[length(taken_chunks)] <- TRUE
#sri <- NULL
} else {
if (!aiat) {
ordered_fri[[prod(var_file_dims)]] <- prod(var_dims)
fri[[prod(var_file_dims)]] <- var_unorder_indices[prod(var_dims)]
#taken_chunks[length(taken_chunks)] <- TRUE
ordered_sri[[prod(var_file_dims)]] <- 1:ceiling(m / n)
sri[[prod(var_file_dims)]] <- 1:ceiling(m / n)
# TODO: TO BE IMPROVED. THE TVI MAY BE WRONG IF THERE'S BEEN A REORDERING.
tvi <- 1:ceiling(m / n)
} else {
ordered_fri[[prod(var_file_dims)]] <- (n - ceiling(m / n) + 1):n
fri[[prod(var_file_dims)]] <- var_unorder_indices[(n - ceiling(m / n) + 1):n]
#taken_chunks[length(taken_chunks)] <- TRUE
ordered_sri[[prod(var_file_dims)]] <- 1
sri[[prod(var_file_dims)]] <- 1
tvi <- 1
}
}
}
}
# If the selectors are not 'all', 'first', 'last', ...
} else {
if (!is.null(var_with_selectors_name)) {
unmatching_file_dims <- which(!(names(var_file_dims) %in% names(selector_file_dims)))
if ((length(unmatching_file_dims) > 0)) {
raise_error <- FALSE
if (!(length(unmatching_file_dims) == 1 &
names(var_file_dims)[unmatching_file_dims] %in% crossed_file_dim &
inner_dim %in% names(selector_inner_dims))) {
raise_error <- TRUE
}
}
if (raise_error) {
stop("Provided selectors for the dimension '", inner_dim, "' must have as many ",
"file dimensions as the variable the dimension is defined along, '",
var_with_selectors_name, "', with the exceptions of the file pattern dimension ('",
found_pattern_dim, "') and any depended file dimension (if specified as ",
"depended dimension in parameter 'inner_dims_across_files' and the ",
"depending file dimension is present in the provided selector array).")
}
}
if (any(names(selector_file_dims) %in% names(dim(var_with_selectors)))) {
if (any(dim(var_with_selectors)[names(selector_file_dims)] != selector_file_dims)) {
stop("Size of selector file dimensions must match size of the corresponding ",
"variable dimensions.")
}
}
}
## TODO: If var dimensions are not in the same order as selector dimensions, reorder
if (is.null(names(selector_file_dims))) {
if (!is.null(crossed_file_dim)) {
fri_dim_names <- c(fri_dim_names, crossed_file_dim)
}
fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% fri_dim_names)]
fri_dims <- rep(NA, length(fri_dim_names))
names(fri_dims) <- fri_dim_names
fri_dims[names(selector_file_dims)] <- selector_file_dims
#NOTE: Not sure how it works here, but "chunk_amount" is the same as
# "selector_file_dims" above in the cases we've seen so far,
# and it causes problem when crossed_file_dim is more than one.
# if (!is.null(crossed_file_dim)) {
# fri_dims[crossed_file_dim] <- chunk_amount
# }
}
fri <- vector('list', length = prod(fri_dims))
dim(fri) <- fri_dims
sri <- vector('list', length = prod(fri_dims))
dim(sri) <- fri_dims
selector_file_dim_array <- array(1:prod(selector_file_dims), dim = selector_file_dims)
selector_store_position <- fri_dims
for (j in 1:prod(dim(selector_file_dim_array))) {
selector_indices_to_take <- which(selector_file_dim_array == j, arr.ind = TRUE)[1, ]
names(selector_indices_to_take) <- names(selector_file_dims)
selector_store_position[names(selector_indices_to_take)] <- selector_indices_to_take
# "selector_indices_to_take" is an array if "selector_file_dims" is not 1 (if
# selector is an array with a file_dim dimname, i.e., time = [sdate = 2, time = 4].
if (!is.null(names(selector_indices_to_take))) {
sub_array_of_selectors <- Subset(selector_array, names(selector_indices_to_take),
as.list(selector_indices_to_take), drop = 'selected')
} else {
sub_array_of_selectors <- selector_array
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> ITERATING OVER FILE DIMENSIONS OF THE SELECTORS.")
print("-> STRUCTURE OF A SUB ARRAY:")
print(str(sub_array_of_selectors))
print("-> STRUCTURE OF THE VARIABLE WITH SELECTORS:")
print(str(var_with_selectors))
print(dim(var_with_selectors))
}
}
if (selectors_are_indices) {
sub_array_of_values <- NULL
#} else if (!is.null(var_ordered)) {
# sub_array_of_values <- var_ordered
} else {
var_indices_to_take <- selector_indices_to_take[which(names(selector_indices_to_take) %in% names(var_file_dims))]
if (!is.null(names(var_indices_to_take))) {
sub_array_of_values <- Subset(var_with_selectors, names(var_indices_to_take),
as.list(var_indices_to_take), drop = 'selected')
} else {
# time across some file dim (e.g., "file_date") but doesn't have
# this file dim as dimension (e.g., time: [sdate, time])
} else {
sub_array_of_values <- var_with_selectors
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> STRUCTURE OF THE SUB ARRAY FROM THE VARIABLE CORRESPONDING TO THE SUB ARRAY OF SELECTORS")
print(str(sub_array_of_values))
print(dim(sub_array_of_values))
print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:")
# The inner dim selector is an array in which have file dim (e.g., time = [sdate = 2, time = 4],
# or the inner dim doesn't go across any file dim (e.g., no time_across = 'sdate')
if ((!is.null(crossed_file_dim) & (any(crossed_file_dim %in% names(selector_file_dims)))) || is.null(crossed_file_dim)) {
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
if (length(sub_array_of_selectors) > 0) {
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> THE INNER DIMENSION DOES NOT GO ACROSS ANY FILE DIMENSION OR IT DOES BUT IS IN THE PROVIDED SELECTOR ARRAY.")
}
}
if (selectors_are_indices) {
if (!is.null(var_with_selectors_name)) {
max_allowed <- ifelse(aiat, m, n)
} else {
max_allowed <- data_dims[inner_dim]
}
if (any(na.omit(unlist(sub_array_of_selectors)) > max_allowed) ||
any(na.omit(unlist(sub_array_of_selectors)) < 1)) {
stop("Provided indices out of range for dimension '", inner_dim, "' ",
"for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ",
max_allowed, ").")
}
}
# The selector_checker will return either a vector of indices or a list
# with the first and last desired indices.
#NOTE: goes_across_prime_meridian may be TRUE only if the selector is list of values
# If selectors are indices and _reorder = CircularSort() is used, change
# is_circular_dim to TRUE.
if (!is.null(var_ordered) & selectors_are_indices &
!is.null(dim_reorder_params[[inner_dim]])) {
if ('circular' %in% names(attributes(dim_reorder_params[[inner_dim]]))) {
is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
if (is_circular_dim & is.list(sub_array_of_selectors)) {
tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix
goes_across_prime_meridian <- tmp[1] > tmp[2]
}
# If selectors are values and _reorder is defined.
if (!is.null(var_ordered) && !selectors_are_indices) {
if (!is.null(dim_reorder_params[[inner_dim]])) {
if ('circular' %in% names(attributes(dim_reorder_params[[inner_dim]]))) {
is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
}
if (is.list(sub_array_of_selectors)) {
## NOTE: The check of 'goes_across_prime_meridian' is moved forward to here.
if (is_circular_dim) {
# NOTE: Use CircularSort() to put the values in the assigned range, and get the order.
# For example, [-10, 20] in CircularSort(0, 360) is [350, 20]. The $ix list is [2, 1].
# 'goes_across_prime_meridian' means the selector range across the border. For example,
# CircularSort(-180, 180) with selector [170, 190] -> goes_across_prime_meridian = TRUE.
# dim_reorder_params is a list of Reorder function, i.e.,
# Sort() or CircularSort().
tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix
goes_across_prime_meridian <- tmp[1] > tmp[2]
#NOTE: HERE change to the same code as below (under 'else'). Not sure why originally
# it uses additional lines, which make reorder not work.
# If "_reorder" is used, here 'sub_array_of_selectors' is adjusted to
# follow the reorder rule. E.g., if lat = values(list(-90, 90)) and
# lat_reorder = Sort(decreasing = T), 'sub_array_of_selectors' changes
# from list(-90, 90) to list(90, -90).
sub_array_of_selectors <- as.list(dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$x)
#sub_array_reordered <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))
#sub_array_unorder <- sort(sub_array_reordered$ix, index.return = TRUE)$ix
#sub_array_of_selectors <- as.list(sub_array_reordered$x[sub_array_unorder])
# Add warning if the boundary is out of range
if (min(unlist(sub_array_of_selectors)) < range(var_ordered)[1]) {
show_out_of_range_warning(inner_dim, range = range(var_ordered),
bound = 'lower')
if (max(unlist(sub_array_of_selectors)) > range(var_ordered)[2]) {
show_out_of_range_warning(inner_dim, range = range(var_ordered), bound = 'upper')
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
}
} else {
sub_array_of_selectors <- dim_reorder_params[[inner_dim]](sub_array_of_selectors)$x
}
}
# NOTE: The ideal solution for selecting indices in goes_across_prime_meridian case
# is modified SelectorCheckor.R. But now SelectorCheckor doesn't know the info of
#goes_across_prime_meridian, so I do the adjustion after calling SelectorCheckor().
sub_array_of_indices <- selector_checker(sub_array_of_selectors, var_ordered,
tolerance = if (aiat) {
NULL
} else {
tolerance_params[[inner_dim]]
})
if (goes_across_prime_meridian & sub_array_of_indices[[1]] < sub_array_of_indices[[2]]) {
if (!(sub_array_of_selectors[[1]] %in% var_ordered)){
sub_array_of_indices[[1]] <- sub_array_of_indices[[1]] - 1
}
if (!(sub_array_of_selectors[[2]] %in% var_ordered)){
sub_array_of_indices[[2]] <- sub_array_of_indices[[2]] + 1
}
}
#NOTE: the possible case?
if (goes_across_prime_meridian & sub_array_of_indices[[1]] > sub_array_of_indices[[2]]) {
stop("The case is goes_across_prime_meridian but no adjustion for the indices!")
}
if (any(is.na(sub_array_of_indices))) {
stop(paste0("The selectors of ", inner_dim,
" are out of range [", min(var_ordered),
", ", max(var_ordered), "]."))
}
} else {
# Add warning if the boundary is out of range
if (is.list(sub_array_of_selectors) & !selectors_are_indices) {
if (min(unlist(sub_array_of_selectors)) < min(sub_array_of_values)) {
show_out_of_range_warning(inner_dim, range = range(sub_array_of_values),
bound = 'lower')
if (max(unlist(sub_array_of_selectors)) > max(sub_array_of_values)) {
show_out_of_range_warning(inner_dim, range = range(sub_array_of_values),
bound = 'upper')
# sub_array_of_values here is NULL if selectors are indices, and
# 'sub_array_of_indices' will be sub_array_of_selectors, i.e., the indices
# assigned (but rounded).
sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values,
tolerance = if (aiat) {
NULL
} else {
tolerance_params[[inner_dim]]
})
if (any(is.na(sub_array_of_indices))) {
stop(paste0("The selectors of ", inner_dim,
" are out of range [", min(sub_array_of_values),
", ", max(sub_array_of_values), "]."))
}
}
#////////////////////////////////////////////////////////////
# If chunking along this inner dim, this part creates the indices for each chunk.
# For example, if 'sub_array_of_indices' is c(5:10) and chunked into 2,
# 'sub_array_of_indices' becomes c(5:7) for chunk = 1, c(8:10) for chunk = 2.
# If 'sub_array_of_indices' is list(55, 62) and chunked into 2, it becomes
# list(55, 58) for chunk = 1 and list(59, 62) for chunk = 2.
#TODO: The list can be turned into vector here? So afterward no need to judge if
# it is list or vector.
#NOTE: chunking cannot be done if goes_across_prime_meridian = TRUE.
#TODO: Change the algorithm to make chunking works for goes_across_prime_meridian = TRUE.
# If goes_across_prime_meridian = TRUE, "sub_array_of_indices" are not
# continuous numbers. For example, list(37, 1243) means sub_array_of_fri
# that will be generated based on sub_array_of_indices later is c(1:37, 1243:1296).
# the longitude are separated into 2 parts, therefore, cannot be chunked here.
if (chunks[[inner_dim]]["n_chunks"] > 1) {
if (goes_across_prime_meridian) {
stop(paste0("Chunking over ", inner_dim, " that goes across the circular border assigned by '", inner_dim, "_reorder' is not supported by startR now. Adjust the ", inner_dim, " selector to be within the border or change the borders." ))
}
if (!is.list(sub_array_of_indices)) {
sub_array_of_indices <-
sub_array_of_indices[get_chunk_indices(length(sub_array_of_indices),
chunks[[inner_dim]]["chunk"],
chunks[[inner_dim]]["n_chunks"],
inner_dim)]
get_chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]),
chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"],
inner_dim)
vect <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
sub_array_of_indices[[1]] <- vect[tmp[1]]
sub_array_of_indices[[2]] <- vect[tmp[length(tmp)]]
}
# The sub_array_of_indices now contains numeric indices of the values to be taken by each chunk.
#----------------------------------------------------------
# 'sub_sub_array_of_values' is for sri chunking. If this inner dim is chunked,
# the sri has to follow the chunking of fri. Therefore, we save the original
# value of this chunk here for later use. We'll find the corresponding
# transformed value within 'sub_sub_array_of_values' and chunk sri.
if (with_transform & chunks[[inner_dim]]["n_chunks"] > 1) {
input_array_of_values <- var_ordered
} else {
if (is.null(sub_array_of_values)) { # selectors are indices
#NOTE: Not sure if 'vars_to_transform' is the correct one to use.
input_array_of_values <- vars_to_transform[[var_with_selectors_name]]
} else {
input_array_of_values <- sub_array_of_values
tmp <- generate_sub_sub_array_of_values(
input_array_of_values, sub_array_of_indices,
number_of_chunk = chunks[[inner_dim]]["chunk"])
sub_sub_array_of_values <- tmp$sub_sub_array_of_values
previous_sub_sub_array_of_values <- tmp$previous_sub_sub_array_of_values
}
#----------------------------------------------------------
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> TRANSFORMATION REQUESTED?")
print(with_transform)
print("-> BETA:")
print(beta)
}
}
if (with_transform) {
# If there is a transformation and selector values are provided, these
# selectors will be processed in the same way either if aiat = TRUE or
# aiat = FALSE.
## TODO: If sub_array_of_selectors was integer and aiat then... do what's commented 50 lines below.
## otherwise, do what's coded.
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> SELECTORS REQUESTED BEFORE TRANSFORM.")
}
}
# Generate sub_array_of_fri
sub_array_of_fri <- generate_sub_array_of_fri(
with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
is_circular_dim)
# May be useful for crop = T. 'subset_vars_to_transform' may not need
# to include extra cells, but currently it shows mistake if not include.
sub_array_of_fri_no_beta <- generate_sub_array_of_fri(
with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
is_circular_dim, add_beta = FALSE)
subset_vars_to_transform <- vars_to_transform
if (!is.null(var_ordered)) {
#NOTE: If var_ordered is common_vars, it doesn't have attributes and it is a vector.
# Turn it into array and add dimension name.