Newer
Older
synonims, file_dim_reader)
data_dims <- tmp$largest_data_dims
# 'data_dims_each_file' is used when merge_across_dims = TRUE &
# the files have different length of inner dim.
data_dims_each_file <- tmp$data_dims_all_files
# file_dim_reader returns dimension names as found in the file.
# Need to translate accoridng to synonims:
names(data_dims) <- replace_with_synonmins(data_dims, synonims)
} # end if (largest_dims_length == TRUE)
#//////////////////////////////////////////////////
aho
committed
# Some dimension is defined in Start() call but doesn't exist in data
if (!all(expected_inner_dims[[i]] %in% names(data_dims))) {
tmp <- expected_inner_dims[[i]][which(!expected_inner_dims[[i]] %in% names(data_dims))]
stop("Could not find the dimension '", tmp, "' in the file. Either ",
"change the dimension name in your request, adjust the ",
"parameter 'dim_names_in_files' or fix the dimension name in ",
"the file.\n", file_path)
}
# Not all the inner dims are defined in Start() call
if (!all(names(data_dims) %in% expected_inner_dims[[i]])) {
tmp <- names(data_dims)[which(!names(data_dims) %in% expected_inner_dims[[i]])]
if (data_dims[tmp] != 1) {
stop("The dimension '", tmp, "' is found in the file ", file_path,
" but not defined in the Start call.")
}
}
#///////////////////////////////////////////////////////////////////
# Transform the variables if needed and keep them apart.
if (!is.null(transform) && (length(transform_vars) > 0)) {
if (!all(transform_vars %in% c(names(picked_vars[[i]]), names(picked_common_vars)))) {
stop("Could not find all the required variables in 'transform_vars' ",
"for the dataset '", dat[[i]][['name']], "'.")
}
# picked_vars[[i]]
vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_vars[[i]], transform_vars, picked_vars_ordered[[i]])
# picked_common_vars
vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_common_vars, transform_vars, picked_common_vars_ordered)
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
# Save the crop domain from selectors of transformed vars
# PROB: It doesn't consider aiat. If aiat, the indices are for
# after transformed data; we don't know the corresponding
# values yet.
transform_crop_domain <- vector('list')
for (transform_var in transform_vars) {
transform_crop_domain[[transform_var]] <- dat[[i]][['selectors']][[transform_var]][[1]]
# Turn indices into values
if (attr(transform_crop_domain[[transform_var]], 'indices')) {
if (transform_var %in% names(common_return_vars)) {
if (transform_var %in% names(dim_reorder_params)) {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_common_vars_ordered[[transform_var]])
} else {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_common_vars[[transform_var]])
}
} else { # return_vars
if (transform_var %in% names(dim_reorder_params)) {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_vars_ordered[[i]][[transform_var]])
} else {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_vars[[i]][[transform_var]])
}
}
} else if (is.atomic(transform_crop_domain[[transform_var]])) {
# if it is values but vector
transform_crop_domain[[transform_var]] <-
c(transform_crop_domain[[transform_var]][1],
tail(transform_crop_domain[[transform_var]], 1))
}
# For CDORemapper (not sure if it's also suitable for other transform functions):
# If lon_reorder is not used + lon selector is from big to small,
# lonmax and lonmin need to be exchanged. The ideal way is to
# exchange in CDORemapper(), but lon_reorder is used or not is not
# known by CDORemapper().
# NOTE: lat's order doesn't matter, big to small and small to big
# both work. Since we shouldn't assume transform_var in Start(),
# e.g., transform_var can be anything transformable in the assigned transform function,
# we exchange whichever parameter here anyway.
if (!transform_var %in% names(dim_reorder_params) &
diff(unlist(transform_crop_domain[[transform_var]])) < 0) {
transform_crop_domain[[transform_var]] <- rev(transform_crop_domain[[transform_var]])
}
}
# Transform the variables
transformed_data <- do.call(transform, c(list(data_array = NULL,
variables = vars_to_transform,
file_selectors = selectors_of_first_files_with_data[[i]],
crop_domain = transform_crop_domain),
transform_params))
# Discard the common transformed variables if already transformed before
if (!is.null(transformed_common_vars)) {
common_ones <- which(names(picked_common_vars) %in% names(transformed_data$variables))
if (length(common_ones) > 0) {
transformed_data$variables <- transformed_data$variables[-common_ones]
}
}
transformed_vars[[i]] <- list()
transformed_vars_ordered[[i]] <- list()
transformed_vars_unorder_indices[[i]] <- list()
# Order the transformed variables if needed
# 'var_to_read' should be 'transformed_var', but is kept to reuse the same code as above.
for (var_to_read in names(transformed_data$variables)) {
if (var_to_read %in% unlist(var_params)) {
associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)]
aho
committed
if ((associated_dim_name %in% names(dim_reorder_params))) {
## Is this check really needed?
if (length(dim(transformed_data$variables[[associated_dim_name]])) > 1) {
stop("Requested a '", associated_dim_name, "_reorder' for a dimension ",
"whose coordinate variable that has more than 1 dimension (after ",
"transform). This is not supported.")
}
ordered_var_values <- dim_reorder_params[[associated_dim_name]](transformed_data$variables[[associated_dim_name]])
attr(ordered_var_values, 'variables') <- attr(transformed_data$variables[[associated_dim_name]], 'variables')
if (!all(c('x', 'ix') %in% names(ordered_var_values))) {
stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.")
}
# Save the indices to reorder back the ordered variable values.
aho
committed
# This will be used to define the first round indices (if aiat) or second round
# indices (if !aiat).
unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix
if (var_to_read %in% names(picked_common_vars)) {
transformed_common_vars_ordered[[var_to_read]] <- ordered_var_values$x
transformed_common_vars_unorder_indices[[var_to_read]] <- unorder
} else {
transformed_vars_ordered[[i]][[var_to_read]] <- ordered_var_values$x
transformed_vars_unorder_indices[[i]][[var_to_read]] <- unorder
}
}
}
}
transformed_picked_vars_names <- which(names(picked_vars[[i]]) %in% names(transformed_data$variables))
if (length(transformed_picked_vars_names) > 0) {
transformed_picked_vars_names <- names(picked_vars[[i]])[transformed_picked_vars_names]
transformed_vars[[i]][transformed_picked_vars_names] <- transformed_data$variables[transformed_picked_vars_names]
transformed_picked_common_vars_names <- which(names(picked_common_vars) %in% names(transformed_data$variables))
if (length(transformed_picked_common_vars_names) > 0) {
transformed_picked_common_vars_names <- names(picked_common_vars)[transformed_picked_common_vars_names]
transformed_common_vars <- transformed_data$variables[transformed_picked_common_vars_names]
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
}
}
}
# Once the variables are transformed, we compute the indices to be
# taken for each inner dimension.
# In all cases, indices will have to be computed to know which data
# values to take from the original data for each dimension (if a
# variable is specified for that dimension, it will be used to
# convert the provided selectors into indices). These indices are
# referred to as 'first round of indices'.
# The taken data will then be transformed if needed, together with
# the dimension variable if specified, and, in that case, indices
# will have to be computed again to know which values to take from the
# transformed data. These are the 'second round of indices'. In the
# case there is no transformation, the second round of indices will
# be all the available indices, i.e. from 1 to the number of taken
# values with the first round of indices.
for (inner_dim in expected_inner_dims[[i]]) {
if (debug) {
print("-> DEFINING INDICES FOR INNER DIMENSION:")
print(inner_dim)
}
crossed_file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]]
chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]])
names(chunk_amount) <- crossed_file_dim
} else if (!is.null(names(dim(dat[[i]][['selectors']][[inner_dim]][[1]]))) &
inner_dim %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])) &
any(found_file_dims[[i]] %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))) {
# inner dim is dependent on file dim in the form of selector array (e.g., time = [sdate = 2, time = 4])
crossed_file_dim <- found_file_dims[[i]][which(found_file_dims[[i]] %in%
names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))]
if (length(crossed_file_dim) == 1) {
chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]])
names(chunk_amount) <- crossed_file_dim
} else {
# e.g., region = [memb = 2, sdate = 3, region = 1]
chunk_amount <- prod(
sapply(lapply(
dat[[i]][['selectors']][crossed_file_dim], "[[", 1), length))
names(chunk_amount) <- paste(crossed_file_dim, collapse = '.')
}
} else {
chunk_amount <- 1
}
# In the special case that the selectors for a dimension are 'all', 'first', ...
# and chunking (dividing in more than 1 chunk) is requested, the selectors are
# replaced for equivalent indices.
if ((any(dat[[i]][['selectors']][[inner_dim]][[1]] %in% c('all', 'first', 'last'))) &&
(chunks[[inner_dim]]['n_chunks'] != 1)) {
replace_character_with_indices(selectors = dat[[i]][['selectors']][[inner_dim]][[1]], data_dims = data_dims[[inner_dim]], chunk_amount)
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
# The selectors for the inner dimension are taken.
selector_array <- dat[[i]][['selectors']][[inner_dim]][[1]]
if (debug) {
if (inner_dim %in% dims_to_check) {
print(paste0("-> DEBUG MESSAGES FOR THE DATASET", i, " AND INNER DIMENSION '", inner_dim, "':"))
print("-> STRUCTURE OF SELECTOR ARRAY:")
print(str(selector_array))
print("-> PICKED VARS:")
print(picked_vars)
print("-> TRANSFORMED VARS:")
print(transformed_vars)
}
}
if (is.null(dim(selector_array))) {
dim(selector_array) <- length(selector_array)
}
if (is.null(names(dim(selector_array)))) {
if (length(dim(selector_array)) == 1) {
names(dim(selector_array)) <- inner_dim
} else {
stop("Provided selector arrays must be provided with dimension ",
"names. Found an array of selectors without dimension names ",
"for the dimension '", inner_dim, "'.")
}
}
selectors_are_indices <- FALSE
if (!is.null(attr(selector_array, 'indices'))) {
if (!is.logical(attr(selector_array, 'indices'))) {
stop("The atribute 'indices' for the selectors for the dimension '",
inner_dim, "' must be TRUE or FALSE.")
}
selectors_are_indices <- attr(selector_array, 'indices')
}
taken_chunks <- rep(FALSE, chunk_amount)
selector_file_dims <- 1
#NOTE: Change 'selector_file_dims' (from 1) if selector is an array with a file_dim dimname.
# I.e., If time = [sdate = 2, time = 4], selector_file_dims <- c(sdate = 2)
if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) {
selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])]
}
selector_inner_dims <- dim(selector_array)[which(!(names(dim(selector_array)) %in% found_file_dims[[i]]))]
var_with_selectors <- NULL
var_with_selectors_name <- var_params[[inner_dim]]
var_ordered <- NULL
var_unorder_indices <- NULL
with_transform <- FALSE
# If the selectors come with an associated variable
if (!is.null(var_with_selectors_name)) {
if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) {
with_transform <- TRUE
stop("Requested a transformation over the dimension '",
inner_dim, "', wich goes across files. This feature ",
"is not supported. Either do the request without the ",
"transformation or request it over dimensions that do ",
"not go across files.")
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> NAME OF THE VARIABLE WITH THE SELECTOR VALUES FOR THE CURRENT INNER DIMENSION:")
print(var_with_selectors_name)
print("-> NAMES OF THE VARIABLES TO BE TRANSFORMED:")
print(transform_vars)
print("-> STRUCTURE OF THE TRANSFORMATION FUNCTION:")
print(str(transform))
}
}
aho
committed
# For fri
if (var_with_selectors_name %in% names(picked_vars[[i]])) {
var_with_selectors <- picked_vars[[i]][[var_with_selectors_name]]
var_ordered <- picked_vars_ordered[[i]][[var_with_selectors_name]]
var_unorder_indices <- picked_vars_unorder_indices[[i]][[var_with_selectors_name]]
} else if (var_with_selectors_name %in% names(picked_common_vars)) {
var_with_selectors <- picked_common_vars[[var_with_selectors_name]]
var_ordered <- picked_common_vars_ordered[[var_with_selectors_name]]
var_unorder_indices <- picked_common_vars_unorder_indices[[var_with_selectors_name]]
}
n <- prod(dim(var_with_selectors))
# if no _reorder, var_unorder_indices is NULL
if (is.null(var_unorder_indices)) {
var_unorder_indices <- 1:n
}
aho
committed
# For sri
aho
committed
## var in 'dat'
if (var_with_selectors_name %in% names(transformed_vars[[i]])) {
m <- prod(dim(transformed_vars[[i]][[var_with_selectors_name]]))
if (aiat) {
var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]]
var_ordered <- transformed_vars_ordered[[i]][[var_with_selectors_name]]
var_unorder_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]]
}
aho
committed
# For making sri ordered later
transformed_var_unordered_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]]
if (is.null(transformed_var_unordered_indices)) {
transformed_var_unordered_indices <- 1:m
}
transformed_var_with_selectors <- transformed_vars[[i]][transformed_picked_vars_names][[var_with_selectors_name]][transformed_var_unordered_indices]
aho
committed
# Sorting the transformed variable and working out the indices again after transform.
if (!is.null(dim_reorder_params[[var_with_selectors_name]])) {
transformed_var_with_selectors_reorder <- dim_reorder_params[[var_with_selectors_name]](transformed_var_with_selectors)
transformed_var_with_selectors <- transformed_var_with_selectors_reorder$x
transformed_var_with_selectors_unorder <- transformed_var_with_selectors_reorder$ix
} else {
transformed_var_with_selectors_unorder <- 1:length(transformed_var_with_selectors)
}
## var in common
} else if (var_with_selectors_name %in% names(transformed_common_vars)) {
m <- prod(dim(transformed_common_vars[[var_with_selectors_name]]))
if (aiat) {
var_with_selectors <- transformed_common_vars[[var_with_selectors_name]]
var_ordered <- transformed_common_vars_ordered[[var_with_selectors_name]]
var_unorder_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]]
}
aho
committed
# For making sri ordered later
transformed_var_unordered_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]]
if (is.null(transformed_var_unordered_indices)) {
transformed_var_unordered_indices <- 1:m
}
transformed_var_with_selectors <- transformed_common_vars[[var_with_selectors_name]][transformed_var_unordered_indices]
# Sorting the transformed variable and working out the indices again after transform.
if (!is.null(dim_reorder_params[[var_with_selectors_name]])) {
transformed_var_with_selectors_reorder <- dim_reorder_params[[var_with_selectors_name]](transformed_var_with_selectors)
transformed_var_with_selectors <- transformed_var_with_selectors_reorder$x
transformed_var_with_selectors_unorder <- transformed_var_with_selectors_reorder$ix
} else {
transformed_var_with_selectors_unorder <- 1:length(transformed_var_with_selectors)
}
}
if (is.null(var_unorder_indices)) {
var_unorder_indices <- 1:m
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> SIZE OF ORIGINAL VARIABLE:")
print(n)
print("-> SIZE OF TRANSFORMED VARIABLE:")
if (with_transform) print(m)
print("-> STRUCTURE OF ORDERED VAR:")
print(str(var_ordered))
print("-> UNORDER INDICES:")
print(var_unorder_indices)
}
}
# If this inner dim's selector (var_with_selectors) is an array
# that has file dim as dimension (e.g., across or depend relationship)
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
if (any(names(var_dims) %in% found_file_dims[[i]])) {
if (with_transform) {
stop("Requested transformation for inner dimension '",
inner_dim, "' but provided selectors for such dimension ",
"over one or more file dimensions. This is not ",
"supported. Either request no transformation for the ",
"dimension '", inner_dim, "' or specify the ",
"selectors for this dimension without the file dimensions.")
}
var_file_dims <- var_dims[which(names(var_dims) %in% found_file_dims[[i]])]
var_dims <- var_dims[-which(names(var_dims) %in% found_file_dims[[i]])]
}
## # Keep the selectors if they correspond to a variable that will be transformed.
## if (with_transform) {
## if (var_with_selectors_name %in% names(picked_vars[[i]])) {
## transformed_var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]]
## } else if (var_with_selectors_name %in% names(picked_common_vars)) {
## transformed_var_with_selectors <- transformed_common_vars[[var_with_selectors_name]]
## }
## transformed_var_dims <- dim(transformed_var_with_selectors)
## transformed_var_file_dims <- 1
## if (any(names(transformed_var_dims) %in% found_file_dims[[i]])) {
## transformed_var_file_dims <- transformed_var_dims[which(names(transformed_var_dims) %in% found_file_dims[[i]])]
## transformed_var_dims <- tranasformed_var_dims[-which(names(transformed_var_dims) %in% found_file_dims[[i]])]
## }
##if (inner_dim %in% dims_to_check) {
##print("111m")
##print(str(transformed_var_dims))
##}
##
## m <- prod(transformed_var_dims)
## }
# Work out var file dims and inner dims.
if (inner_dim %in% unlist(inner_dims_across_files)) {
#TODO: if (chunk_amount != number of chunks in selector_file_dims), crash
if (length(var_dims) > 1) {
stop("Specified a '", inner_dim, "_var' for the dimension '",
inner_dim, "', which goes across files (across '", crossed_file_dim,
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
"'). The specified variable, '", var_with_selectors_name, "', has more ",
"than one dimension and can not be used as selector variable. ",
"Select another variable or fix it in the files.")
}
}
## TODO HERE::
#- indices_of_first_files_with_data may change, because array is now extended
var_full_dims <- dim(var_with_selectors)
} else if (((is.numeric(selector_array) || is.list(selector_array)) && selectors_are_indices) ||
(is.character(selector_array) && (length(selector_array) == 1) &&
(selector_array %in% c('all', 'first', 'last')) &&
!is.null(file_dim_reader))) {
#### TODO HERE::
###- indices_of_first_files_with_data may change, because array is now extended
# Lines moved above for better performance.
##data_dims <- file_dim_reader(file_path, NULL, selectors_of_first_files_with_data[[i]],
## lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1))
} else {
stop(paste0("Can not translate the provided selectors for '", inner_dim,
"' to numeric indices. Provide numeric indices and a ",
"'file_dim_reader' function, or a '", inner_dim,
"_var' in order to calculate the indices."))
}
# At this point, if no selector variable was provided, the variable
# data_dims has been populated. If a selector variable was provided,
# the variables var_dims, var_file_dims and var_full_dims have been
# populated instead.
#////////////////////////////////////////////////////////////////////
# If the inner dim lengths differ among files,
# need to know each length to create the indices for each file later.
inner_dim_lengths <- NULL
# inner_dim_lengths here includes all the files, but we only want
# the files of fyear for certain "sdate". We will categorize it later.
inner_dim_lengths <- tryCatch({
sapply(data_dims_each_file, '[[', inner_dim)
}, error = function(x) {
sapply(data_dims_each_file, '[[',
synonims[[inner_dim]][which(synonims[[inner_dim]] != inner_dim)])
})
# Use other file dims as the factors to categorize.
other_file_dims <- dim(array_of_files_to_load)[which(!found_file_dims[[i]] %in% crossed_file_dim)]
other_file_dims <- lapply(lapply(other_file_dims, seq, 1), rev)
other_file_dims_factor <- expand.grid(other_file_dims)
selector_indices_save_subset <-
lapply(selector_indices_save[[i]], '[', which(!found_file_dims[[i]] %in% crossed_file_dim))
# Put the fyear with the same other file dims (sdate, etc.) together, and find the largest length (in theory all of them should be the same)
inner_dim_lengths_cat <- vector('list', dim(other_file_dims_factor)[1])
for (i_factor in 1:length(inner_dim_lengths_cat)) {
inner_dim_lengths_cat[[i_factor]] <-
inner_dim_lengths[which(sapply(lapply(
selector_indices_save_subset, '==',
other_file_dims_factor[i_factor, ]), all))]
}
# Find the largest length of each time step
inner_dim_lengths <- do.call(pmax, inner_dim_lengths_cat)
}
fri <- first_round_indices <- NULL
sri <- second_round_indices <- NULL
# This variable will keep the indices needed to crop the transformed
# variable (the one that has been transformed without being subset
# with the first round indices).
tvi <- tranaformed_variable_indices <- NULL
ordered_fri <- NULL
ordered_sri <- NULL
if ((length(selector_array) == 1) && is.character(selector_array) &&
(selector_array %in% c('all', 'first', 'last')) &&
(chunks[[inner_dim]]['n_chunks'] == 1)) {
if (is.null(var_with_selectors_name)) {
fri <- vector('list', length = chunk_amount)
dim(fri) <- c(chunk_amount)
sri <- vector('list', length = chunk_amount)
dim(sri) <- c(chunk_amount)
if (selector_array == 'all') {
if (is.null(inner_dim_lengths) | length(unique(inner_dim_lengths)) <= 1) { #old code
fri[] <- replicate(chunk_amount, list(1:(data_dims[inner_dim])))
} else { # files have different inner dim length
for (i_chunk in 1:length(fri)) {
fri[[i_chunk]] <- 1:inner_dim_lengths[i_chunk]
}
}
taken_chunks <- rep(TRUE, chunk_amount)
#sri <- NULL
} else if (selector_array == 'first') {
fri[[1]] <- 1
taken_chunks[1] <- TRUE
#sri <- NULL
} else if (selector_array == 'last') {
fri[[chunk_amount]] <- data_dims[inner_dim]
taken_chunks[length(taken_chunks)] <- TRUE
#sri <- NULL
}
} else {
if (!is.null(crossed_file_dim) & any(!(crossed_file_dim %in% names(var_file_dims)))) {
stop("The variable '", var_with_selectors_name, "' must also be ",
"requested for the file dimension '", crossed_file_dim, "' in ",
"this configuration.")
}
fri <- vector('list', length = prod(var_file_dims))
dim(fri) <- var_file_dims
ordered_fri <- fri
sri <- vector('list', length = prod(var_file_dims))
dim(sri) <- var_file_dims
ordered_sri <- sri
if (selector_array == 'all') {
# TODO: Populate ordered_fri
ordered_fri[] <- replicate(prod(var_file_dims), list(1:n))
fri[] <- replicate(prod(var_file_dims), list(var_unorder_indices[1:n]))
taken_chunks <- rep(TRUE, chunk_amount)
if (!with_transform) {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri <- NULL
} else {
ordered_sri[] <- replicate(prod(var_file_dims), list(1:m))
aho
committed
if (inner_dim %in% names(dim_reorder_params)) {
sri[] <- replicate(prod(var_file_dims), list(transformed_var_unordered_indices[1:m]))
} else {
sri[] <- replicate(prod(var_file_dims), list(1:m))
}
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
## var_file_dims instead??
#if (!aiat) {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri[] <- replicate(prod(transformed_var_file_dims), list(1:m))
#} else {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri[] <- replicate(prod(transformed_var_file_dims), list(1:m))
#}
tvi <- 1:m
}
} else if (selector_array == 'first') {
taken_chunks[1] <- TRUE
if (!with_transform) {
ordered_fri[[1]] <- 1
fri[[1]] <- var_unorder_indices[1]
#taken_chunks[1] <- TRUE
#sri <- NULL
} else {
if (!aiat) {
ordered_fri[[1]] <- 1
fri[[1]] <- var_unorder_indices[1]
# TODO: TO BE IMPROVED
#taken_chunks[1] <- TRUE
ordered_sri[[1]] <- 1:ceiling(m / n)
sri[[1]] <- 1:ceiling(m / n)
tvi <- 1:ceiling(m / n)
} else {
ordered_fri[[1]] <- 1:ceiling(m / n)
fri[[1]] <- var_unorder_indices[1:ceiling(m / n)]
#taken_chunks[1] <- TRUE
ordered_sri[[1]] <- 1
sri[[1]] <- 1
tvi <- 1
}
}
} else if (selector_array == 'last') {
taken_chunks[length(taken_chunks)] <- TRUE
if (!with_transform) {
ordered_fri[[prod(var_file_dims)]] <- n
fri[[prod(var_file_dims)]] <- var_unorder_indices[n]
#taken_chunks[length(taken_chunks)] <- TRUE
#sri <- NULL
} else {
if (!aiat) {
ordered_fri[[prod(var_file_dims)]] <- prod(var_dims)
fri[[prod(var_file_dims)]] <- var_unorder_indices[prod(var_dims)]
#taken_chunks[length(taken_chunks)] <- TRUE
ordered_sri[[prod(var_file_dims)]] <- 1:ceiling(m / n)
sri[[prod(var_file_dims)]] <- 1:ceiling(m / n)
# TODO: TO BE IMPROVED. THE TVI MAY BE WRONG IF THERE'S BEEN A REORDERING.
tvi <- 1:ceiling(m / n)
} else {
ordered_fri[[prod(var_file_dims)]] <- (n - ceiling(m / n) + 1):n
fri[[prod(var_file_dims)]] <- var_unorder_indices[(n - ceiling(m / n) + 1):n]
#taken_chunks[length(taken_chunks)] <- TRUE
ordered_sri[[prod(var_file_dims)]] <- 1
sri[[prod(var_file_dims)]] <- 1
tvi <- 1
}
}
}
}
# If the selectors are not 'all', 'first', 'last', ...
} else {
if (!is.null(var_with_selectors_name)) {
unmatching_file_dims <- which(!(names(var_file_dims) %in% names(selector_file_dims)))
if ((length(unmatching_file_dims) > 0)) {
raise_error <- FALSE
if (!(length(unmatching_file_dims) == 1 &
names(var_file_dims)[unmatching_file_dims] %in% crossed_file_dim &
inner_dim %in% names(selector_inner_dims))) {
raise_error <- TRUE
}
}
if (raise_error) {
stop("Provided selectors for the dimension '", inner_dim, "' must have as many ",
"file dimensions as the variable the dimension is defined along, '",
var_with_selectors_name, "', with the exceptions of the file pattern dimension ('",
found_pattern_dim, "') and any depended file dimension (if specified as ",
"depended dimension in parameter 'inner_dims_across_files' and the ",
"depending file dimension is present in the provided selector array).")
}
}
if (any(names(selector_file_dims) %in% names(dim(var_with_selectors)))) {
if (any(dim(var_with_selectors)[names(selector_file_dims)] != selector_file_dims)) {
stop("Size of selector file dimensions must match size of the corresponding ",
"variable dimensions.")
}
}
}
## TODO: If var dimensions are not in the same order as selector dimensions, reorder
if (is.null(names(selector_file_dims))) {
if (!is.null(crossed_file_dim)) {
fri_dim_names <- c(fri_dim_names, crossed_file_dim)
}
fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% fri_dim_names)]
fri_dims <- rep(NA, length(fri_dim_names))
names(fri_dims) <- fri_dim_names
fri_dims[names(selector_file_dims)] <- selector_file_dims
#NOTE: Not sure how it works here, but "chunk_amount" is the same as
# "selector_file_dims" above in the cases we've seen so far,
# and it causes problem when crossed_file_dim is more than one.
# if (!is.null(crossed_file_dim)) {
# fri_dims[crossed_file_dim] <- chunk_amount
# }
}
fri <- vector('list', length = prod(fri_dims))
dim(fri) <- fri_dims
sri <- vector('list', length = prod(fri_dims))
dim(sri) <- fri_dims
selector_file_dim_array <- array(1:prod(selector_file_dims), dim = selector_file_dims)
selector_store_position <- fri_dims
for (j in 1:prod(dim(selector_file_dim_array))) {
selector_indices_to_take <- which(selector_file_dim_array == j, arr.ind = TRUE)[1, ]
names(selector_indices_to_take) <- names(selector_file_dims)
selector_store_position[names(selector_indices_to_take)] <- selector_indices_to_take
#TODO: Check if the non-subset part makes sense. "selector_indices_to_take" is an array if
# "selector_file_dims" is not 1 (if selector is an array with a file_dim dimname, ie.,
# time = [sdate = 2, time = 4].
if (!is.null(names(selector_indices_to_take))) {
sub_array_of_selectors <- Subset(selector_array, names(selector_indices_to_take),
as.list(selector_indices_to_take), drop = 'selected')
} else {
sub_array_of_selectors <- selector_array
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> ITERATING OVER FILE DIMENSIONS OF THE SELECTORS.")
print("-> STRUCTURE OF A SUB ARRAY:")
print(str(sub_array_of_selectors))
print("-> STRUCTURE OF THE VARIABLE WITH SELECTORS:")
print(str(var_with_selectors))
print(dim(var_with_selectors))
}
}
if (selectors_are_indices) {
sub_array_of_values <- NULL
#} else if (!is.null(var_ordered)) {
# sub_array_of_values <- var_ordered
} else {
var_indices_to_take <- selector_indices_to_take[which(names(selector_indices_to_take) %in% names(var_file_dims))]
sub_array_of_values <- Subset(var_with_selectors, names(var_indices_to_take),
as.list(var_indices_to_take), drop = 'selected')
} else {
sub_array_of_values <- var_with_selectors
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> STRUCTURE OF THE SUB ARRAY FROM THE VARIABLE CORRESPONDING TO THE SUB ARRAY OF SELECTORS")
print(str(sub_array_of_values))
print(dim(sub_array_of_values))
print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:")
# The inner dim selector is an array in which have file dim (e.g., time = [sdate = 2, time = 4],
# or the inner dim doesn't go across any file dim (e.g., no time_across = 'sdate')
if ((!is.null(crossed_file_dim) & (any(crossed_file_dim %in% names(selector_file_dims)))) || is.null(crossed_file_dim)) {
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
if (length(sub_array_of_selectors) > 0) {
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> THE INNER DIMENSION DOES NOT GO ACROSS ANY FILE DIMENSION OR IT DOES BUT IS IN THE PROVIDED SELECTOR ARRAY.")
}
}
if (selectors_are_indices) {
if (!is.null(var_with_selectors_name)) {
max_allowed <- ifelse(aiat, m, n)
} else {
max_allowed <- data_dims[inner_dim]
}
if (any(na.omit(unlist(sub_array_of_selectors)) > max_allowed) ||
any(na.omit(unlist(sub_array_of_selectors)) < 1)) {
stop("Provided indices out of range for dimension '", inner_dim, "' ",
"for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ",
max_allowed, ").")
}
}
# The selector_checker will return either a vector of indices or a list
# with the first and last desired indices.
#NOTE: goes_across_prime_meridian may be TRUE only if the selector is list of values
# If selectors are indices and _reorder = CircularSort() is used, change
# is_circular_dim to TRUE.
if (!is.null(var_ordered) & selectors_are_indices &
!is.null(dim_reorder_params[[inner_dim]])) {
if ('circular' %in% names(attributes(dim_reorder_params[[inner_dim]]))) {
is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
if (is_circular_dim & is.list(sub_array_of_selectors)) {
tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix
goes_across_prime_meridian <- tmp[1] > tmp[2]
}
# If selectors are values and _reorder is defined.
if (!is.null(var_ordered) && !selectors_are_indices) {
if (!is.null(dim_reorder_params[[inner_dim]])) {
if ('circular' %in% names(attributes(dim_reorder_params[[inner_dim]]))) {
is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
}
if (is.list(sub_array_of_selectors)) {
## NOTE: The check of 'goes_across_prime_meridian' is moved forward to here.
if (is_circular_dim) {
# NOTE: Use CircularSort() to put the values in the assigned range, and get the order.
# For example, [-10, 20] in CircularSort(0, 360) is [350, 20]. The $ix list is [2, 1].
# 'goes_across_prime_meridian' means the selector range across the border. For example,
# CircularSort(-180, 180) with selector [170, 190] -> goes_across_prime_meridian = TRUE.
# dim_reorder_params is a list of Reorder function, i.e.,
# Sort() or CircularSort().
tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix
goes_across_prime_meridian <- tmp[1] > tmp[2]
#NOTE: HERE change to the same code as below (under 'else'). Not sure why originally
# it uses additional lines, which make reorder not work.
# If "_reorder" is used, here 'sub_array_of_selectors' is adjusted to
# follow the reorder rule. E.g., if lat = values(list(-90, 90)) and
# lat_reorder = Sort(decreasing = T), 'sub_array_of_selectors' changes
# from list(-90, 90) to list(90, -90).
sub_array_of_selectors <- as.list(dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$x)
#sub_array_reordered <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))
#sub_array_unorder <- sort(sub_array_reordered$ix, index.return = TRUE)$ix
#sub_array_of_selectors <- as.list(sub_array_reordered$x[sub_array_unorder])
# Add warning if the boundary is out of range
if (min(unlist(sub_array_of_selectors)) < range(var_ordered)[1]) {
show_out_of_range_warning(inner_dim, range = range(var_ordered),
bound = 'lower')
if (max(unlist(sub_array_of_selectors)) > range(var_ordered)[2]) {
show_out_of_range_warning(inner_dim, range = range(var_ordered), bound = 'upper')
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
}
} else {
sub_array_of_selectors <- dim_reorder_params[[inner_dim]](sub_array_of_selectors)$x
}
}
# NOTE: The ideal solution for selecting indices in goes_across_prime_meridian case
# is modified SelectorCheckor.R. But now SelectorCheckor doesn't know the info of
#goes_across_prime_meridian, so I do the adjustion after calling SelectorCheckor().
sub_array_of_indices <- selector_checker(sub_array_of_selectors, var_ordered,
tolerance = if (aiat) {
NULL
} else {
tolerance_params[[inner_dim]]
})
if (goes_across_prime_meridian & sub_array_of_indices[[1]] < sub_array_of_indices[[2]]) {
if (!(sub_array_of_selectors[[1]] %in% var_ordered)){
sub_array_of_indices[[1]] <- sub_array_of_indices[[1]] - 1
}
if (!(sub_array_of_selectors[[2]] %in% var_ordered)){
sub_array_of_indices[[2]] <- sub_array_of_indices[[2]] + 1
}
}
#NOTE: the possible case?
if (goes_across_prime_meridian & sub_array_of_indices[[1]] > sub_array_of_indices[[2]]) {
stop("The case is goes_across_prime_meridian but no adjustion for the indices!")
}
if (any(is.na(sub_array_of_indices))) {
stop(paste0("The selectors of ", inner_dim,
" are out of range [", min(var_ordered),
", ", max(var_ordered), "]."))
}
} else {
# Add warning if the boundary is out of range
if (is.list(sub_array_of_selectors) & !selectors_are_indices) {
if (min(unlist(sub_array_of_selectors)) < min(sub_array_of_values)) {
show_out_of_range_warning(inner_dim, range = range(sub_array_of_values),
bound = 'lower')
if (max(unlist(sub_array_of_selectors)) > max(sub_array_of_values)) {
show_out_of_range_warning(inner_dim, range = range(sub_array_of_values),
bound = 'upper')
# sub_array_of_values here is NULL if selectors are indices, and
# 'sub_array_of_indices' will be sub_array_of_selectors, i.e., the indices
# assigned (but rounded).
sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values,
tolerance = if (aiat) {
NULL
} else {
tolerance_params[[inner_dim]]
})
if (any(is.na(sub_array_of_indices))) {
stop(paste0("The selectors of ", inner_dim,
" are out of range [", min(sub_array_of_values),
", ", max(sub_array_of_values), "]."))
}
}
#////////////////////////////////////////////////////////////
# If chunking along this inner dim, this part creates the indices for each chunk.
# For example, if 'sub_array_of_indices' is c(5:10) and chunked into 2,
# 'sub_array_of_indices' becomes c(5:7) for chunk = 1, c(8:10) for chunk = 2.
# If 'sub_array_of_indices' is list(55, 62) and chunked into 2, it becomes
# list(55, 58) for chunk = 1 and list(59, 62) for chunk = 2.
#TODO: The list can be turned into vector here? So afterward no need to judge if
# it is list or vector.
#NOTE: chunking cannot be done if goes_across_prime_meridian = TRUE.
#TODO: Change the algorithm to make chunking works for goes_across_prime_meridian = TRUE.
# If goes_across_prime_meridian = TRUE, "sub_array_of_indices" are not
# continuous numbers. For example, list(37, 1243) means sub_array_of_fri
# that will be generated based on sub_array_of_indices later is c(1:37, 1243:1296).
# the longitude are separated into 2 parts, therefore, cannot be chunked here.
if (chunks[[inner_dim]]["n_chunks"] > 1) {
if (goes_across_prime_meridian) {
stop(paste0("Chunking over ", inner_dim, " that goes across the circular border assigned by '", inner_dim, "_reorder' is not supported by startR now. Adjust the ", inner_dim, " selector to be within the border or change the borders." ))
}
if (!is.list(sub_array_of_indices)) {
sub_array_of_indices <-
sub_array_of_indices[get_chunk_indices(length(sub_array_of_indices),
chunks[[inner_dim]]["chunk"],
chunks[[inner_dim]]["n_chunks"],
inner_dim)]
get_chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]),
chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"],
inner_dim)
vect <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
sub_array_of_indices[[1]] <- vect[tmp[1]]
sub_array_of_indices[[2]] <- vect[tmp[length(tmp)]]
}
}
# The sub_array_of_indices now contains numeric indices of the values to be taken by each chunk.
#----------------------------------------------------------
# 'sub_sub_array_of_values' is for sri chunking. If this inner dim is chunked,
# the sri has to follow the chunking of fri. Therefore, we save the original
# value of this chunk here for later use. We'll find the corresponding
# transformed value within 'sub_sub_array_of_values' and chunk sri.
if (with_transform & chunks[[inner_dim]]["n_chunks"] > 1) {
input_array_of_values <- var_ordered
} else {
if (is.null(sub_array_of_values)) { # selectors are indices
#NOTE: Not sure if 'vars_to_transform' is the correct one to use.
input_array_of_values <- vars_to_transform[[var_with_selectors_name]]
} else {
input_array_of_values <- sub_array_of_values
tmp <- generate_sub_sub_array_of_values(
input_array_of_values, sub_array_of_indices,
number_of_chunk = chunks[[inner_dim]]["chunk"])
sub_sub_array_of_values <- tmp$sub_sub_array_of_values
previous_sub_sub_array_of_values <- tmp$previous_sub_sub_array_of_values
}
#----------------------------------------------------------
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> TRANSFORMATION REQUESTED?")
print(with_transform)
print("-> BETA:")
print(beta)
}
}
if (with_transform) {
# If there is a transformation and selector values are provided, these
# selectors will be processed in the same way either if aiat = TRUE or
# aiat = FALSE.
## TODO: If sub_array_of_selectors was integer and aiat then... do what's commented 50 lines below.
## otherwise, do what's coded.
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> SELECTORS REQUESTED BEFORE TRANSFORM.")
}
}
# Generate sub_array_of_fri
sub_array_of_fri <- generate_sub_array_of_fri(
with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
is_circular_dim)
# May be useful for crop = T. 'subset_vars_to_transform' may not need
# to include extra cells, but currently it shows mistake if not include.
sub_array_of_fri_no_beta <- generate_sub_array_of_fri(
with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
is_circular_dim, add_beta = FALSE)
subset_vars_to_transform <- vars_to_transform
if (!is.null(var_ordered)) {
#NOTE: If var_ordered is common_vars, it doesn't have attributes and it is a vector.
# Turn it into array and add dimension name.
if (!is.array(var_ordered)) {
var_ordered <- as.array(var_ordered)
names(dim(var_ordered)) <- inner_dim
}
subset_vars_to_transform[[var_with_selectors_name]] <- Subset(var_ordered, inner_dim, sub_array_of_fri)
} else {
if (!selectors_are_indices) { # selectors are values
#NOTE: It should be redundant because without reordering the var should remain array
## But just stay same with above...
if (!is.array(sub_array_of_values)) {
sub_array_of_values <- as.array(sub_array_of_values)
names(dim(sub_array_of_values)) <- inner_dim
}
subset_vars_to_transform[[var_with_selectors_name]] <- Subset(sub_array_of_values, inner_dim, sub_array_of_fri)
} else { # selectors are indices
subset_vars_to_transform[[var_with_selectors_name]] <-
Subset(subset_vars_to_transform[[var_with_selectors_name]],
inner_dim, sub_array_of_fri)
}
transformed_subset_var <- do.call(transform, c(list(data_array = NULL,
variables = subset_vars_to_transform,
file_selectors = selectors_of_first_files_with_data[[i]],
crop_domain = transform_crop_domain),
transform_params))$variables[[var_with_selectors_name]]
# Sorting the transformed variable and working out the indices again after transform.
if (!is.null(dim_reorder_params[[inner_dim]])) {
transformed_subset_var_reorder <- dim_reorder_params[[inner_dim]](transformed_subset_var)
transformed_subset_var <- transformed_subset_var_reorder$x
#NOTE: The fix here solves the mis-ordered lon when across_meridian.
transformed_subset_var_unorder <- transformed_subset_var_reorder$ix
# transformed_subset_var_unorder <- sort(transformed_subset_var_reorder$ix, index.return = TRUE)$ix
} else {
transformed_subset_var_unorder <- 1:length(transformed_subset_var)
}
if (!selectors_are_indices) { # selectors are values
sub_array_of_sri <- selector_checker(
sub_array_of_selectors, transformed_subset_var,
tolerance = if (aiat) {
tolerance_params[[inner_dim]]
} else {
NULL
})