Newer
Older
# picked_vars[[i]]
vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_vars[[i]], transform_vars, picked_vars_ordered[[i]])
# picked_common_vars
vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_common_vars, transform_vars, picked_common_vars_ordered)
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
# Save the crop domain from selectors of transformed vars
# PROB: It doesn't consider aiat. If aiat, the indices are for
# after transformed data; we don't know the corresponding
# values yet.
transform_crop_domain <- vector('list')
for (transform_var in transform_vars) {
transform_crop_domain[[transform_var]] <- dat[[i]][['selectors']][[transform_var]][[1]]
# Turn indices into values
if (attr(transform_crop_domain[[transform_var]], 'indices')) {
if (transform_var %in% names(common_return_vars)) {
if (transform_var %in% names(dim_reorder_params)) {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_common_vars_ordered[[transform_var]])
} else {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_common_vars[[transform_var]])
}
} else { # return_vars
if (transform_var %in% names(dim_reorder_params)) {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_vars_ordered[[i]][[transform_var]])
} else {
transform_crop_domain[[transform_var]] <-
generate_transform_crop_domain_values(
transform_crop_domain[[transform_var]],
picked_vars = picked_vars[[i]][[transform_var]])
}
}
} else if (is.atomic(transform_crop_domain[[transform_var]])) {
# if it is values but vector
transform_crop_domain[[transform_var]] <-
c(transform_crop_domain[[transform_var]][1],
tail(transform_crop_domain[[transform_var]], 1))
}
# For CDORemapper (not sure if it's also suitable for other transform functions):
# If lon_reorder is not used + lon selector is from big to small,
# lonmax and lonmin need to be exchanged. The ideal way is to
# exchange in CDORemapper(), but lon_reorder is used or not is not
# known by CDORemapper().
# NOTE: lat's order doesn't matter, big to small and small to big
# both work. Since we shouldn't assume transform_var in Start(),
# e.g., transform_var can be anything transformable in the assigned transform function,
# we exchange whichever parameter here anyway.
if (!transform_var %in% names(dim_reorder_params) &
diff(unlist(transform_crop_domain[[transform_var]])) < 0) {
transform_crop_domain[[transform_var]] <- rev(transform_crop_domain[[transform_var]])
}
}
# Transform the variables
transformed_data <- do.call(transform, c(list(data_array = NULL,
variables = vars_to_transform,
file_selectors = selectors_of_first_files_with_data[[i]],
crop_domain = transform_crop_domain),
transform_params))
# Discard the common transformed variables if already transformed before
if (!is.null(transformed_common_vars)) {
common_ones <- which(names(picked_common_vars) %in% names(transformed_data$variables))
if (length(common_ones) > 0) {
transformed_data$variables <- transformed_data$variables[-common_ones]
}
}
transformed_vars[[i]] <- list()
transformed_vars_ordered[[i]] <- list()
transformed_vars_unorder_indices[[i]] <- list()
# Order the transformed variables if needed
# 'var_to_read' should be 'transformed_var', but is kept to reuse the same code as above.
for (var_to_read in names(transformed_data$variables)) {
if (var_to_read %in% unlist(var_params)) {
associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)]
aho
committed
if ((associated_dim_name %in% names(dim_reorder_params))) {
## Is this check really needed?
if (length(dim(transformed_data$variables[[associated_dim_name]])) > 1) {
stop("Requested a '", associated_dim_name, "_reorder' for a dimension ",
"whose coordinate variable that has more than 1 dimension (after ",
"transform). This is not supported.")
}
ordered_var_values <- dim_reorder_params[[associated_dim_name]](transformed_data$variables[[associated_dim_name]])
attr(ordered_var_values, 'variables') <- attr(transformed_data$variables[[associated_dim_name]], 'variables')
if (!all(c('x', 'ix') %in% names(ordered_var_values))) {
stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.")
}
# Save the indices to reorder back the ordered variable values.
aho
committed
# This will be used to define the first round indices (if aiat) or second round
# indices (if !aiat).
unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix
if (var_to_read %in% names(picked_common_vars)) {
transformed_common_vars_ordered[[var_to_read]] <- ordered_var_values$x
transformed_common_vars_unorder_indices[[var_to_read]] <- unorder
} else {
transformed_vars_ordered[[i]][[var_to_read]] <- ordered_var_values$x
transformed_vars_unorder_indices[[i]][[var_to_read]] <- unorder
}
}
}
}
transformed_picked_vars_names <- which(names(picked_vars[[i]]) %in% names(transformed_data$variables))
if (length(transformed_picked_vars_names) > 0) {
transformed_picked_vars_names <- names(picked_vars[[i]])[transformed_picked_vars_names]
transformed_vars[[i]][transformed_picked_vars_names] <- transformed_data$variables[transformed_picked_vars_names]
transformed_picked_common_vars_names <- which(names(picked_common_vars) %in% names(transformed_data$variables))
if (length(transformed_picked_common_vars_names) > 0) {
transformed_picked_common_vars_names <- names(picked_common_vars)[transformed_picked_common_vars_names]
transformed_common_vars <- transformed_data$variables[transformed_picked_common_vars_names]
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
}
}
}
# Once the variables are transformed, we compute the indices to be
# taken for each inner dimension.
# In all cases, indices will have to be computed to know which data
# values to take from the original data for each dimension (if a
# variable is specified for that dimension, it will be used to
# convert the provided selectors into indices). These indices are
# referred to as 'first round of indices'.
# The taken data will then be transformed if needed, together with
# the dimension variable if specified, and, in that case, indices
# will have to be computed again to know which values to take from the
# transformed data. These are the 'second round of indices'. In the
# case there is no transformation, the second round of indices will
# be all the available indices, i.e. from 1 to the number of taken
# values with the first round of indices.
for (inner_dim in expected_inner_dims[[i]]) {
if (debug) {
print("-> DEFINING INDICES FOR INNER DIMENSION:")
print(inner_dim)
}
file_dim <- NULL
if (inner_dim %in% unlist(inner_dims_across_files)) {
file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]]
chunk_amount <- length(dat[[i]][['selectors']][[file_dim]][[1]])
names(chunk_amount) <- file_dim
} else {
chunk_amount <- 1
}
# In the special case that the selectors for a dimension are 'all', 'first', ...
# and chunking (dividing in more than 1 chunk) is requested, the selectors are
# replaced for equivalent indices.
if ((any(dat[[i]][['selectors']][[inner_dim]][[1]] %in% c('all', 'first', 'last'))) &&
(chunks[[inner_dim]]['n_chunks'] != 1)) {
replace_character_with_indices(selectors = dat[[i]][['selectors']][[inner_dim]][[1]], data_dims = data_dims[[inner_dim]], chunk_amount)
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
# The selectors for the inner dimension are taken.
selector_array <- dat[[i]][['selectors']][[inner_dim]][[1]]
if (debug) {
if (inner_dim %in% dims_to_check) {
print(paste0("-> DEBUG MESSAGES FOR THE DATASET", i, " AND INNER DIMENSION '", inner_dim, "':"))
print("-> STRUCTURE OF SELECTOR ARRAY:")
print(str(selector_array))
print("-> PICKED VARS:")
print(picked_vars)
print("-> TRANSFORMED VARS:")
print(transformed_vars)
}
}
if (is.null(dim(selector_array))) {
dim(selector_array) <- length(selector_array)
}
if (is.null(names(dim(selector_array)))) {
if (length(dim(selector_array)) == 1) {
names(dim(selector_array)) <- inner_dim
} else {
stop("Provided selector arrays must be provided with dimension ",
"names. Found an array of selectors without dimension names ",
"for the dimension '", inner_dim, "'.")
}
}
selectors_are_indices <- FALSE
if (!is.null(attr(selector_array, 'indices'))) {
if (!is.logical(attr(selector_array, 'indices'))) {
stop("The atribute 'indices' for the selectors for the dimension '",
inner_dim, "' must be TRUE or FALSE.")
}
selectors_are_indices <- attr(selector_array, 'indices')
}
taken_chunks <- rep(FALSE, chunk_amount)
selector_file_dims <- 1
#NOTE: Change 'selector_file_dims' (from 1) if selector is an array with a file_dim dimname.
# I.e., If time = [sdate = 2, time = 4], selector_file_dims <- array(sdate = 2)
if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) {
selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])]
}
selector_inner_dims <- dim(selector_array)[which(!(names(dim(selector_array)) %in% found_file_dims[[i]]))]
var_with_selectors <- NULL
var_with_selectors_name <- var_params[[inner_dim]]
var_ordered <- NULL
var_unorder_indices <- NULL
with_transform <- FALSE
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
# If the selectors come with an associated variable
if (!is.null(var_with_selectors_name)) {
if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) {
with_transform <- TRUE
if (!is.null(file_dim)) {
stop("Requested a transformation over the dimension '",
inner_dim, "', wich goes across files. This feature ",
"is not supported. Either do the request without the ",
"transformation or request it over dimensions that do ",
"not go across files.")
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> NAME OF THE VARIABLE WITH THE SELECTOR VALUES FOR THE CURRENT INNER DIMENSION:")
print(var_with_selectors_name)
print("-> NAMES OF THE VARIABLES TO BE TRANSFORMED:")
print(transform_vars)
print("-> STRUCTURE OF THE TRANSFORMATION FUNCTION:")
print(str(transform))
}
}
aho
committed
# For fri
if (var_with_selectors_name %in% names(picked_vars[[i]])) {
var_with_selectors <- picked_vars[[i]][[var_with_selectors_name]]
var_ordered <- picked_vars_ordered[[i]][[var_with_selectors_name]]
var_unorder_indices <- picked_vars_unorder_indices[[i]][[var_with_selectors_name]]
} else if (var_with_selectors_name %in% names(picked_common_vars)) {
var_with_selectors <- picked_common_vars[[var_with_selectors_name]]
var_ordered <- picked_common_vars_ordered[[var_with_selectors_name]]
var_unorder_indices <- picked_common_vars_unorder_indices[[var_with_selectors_name]]
}
n <- prod(dim(var_with_selectors))
# if no _reorder, var_unorder_indices is NULL
if (is.null(var_unorder_indices)) {
var_unorder_indices <- 1:n
}
aho
committed
# For sri
aho
committed
## var in 'dat'
if (var_with_selectors_name %in% names(transformed_vars[[i]])) {
m <- prod(dim(transformed_vars[[i]][[var_with_selectors_name]]))
if (aiat) {
var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]]
var_ordered <- transformed_vars_ordered[[i]][[var_with_selectors_name]]
var_unorder_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]]
}
aho
committed
# For making sri ordered later
transformed_var_unordered_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]]
if (is.null(transformed_var_unordered_indices)) {
transformed_var_unordered_indices <- 1:m
}
transformed_var_with_selectors <- transformed_vars[[i]][transformed_picked_vars_names][[var_with_selectors_name]][transformed_var_unordered_indices]
aho
committed
# Sorting the transformed variable and working out the indices again after transform.
if (!is.null(dim_reorder_params[[var_with_selectors_name]])) {
transformed_var_with_selectors_reorder <- dim_reorder_params[[var_with_selectors_name]](transformed_var_with_selectors)
transformed_var_with_selectors <- transformed_var_with_selectors_reorder$x
transformed_var_with_selectors_unorder <- transformed_var_with_selectors_reorder$ix
} else {
transformed_var_with_selectors_unorder <- 1:length(transformed_var_with_selectors)
}
## var in common
} else if (var_with_selectors_name %in% names(transformed_common_vars)) {
m <- prod(dim(transformed_common_vars[[var_with_selectors_name]]))
if (aiat) {
var_with_selectors <- transformed_common_vars[[var_with_selectors_name]]
var_ordered <- transformed_common_vars_ordered[[var_with_selectors_name]]
var_unorder_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]]
}
aho
committed
# For making sri ordered later
transformed_var_unordered_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]]
if (is.null(transformed_var_unordered_indices)) {
transformed_var_unordered_indices <- 1:m
}
transformed_var_with_selectors <- transformed_common_vars[[var_with_selectors_name]][transformed_var_unordered_indices]
# Sorting the transformed variable and working out the indices again after transform.
if (!is.null(dim_reorder_params[[var_with_selectors_name]])) {
transformed_var_with_selectors_reorder <- dim_reorder_params[[var_with_selectors_name]](transformed_var_with_selectors)
transformed_var_with_selectors <- transformed_var_with_selectors_reorder$x
transformed_var_with_selectors_unorder <- transformed_var_with_selectors_reorder$ix
} else {
transformed_var_with_selectors_unorder <- 1:length(transformed_var_with_selectors)
}
}
if (is.null(var_unorder_indices)) {
var_unorder_indices <- 1:m
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> SIZE OF ORIGINAL VARIABLE:")
print(n)
print("-> SIZE OF TRANSFORMED VARIABLE:")
if (with_transform) print(m)
print("-> STRUCTURE OF ORDERED VAR:")
print(str(var_ordered))
print("-> UNORDER INDICES:")
print(var_unorder_indices)
}
}
# If this inner dim's selector (var_with_selectors) is an array
# that has file dim as dimension (e.g., across or depend relationship)
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
if (any(names(var_dims) %in% found_file_dims[[i]])) {
if (with_transform) {
stop("Requested transformation for inner dimension '",
inner_dim, "' but provided selectors for such dimension ",
"over one or more file dimensions. This is not ",
"supported. Either request no transformation for the ",
"dimension '", inner_dim, "' or specify the ",
"selectors for this dimension without the file dimensions.")
}
var_file_dims <- var_dims[which(names(var_dims) %in% found_file_dims[[i]])]
var_dims <- var_dims[-which(names(var_dims) %in% found_file_dims[[i]])]
}
## # Keep the selectors if they correspond to a variable that will be transformed.
## if (with_transform) {
## if (var_with_selectors_name %in% names(picked_vars[[i]])) {
## transformed_var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]]
## } else if (var_with_selectors_name %in% names(picked_common_vars)) {
## transformed_var_with_selectors <- transformed_common_vars[[var_with_selectors_name]]
## }
## transformed_var_dims <- dim(transformed_var_with_selectors)
## transformed_var_file_dims <- 1
## if (any(names(transformed_var_dims) %in% found_file_dims[[i]])) {
## transformed_var_file_dims <- transformed_var_dims[which(names(transformed_var_dims) %in% found_file_dims[[i]])]
## transformed_var_dims <- tranasformed_var_dims[-which(names(transformed_var_dims) %in% found_file_dims[[i]])]
## }
##if (inner_dim %in% dims_to_check) {
##print("111m")
##print(str(transformed_var_dims))
##}
##
## m <- prod(transformed_var_dims)
## }
# Work out var file dims and inner dims.
if (inner_dim %in% unlist(inner_dims_across_files)) {
#TODO: if (chunk_amount != number of chunks in selector_file_dims), crash
if (length(var_dims) > 1) {
stop("Specified a '", inner_dim, "_var' for the dimension '",
inner_dim, "', which goes across files (across '", file_dim,
"'). The specified variable, '", var_with_selectors_name, "', has more ",
"than one dimension and can not be used as selector variable. ",
"Select another variable or fix it in the files.")
}
}
## TODO HERE::
#- indices_of_first_files_with_data may change, because array is now extended
var_full_dims <- dim(var_with_selectors)
} else if (((is.numeric(selector_array) || is.list(selector_array)) && selectors_are_indices) ||
(is.character(selector_array) && (length(selector_array) == 1) &&
(selector_array %in% c('all', 'first', 'last')) &&
!is.null(file_dim_reader))) {
#### TODO HERE::
###- indices_of_first_files_with_data may change, because array is now extended
# Lines moved above for better performance.
##data_dims <- file_dim_reader(file_path, NULL, selectors_of_first_files_with_data[[i]],
## lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1))
} else {
stop(paste0("Can not translate the provided selectors for '", inner_dim,
"' to numeric indices. Provide numeric indices and a ",
"'file_dim_reader' function, or a '", inner_dim,
"_var' in order to calculate the indices."))
}
# At this point, if no selector variable was provided, the variable
# data_dims has been populated. If a selector variable was provided,
# the variables var_dims, var_file_dims and var_full_dims have been
# populated instead.
#////////////////////////////////////////////////////////////////////
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
fri <- first_round_indices <- NULL
sri <- second_round_indices <- NULL
# This variable will keep the indices needed to crop the transformed
# variable (the one that has been transformed without being subset
# with the first round indices).
tvi <- tranaformed_variable_indices <- NULL
ordered_fri <- NULL
ordered_sri <- NULL
if ((length(selector_array) == 1) && is.character(selector_array) &&
(selector_array %in% c('all', 'first', 'last')) &&
(chunks[[inner_dim]]['n_chunks'] == 1)) {
if (is.null(var_with_selectors_name)) {
fri <- vector('list', length = chunk_amount)
dim(fri) <- c(chunk_amount)
sri <- vector('list', length = chunk_amount)
dim(sri) <- c(chunk_amount)
if (selector_array == 'all') {
fri[] <- replicate(chunk_amount, list(1:(data_dims[inner_dim])))
taken_chunks <- rep(TRUE, chunk_amount)
#sri <- NULL
} else if (selector_array == 'first') {
fri[[1]] <- 1
taken_chunks[1] <- TRUE
#sri <- NULL
} else if (selector_array == 'last') {
fri[[chunk_amount]] <- data_dims[inner_dim]
taken_chunks[length(taken_chunks)] <- TRUE
#sri <- NULL
}
} else {
if ((!is.null(file_dim)) && !(file_dim %in% names(var_file_dims))) {
stop("The variable '", var_with_selectors_name, "' must also be ",
"requested for the file dimension '", file_dim, "' in ",
"this configuration.")
}
fri <- vector('list', length = prod(var_file_dims))
dim(fri) <- var_file_dims
ordered_fri <- fri
sri <- vector('list', length = prod(var_file_dims))
dim(sri) <- var_file_dims
ordered_sri <- sri
if (selector_array == 'all') {
# TODO: Populate ordered_fri
ordered_fri[] <- replicate(prod(var_file_dims), list(1:n))
fri[] <- replicate(prod(var_file_dims), list(var_unorder_indices[1:n]))
taken_chunks <- rep(TRUE, chunk_amount)
if (!with_transform) {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri <- NULL
} else {
ordered_sri[] <- replicate(prod(var_file_dims), list(1:m))
aho
committed
if (inner_dim %in% names(dim_reorder_params)) {
sri[] <- replicate(prod(var_file_dims), list(transformed_var_unordered_indices[1:m]))
} else {
sri[] <- replicate(prod(var_file_dims), list(1:m))
}
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
## var_file_dims instead??
#if (!aiat) {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri[] <- replicate(prod(transformed_var_file_dims), list(1:m))
#} else {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri[] <- replicate(prod(transformed_var_file_dims), list(1:m))
#}
tvi <- 1:m
}
} else if (selector_array == 'first') {
taken_chunks[1] <- TRUE
if (!with_transform) {
ordered_fri[[1]] <- 1
fri[[1]] <- var_unorder_indices[1]
#taken_chunks[1] <- TRUE
#sri <- NULL
} else {
if (!aiat) {
ordered_fri[[1]] <- 1
fri[[1]] <- var_unorder_indices[1]
# TODO: TO BE IMPROVED
#taken_chunks[1] <- TRUE
ordered_sri[[1]] <- 1:ceiling(m / n)
sri[[1]] <- 1:ceiling(m / n)
tvi <- 1:ceiling(m / n)
} else {
ordered_fri[[1]] <- 1:ceiling(m / n)
fri[[1]] <- var_unorder_indices[1:ceiling(m / n)]
#taken_chunks[1] <- TRUE
ordered_sri[[1]] <- 1
sri[[1]] <- 1
tvi <- 1
}
}
} else if (selector_array == 'last') {
taken_chunks[length(taken_chunks)] <- TRUE
if (!with_transform) {
ordered_fri[[prod(var_file_dims)]] <- n
fri[[prod(var_file_dims)]] <- var_unorder_indices[n]
#taken_chunks[length(taken_chunks)] <- TRUE
#sri <- NULL
} else {
if (!aiat) {
ordered_fri[[prod(var_file_dims)]] <- prod(var_dims)
fri[[prod(var_file_dims)]] <- var_unorder_indices[prod(var_dims)]
#taken_chunks[length(taken_chunks)] <- TRUE
ordered_sri[[prod(var_file_dims)]] <- 1:ceiling(m / n)
sri[[prod(var_file_dims)]] <- 1:ceiling(m / n)
# TODO: TO BE IMPROVED. THE TVI MAY BE WRONG IF THERE'S BEEN A REORDERING.
tvi <- 1:ceiling(m / n)
} else {
ordered_fri[[prod(var_file_dims)]] <- (n - ceiling(m / n) + 1):n
fri[[prod(var_file_dims)]] <- var_unorder_indices[(n - ceiling(m / n) + 1):n]
#taken_chunks[length(taken_chunks)] <- TRUE
ordered_sri[[prod(var_file_dims)]] <- 1
sri[[prod(var_file_dims)]] <- 1
tvi <- 1
}
}
}
}
# If the selectors are not 'all', 'first', 'last', ...
} else {
if (!is.null(var_with_selectors_name)) {
unmatching_file_dims <- which(!(names(var_file_dims) %in% names(selector_file_dims)))
if ((length(unmatching_file_dims) > 0)) {
raise_error <- FALSE
if (is.null(file_dim)) {
raise_error <- TRUE
} else {
if (!((length(unmatching_file_dims) == 1) &&
(names(var_file_dims)[unmatching_file_dims] == file_dim) &&
(inner_dim %in% names(selector_inner_dims)))) {
raise_error <- TRUE
}
}
if (raise_error) {
stop("Provided selectors for the dimension '", inner_dim, "' must have as many ",
"file dimensions as the variable the dimension is defined along, '",
var_with_selectors_name, "', with the exceptions of the file pattern dimension ('",
found_pattern_dim, "') and any depended file dimension (if specified as ",
"depended dimension in parameter 'inner_dims_across_files' and the ",
"depending file dimension is present in the provided selector array).")
}
}
if (any(names(selector_file_dims) %in% names(dim(var_with_selectors)))) {
if (any(dim(var_with_selectors)[names(selector_file_dims)] != selector_file_dims)) {
stop("Size of selector file dimensions must mach size of requested ",
"variable dimensions.")
}
}
}
## TODO: If var dimensions are not in the same order as selector dimensions, reorder
if (is.null(names(selector_file_dims))) {
if (is.null(file_dim)) {
fri_dims <- 1
} else {
fri_dims <- chunk_amount
names(fri_dims) <- file_dim
}
} else {
fri_dim_names <- names(selector_file_dims)
if (!is.null(file_dim)) {
fri_dim_names <- c(fri_dim_names, file_dim)
}
fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% fri_dim_names)]
fri_dims <- rep(NA, length(fri_dim_names))
names(fri_dims) <- fri_dim_names
fri_dims[names(selector_file_dims)] <- selector_file_dims
if (!is.null(file_dim)) {
fri_dims[file_dim] <- chunk_amount
}
}
fri <- vector('list', length = prod(fri_dims))
dim(fri) <- fri_dims
sri <- vector('list', length = prod(fri_dims))
dim(sri) <- fri_dims
selector_file_dim_array <- array(1:prod(selector_file_dims), dim = selector_file_dims)
selector_store_position <- fri_dims
for (j in 1:prod(dim(selector_file_dim_array))) {
selector_indices_to_take <- which(selector_file_dim_array == j, arr.ind = TRUE)[1, ]
names(selector_indices_to_take) <- names(selector_file_dims)
selector_store_position[names(selector_indices_to_take)] <- selector_indices_to_take
sub_array_of_selectors <- Subset(selector_array, names(selector_indices_to_take),
as.list(selector_indices_to_take), drop = 'selected')
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> ITERATING OVER FILE DIMENSIONS OF THE SELECTORS.")
print("-> STRUCTURE OF A SUB ARRAY:")
print(str(sub_array_of_selectors))
print("-> STRUCTURE OF THE VARIABLE WITH SELECTORS:")
print(str(var_with_selectors))
print(dim(var_with_selectors))
}
}
if (selectors_are_indices) {
sub_array_of_values <- NULL
#} else if (!is.null(var_ordered)) {
# sub_array_of_values <- var_ordered
} else {
var_indices_to_take <- selector_indices_to_take[which(names(selector_indices_to_take) %in% names(var_file_dims))]
sub_array_of_values <- Subset(var_with_selectors, names(var_indices_to_take),
as.list(var_indices_to_take), drop = 'selected')
} else {
sub_array_of_values <- var_with_selectors
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> STRUCTURE OF THE SUB ARRAY FROM THE VARIABLE CORRESPONDING TO THE SUB ARRAY OF SELECTORS")
print(str(sub_array_of_values))
print(dim(sub_array_of_values))
print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:")
print(file_dim)
}
}
# The inner dim selector is an array in which have file dim (e.g., time = [sdate = 2, time = 4],
# or the inner dim doesn't go across any file dim (e.g., no time_across = 'sdate')
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
if ((!is.null(file_dim) && (file_dim %in% names(selector_file_dims))) || is.null(file_dim)) {
if (length(sub_array_of_selectors) > 0) {
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> THE INNER DIMENSION DOES NOT GO ACROSS ANY FILE DIMENSION OR IT DOES BUT IS IN THE PROVIDED SELECTOR ARRAY.")
}
}
if (selectors_are_indices) {
if (!is.null(var_with_selectors_name)) {
max_allowed <- ifelse(aiat, m, n)
} else {
max_allowed <- data_dims[inner_dim]
}
if (any(na.omit(unlist(sub_array_of_selectors)) > max_allowed) ||
any(na.omit(unlist(sub_array_of_selectors)) < 1)) {
stop("Provided indices out of range for dimension '", inner_dim, "' ",
"for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ",
max_allowed, ").")
}
}
# The selector_checker will return either a vector of indices or a list
# with the first and last desired indices.
#NOTE: goes_across_prime_meridian may be TRUE only if the selector is list of values
# If selectors are indices and _reorder = CircularSort() is used, change
# is_circular_dim to TRUE.
if (!is.null(var_ordered) & selectors_are_indices &
!is.null(dim_reorder_params[[inner_dim]])) {
if ('circular' %in% names(attributes(dim_reorder_params[[inner_dim]]))) {
is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
if (is_circular_dim & is.list(sub_array_of_selectors)) {
tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix
goes_across_prime_meridian <- tmp[1] > tmp[2]
}
# If selectors are values and _reorder is defined.
if (!is.null(var_ordered) && !selectors_are_indices) {
if (!is.null(dim_reorder_params[[inner_dim]])) {
if ('circular' %in% names(attributes(dim_reorder_params[[inner_dim]]))) {
is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
}
if (is.list(sub_array_of_selectors)) {
## NOTE: The check of 'goes_across_prime_meridian' is moved forward to here.
if (is_circular_dim) {
# NOTE: Use CircularSort() to put the values in the assigned range, and get the order.
# For example, [-10, 20] in CircularSort(0, 360) is [350, 20]. The $ix list is [2, 1].
# 'goes_across_prime_meridian' means the selector range across the border. For example,
# CircularSort(-180, 180) with selector [170, 190] -> goes_across_prime_meridian = TRUE.
# dim_reorder_params is a list of Reorder function, i.e.,
# Sort() or CircularSort().
tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix
goes_across_prime_meridian <- tmp[1] > tmp[2]
#NOTE: HERE change to the same code as below (under 'else'). Not sure why originally
# it uses additional lines, which make reorder not work.
# If "_reorder" is used, here 'sub_array_of_selectors' is adjusted to
# follow the reorder rule. E.g., if lat = values(list(-90, 90)) and
# lat_reorder = Sort(decreasing = T), 'sub_array_of_selectors' changes
# from list(-90, 90) to list(90, -90).
sub_array_of_selectors <- as.list(dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$x)
#sub_array_reordered <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))
#sub_array_unorder <- sort(sub_array_reordered$ix, index.return = TRUE)$ix
#sub_array_of_selectors <- as.list(sub_array_reordered$x[sub_array_unorder])
# Add warning if the boundary is out of range
if (min(unlist(sub_array_of_selectors)) < range(var_ordered)[1]) {
show_out_of_range_warning(inner_dim, range = range(var_ordered),
bound = 'lower')
if (max(unlist(sub_array_of_selectors)) > range(var_ordered)[2]) {
show_out_of_range_warning(inner_dim, range = range(var_ordered), bound = 'upper')
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
}
} else {
sub_array_of_selectors <- dim_reorder_params[[inner_dim]](sub_array_of_selectors)$x
}
}
# NOTE: The ideal solution for selecting indices in goes_across_prime_meridian case
# is modified SelectorCheckor.R. But now SelectorCheckor doesn't know the info of
#goes_across_prime_meridian, so I do the adjustion after calling SelectorCheckor().
sub_array_of_indices <- selector_checker(sub_array_of_selectors, var_ordered,
tolerance = if (aiat) {
NULL
} else {
tolerance_params[[inner_dim]]
})
if (goes_across_prime_meridian & sub_array_of_indices[[1]] < sub_array_of_indices[[2]]) {
if (!(sub_array_of_selectors[[1]] %in% var_ordered)){
sub_array_of_indices[[1]] <- sub_array_of_indices[[1]] - 1
}
if (!(sub_array_of_selectors[[2]] %in% var_ordered)){
sub_array_of_indices[[2]] <- sub_array_of_indices[[2]] + 1
}
}
#NOTE: the possible case?
if (goes_across_prime_meridian & sub_array_of_indices[[1]] > sub_array_of_indices[[2]]) {
stop("The case is goes_across_prime_meridian but no adjustion for the indices!")
}
if (any(is.na(sub_array_of_indices))) {
stop(paste0("The selectors of ", inner_dim,
" are out of range [", min(var_ordered),
", ", max(var_ordered), "]."))
}
} else {
# Add warning if the boundary is out of range
if (is.list(sub_array_of_selectors) & !selectors_are_indices) {
if (min(unlist(sub_array_of_selectors)) < min(sub_array_of_values)) {
show_out_of_range_warning(inner_dim, range = range(sub_array_of_values),
bound = 'lower')
if (max(unlist(sub_array_of_selectors)) > max(sub_array_of_values)) {
show_out_of_range_warning(inner_dim, range = range(sub_array_of_values),
bound = 'upper')
# sub_array_of_values here is NULL if selectors are indices, and
# 'sub_array_of_indices' will be sub_array_of_selectors, i.e., the indices
# assigned (but rounded).
sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values,
tolerance = if (aiat) {
NULL
} else {
tolerance_params[[inner_dim]]
})
if (any(is.na(sub_array_of_indices))) {
stop(paste0("The selectors of ", inner_dim,
" are out of range [", min(sub_array_of_values),
", ", max(sub_array_of_values), "]."))
}
}
#////////////////////////////////////////////////////////////
# If chunking along this inner dim, this part creates the indices for each chunk.
# For example, if 'sub_array_of_indices' is c(5:10) and chunked into 2,
# 'sub_array_of_indices' becomes c(5:7) for chunk = 1, c(8:10) for chunk = 2.
# If 'sub_array_of_indices' is list(55, 62) and chunked into 2, it becomes
# list(55, 58) for chunk = 1 and list(59, 62) for chunk = 2.
#TODO: The list can be turned into vector here? So afterward no need to judge if
# it is list or vector.
#NOTE: chunking cannot be done if goes_across_prime_meridian = TRUE.
#TODO: Change the algorithm to make chunking works for goes_across_prime_meridian = TRUE.
# If goes_across_prime_meridian = TRUE, "sub_array_of_indices" are not
# continuous numbers. For example, list(37, 1243) means sub_array_of_fri
# that will be generated based on sub_array_of_indices later is c(1:37, 1243:1296).
# the longitude are separated into 2 parts, therefore, cannot be chunked here.
if (chunks[[inner_dim]]["n_chunks"] > 1) {
if (goes_across_prime_meridian) {
stop(paste0("Chunking over ", inner_dim, " that goes across the circular border assigned by '", inner_dim, "_reorder' is not supported by startR now. Adjust the ", inner_dim, " selector to be within the border or change the borders." ))
}
if (!is.list(sub_array_of_indices)) {
sub_array_of_indices <-
sub_array_of_indices[chunk_indices(length(sub_array_of_indices),
chunks[[inner_dim]]["chunk"],
chunks[[inner_dim]]["n_chunks"],
inner_dim)]
} else {
tmp <-
chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]),
chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"],
inner_dim)
vect <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
sub_array_of_indices[[1]] <- vect[tmp[1]]
sub_array_of_indices[[2]] <- vect[tmp[length(tmp)]]
}
}
# The sub_array_of_indices now contains numeric indices of the values to be taken by each chunk.
#----------------------------------------------------------
# 'sub_sub_array_of_values' is for sri chunking. If this inner dim is chunked,
# the sri has to follow the chunking of fri. Therefore, we save the original
# value of this chunk here for later use. We'll find the corresponding
# transformed value within 'sub_sub_array_of_values' and chunk sri.
if (with_transform & chunks[[inner_dim]]["n_chunks"] > 1) {
input_array_of_values <- var_ordered
} else {
if (is.null(sub_array_of_values)) { # selectors are indices
#NOTE: Not sure if 'vars_to_transform' is the correct one to use.
input_array_of_values <- vars_to_transform[[var_with_selectors_name]]
} else {
input_array_of_values <- sub_array_of_values
tmp <- generate_sub_sub_array_of_values(
input_array_of_values, sub_array_of_indices,
number_of_chunk = chunks[[inner_dim]]["chunk"])
sub_sub_array_of_values <- tmp$sub_sub_array_of_values
previous_sub_sub_array_of_values <- tmp$previous_sub_sub_array_of_values
}
#----------------------------------------------------------
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> TRANSFORMATION REQUESTED?")
print(with_transform)
print("-> BETA:")
print(beta)
}
}
if (with_transform) {
# If there is a transformation and selector values are provided, these
# selectors will be processed in the same way either if aiat = TRUE or
# aiat = FALSE.
## TODO: If sub_array_of_selectors was integer and aiat then... do what's commented 50 lines below.
## otherwise, do what's coded.
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> SELECTORS REQUESTED BEFORE TRANSFORM.")
}
}
# Generate sub_array_of_fri
sub_array_of_fri <- generate_sub_array_of_fri(
with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
is_circular_dim)
# May be useful for crop = T. 'subset_vars_to_transform' may not need
# to include extra cells, but currently it shows mistake if not include.
sub_array_of_fri_no_beta <- generate_sub_array_of_fri(
with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
is_circular_dim, add_beta = FALSE)
subset_vars_to_transform <- vars_to_transform
if (!is.null(var_ordered)) {
#NOTE: If var_ordered is common_vars, it doesn't have attributes and it is a vector.
# Turn it into array and add dimension name.
if (!is.array(var_ordered)) {
var_ordered <- as.array(var_ordered)
names(dim(var_ordered)) <- inner_dim
}
subset_vars_to_transform[[var_with_selectors_name]] <- Subset(var_ordered, inner_dim, sub_array_of_fri)
} else {
if (!selectors_are_indices) { # selectors are values
#NOTE: It should be redundant because without reordering the var should remain array
## But just stay same with above...
if (!is.array(sub_array_of_values)) {
sub_array_of_values <- as.array(sub_array_of_values)
names(dim(sub_array_of_values)) <- inner_dim
}
subset_vars_to_transform[[var_with_selectors_name]] <- Subset(sub_array_of_values, inner_dim, sub_array_of_fri)
} else { # selectors are indices
subset_vars_to_transform[[var_with_selectors_name]] <-
Subset(subset_vars_to_transform[[var_with_selectors_name]],
inner_dim, sub_array_of_fri)
}
transformed_subset_var <- do.call(transform, c(list(data_array = NULL,
variables = subset_vars_to_transform,
file_selectors = selectors_of_first_files_with_data[[i]],
crop_domain = transform_crop_domain),
transform_params))$variables[[var_with_selectors_name]]
# Sorting the transformed variable and working out the indices again after transform.
if (!is.null(dim_reorder_params[[inner_dim]])) {
transformed_subset_var_reorder <- dim_reorder_params[[inner_dim]](transformed_subset_var)
transformed_subset_var <- transformed_subset_var_reorder$x
#NOTE: The fix here solves the mis-ordered lon when across_meridian.
transformed_subset_var_unorder <- transformed_subset_var_reorder$ix
# transformed_subset_var_unorder <- sort(transformed_subset_var_reorder$ix, index.return = TRUE)$ix
} else {
transformed_subset_var_unorder <- 1:length(transformed_subset_var)
}
if (!selectors_are_indices) { # selectors are values
sub_array_of_sri <- selector_checker(
sub_array_of_selectors, transformed_subset_var,
tolerance = if (aiat) {
tolerance_params[[inner_dim]]
} else {
NULL
})
sub_array_of_sri <- unique(sub_array_of_sri)
}
} else { # selectors are indices
# Need to transfer to values first, then use the values to get the new
# indices in transformed_subset_var.
if (is.list(sub_array_of_selectors)) {
ori_values <- vars_to_transform[[var_with_selectors_name]][sub_array_of_selectors[[1]]:sub_array_of_selectors[[2]]]
} else {
ori_values <- vars_to_transform[[var_with_selectors_name]][sub_array_of_selectors]
}
sub_array_of_sri <- selector_checker(
ori_values, transformed_subset_var,
tolerance = if (aiat) {
tolerance_params[[inner_dim]]
} else {
NULL
})
# Here may need to further modify considering aiat. If aiat = FALSE,
# (i.e., indices are taken before transform), unique() is needed.
sub_array_of_sri <- unique(sub_array_of_sri)
}
# Check if selectors fall out of the range of the transform grid
# It may happen when original lon is [-180, 180] while want to regrid to
# [0, 360], and lon selector = [-20, -10].
if (any(is.na(sub_array_of_sri))) {
stop(paste0("The selectors of ",
inner_dim, " are out of range of transform grid '",
transform_params$grid, "'. Use parameter '",
inner_dim, "_reorder' or change ", inner_dim,
" selectors."))
}
if (goes_across_prime_meridian) {
if (sub_array_of_sri[[1]] == sub_array_of_sri[[2]]) {
# global longitude
sub_array_of_sri <- c(1:length(transformed_subset_var))
} else {
# the common case, i.e., non-global
# # NOTE: Because sub_array_of_sri order is exchanged due to
# # previous development, here [[1]] and [[2]] should exchange
# sub_array_of_sri <- c(1:sub_array_of_sri[[1]],
# sub_array_of_sri[[2]]:length(transformed_subset_var))
#NOTE: the old code above is not suitable for all the possible cases.
# If sub_array_of_selectors is not exactly the value in transformed_subset_var, sub_array_of_sri[[1]] will be larger than sub_array_of_sri[[2]].
# Though here is not global case, we already have transformed_subset_var cropped as the desired region, so it is okay to use the whole length. Not sure if it will cause other problems...
sub_array_of_sri <- 1:length(transformed_subset_var)
}
} else if (is.list(sub_array_of_sri)) {
sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
}
# Chunk sub_array_of_sri if this inner_dim needs to be chunked
#TODO: Potential problem: the transformed_subset_var value falls between
# the end of sub_sub_array_of_values of the 1st chunk and the beginning
# of sub_sub_array_of_values of the 2nd chunk. Then, one sub_array_of_sri
# will miss. 'previous_sri' is checked and will be included if this
# situation happens, but don't know if the transformed result is
# correct or not.
#-------------------NEW--------------------------
# TODO: The chunking criteria may not be 100% correct. The current way
# is to pick the sri that larger than the minimal sub_sub_array_of_values
# and smaller than the maximal sub_sub_array_of_values; if it's
# the first chunk, make sure the 1st sri is included; if it's the
# last chunk, make sure the last sri is included.
#-------------------NEW_END------------------------
if (chunks[[inner_dim]]["n_chunks"] > 1) {
#-------------------NEW--------------------------
sub_array_of_sri_complete <- sub_array_of_sri
#-------------------NEW_END------------------------
if (is.list(sub_sub_array_of_values)) { # list
sub_array_of_sri <-
which(transformed_subset_var >= min(unlist(sub_sub_array_of_values)) &
transformed_subset_var <= max(unlist(sub_sub_array_of_values)))
#-------------------NEW--------------------------
# if it's 1st chunk & the first sri is not included, include it.
if (chunks[[inner_dim]]["chunk"] == 1 &
!(sub_array_of_sri_complete[1] %in% sub_array_of_sri)) {
sub_array_of_sri <- c(sub_array_of_sri_complete[1], sub_array_of_sri)
}
# if it's last chunk & the last sri is not included, include it.
if (chunks[[inner_dim]]["chunk"] == chunks[[inner_dim]]["n_chunks"] &
!(tail(sub_array_of_sri_complete, 1) %in% sub_array_of_sri)) {
sub_array_of_sri <- c(sub_array_of_sri, tail(sub_array_of_sri_complete, 1))
}
#-------------------NEW_END------------------------
# Check if sub_array_of_sri perfectly connects to the previous sri.
# If not, inlclude the previous sri.
#NOTE 1: don't know if the transform for the previous sri is
# correct or not.
#NOTE 2: If crop = T, sub_array_of_sri always starts from 1.
# Don't know if the cropping will miss some sri or not.
if (sub_array_of_sri[1] != 1) {
#------------------NEW--------------------
if (!is.null(previous_sub_sub_array_of_values)) {
# if decreasing = F
if (transformed_subset_var[1] < transformed_subset_var[2]) {
previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values))
if (previous_sri + 1 != sub_array_of_sri[1]) {
sub_array_of_sri <- (previous_sri + 1):sub_array_of_sri[length(sub_array_of_sri)]
}