Newer
Older
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
}
}
}
# Once we have the variable values, we can work out the indices
# for the implicitly defined selectors.
#
# Trnasforms a vector of indices v expressed in a world of
# length N from 1 to N, into a world of length M, from
# 1 to M. Repeated adjacent indices are collapsed.
transform_indices <- function(v, n, m) {
#unique2 turns e.g. 1 1 2 2 2 3 3 1 1 1 into 1 2 3 1
unique2 <- function(v) {
if (length(v) < 2) {
v
} else {
v[c(1, v[2:length(v)] - v[1:(length(v) - 1)]) != 0]
}
}
unique2(round(((v - 1) / (n - 1)) * (m - 1))) + 1 # this rounding may generate 0s. what then?
}
beta <- transform_extra_cells
dims_to_crop <- vector('list')
transformed_vars <- vector('list', length = length(dat))
names(transformed_vars) <- dat_names
transformed_vars_ordered <- transformed_vars
transformed_vars_unorder_indices <- transformed_vars
transformed_common_vars <- NULL
transformed_common_vars_ordered <- NULL
transformed_common_vars_unorder_indices <- NULL
for (i in 1:length(dat)) {
if (dataset_has_files[i]) {
indices <- indices_of_first_files_with_data[[i]]
if (!is.null(indices)) {
if (largest_dims_length == FALSE | is.numeric(largest_dims_length)) { #old code. use the 1st valid file to determine the dims
file_path <- do.call("[", c(list(array_of_files_to_load), as.list(indices_of_first_files_with_data[[i]])))
# The following 5 lines should go several lines below, but were moved
# here for better performance.
# If any of the dimensions comes without defining variable, then we read
# the data dimensions.
data_dims <- NULL
if (length(unlist(var_params[expected_inner_dims[[i]]])) < length(expected_inner_dims[[i]])) {
file_to_open <- file_path
data_dims <- file_dim_reader(file_to_open, NULL, selectors_of_first_files_with_data[[i]],
lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1),
synonims)
# file_dim_reader returns dimension names as found in the file.
# Need to translate accoridng to synonims:
names(data_dims) <- replace_with_synonmins(data_dims, synonims)
}
if (is.numeric(largest_dims_length)) { # largest_dims_length is a named vector
# Check if the names fit the inner dimension names
if (!all(names(largest_dims_length) %in% names(data_dims))) {
#NOTE: stop or warning?
stop("Parameter 'largest_dims_length' has inconsistent names with inner dimensions.")
} else {
match_ind <- match(names(largest_dims_length), names(data_dims))
data_dims[match_ind] <- largest_dims_length
}
}
} else { # largest_dims_length == TRUE
data_dims <- find_largest_dims_length(
selectors_total_list[[i]], array_of_files_to_load,
selector_indices_save[[i]], dat[[i]], expected_inner_dims[[i]],
synonims, file_dim_reader)
# file_dim_reader returns dimension names as found in the file.
# Need to translate accoridng to synonims:
names(data_dims) <- replace_with_synonmins(data_dims, synonims)
} # end of if (largest_dims_length == TRUE)
# Transform the variables if needed and keep them apart.
if (!is.null(transform) && (length(transform_vars) > 0)) {
if (!all(transform_vars %in% c(names(picked_vars[[i]]), names(picked_common_vars)))) {
stop("Could not find all the required variables in 'transform_vars' ",
"for the dataset '", dat[[i]][['name']], "'.")
}
# picked_vars[[i]]
vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_vars[[i]], transform_vars, picked_vars_ordered[[i]])
# picked_common_vars
vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_common_vars, transform_vars, picked_common_vars_ordered)
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
# Transform the variables
transformed_data <- do.call(transform, c(list(data_array = NULL,
variables = vars_to_transform,
file_selectors = selectors_of_first_files_with_data[[i]]),
transform_params))
# Discard the common transformed variables if already transformed before
if (!is.null(transformed_common_vars)) {
common_ones <- which(names(picked_common_vars) %in% names(transformed_data$variables))
if (length(common_ones) > 0) {
transformed_data$variables <- transformed_data$variables[-common_ones]
}
}
transformed_vars[[i]] <- list()
transformed_vars_ordered[[i]] <- list()
transformed_vars_unorder_indices[[i]] <- list()
# Order the transformed variables if needed
# 'var_to_read' should be 'transformed_var', but is kept to reuse the same code as above.
for (var_to_read in names(transformed_data$variables)) {
if (var_to_read %in% unlist(var_params)) {
associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)]
if ((associated_dim_name %in% names(dim_reorder_params)) && aiat) {
## Is this check really needed?
if (length(dim(transformed_data$variables[[associated_dim_name]])) > 1) {
stop("Requested a '", associated_dim_name, "_reorder' for a dimension ",
"whose coordinate variable that has more than 1 dimension (after ",
"transform). This is not supported.")
}
ordered_var_values <- dim_reorder_params[[associated_dim_name]](transformed_data$variables[[associated_dim_name]])
attr(ordered_var_values, 'variables') <- attr(transformed_data$variables[[associated_dim_name]], 'variables')
if (!all(c('x', 'ix') %in% names(ordered_var_values))) {
stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.")
}
# Save the indices to reorder back the ordered variable values.
# This will be used to define the first round indices.
unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix
if (var_to_read %in% names(picked_common_vars)) {
transformed_common_vars_ordered[[var_to_read]] <- ordered_var_values$x
transformed_common_vars_unorder_indices[[var_to_read]] <- unorder
} else {
transformed_vars_ordered[[i]][[var_to_read]] <- ordered_var_values$x
transformed_vars_unorder_indices[[i]][[var_to_read]] <- unorder
}
}
}
}
transformed_picked_vars <- which(names(picked_vars[[i]]) %in% names(transformed_data$variables))
if (length(transformed_picked_vars) > 0) {
transformed_picked_vars <- names(picked_vars[[i]])[transformed_picked_vars]
transformed_vars[[i]][transformed_picked_vars] <- transformed_data$variables[transformed_picked_vars]
}
if (is.null(transformed_common_vars)) {
transformed_picked_common_vars <- which(names(picked_common_vars) %in% names(transformed_data$variables))
if (length(transformed_picked_common_vars) > 0) {
transformed_picked_common_vars <- names(picked_common_vars)[transformed_picked_common_vars]
transformed_common_vars <- transformed_data$variables[transformed_picked_common_vars]
}
}
}
# Once the variables are transformed, we compute the indices to be
# taken for each inner dimension.
# In all cases, indices will have to be computed to know which data
# values to take from the original data for each dimension (if a
# variable is specified for that dimension, it will be used to
# convert the provided selectors into indices). These indices are
# referred to as 'first round of indices'.
# The taken data will then be transformed if needed, together with
# the dimension variable if specified, and, in that case, indices
# will have to be computed again to know which values to take from the
# transformed data. These are the 'second round of indices'. In the
# case there is no transformation, the second round of indices will
# be all the available indices, i.e. from 1 to the number of taken
# values with the first round of indices.
for (inner_dim in expected_inner_dims[[i]]) {
if (debug) {
print("-> DEFINING INDICES FOR INNER DIMENSION:")
print(inner_dim)
}
file_dim <- NULL
if (inner_dim %in% unlist(inner_dims_across_files)) {
file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]]
chunk_amount <- length(dat[[i]][['selectors']][[file_dim]][[1]])
names(chunk_amount) <- file_dim
} else {
chunk_amount <- 1
}
# In the special case that the selectors for a dimension are 'all', 'first', ...
# and chunking (dividing in more than 1 chunk) is requested, the selectors are
# replaced for equivalent indices.
if ((any(dat[[i]][['selectors']][[inner_dim]][[1]] %in% c('all', 'first', 'last'))) &&
(chunks[[inner_dim]]['n_chunks'] != 1)) {
selectors <- dat[[i]][['selectors']][[inner_dim]][[1]]
if (selectors == 'all') {
selectors <- indices(1:(data_dims[[inner_dim]] * chunk_amount))
} else if (selectors == 'first') {
selectors <- indices(1)
} else {
selectors <- indices(data_dims[[inner_dim]] * chunk_amount)
}
dat[[i]][['selectors']][[inner_dim]][[1]] <- selectors
}
# The selectors for the inner dimension are taken.
selector_array <- dat[[i]][['selectors']][[inner_dim]][[1]]
if (debug) {
if (inner_dim %in% dims_to_check) {
print(paste0("-> DEBUG MESSAGES FOR THE DATASET", i, " AND INNER DIMENSION '", inner_dim, "':"))
print("-> STRUCTURE OF SELECTOR ARRAY:")
print(str(selector_array))
print("-> PICKED VARS:")
print(picked_vars)
print("-> TRANSFORMED VARS:")
print(transformed_vars)
}
}
if (is.null(dim(selector_array))) {
dim(selector_array) <- length(selector_array)
}
if (is.null(names(dim(selector_array)))) {
if (length(dim(selector_array)) == 1) {
names(dim(selector_array)) <- inner_dim
} else {
stop("Provided selector arrays must be provided with dimension ",
"names. Found an array of selectors without dimension names ",
"for the dimension '", inner_dim, "'.")
}
}
selectors_are_indices <- FALSE
if (!is.null(attr(selector_array, 'indices'))) {
if (!is.logical(attr(selector_array, 'indices'))) {
stop("The atribute 'indices' for the selectors for the dimension '",
inner_dim, "' must be TRUE or FALSE.")
}
selectors_are_indices <- attr(selector_array, 'indices')
}
taken_chunks <- rep(FALSE, chunk_amount)
selector_file_dims <- 1
#NOTE: Change 'selector_file_dims' (from 1) if selector is an array with a file_dim dimname.
# I.e., If time = [sdate = 2, time = 4], selector_file_dims <- array(sdate = 2)
if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) {
selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])]
}
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
selector_inner_dims <- dim(selector_array)[which(!(names(dim(selector_array)) %in% found_file_dims[[i]]))]
var_with_selectors <- NULL
var_with_selectors_name <- var_params[[inner_dim]]
var_ordered <- NULL
var_unorder_indices <- NULL
with_transform <- FALSE
# If the selectors come with an associated variable
if (!is.null(var_with_selectors_name)) {
if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) {
with_transform <- TRUE
if (!is.null(file_dim)) {
stop("Requested a transformation over the dimension '",
inner_dim, "', wich goes across files. This feature ",
"is not supported. Either do the request without the ",
"transformation or request it over dimensions that do ",
"not go across files.")
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> NAME OF THE VARIABLE WITH THE SELECTOR VALUES FOR THE CURRENT INNER DIMENSION:")
print(var_with_selectors_name)
print("-> NAMES OF THE VARIABLES TO BE TRANSFORMED:")
print(transform_vars)
print("-> STRUCTURE OF THE TRANSFORMATION FUNCTION:")
print(str(transform))
}
}
if (var_with_selectors_name %in% names(picked_vars[[i]])) {
var_with_selectors <- picked_vars[[i]][[var_with_selectors_name]]
var_ordered <- picked_vars_ordered[[i]][[var_with_selectors_name]]
var_unorder_indices <- picked_vars_unorder_indices[[i]][[var_with_selectors_name]]
} else if (var_with_selectors_name %in% names(picked_common_vars)) {
var_with_selectors <- picked_common_vars[[var_with_selectors_name]]
var_ordered <- picked_common_vars_ordered[[var_with_selectors_name]]
var_unorder_indices <- picked_common_vars_unorder_indices[[var_with_selectors_name]]
}
n <- prod(dim(var_with_selectors))
if (is.null(var_unorder_indices)) {
var_unorder_indices <- 1:n
}
if (with_transform) {
if (var_with_selectors_name %in% names(transformed_vars[[i]])) {
m <- prod(dim(transformed_vars[[i]][[var_with_selectors_name]]))
if (aiat) {
var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]]
var_ordered <- transformed_vars_ordered[[i]][[var_with_selectors_name]]
var_unorder_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]]
}
} else if (var_with_selectors_name %in% names(transformed_common_vars)) {
m <- prod(dim(transformed_common_vars[[var_with_selectors_name]]))
if (aiat) {
var_with_selectors <- transformed_common_vars[[var_with_selectors_name]]
var_ordered <- transformed_common_vars_ordered[[var_with_selectors_name]]
var_unorder_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]]
}
}
if (is.null(var_unorder_indices)) {
var_unorder_indices <- 1:m
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> SIZE OF ORIGINAL VARIABLE:")
print(n)
print("-> SIZE OF TRANSFORMED VARIABLE:")
if (with_transform) print(m)
print("-> STRUCTURE OF ORDERED VAR:")
print(str(var_ordered))
print("-> UNORDER INDICES:")
print(var_unorder_indices)
}
}
var_dims <- dim(var_with_selectors)
var_file_dims <- 1
if (any(names(var_dims) %in% found_file_dims[[i]])) {
if (with_transform) {
stop("Requested transformation for inner dimension '",
inner_dim, "' but provided selectors for such dimension ",
"over one or more file dimensions. This is not ",
"supported. Either request no transformation for the ",
"dimension '", inner_dim, "' or specify the ",
"selectors for this dimension without the file dimensions.")
}
var_file_dims <- var_dims[which(names(var_dims) %in% found_file_dims[[i]])]
var_dims <- var_dims[-which(names(var_dims) %in% found_file_dims[[i]])]
}
## # Keep the selectors if they correspond to a variable that will be transformed.
## if (with_transform) {
## if (var_with_selectors_name %in% names(picked_vars[[i]])) {
## transformed_var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]]
## } else if (var_with_selectors_name %in% names(picked_common_vars)) {
## transformed_var_with_selectors <- transformed_common_vars[[var_with_selectors_name]]
## }
## transformed_var_dims <- dim(transformed_var_with_selectors)
## transformed_var_file_dims <- 1
## if (any(names(transformed_var_dims) %in% found_file_dims[[i]])) {
## transformed_var_file_dims <- transformed_var_dims[which(names(transformed_var_dims) %in% found_file_dims[[i]])]
## transformed_var_dims <- tranasformed_var_dims[-which(names(transformed_var_dims) %in% found_file_dims[[i]])]
## }
##if (inner_dim %in% dims_to_check) {
##print("111m")
##print(str(transformed_var_dims))
##}
##
## m <- prod(transformed_var_dims)
## }
# Work out var file dims and inner dims.
if (inner_dim %in% unlist(inner_dims_across_files)) {
#TODO: if (chunk_amount != number of chunks in selector_file_dims), crash
if (length(var_dims) > 1) {
stop("Specified a '", inner_dim, "_var' for the dimension '",
inner_dim, "', which goes across files (across '", file_dim,
"'). The specified variable, '", var_with_selectors_name, "', has more ",
"than one dimension and can not be used as selector variable. ",
"Select another variable or fix it in the files.")
}
}
## TODO HERE::
#- indices_of_first_files_with_data may change, because array is now extended
var_full_dims <- dim(var_with_selectors)
if (!(inner_dim %in% names(var_full_dims))) {
stop("Could not find the dimension '", inner_dim, "' in ",
"the file. Either change the dimension name in ",
"your request, adjust the parameter ",
"'dim_names_in_files' or fix the dimension name in ",
"the file.\n", file_path)
}
} else if (((is.numeric(selector_array) || is.list(selector_array)) && selectors_are_indices) ||
(is.character(selector_array) && (length(selector_array) == 1) &&
(selector_array %in% c('all', 'first', 'last')) &&
!is.null(file_dim_reader))) {
#### TODO HERE::
###- indices_of_first_files_with_data may change, because array is now extended
# Lines moved above for better performance.
##data_dims <- file_dim_reader(file_path, NULL, selectors_of_first_files_with_data[[i]],
## lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1))
if (!(inner_dim %in% names(data_dims))) {
stop("Could not find the dimension '", inner_dim, "' in ",
"the file. Either change the dimension name in ",
"your request, adjust the parameter ",
"'dim_names_in_files' or fix the dimension name in ",
"the file.\n", file_path)
}
} else {
stop(paste0("Can not translate the provided selectors for '", inner_dim,
"' to numeric indices. Provide numeric indices and a ",
"'file_dim_reader' function, or a '", inner_dim,
"_var' in order to calculate the indices."))
}
# At this point, if no selector variable was provided, the variable
# data_dims has been populated. If a selector variable was provided,
# the variables var_dims, var_file_dims and var_full_dims have been
# populated instead.
fri <- first_round_indices <- NULL
sri <- second_round_indices <- NULL
# This variable will keep the indices needed to crop the transformed
# variable (the one that has been transformed without being subset
# with the first round indices).
tvi <- tranaformed_variable_indices <- NULL
ordered_fri <- NULL
ordered_sri <- NULL
if ((length(selector_array) == 1) && is.character(selector_array) &&
(selector_array %in% c('all', 'first', 'last')) &&
(chunks[[inner_dim]]['n_chunks'] == 1)) {
if (is.null(var_with_selectors_name)) {
fri <- vector('list', length = chunk_amount)
dim(fri) <- c(chunk_amount)
sri <- vector('list', length = chunk_amount)
dim(sri) <- c(chunk_amount)
if (selector_array == 'all') {
fri[] <- replicate(chunk_amount, list(1:(data_dims[inner_dim])))
taken_chunks <- rep(TRUE, chunk_amount)
#sri <- NULL
} else if (selector_array == 'first') {
fri[[1]] <- 1
taken_chunks[1] <- TRUE
#sri <- NULL
} else if (selector_array == 'last') {
fri[[chunk_amount]] <- data_dims[inner_dim]
taken_chunks[length(taken_chunks)] <- TRUE
#sri <- NULL
}
} else {
if ((!is.null(file_dim)) && !(file_dim %in% names(var_file_dims))) {
stop("The variable '", var_with_selectors_name, "' must also be ",
"requested for the file dimension '", file_dim, "' in ",
"this configuration.")
}
fri <- vector('list', length = prod(var_file_dims))
dim(fri) <- var_file_dims
ordered_fri <- fri
sri <- vector('list', length = prod(var_file_dims))
dim(sri) <- var_file_dims
ordered_sri <- sri
if (selector_array == 'all') {
# TODO: Populate ordered_fri
ordered_fri[] <- replicate(prod(var_file_dims), list(1:n))
fri[] <- replicate(prod(var_file_dims), list(var_unorder_indices[1:n]))
taken_chunks <- rep(TRUE, chunk_amount)
if (!with_transform) {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri <- NULL
} else {
ordered_sri[] <- replicate(prod(var_file_dims), list(1:m))
sri[] <- replicate(prod(var_file_dims), list(1:m))
## var_file_dims instead??
#if (!aiat) {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri[] <- replicate(prod(transformed_var_file_dims), list(1:m))
#} else {
#fri[] <- replicate(prod(var_file_dims), list(1:n))
#taken_chunks <- rep(TRUE, chunk_amount)
#sri[] <- replicate(prod(transformed_var_file_dims), list(1:m))
#}
tvi <- 1:m
}
} else if (selector_array == 'first') {
taken_chunks[1] <- TRUE
if (!with_transform) {
ordered_fri[[1]] <- 1
fri[[1]] <- var_unorder_indices[1]
#taken_chunks[1] <- TRUE
#sri <- NULL
} else {
if (!aiat) {
ordered_fri[[1]] <- 1
fri[[1]] <- var_unorder_indices[1]
# TODO: TO BE IMPROVED
#taken_chunks[1] <- TRUE
ordered_sri[[1]] <- 1:ceiling(m / n)
sri[[1]] <- 1:ceiling(m / n)
tvi <- 1:ceiling(m / n)
} else {
ordered_fri[[1]] <- 1:ceiling(m / n)
fri[[1]] <- var_unorder_indices[1:ceiling(m / n)]
#taken_chunks[1] <- TRUE
ordered_sri[[1]] <- 1
sri[[1]] <- 1
tvi <- 1
}
}
} else if (selector_array == 'last') {
taken_chunks[length(taken_chunks)] <- TRUE
if (!with_transform) {
ordered_fri[[prod(var_file_dims)]] <- n
fri[[prod(var_file_dims)]] <- var_unorder_indices[n]
#taken_chunks[length(taken_chunks)] <- TRUE
#sri <- NULL
} else {
if (!aiat) {
ordered_fri[[prod(var_file_dims)]] <- prod(var_dims)
fri[[prod(var_file_dims)]] <- var_unorder_indices[prod(var_dims)]
#taken_chunks[length(taken_chunks)] <- TRUE
ordered_sri[[prod(var_file_dims)]] <- 1:ceiling(m / n)
sri[[prod(var_file_dims)]] <- 1:ceiling(m / n)
# TODO: TO BE IMPROVED. THE TVI MAY BE WRONG IF THERE'S BEEN A REORDERING.
tvi <- 1:ceiling(m / n)
} else {
ordered_fri[[prod(var_file_dims)]] <- (n - ceiling(m / n) + 1):n
fri[[prod(var_file_dims)]] <- var_unorder_indices[(n - ceiling(m / n) + 1):n]
#taken_chunks[length(taken_chunks)] <- TRUE
ordered_sri[[prod(var_file_dims)]] <- 1
sri[[prod(var_file_dims)]] <- 1
tvi <- 1
}
}
}
}
# If the selectors are not 'all', 'first', 'last', ...
} else {
if (!is.null(var_with_selectors_name)) {
unmatching_file_dims <- which(!(names(var_file_dims) %in% names(selector_file_dims)))
if ((length(unmatching_file_dims) > 0)) {
raise_error <- FALSE
if (is.null(file_dim)) {
raise_error <- TRUE
} else {
if (!((length(unmatching_file_dims) == 1) &&
(names(var_file_dims)[unmatching_file_dims] == file_dim) &&
(inner_dim %in% names(selector_inner_dims)))) {
raise_error <- TRUE
}
}
if (raise_error) {
stop("Provided selectors for the dimension '", inner_dim, "' must have as many ",
"file dimensions as the variable the dimension is defined along, '",
var_with_selectors_name, "', with the exceptions of the file pattern dimension ('",
found_pattern_dim, "') and any depended file dimension (if specified as ",
"depended dimension in parameter 'inner_dims_across_files' and the ",
"depending file dimension is present in the provided selector array).")
}
}
if (any(names(selector_file_dims) %in% names(dim(var_with_selectors)))) {
if (any(dim(var_with_selectors)[names(selector_file_dims)] != selector_file_dims)) {
stop("Size of selector file dimensions must mach size of requested ",
"variable dimensions.")
}
}
}
## TODO: If var dimensions are not in the same order as selector dimensions, reorder
if (is.null(names(selector_file_dims))) {
if (is.null(file_dim)) {
fri_dims <- 1
} else {
fri_dims <- chunk_amount
names(fri_dims) <- file_dim
}
} else {
fri_dim_names <- names(selector_file_dims)
if (!is.null(file_dim)) {
fri_dim_names <- c(fri_dim_names, file_dim)
}
fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% fri_dim_names)]
fri_dims <- rep(NA, length(fri_dim_names))
names(fri_dims) <- fri_dim_names
fri_dims[names(selector_file_dims)] <- selector_file_dims
if (!is.null(file_dim)) {
fri_dims[file_dim] <- chunk_amount
}
}
fri <- vector('list', length = prod(fri_dims))
dim(fri) <- fri_dims
sri <- vector('list', length = prod(fri_dims))
dim(sri) <- fri_dims
selector_file_dim_array <- array(1:prod(selector_file_dims), dim = selector_file_dims)
selector_store_position <- fri_dims
for (j in 1:prod(dim(selector_file_dim_array))) {
selector_indices_to_take <- which(selector_file_dim_array == j, arr.ind = TRUE)[1, ]
names(selector_indices_to_take) <- names(selector_file_dims)
selector_store_position[names(selector_indices_to_take)] <- selector_indices_to_take
sub_array_of_selectors <- Subset(selector_array, names(selector_indices_to_take),
as.list(selector_indices_to_take), drop = 'selected')
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> ITERATING OVER FILE DIMENSIONS OF THE SELECTORS.")
print("-> STRUCTURE OF A SUB ARRAY:")
print(str(sub_array_of_selectors))
print("-> STRUCTURE OF THE VARIABLE WITH SELECTORS:")
print(str(var_with_selectors))
print(dim(var_with_selectors))
}
}
if (selectors_are_indices) {
sub_array_of_values <- NULL
#} else if (!is.null(var_ordered)) {
# sub_array_of_values <- var_ordered
} else {
if (length(var_file_dims) > 0) {
var_indices_to_take <- selector_indices_to_take[which(names(selector_indices_to_take) %in% names(var_file_dims))]
sub_array_of_values <- Subset(var_with_selectors, names(var_indices_to_take),
as.list(var_indices_to_take), drop = 'selected')
} else {
sub_array_of_values <- var_with_selectors
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> STRUCTURE OF THE SUB ARRAY FROM THE VARIABLE CORRESPONDING TO THE SUB ARRAY OF SELECTORS")
print(str(sub_array_of_values))
print(dim(sub_array_of_values))
print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:")
print(file_dim)
}
}
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
if ((!is.null(file_dim) && (file_dim %in% names(selector_file_dims))) || is.null(file_dim)) {
if (length(sub_array_of_selectors) > 0) {
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> THE INNER DIMENSION DOES NOT GO ACROSS ANY FILE DIMENSION OR IT DOES BUT IS IN THE PROVIDED SELECTOR ARRAY.")
}
}
if (selectors_are_indices) {
if (!is.null(var_with_selectors_name)) {
max_allowed <- ifelse(aiat, m, n)
} else {
max_allowed <- data_dims[inner_dim]
}
if (any(na.omit(unlist(sub_array_of_selectors)) > max_allowed) ||
any(na.omit(unlist(sub_array_of_selectors)) < 1)) {
stop("Provided indices out of range for dimension '", inner_dim, "' ",
"for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ",
max_allowed, ").")
}
}
# The selector_checker will return either a vector of indices or a list
# with the first and last desired indices.
#NOTE: goes_across_prime_meridian may be TRUE only if the selector is list
# If selectors are indices and _reorder = CircularSort() is used, change
# is_circular_dim to TRUE.
if (!is.null(var_ordered) & selectors_are_indices &
!is.null(dim_reorder_params[[inner_dim]])) {
if ('circular' %in% names(attributes(dim_reorder_params[[inner_dim]]))) {
is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
}
}
if (!is.null(var_ordered) && !selectors_are_indices) {
if (!is.null(dim_reorder_params[[inner_dim]])) {
if (is.list(sub_array_of_selectors)) {
## NOTE: The check of 'goes_across_prime_meridian' is moved forward to here.
if ('circular' %in% names(attributes(dim_reorder_params[[inner_dim]]))) {
is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
}
if (is_circular_dim) {
# NOTE: Use CircularSort() to put the values in the assigned range, and get the order.
# For example, [-10, 20] in CircularSort(0, 360) is [350, 20]. The $ix list is [2, 1].
# 'goes_across_prime_meridian' means the selector range across the border. For example,
# CircularSort(-180, 180) with selector [170, 190] -> goes_across_prime_meridian = TRUE.
# dim_reorder_params is a list of Reorder function, i.e.,
# Sort() or CircularSort().
tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix
goes_across_prime_meridian <- tmp[1] > tmp[2]
}
# HERE change to the same code as below (under 'else'). Not sure why originally
#it uses additional lines, which make reorder not work.
sub_array_of_selectors <- as.list(dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$x)
#sub_array_reordered <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))
#sub_array_unorder <- sort(sub_array_reordered$ix, index.return = TRUE)$ix
#sub_array_of_selectors <- as.list(sub_array_reordered$x[sub_array_unorder])
# Add warning if the boundary is out of range
if (min(unlist(sub_array_of_selectors)) < range(var_ordered)[1]) {
show_out_of_range_warning(inner_dim, range = range(var_ordered),
bound = 'lower')
if (max(unlist(sub_array_of_selectors)) > range(var_ordered)[2]) {
show_out_of_range_warning(inner_dim, range = range(var_ordered), bound = 'upper')
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
}
} else {
sub_array_of_selectors <- dim_reorder_params[[inner_dim]](sub_array_of_selectors)$x
}
}
# NOTE: The ideal solution for selecting indices in goes_across_prime_meridian case
# is modified SelectorCheckor.R. But now SelectorCheckor doesn't know the info of
#goes_across_prime_meridian, so I do the adjustion after calling SelectorCheckor().
sub_array_of_indices <- selector_checker(sub_array_of_selectors, var_ordered,
tolerance = if (aiat) {
NULL
} else {
tolerance_params[[inner_dim]]
})
if (goes_across_prime_meridian & sub_array_of_indices[[1]] < sub_array_of_indices[[2]]) {
if (!(sub_array_of_selectors[[1]] %in% var_ordered)){
sub_array_of_indices[[1]] <- sub_array_of_indices[[1]] - 1
}
if (!(sub_array_of_selectors[[2]] %in% var_ordered)){
sub_array_of_indices[[2]] <- sub_array_of_indices[[2]] + 1
}
}
#NOTE: the possible case?
if (goes_across_prime_meridian & sub_array_of_indices[[1]] > sub_array_of_indices[[2]]) {
stop("The case is goes_across_prime_meridian but no adjustion for the indices!")
}
if (any(is.na(sub_array_of_indices))) {
stop(paste0("The selectors of ", inner_dim,
" are out of range [", min(var_ordered),
", ", max(var_ordered), "]."))
}
} else {
# Add warning if the boundary is out of range
if (is.list(sub_array_of_selectors)) {
if (min(unlist(sub_array_of_selectors)) < min(sub_array_of_values)) {
show_out_of_range_warning(inner_dim, range = range(sub_array_of_values),
bound = 'lower')
if (max(unlist(sub_array_of_selectors)) > max(sub_array_of_values)) {
show_out_of_range_warning(inner_dim, range = range(sub_array_of_values),
bound = 'upper')
#TODO: sub_array_of_values here is NULL if selectors are indices.
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values,
tolerance = if (aiat) {
NULL
} else {
tolerance_params[[inner_dim]]
})
if (any(is.na(sub_array_of_indices))) {
stop(paste0("The selectors of ", inner_dim,
" are out of range [", min(sub_array_of_values),
", ", max(sub_array_of_values), "]."))
}
}
## This 'if' runs in both Start() and Compute(). In Start(), it doesn't have any effect (no chunk).
## In Compute(), it creates the indices for each chunk. For example, if 'sub_array_of_indices'
## is c(5:10) and chunked into 2, 'sub_array_of_indices' becomes c(5:7) for chunk = 1, c(8:10)
## for chunk = 2. If 'sub_array_of_indices' is list(55, 62) and chunked into 2, it becomes
## list(55, 58) for chunk = 1 and list(59, 62) for chunk = 2.
## TODO: The list can be turned into vector here? So afterward no need to judge if it is list
## or vector.
if (chunks[[inner_dim]]["n_chunks"] > 1) {
if (!is.list(sub_array_of_indices)) {
sub_array_of_indices <-
sub_array_of_indices[chunk_indices(length(sub_array_of_indices),
chunks[[inner_dim]]["chunk"],
chunks[[inner_dim]]["n_chunks"],
inner_dim)]
} else {
tmp <-
chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]),
chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"],
inner_dim)
vect <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
sub_array_of_indices[[1]] <- vect[tmp[1]]
sub_array_of_indices[[2]] <- vect[tmp[length(tmp)]]
}
}
# The sub_array_of_indices now contains numeric indices of the values to be taken by each chunk.
#----------------------------------------------------------
# 'sub_sub_array_of_values' is for sri chunking. If this inner dim is chunked,
# the sri has to follow the chunking of fri. Therefore, we save the original
# value of this chunk here for later use. We'll find the corresponding
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
# transformed value within 'sub_sub_array_of_values' and chunk sri.
if (with_transform) {
if (!is.null(var_ordered)) { #var_ordered
if (is.list(sub_array_of_indices)) {
sub_sub_array_of_values <-
var_ordered[sub_array_of_indices[[1]]:sub_array_of_indices[[2]]]
} else {
sub_sub_array_of_values <- var_ordered[sub_array_of_indices]
}
} else { # sub_array_of_values
# Not sure if 'vars_to_transform' is correct to use.
if (is.null(sub_array_of_values)) { # selectors are indices
if (is.list(sub_array_of_indices)) {
sub_sub_array_of_values <-
vars_to_transform[[var_with_selectors_name]][sub_array_of_indices[[1]]:sub_array_of_indices[[2]]]
} else {
sub_sub_array_of_values <- vars_to_transform[[var_with_selectors_name]][sub_array_of_indices]
}
} else { # selectors are values
if (is.list(sub_array_of_indices)) {
sub_sub_array_of_values <-
sub_array_of_values[sub_array_of_indices[[1]]:sub_array_of_indices[[2]]]
} else {
sub_sub_array_of_values <- sub_array_of_values[sub_array_of_indices]
}
}
#----------------------------------------------------------
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> TRANSFORMATION REQUESTED?")
print(with_transform)
print("-> BETA:")
print(beta)
}
}
if (with_transform) {
# If there is a transformation and selector values are provided, these
# selectors will be processed in the same way either if aiat = TRUE or
# aiat = FALSE.
## TODO: If sub_array_of_selectors was integer and aiat then... do what's commented 50 lines below.
## otherwise, do what's coded.
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> SELECTORS REQUESTED BEFORE TRANSFORM.")
}
}
# Generate sub_array_of_fri
sub_array_of_fri <- generate_sub_array_of_fri(
with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
is_circular_dim)
subset_vars_to_transform <- vars_to_transform
if (!is.null(var_ordered)) {
#NOTE: If var_ordered is common_vars, it doesn't have attributes and it is a vector.
# Turn it into array and add dimension name.
if (!is.array(var_ordered)) {
var_ordered <- as.array(var_ordered)
names(dim(var_ordered)) <- inner_dim
}
subset_vars_to_transform[[var_with_selectors_name]] <- Subset(var_ordered, inner_dim, sub_array_of_fri)
} else {
if (!selectors_are_indices) { # selectors are values
#NOTE: It should be redundant because without reordering the var should remain array
## But just stay same with above...
if (!is.array(sub_array_of_values)) {
sub_array_of_values <- as.array(sub_array_of_values)
names(dim(sub_array_of_values)) <- inner_dim
}
subset_vars_to_transform[[var_with_selectors_name]] <- Subset(sub_array_of_values, inner_dim, sub_array_of_fri)
} else { # selectors are indices
subset_vars_to_transform[[var_with_selectors_name]] <-
Subset(subset_vars_to_transform[[var_with_selectors_name]],
inner_dim, sub_array_of_fri)
}
# Change the order of longitude crop if no reorder + from big to small.
# cdo -sellonlatbox, the lon is west, east (while lat can be north
# to south or opposite)
# Before changing crop, first we need to find the name of longitude.
# NOTE: The potential bug here (also the bug for CDORemapper): the lon name
# is limited (only the ones listed in .KnownLonNames() are available.
known_lon_names <- startR:::.KnownLonNames()
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
lon_name <- names(subset_vars_to_transform)[which(names(subset_vars_to_transform) %in% known_lon_names)[1]]
# NOTE: The cases not considered: (1) if lon reorder(decreasing = T)
# It doesn't make sense, but if someone uses it, here should
# occur error. (2) crop = TRUE/FALSE
if ('crop' %in% names(transform_params) & var_with_selectors_name == lon_name & is.null(dim_reorder_params[[inner_dim]])) {
if (is.numeric(class(transform_params$crop))) {
if (transform_params$crop[1] > transform_params$crop[2]) {
tmp <- transform_params$crop[1]
transform_params$crop[1] <- transform_params$crop[2]
transform_params$crop[2] <- tmp
}
}
}
transformed_subset_var <- do.call(transform, c(list(data_array = NULL,
variables = subset_vars_to_transform,
file_selectors = selectors_of_first_files_with_data[[i]]),
transform_params))$variables[[var_with_selectors_name]]
# Sorting the transformed variable and working out the indices again after transform.
if (!is.null(dim_reorder_params[[inner_dim]])) {
transformed_subset_var_reorder <- dim_reorder_params[[inner_dim]](transformed_subset_var)
transformed_subset_var <- transformed_subset_var_reorder$x
#NOTE: The fix here solves the mis-ordered lon when across_meridian.
transformed_subset_var_unorder <- transformed_subset_var_reorder$ix
# transformed_subset_var_unorder <- sort(transformed_subset_var_reorder$ix, index.return = TRUE)$ix
} else {
transformed_subset_var_unorder <- 1:length(transformed_subset_var)
}
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
if (!selectors_are_indices) { # selectors are values
sub_array_of_sri <- selector_checker(
sub_array_of_selectors, transformed_subset_var,
tolerance = if (aiat) {
tolerance_params[[inner_dim]]
} else {
NULL
})
} else { # selectors are indices
# Need to transfer to values first, then use the values to get the new
# indices in transformed_subset_var.
ori_values <- vars_to_transform[[var_with_selectors_name]][sub_array_of_selectors]
sub_array_of_sri <- selector_checker(
ori_values, transformed_subset_var,
tolerance = if (aiat) {
tolerance_params[[inner_dim]]
} else {
NULL
})
sub_array_of_sri <- unique(sub_array_of_sri)
}
# Check if selectors fall out of the range of the transform grid
# It may happen when original lon is [-180, 180] while want to regrid to
# [0, 360], and lon selector = [-20, -10].
if (any(is.na(sub_array_of_sri))) {
stop(paste0("The selectors of ",
inner_dim, " are out of range of transform grid '",
transform_params$grid, "'. Use parameter '",
inner_dim, "_reorder' or change ", inner_dim,
" selectors."))
}
if (goes_across_prime_meridian) {
if (sub_array_of_sri[[1]] == sub_array_of_sri[[2]]) {
# global longitude
sub_array_of_sri <- c(1:length(transformed_subset_var))
} else {
# the common case, i.e., non-global
# # NOTE: Because sub_array_of_sri order is exchanged due to
# # previous development, here [[1]] and [[2]] should exchange
# sub_array_of_sri <- c(1:sub_array_of_sri[[1]],
# sub_array_of_sri[[2]]:length(transformed_subset_var))
#NOTE: the old code above is not suitable for all the possible cases.
# If sub_array_of_selectors is not exactly the value in transformed_subset_var, sub_array_of_sri[[1]] will be larger than sub_array_of_sri[[2]].
# Though here is not global case, we already have transformed_subset_var cropped as the desired region, so it is okay to use the whole length. Not sure if it will cause other problems...
sub_array_of_sri <- 1:length(transformed_subset_var)
}
} else if (is.list(sub_array_of_sri)) {
sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
}
ordered_sri <- sub_array_of_sri
sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri]
# Chunk sub_array_of_sri if this inner_dim needs to be chunked
if (chunks[[inner_dim]]["n_chunks"] > 1) {
tmp <- which(transformed_subset_var >= min(sub_sub_array_of_values) &
transformed_subset_var <= max(sub_sub_array_of_values))
sub_array_of_sri <- sub_array_of_sri[tmp]
}
# In this case, the tvi are not defined and the 'transformed_subset_var'
# will be taken instead of the var transformed before in the code.
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> FIRST INDEX:")
# print(first_index)
print("NOTE: Check function generate_sub_array_of_fri() in zzz.R")
# print(last_index)
print("NOTE: Check function generate_sub_array_of_fri() in zzz.R")
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
print("-> STRUCTURE OF FIRST ROUND INDICES:")
print(str(sub_array_of_fri))
print("-> STRUCTURE OF SECOND ROUND INDICES:")
print(str(sub_array_of_sri))
print("-> STRUCTURE OF TRANSFORMED VARIABLE INDICES:")
print(str(tvi))
}
}
### # If the selectors are expressed after transformation
### } else {
###if (debug) {
###if (inner_dim %in% dims_to_check) {
###print("-> SELECTORS REQUESTED AFTER TRANSFORM.")
###}
###}
### if (goes_across_prime_meridian) {
### sub_array_of_indices <- c(sub_array_of_indices[[1]]:m,
### 1:sub_array_of_indices[[2]])
### }
### first_index <- min(unlist(sub_array_of_indices))
### last_index <- max(unlist(sub_array_of_indices))
### first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
### last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
### sub_array_of_fri <- first_index_before_transform:last_index_before_transform
### n_of_extra_cells <- round(beta / n * m)
### if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
### sub_array_of_sri <- 1:(last_index - first_index + 1)
### if (is.null(tvi)) {
### tvi <- sub_array_of_sri + first_index - 1
### }
### } else {
### sub_array_of_sri <- sub_array_of_indices - first_index + 1
### if (is.null(tvi)) {
### tvi <- sub_array_of_indices
### }
### }
### sub_array_of_sri <- sub_array_of_sri + n_of_extra_cells
sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
list(value = sub_array_of_sri)))
} else { # !with_transform
sub_array_of_fri <- generate_sub_array_of_fri(