Newer
Older
# global longitude
sub_array_of_sri <- c(1:length(transformed_subset_var))
} else {
# the common case, i.e., non-global
# # NOTE: Because sub_array_of_sri order is exchanged due to
# # previous development, here [[1]] and [[2]] should exchange
# sub_array_of_sri <- c(1:sub_array_of_sri[[1]],
# sub_array_of_sri[[2]]:length(transformed_subset_var))
#NOTE: the old code above is not suitable for all the possible cases.
# If sub_array_of_selectors is not exactly the value in transformed_subset_var, sub_array_of_sri[[1]] will be larger than sub_array_of_sri[[2]].
# Though here is not global case, we already have transformed_subset_var cropped as the desired region, so it is okay to use the whole length. Not sure if it will cause other problems...
sub_array_of_sri <- 1:length(transformed_subset_var)
}
} else if (is.list(sub_array_of_sri)) {
sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
}
# Chunk sub_array_of_sri if this inner_dim needs to be chunked
#TODO: Potential problem: the transformed_subset_var value falls between
# the end of sub_sub_array_of_values of the 1st chunk and the beginning
# of sub_sub_array_of_values of the 2nd chunk. Then, one sub_array_of_sri
# will miss. 'previous_sri' is checked and will be included if this
# situation happens, but don't know if the transformed result is
# correct or not.
# NOTE: The chunking criteria may not be 100% correct. The current way
# is to pick the sri that larger than the minimal sub_sub_array_of_values
# and smaller than the maximal sub_sub_array_of_values; if it's
# the first chunk, make sure the 1st sri is included; if it's the
# last chunk, make sure the last sri is included.
if (chunks[[inner_dim]]["n_chunks"] > 1) {
sub_array_of_sri_complete <- sub_array_of_sri
if (is.list(sub_sub_array_of_values)) { # list
sub_array_of_sri <-
which(transformed_subset_var >= min(unlist(sub_sub_array_of_values)) &
transformed_subset_var <= max(unlist(sub_sub_array_of_values)))
# if it's 1st chunk & the first sri is not included, include it.
if (chunks[[inner_dim]]["chunk"] == 1 &
!(sub_array_of_sri_complete[1] %in% sub_array_of_sri)) {
sub_array_of_sri <- c(sub_array_of_sri_complete[1], sub_array_of_sri)
}
# if it's last chunk & the last sri is not included, include it.
if (chunks[[inner_dim]]["chunk"] == chunks[[inner_dim]]["n_chunks"] &
!(tail(sub_array_of_sri_complete, 1) %in% sub_array_of_sri)) {
sub_array_of_sri <- c(sub_array_of_sri, tail(sub_array_of_sri_complete, 1))
}
# Check if sub_array_of_sri perfectly connects to the previous sri.
# If not, inlclude the previous sri.
#NOTE 1: don't know if the transform for the previous sri is
# correct or not.
#NOTE 2: If crop = T, sub_array_of_sri always starts from 1.
# Don't know if the cropping will miss some sri or not.
if (sub_array_of_sri[1] != 1) {
if (!is.null(previous_sub_sub_array_of_values)) {
# if decreasing = F
if (transformed_subset_var[1] < transformed_subset_var[2]) {
previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values))
} else {
# if decreasing = T
previous_sri <- max(which(transformed_subset_var >= previous_sub_sub_array_of_values))
if (previous_sri + 1 != sub_array_of_sri[1]) {
sub_array_of_sri <- (previous_sri + 1):sub_array_of_sri[length(sub_array_of_sri)]
}
} else { # is vector
tmp <- which(transformed_subset_var >= min(sub_sub_array_of_values) &
transformed_subset_var <= max(sub_sub_array_of_values))
# Ensure tmp and sub_array_of_sri are both ascending or descending
if (is.unsorted(tmp) != is.unsorted(sub_array_of_sri)) {
tmp <- rev(tmp)
}
# Include first or last sri if tmp doesn't have. It's only for
# ""vectors"" because vectors look for the closest value.
#NOTE: The condition here is not correct. The criteria should be
# 'vector' instead of indices.
if (chunks[[inner_dim]]["chunk"] == 1) {
sub_array_of_sri <- unique(c(sub_array_of_sri[1], tmp))
} else if (chunks[[inner_dim]]["chunk"] ==
chunks[[inner_dim]]["n_chunks"]) { # last chunk
sub_array_of_sri <- unique(c(tmp, sub_array_of_sri[length(sub_array_of_sri)]))
} else {
sub_array_of_sri <- tmp
}
# Check if sub_array_of_sri perfectly connects to the previous sri.
# If not, inlclude the previous sri.
#NOTE 1: don't know if the transform for the previous sri is
# correct or not.
#NOTE 2: If crop = T, sub_array_of_sri always starts from 1.
# Don't know if the cropping will miss some sri or not.
if (sub_array_of_sri[1] != 1) {
if (!is.null(previous_sub_sub_array_of_values)) {
# if decreasing = F
if (transformed_subset_var[1] < transformed_subset_var[2]) {
previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values))
} else {
# if decreasing = T
previous_sri <- max(which(transformed_subset_var >= previous_sub_sub_array_of_values))
}
if (previous_sri + 1 != which(sub_array_of_sri[1] == sub_array_of_sri_complete)) {
sub_array_of_sri <- (previous_sri + 1):sub_array_of_sri[length(sub_array_of_sri)]
ordered_sri <- sub_array_of_sri
sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri]
###########################old##################################
# if (chunks[[inner_dim]]["n_chunks"] > 1) {
# tmp <- which(transformed_subset_var >= min(sub_sub_array_of_values) &
# transformed_subset_var <= max(sub_sub_array_of_values))
# sub_array_of_sri <- sub_array_of_sri[tmp]
# }
################################################################
# In this case, the tvi are not defined and the 'transformed_subset_var'
# will be taken instead of the var transformed before in the code.
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> FIRST INDEX:")
# print(first_index)
print("NOTE: Check function generate_sub_array_of_fri() in zzz.R")
# print(last_index)
print("NOTE: Check function generate_sub_array_of_fri() in zzz.R")
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
print("-> STRUCTURE OF FIRST ROUND INDICES:")
print(str(sub_array_of_fri))
print("-> STRUCTURE OF SECOND ROUND INDICES:")
print(str(sub_array_of_sri))
print("-> STRUCTURE OF TRANSFORMED VARIABLE INDICES:")
print(str(tvi))
}
}
### # If the selectors are expressed after transformation
### } else {
###if (debug) {
###if (inner_dim %in% dims_to_check) {
###print("-> SELECTORS REQUESTED AFTER TRANSFORM.")
###}
###}
### if (goes_across_prime_meridian) {
### sub_array_of_indices <- c(sub_array_of_indices[[1]]:m,
### 1:sub_array_of_indices[[2]])
### }
### first_index <- min(unlist(sub_array_of_indices))
### last_index <- max(unlist(sub_array_of_indices))
### first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
### last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
### sub_array_of_fri <- first_index_before_transform:last_index_before_transform
### n_of_extra_cells <- round(beta / n * m)
### if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
### sub_array_of_sri <- 1:(last_index - first_index + 1)
### if (is.null(tvi)) {
### tvi <- sub_array_of_sri + first_index - 1
### }
### } else {
### sub_array_of_sri <- sub_array_of_indices - first_index + 1
### if (is.null(tvi)) {
### tvi <- sub_array_of_indices
### }
### }
### sub_array_of_sri <- sub_array_of_sri + n_of_extra_cells
sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
list(value = sub_array_of_sri)))
} else { # !with_transform
sub_array_of_fri <- generate_sub_array_of_fri(
with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
is_circular_dim)
# Reorder sub_array_of_fri if reordering function is used.
# It was index in the assigned order (e.g., in [-180, 180] if CircularSort(-180, 180)), and here is changed to the index in the original order.
if (!is.null(var_unorder_indices)) {
if (is.null(ordered_fri)) {
ordered_fri <- sub_array_of_fri
}
sub_array_of_fri <- var_unorder_indices[sub_array_of_fri]
}
fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position),
list(value = sub_array_of_fri)))
if (!is.null(file_dim)) {
taken_chunks[selector_store_position[[file_dim]]] <- TRUE
} else {
taken_chunks <- TRUE
}
}
} else {
# The inner dim goes across a file dim (e.g., time_across = 'sdate')
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.")
}
}
# If "<inner_dim>_across = <file_dim> + merge_across_dims = FALSE + chunk over <inner_dim>", return error because this instance is not logically correct.
if (chunks[[inner_dim]]["n_chunks"] > 1 & inner_dim %in% inner_dims_across_files) {
stop("Chunk over dimension '", inner_dim, "' is not allowed because '",
inner_dim, "' is across '",
names(inner_dims_across_files)[which(inner_dim %in% inner_dims_across_files)], "'.")
}
if (inner_dim %in% names(dim(sub_array_of_selectors))) {
if (is.null(var_with_selectors_name)) {
if (!largest_dims_length | (largest_dims_length & length(unique(inner_dim_lengths)) <= 1)) { #old code
maximal_indice <- data_dims[inner_dim] * chunk_amount
} else { # files have different length of inner dim
maximal_indice <- sum(inner_dim_lengths)
}
any(na.omit(unlist(sub_array_of_selectors)) > maximal_indice)) {
stop("Provided indices out of range for dimension '", inner_dim, "' ",
"for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ",
maximal_indice, ").")
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
}
} else {
if (inner_dim %in% names(dim(sub_array_of_values))) {
# NOTE: Put across-inner-dim at the 1st position.
# POSSIBLE PROB!! Only organize inner dim, the rest dims may not in the same order as sub_array_of_selectors below.
inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_values)) == inner_dim)
if (inner_dim_pos_in_sub_array != 1) {
new_sub_array_order <- (1:length(dim(sub_array_of_values)))[-inner_dim_pos_in_sub_array]
new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order)
sub_array_of_values <- .aperm2(sub_array_of_values, new_sub_array_order)
}
}
}
# NOTE: Put across-inner-dim at the 1st position.
# POSSIBLE PROB!! Only organize inner dim, the rest dims may not in the same order as sub_array_of_values above.
inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_selectors)) == inner_dim)
if (inner_dim_pos_in_sub_array != 1) {
new_sub_array_order <- (1:length(dim(sub_array_of_selectors)))[-inner_dim_pos_in_sub_array]
new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order)
sub_array_of_selectors <- .aperm2(sub_array_of_selectors, new_sub_array_order)
}
sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values,
tolerance = tolerance_params[[inner_dim]])
# It is needed to expand the indices here, otherwise for
# values(list(date1, date2)) only 2 values are picked.
if (is.list(sub_array_of_indices)) {
sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
}
sub_array_of_indices <- sub_array_of_indices[get_chunk_indices(length(sub_array_of_indices),
chunks[[inner_dim]]['chunk'],
chunks[[inner_dim]]['n_chunks'],
inner_dim)]
sub_array_is_list <- FALSE
if (is.list(sub_array_of_indices)) {
sub_array_is_list <- TRUE
sub_array_of_indices <- unlist(sub_array_of_indices)
}
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
# "indices_chunk" refers to in which file the
# sub_array_of_indices is; "transformed_indices"
# refers to the indices of sub_array_of_indices in each file.
if (!largest_dims_length |
(largest_dims_length & length(unique(inner_dim_lengths)) <= 1)) {
# old code; all the files have the same length of inner_dim
if (is.null(var_with_selectors_name)) {
indices_chunk <- floor((sub_array_of_indices - 1) / data_dims[inner_dim]) + 1
transformed_indices <- ((sub_array_of_indices - 1) %% data_dims[inner_dim]) + 1
} else {
indices_chunk <- floor((sub_array_of_indices - 1) / var_full_dims[inner_dim]) + 1
transformed_indices <- ((sub_array_of_indices - 1) %% var_full_dims[inner_dim]) + 1
}
} else { # files have different inner dim length
indices_chunk <- c()
for (item in 1:length(inner_dim_lengths)) {
tmp <- which(sub_array_of_indices <= cumsum(inner_dim_lengths)[item])
indices_chunk <- c(indices_chunk, rep(item, length(tmp) - length(indices_chunk)))
}
sub_array_of_indices_by_file <- split(sub_array_of_indices, indices_chunk)
for (item in 2:length(sub_array_of_indices_by_file)) {
sub_array_of_indices_by_file[[item]] <- sub_array_of_indices_by_file[[item]] - cumsum(inner_dim_lengths)[item - 1]
}
transformed_indices <- unlist(sub_array_of_indices_by_file, use.names = FALSE)
if (sub_array_is_list) {
sub_array_of_indices <- as.list(sub_array_of_indices)
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> GOING TO ITERATE ALONG CHUNKS.")
}
}
for (chunk in 1:chunk_amount) {
if (!is.null(names(selector_store_position))) {
selector_store_position[file_dim] <- chunk
} else {
selector_store_position <- chunk
}
sub_array_of_indices <- transformed_indices[which(indices_chunk == chunk)]
sub_array_of_indices <- transformed_indices[which(indices_chunk == chunk)]
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
if (with_transform) {
# If the provided selectors are expressed in the world
# before transformation
if (!aiat) {
first_index <- min(unlist(sub_array_of_indices))
last_index <- max(unlist(sub_array_of_indices))
sub_array_of_fri <- max(c(first_index - beta, 1)):min(c(last_index + beta, n))
sub_array_of_sri <- transform_indices(unlist(sub_array_of_indices) - first_index + 1, n, m)
if (is.list(sub_array_of_indices)) {
if (length(sub_array_of_sri) > 1) {
sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
}
}
##TODO: TRANSFORM SUBSET VARIABLE AS ABOVE, TO COMPUTE SRI
# If the selectors are expressed after transformation
} else {
first_index <- min(unlist(sub_array_of_indices))
last_index <- max(unlist(sub_array_of_indices))
first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
sub_array_of_fri <- first_index_before_transform:last_index_before_transform
if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
sub_array_of_sri <- 1:(last_index - first_index + 1) +
round(beta / n * m)
} else {
sub_array_of_sri <- sub_array_of_indices - first_index + 1 +
round(beta / n * m)
}
##TODO: FILL IN TVI
}
sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
list(value = sub_array_of_sri)))
if (length(sub_array_of_sri) > 0) {
taken_chunks[chunk] <- TRUE
}
} else {
sub_array_of_fri <- sub_array_of_indices
if (length(sub_array_of_fri) > 0) {
taken_chunks[chunk] <- TRUE
}
}
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
if (!is.null(var_unorder_indices)) {
ordered_fri <- sub_array_of_fri
sub_array_of_fri <- var_unorder_indices[sub_array_of_fri]
}
fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position),
list(value = sub_array_of_fri)))
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> FINISHED ITERATING ALONG CHUNKS")
}
}
} else {
stop("Provided array of indices for dimension '", inner_dim, "', ",
"which goes across the file dimension '", file_dim, "', but ",
"the provided array does not have the dimension '", inner_dim,
"', which is mandatory.")
}
}
}
}
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> PROCEEDING TO CROP VARIABLES")
}
}
#if ((length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last'))) {
#if (!is.null(var_with_selectors_name) || (is.null(var_with_selectors_name) && is.character(selector_array) &&
# (length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last')))) {
empty_chunks <- which(!taken_chunks)
if (length(empty_chunks) >= length(taken_chunks)) {
stop("Selectors do not match any of the possible values for the dimension '", inner_dim, "'.")
}
if (length(empty_chunks) > 0) {
# # Get the first group of chunks to remove, and remove them.
# # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 1 and 2
# dist <- abs(rev(empty_chunks) - c(rev(empty_chunks)[1] - 1, head(rev(empty_chunks), length(rev(empty_chunks)) - 1)))
# if (all(dist == 1)) {
# start_chunks_to_remove <- NULL
# } else {
# first_chunk_to_remove <- tail(which(dist > 1), 1)
# start_chunks_to_remove <- rev(rev(empty_chunks)[first_chunk_to_remove:length(empty_chunks)])
# }
# # Get the last group of chunks to remove, and remove them.
# # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 8 and 9
# dist <- abs(empty_chunks - c(empty_chunks[1] - 1, head(empty_chunks, length(empty_chunks) - 1)))
# if (all(dist == 1)) {
# first_chunk_to_remove <- 1
# } else {
# first_chunk_to_remove <- tail(which(dist > 1), 1)
# }
# end_chunks_to_remove <- empty_chunks[first_chunk_to_remove:length(empty_chunks)]
# chunks_to_keep <- which(!((1:length(taken_chunks)) %in% c(start_chunks_to_remove, end_chunks_to_remove)))
chunks_to_keep <- which(taken_chunks)
dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], list(chunks_to_keep))
# found_indices <- Subset(found_indices, file_dim, chunks_to_keep)
# # Crop dataset variables file dims.
# for (picked_var in names(picked_vars[[i]])) {
# if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) {
# picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, chunks_to_keep)
# }
# }
}
#}
dat[[i]][['selectors']][[inner_dim]] <- list(fri = fri, sri = sri)
# Crop dataset variables inner dims.
# Crop common variables inner dims.
types_of_var_to_crop <- 'picked'
if (with_transform) {
types_of_var_to_crop <- c(types_of_var_to_crop, 'transformed')
}
if (!is.null(dim_reorder_params[[inner_dim]])) {
types_of_var_to_crop <- c(types_of_var_to_crop, 'reordered')
}
for (type_of_var_to_crop in types_of_var_to_crop) {
if (type_of_var_to_crop == 'transformed') {
if (is.null(tvi)) {
if (!is.null(dim_reorder_params[[inner_dim]])) {
crop_indices <- unique(unlist(ordered_sri))
} else {
crop_indices <- unique(unlist(sri))
}
} else {
crop_indices <- unique(unlist(tvi))
}
vars_to_crop <- transformed_vars[[i]]
common_vars_to_crop <- transformed_common_vars
} else if (type_of_var_to_crop == 'reordered') {
crop_indices <- unique(unlist(ordered_fri))
vars_to_crop <- picked_vars_ordered[[i]]
common_vars_to_crop <- picked_common_vars_ordered
} else {
#TODO: If fri has different indices in each list, the crop_indices should be
# separated for each list. Otherwise, picked_common_vars later will be wrong.
crop_indices <- unique(unlist(fri))
vars_to_crop <- picked_vars[[i]]
common_vars_to_crop <- picked_common_vars
}
for (var_to_crop in names(vars_to_crop)) {
if (inner_dim %in% names(dim(vars_to_crop[[var_to_crop]]))) {
if (!is.null(crop_indices)) {
if (type_of_var_to_crop == 'transformed') {
if (!aiat) {
all(selector_array %in% c('all', 'first', 'last')))) {
vars_to_crop[[var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices)
aho
committed
} else {
vars_to_crop[[var_to_crop]] <-
Subset(transformed_var_with_selectors, inner_dim, crop_indices)
} else {
vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices)
}
} else {
vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices)
}
}
}
}
if (i == length(dat)) {
for (common_var_to_crop in names(common_vars_to_crop)) {
if (inner_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) {
if (type_of_var_to_crop == 'transformed' & !aiat) {
all(selector_array %in% c('all', 'first', 'last')))) {
common_vars_to_crop[[common_var_to_crop]] <-
Subset(transformed_subset_var, inner_dim, crop_indices)
aho
committed
} else {
common_vars_to_crop[[common_var_to_crop]] <-
Subset(transformed_var_with_selectors, inner_dim, crop_indices)
} else {
if (!is.null(file_dim)) { #merge_across_dims, file_dims is the depended file dim
tmp <- common_vars_to_crop[[common_var_to_crop]]
tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]])
dim_extra_ind <- which(!names(dim(tmp)) %in% c(file_dim, inner_dim))
if (!identical(dim_extra_ind, integer(0))) {
tmp_list <- asplit(tmp, dim_extra_ind)
dim_file_ind <- which(names(dim(tmp_list[[1]])) == file_dim)
tmp_list <- lapply(tmp_list, asplit, dim_file_ind)
} else { # only file_dim and inner_dim
dim_file_ind <- which(names(dim(tmp)) == file_dim)
tmp_list <- asplit(tmp, dim_file_ind)
# Add another layer to be consistent with the first case above
tmp_list <- list(tmp_list)
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
}
max_fri_length <- max(sapply(fri, length))
for (i_extra_dim in 1:length(tmp_list)) {
for (i_fri in 1:length(fri)) {
tmp_list[[i_extra_dim]][[i_fri]] <-
tmp_list[[i_extra_dim]][[i_fri]][fri[[i_fri]]]
if (length(tmp_list[[i_extra_dim]][[i_fri]]) != max_fri_length) {
tmp_list[[i_extra_dim]][[i_fri]] <-
c(tmp_list[[i_extra_dim]][[i_fri]], rep(NA, max_fri_length - length(tmp_list[[i_extra_dim]][[i_fri]])))
}
}
}
# Change list back to array
tmp_new_dim <- c(max_fri_length, dim(tmp)[file_dim], dim(tmp)[dim_extra_ind])
names(tmp_new_dim) <- c(inner_dim, file_dim, names(dim(tmp))[dim_extra_ind])
common_vars_to_crop[[common_var_to_crop]] <-
array(unlist(tmp_list), dim = tmp_new_dim)
# Reorder back
common_vars_to_crop[[common_var_to_crop]] <-
aperm(common_vars_to_crop[[common_var_to_crop]], match(names(dim(tmp)), names(tmp_new_dim)))
# Put attributes back
tmp <- which(!names(tmp_attributes) %in% names(attributes(common_vars_to_crop[[common_var_to_crop]])))
attributes(common_vars_to_crop[[common_var_to_crop]]) <-
c(attributes(common_vars_to_crop[[common_var_to_crop]]),
tmp_attributes[tmp])
if (common_var_to_crop == 'time') {
# Convert number back to time
common_vars_to_crop[[common_var_to_crop]] <-
as.POSIXct(common_vars_to_crop[[common_var_to_crop]],
origin = "1970-01-01", tz = 'UTC')
}
} else { # old code
common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices)
}
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
}
}
}
}
if (type_of_var_to_crop == 'transformed') {
if (!is.null(vars_to_crop)) {
transformed_vars[[i]] <- vars_to_crop
}
if (i == length(dat)) {
transformed_common_vars <- common_vars_to_crop
}
} else if (type_of_var_to_crop == 'reordered') {
if (!is.null(vars_to_crop)) {
picked_vars_ordered[[i]] <- vars_to_crop
}
if (i == length(dat)) {
picked_common_vars_ordered <- common_vars_to_crop
}
} else {
if (!is.null(vars_to_crop)) {
picked_vars[[i]] <- vars_to_crop
}
if (i == length(dat)) {
#NOTE: To avoid redundant run
if (inner_dim %in% names(common_vars_to_crop)) {
picked_common_vars <- common_vars_to_crop
}
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
}
}
}
#}
}
# After the selectors have been picked (using the original variables),
# the variables are transformed. At that point, the original selectors
# for the transformed variables are also kept in the variable original_selectors.
#print("L")
}
}
}
# if (!is.null(transformed_common_vars)) {
# picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars
# }
# Remove the trailing chunks, if any.
for (file_dim in names(dims_to_crop)) {
# indices_to_keep <- min(sapply(dims_to_crop[[file_dim]], min)):max(sapply(dims_to_crop[[file_dim]], max))
## TODO: Merge indices in dims_to_crop with some advanced mechanism?
indices_to_keep <- unique(unlist(dims_to_crop[[file_dim]]))
array_of_files_to_load <- Subset(array_of_files_to_load, file_dim, indices_to_keep)
array_of_not_found_files <- Subset(array_of_not_found_files, file_dim, indices_to_keep)
for (i in 1:length(dat)) {
# Crop selectors
for (selector_dim in names(dat[[i]][['selectors']])) {
if (selector_dim == file_dim) {
for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['fri']])) {
dat[[i]][['selectors']][[selector_dim]][['fri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['fri']][[j]][indices_to_keep]
}
for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['sri']])) {
dat[[i]][['selectors']][[selector_dim]][['sri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['sri']][[j]][indices_to_keep]
}
}
if (file_dim %in% names(dim(dat[[i]][['selectors']][[selector_dim]][['fri']]))) {
dat[[i]][['selectors']][[selector_dim]][['fri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['fri']], file_dim, indices_to_keep)
dat[[i]][['selectors']][[selector_dim]][['sri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['sri']], file_dim, indices_to_keep)
}
}
# Crop dataset variables file dims.
for (picked_var in names(picked_vars[[i]])) {
if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) {
picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, indices_to_keep)
}
}
for (transformed_var in names(transformed_vars[[i]])) {
if (file_dim %in% names(dim(transformed_vars[[i]][[transformed_var]]))) {
transformed_vars[[i]][[transformed_var]] <- Subset(transformed_vars[[i]][[transformed_var]], file_dim, indices_to_keep)
}
}
}
# Crop common variables file dims.
for (picked_common_var in names(picked_common_vars)) {
if (file_dim %in% names(dim(picked_common_vars[[picked_common_var]]))) {
picked_common_vars[[picked_common_var]] <- Subset(picked_common_vars[[picked_common_var]], file_dim, indices_to_keep)
}
}
for (transformed_common_var in names(transformed_common_vars)) {
if (file_dim %in% names(dim(transformed_common_vars[[transformed_common_var]]))) {
transformed_common_vars[[transformed_common_var]] <- Subset(transformed_common_vars[[transformed_common_var]], file_dim, indices_to_keep)
}
}
}
# Calculate the size of the final array.
total_inner_dims <- NULL
for (i in 1:length(dat)) {
if (dataset_has_files[i]) {
inner_dims <- expected_inner_dims[[i]]
inner_dims <- sapply(inner_dims,
function(x) {
if (!all(sapply(dat[[i]][['selectors']][[x]][['sri']], is.null))) {
max(sapply(dat[[i]][['selectors']][[x]][['sri']], length))
} else {
if (length(var_params[[x]]) > 0) {
if (var_params[[x]] %in% names(transformed_vars[[i]])) {
length(transformed_vars[[i]][[var_params[[x]]]])
} else if (var_params[[x]] %in% names(transformed_common_vars)) {
length(transformed_common_vars[[var_params[[x]]]])
} else {
max(sapply(dat[[i]][['selectors']][[x]][['fri']], length))
}
} else {
max(sapply(dat[[i]][['selectors']][[x]][['fri']], length))
}
}
})
names(inner_dims) <- expected_inner_dims[[i]]
if (is.null(total_inner_dims)) {
total_inner_dims <- inner_dims
} else {
new_dims <- .MergeArrayDims(total_inner_dims, inner_dims)
total_inner_dims <- new_dims[[3]]
}
}
}
new_dims <- .MergeArrayDims(dim(array_of_files_to_load), total_inner_dims)
final_dims <- new_dims[[3]][dim_names]
# final_dims_fake is the vector of final dimensions after having merged the
# 'across' file dimensions with the respective 'across' inner dimensions, and
# after having broken into multiple dimensions those dimensions for which
# multidimensional selectors have been provided.
# final_dims will be used for collocation of data, whereas final_dims_fake
# will be used for shaping the final array to be returned to the user.
final_dims_fake <- final_dims
if (merge_across_dims) {
final_dims_fake <- dims_merge(inner_dims_across_files, final_dims_fake)
#=========================================================================
# Find the dimension to split if split_multiselected_dims = TRUE.
# If there is no dimension able to be split, change split_multiselected_dims to FALSE.
tmp <- dims_split(dim_params, final_dims_fake)
final_dims_fake <- tmp[[1]]
# all_split_dims is a list containing all the split dims
all_split_dims <- tmp[[2]]
if (is.null(all_split_dims)) {
split_multiselected_dims <- FALSE
.warning(paste0("Not found any dimensions able to be split. The parameter ",
"'split_multiselected_dims' is changed to FALSE."))
}
#======================================================================
# If only merge_across_dims and merge_across_dims_narm and no split_multiselected_dims,
# the length of inner across dim (e.g., time) needs to be adjusted. Sum up the actual length
# without potential NAs.
if (merge_across_dims) {
across_inner_dim <- inner_dims_across_files[[1]] #TODO: more than one?
# Get the length of each inner_dim ('time') along each file_dim ('file_date')
length_inner_across_dim <- lapply(dat[[i]][['selectors']][[across_inner_dim]][['fri']], length)
if (merge_across_dims_narm & !split_multiselected_dims) {
final_dims_fake <- merge_narm_dims(final_dims_fake, across_inner_dim, length_inner_across_dim)
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
}
}
if (!silent) {
.message("Detected dimension sizes:")
longest_dim_len <- max(sapply(names(final_dims_fake), nchar))
longest_size_len <- max(sapply(paste0(final_dims_fake, ''), nchar))
sapply(names(final_dims_fake),
function(x) {
message(paste0("* ", paste(rep(' ', longest_dim_len - nchar(x)), collapse = ''),
x, ": ", paste(rep(' ', longest_size_len - nchar(paste0(final_dims_fake[x], ''))), collapse = ''),
final_dims_fake[x]))
})
bytes <- prod(c(final_dims_fake, 8))
dim_sizes <- paste(final_dims_fake, collapse = ' x ')
if (retrieve) {
.message(paste("Total size of requested data:"))
} else {
.message(paste("Total size of involved data:"))
}
.message(paste(dim_sizes, " x 8 bytes =",
format(structure(bytes, class = "object_size"), units = "auto")),
indent = 2)
}
# NOTE: If split_multiselected_dims + merge_across_dims, the dim order may need to be changed.
# The inner_dim needs to be the first dim among split dims.
# TODO: Cannot control the rest dims are in the same order or not...
# Suppose users put the same order of across inner and file dims.
if (split_multiselected_dims & merge_across_dims) {
# TODO: More than one split?
inner_dim_pos_in_split_dims <- which(names(all_split_dims[[1]]) == inner_dims_across_files)
# if inner_dim is not the first, change!
if (inner_dim_pos_in_split_dims != 1) {
# Save the current final_dims_fake for reordering it back later
tmp <- reorder_split_dims(all_split_dims[[1]], inner_dim_pos_in_split_dims, final_dims_fake)
final_dims_fake <- tmp[[1]]
all_split_dims[[1]] <- tmp[[2]]
# The following several lines will only run if retrieve = TRUE
if (retrieve) {
########## CREATING THE SHARED MATRIX AND DISPATCHING WORK PIECES ###########
# TODO: try performance of storing all in cols instead of rows
# Create the shared memory array, and a pointer to it, to be sent
# to the work pieces.
if (is.null(ObjectBigmemory)) {
data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1)
} else {
data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1,
if (is.null(ObjectBigmemory)) {
name_bigmemory_obj <- attr(shared_matrix_pointer, 'description')$sharedName
} else {
name_bigmemory_obj <- attr(shared_matrix_pointer, 'description')$filename
}
#warning(paste("SharedName:", attr(shared_matrix_pointer, 'description')$sharedName))
#warning(paste("Filename:", attr(shared_matrix_pointer, 'description')$filename))
nperez
committed
#if (!is.null(ObjectBigmemory)) {
# attr(shared_matrix_pointer, 'description')$sharedName <- ObjectBigmemory
#}
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
if (is.null(num_procs)) {
num_procs <- future::availableCores()
}
# Creating a shared tmp folder to store metadata from each chunk
array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load))
if (!is.null(metadata_dims)) {
metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load))))
names(metadata_indices_to_load) <- names(dim(array_of_files_to_load))
metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims)))
array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags), metadata_indices_to_load,
list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims])))))
}
metadata_file_counter <- 0
metadata_folder <- tempfile('metadata')
dir.create(metadata_folder)
# Build the work pieces, each with:
# - file path
# - total size (dims) of store array
# - start position in store array
# - file selectors (to provide extra info. useful e.g. to select variable)
# - indices to take from file
work_pieces <- list()
for (i in 1:length(dat)) {
if (dataset_has_files[i]) {
# metadata_file_counter may be changed by the following function
work_pieces <- build_work_pieces(
work_pieces = work_pieces, i = i, selectors = dat[[i]][['selectors']],
file_dims = found_file_dims[[i]],
inner_dims = expected_inner_dims[[i]], final_dims = final_dims,
found_pattern_dim = found_pattern_dim,
inner_dims_across_files = inner_dims_across_files,
array_of_files_to_load = array_of_files_to_load,
array_of_not_found_files = array_of_not_found_files,
array_of_metadata_flags = array_of_metadata_flags,
metadata_file_counter = metadata_file_counter,
depending_file_dims = depending_file_dims, transform = transform,
transform_vars = transform_vars, picked_vars = picked_vars[[i]],
picked_vars_ordered = picked_vars_ordered[[i]],
picked_common_vars = picked_common_vars,
picked_common_vars_ordered = picked_common_vars_ordered,
metadata_folder = metadata_folder, debug = debug)
}
}
#print("N")
if (debug) {
print("-> WORK PIECES BUILT")
}
# Calculate the progress %s that will be displayed and assign them to
# the appropriate work pieces.
work_pieces <- retrieve_progress_message(work_pieces, num_procs, silent)
# NOTE: In .LoadDataFile(), metadata is saved in metadata_folder/1 or /2 etc. Before here,
# the path name is created in work_pieces but the path hasn't been built yet.
if (num_procs == 1) {
found_files <- lapply(work_pieces, .LoadDataFile,
shared_matrix_pointer = shared_matrix_pointer,
file_data_reader = file_data_reader,
synonims = synonims,
transform = transform,
transform_params = transform_params,
transform_crop_domain = transform_crop_domain,
silent = silent, debug = debug)
} else {
cluster <- parallel::makeCluster(num_procs, outfile = "")
# Send the heavy work to the workers
work_errors <- try({
found_files <- parallel::clusterApplyLB(cluster, work_pieces, .LoadDataFile,
shared_matrix_pointer = shared_matrix_pointer,
file_data_reader = file_data_reader,
synonims = synonims,
transform = transform,
transform_params = transform_params,
transform_crop_domain = transform_crop_domain,
silent = silent, debug = debug)
})
parallel::stopCluster(cluster)
}
if (!silent) {
# if (progress_message != '')
if (length(work_pieces) / num_procs >= 2 && !silent) {
.message("\n", tag = '')
}
}
#print("P")
# NOTE: If merge_across_dims = TRUE, there might be additional NAs due to
# unequal inner_dim ('time') length across file_dim ('file_date').
# If merge_across_dims_narm = TRUE, add additional lines to remove these NAs.
# TODO: Now it assumes that only one '_across'. Add a for loop for more-than-one case.
if (merge_across_dims & (split_multiselected_dims | merge_across_dims_narm)) {
if (!merge_across_dims_narm) {
data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims)
} else {
tmp <- remove_additional_na_from_merge(
length_inner_across_dim, data_array,
merge_dim_metadata = picked_common_vars[[across_inner_dim]])
data_array_tmp <- tmp$data_array_tmp
# Save dim for later use
dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]])
picked_common_vars[[across_inner_dim]] <- tmp$merge_dim_metadata
if (length(data_array_tmp) != prod(final_dims_fake)) {
stop(paste0("After reshaping, the data do not fit into the expected output dimension. ",
"Check if the reshaping parameters are used correctly."))
#NOTE: When one file contains values for dicrete dimensions, rearrange the
# chunks (i.e., work_piece) is necessary.
data_array_tmp <- rebuild_array_merge_split(
data_array_tmp, indices_chunk, all_split_dims, final_dims_fake,
across_inner_dim, length_inner_across_dim)
}
data_array <- array(data_array_tmp, dim = final_dims_fake)
final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)]
picked_common_vars[[across_inner_dim]] <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim)
if (across_inner_dim == 'time') {
# Convert numeric back to dates
picked_common_vars[[across_inner_dim]] <-
as.POSIXct(picked_common_vars[[across_inner_dim]],
origin = "1970-01-01", tz = 'UTC')
}
} else { # ! (merge_across_dims + split_multiselected_dims) (old version)
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake)
}
# NOTE: If split_multiselected_dims + merge_across_dims, the dimension order may change above.
# To get the user-required dim order, we need to reorder the array again.
if (split_multiselected_dims & merge_across_dims) {
if (inner_dim_pos_in_split_dims != 1) {
correct_order <- match(names(final_dims_fake_output), names(final_dims_fake))
data_array <- .aperm2(data_array, correct_order)
}
}
gc()
# Load metadata and remove the metadata folder
if (!is.null(metadata_dims)) {
loaded_metadata_files <- list.files(metadata_folder)
if (!identical(loaded_metadata_files, character(0))) { # old code
loaded_metadata <- lapply(paste0(metadata_folder, '/', loaded_metadata_files), readRDS)
} else {
loaded_metadata <- NULL
}
unlink(metadata_folder, recursive = TRUE)
# Create a list of metadata of the variable (e.g., tas)
return_metadata <- create_metadata_list(array_of_metadata_flags, metadata_dims, pattern_dims,
loaded_metadata_files, loaded_metadata, dat_names,
dataset_has_files)
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
# TODO: Try to infer data type from loaded_metadata
# as.integer(data_array)
}
failed_pieces <- work_pieces[which(unlist(found_files))]
for (failed_piece in failed_pieces) {
array_of_not_found_files <- do.call('[<-',
c(list(array_of_not_found_files),
as.list(failed_piece[['file_indices_in_array_of_files']]),
list(value = TRUE)))
}
if (any(array_of_not_found_files)) {
for (i in 1:prod(dim(array_of_files_to_load))) {
if (is.na(array_of_not_found_files[i])) {
array_of_files_to_load[i] <- NA
} else {
if (array_of_not_found_files[i]) {
array_of_not_found_files[i] <- array_of_files_to_load[i]
array_of_files_to_load[i] <- NA
} else {
array_of_not_found_files[i] <- NA
}
}
}
} else {
array_of_not_found_files <- NULL
}
} # End if (retrieve)
# Change final_dims_fake back because retrieve = FALSE will use it for attributes later
if (exists("final_dims_fake_output")) {
final_dims_fake <- final_dims_fake_output
}
# Replace the vars and common vars by the transformed vars and common vars
for (i in 1:length(dat)) {
if (length(names(transformed_vars[[i]])) > 0) {
picked_vars[[i]][names(transformed_vars[[i]])] <- transformed_vars[[i]]
} else if (length(names(picked_vars_ordered[[i]])) > 0) {
picked_vars[[i]][names(picked_vars_ordered[[i]])] <- picked_vars_ordered[[i]]
}
}
if (length(names(transformed_common_vars)) > 0) {
picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars
} else if (length(names(picked_common_vars_ordered)) > 0) {
picked_common_vars[names(picked_common_vars_ordered)] <- picked_common_vars_ordered
}
if (debug) {
print("-> THE TRANSFORMED VARS:")
print(str(transformed_vars))
print("-> THE PICKED VARS:")
print(str(picked_vars))
}
file_selectors <- NULL
for (i in 1:length(dat)) {
file_selectors[[dat[[i]][['name']]]] <- dat[[i]][['selectors']][which(names(dat[[i]][['selectors']]) %in% found_file_dims[[i]])]
}
if (retrieve) {
if (!silent) {
.message("Successfully retrieved data.")
}
if (all(sapply(return_metadata, is.null))) {
# We don't have metadata of the variable (e.g., tas). The returned metadata list only
# contains those are specified in argument "return_vars".
Variables_list <- c(list(common = picked_common_vars), picked_vars)
.warning(paste0("Metadata cannot be retrieved. The reason may be the ",
"non-existence of the first file. Use parameter 'metadata_dims'",
" to assign to file dimensions along which to return metadata, ",
"or check the existence of the first file."))
} else {
# Add the metadata of the variable (e.g., tas) into the list of picked_vars or
# picked_common_vars.
Variables_list <- combine_metadata_picked_vars(
return_metadata, picked_vars, picked_common_vars,
metadata_dims, pattern_dims, length(dat))