Start.R 237 KB
Newer Older
aho's avatar
aho committed
                  if (debug) {
                    if (inner_dim %in% dims_to_check) {
                      print("-> TRANSFORMATION REQUESTED?")
                      print(with_transform)
                      print("-> BETA:")
                      print(beta)
                    }
                  }
                  if (with_transform) {
                    # If there is a transformation and selector values are provided, these
                    # selectors will be processed in the same way either if aiat = TRUE or
                    # aiat = FALSE.
                    ## TODO: If sub_array_of_selectors was integer and aiat then... do what's commented 50 lines below.
                    ##       otherwise, do what's coded.
                    if (debug) {
                      if (inner_dim %in% dims_to_check) {
                        print("-> SELECTORS REQUESTED BEFORE TRANSFORM.")
                      }
                    }
                    
                    ###NOTE: Here, the transform, is different from the below part of non-transform.
                    #  search 'if (goes_across_prime_meridian' to find the lines below.
                    if (goes_across_prime_meridian) {
                      # NOTE: before changing, the return is already correct.
                      
                      #NOTE: The fix below has the same explanation as no with_transform part below. 
                      #      Search the next next 'if (goes_across_prime_meridian) {'.
                      if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
                        # global longitude
                        sub_array_of_fri <- 1:n
                        # Warning if transform_extra_cell != 0
                        if (beta != 0) {
                          .warning(paste0("Adding parameter transform_extra_cells =  ",
                                          transform_extra_cells, " to the transformed index excesses ",
                                          "the border. The border index is used for transformation."))
                        }
                        
                      } else {
                        # normal case, i.e., not global
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        gap_width <- last_index - first_index - 1
                        sub_array_of_fri <- c(1:(min(unlist(sub_array_of_indices)) + min(gap_width, beta)),                       
                                              (max(unlist(sub_array_of_indices)) - min(gap_width, beta)):n)
                        
                        if (min(gap_width, beta) != beta) {
                          .warning(paste0("Adding parameter transform_extra_cells =  ",
                                          transform_extra_cells, " to the transformed index excesses ",
                                          "the border. The border index is used for transformation."))
                        }
                      }
                      
                    } else {
                      #NOTE: This if seems redundant.
                      if (is.list(sub_array_of_indices)) {
                        sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                      }
                      first_index <- min(unlist(sub_array_of_indices))
                      last_index <- max(unlist(sub_array_of_indices))
                      
                      start_padding <- min(beta, first_index - 1)
                      end_padding <- min(beta, n - last_index)
                      
                      if (exists("is_circular_dim")) {
                        if (!is_circular_dim) {  #latitude
                          sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
                          if (start_padding != beta | end_padding != beta) {
                            .warning(paste0("Adding parameter transform_extra_cells =  ",
                                            transform_extra_cells, " to the transformed index excesses ",
                                            "the border. The border index is used for transformation."))
                          }
                        } else {  #longitude
                          if ((last_index - first_index + 1 + beta * 2) >= n) {
                            sub_array_of_fri <- 1:n
                          } else if (start_padding < beta) {  # left side too close to border, need to go to right side
                            sub_array_of_fri <- c((first_index - start_padding):(last_index + end_padding), (n - (beta - start_padding - 1)):n)
                          } else if (end_padding < beta) { # right side too close to border, need to go to left side
                            sub_array_of_fri <- c(1: (beta - end_padding), (first_index - start_padding):(last_index + end_padding))
                          } else {  #normal
                            sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
                          }
                        }
                      } else {   # when <var>_reorder is not used
                        sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
                        if (start_padding != beta | end_padding != beta) {
                          .warning(paste0("Adding parameter transform_extra_cells =  ",
                                          transform_extra_cells, " to the transformed index excesses ",
                                          "the border. The border index is used for transformation."))
                        }
                      }
                      
                    }
                    subset_vars_to_transform <- vars_to_transform
                    if (!is.null(var_ordered)) {
                      
                      ##NOTE: if var_ordered is common_vars, it doesn't have attributes and it is a vector.
                      ## Turn it into array and add dimension name.
                      if (!is.array(var_ordered)) {
                        var_ordered <- as.array(var_ordered)
                        names(dim(var_ordered)) <- inner_dim
                      }
                      
                      subset_vars_to_transform[[var_with_selectors_name]] <- Subset(var_ordered, inner_dim, sub_array_of_fri)
                    } else {
                      ##NOTE: It should be redundant because without reordering the var should remain array
                      ## But just stay same with above...
                      if (!is.array(sub_array_of_values)) {
                        sub_array_of_values <- as.array(sub_array_of_values)
                        names(dim(sub_array_of_values)) <- inner_dim
                      }
                      
                      subset_vars_to_transform[[var_with_selectors_name]] <- Subset(sub_array_of_values, inner_dim, sub_array_of_fri)
                    }
                    
                    # Change the order of longitude crop if no reorder + from big to small.
                    # cdo -sellonlatbox, the lon is west, east (while lat can be north 
                    # to south or opposite)
                    
                    # Before changing crop, first we need to find the name of longitude. 
                    # NOTE: The potential bug here (also the bug for CDORemapper): the lon name
                    #       is limited (only the ones listed in .KnownLonNames() are available.
                    known_lon_names <- startR:::.KnownLonNames()
aho's avatar
aho committed
3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531
                    lon_name <- names(subset_vars_to_transform)[which(names(subset_vars_to_transform) %in% known_lon_names)[1]]
                    
                    # NOTE: The cases not considered: (1) if lon reorder(decreasing = T)
                    #       It doesn't make sense, but if someone uses it, here should
                    #       occur error. (2) crop = TRUE/FALSE
                    if ('crop' %in% names(transform_params) & var_with_selectors_name == lon_name & is.null(dim_reorder_params[[inner_dim]])) {
                      if (is.numeric(class(transform_params$crop))) {
                        if (transform_params$crop[1] > transform_params$crop[2]) {
                          tmp <- transform_params$crop[1]
                          transform_params$crop[1] <- transform_params$crop[2]
                          transform_params$crop[2] <- tmp
                        }
                      }
                    }
                    
                    transformed_subset_var <- do.call(transform, c(list(data_array = NULL,
                                                                        variables = subset_vars_to_transform,
                                                                        file_selectors = selectors_of_first_files_with_data[[i]]),
                                                                   transform_params))$variables[[var_with_selectors_name]]
                    # Sorting the transformed variable and working out the indices again after transform.
                    if (!is.null(dim_reorder_params[[inner_dim]])) {
                      transformed_subset_var_reorder <- dim_reorder_params[[inner_dim]](transformed_subset_var)
                      transformed_subset_var <- transformed_subset_var_reorder$x
                      #NOTE: The fix here solves the mis-ordered lon when across_meridian. 
                      transformed_subset_var_unorder <- transformed_subset_var_reorder$ix
                      #                      transformed_subset_var_unorder <- sort(transformed_subset_var_reorder$ix, index.return = TRUE)$ix
                    } else {
                      transformed_subset_var_unorder <- 1:length(transformed_subset_var)
                    }
                    sub_array_of_sri <- selector_checker(sub_array_of_selectors, transformed_subset_var,
                                                         tolerance = if (aiat) {
                                                           tolerance_params[[inner_dim]]
                                                         } else {
                                                           NULL
                                                         })
                    
                    # Check if selectors fall out of the range of the transform grid
                    # It may happen when original lon is [-180, 180] while want to regrid to
                    # [0, 360], and lon selector = [-20, -10].
                    if (any(is.na(sub_array_of_sri))) {
                      stop(paste0("The selectors of ",
                                  inner_dim, " are out of range of transform grid '",
                                  transform_params$grid, "'. Use parameter '",
                                  inner_dim, "_reorder' or change ", inner_dim,
                                  " selectors."))
                    }
                    
                    if (goes_across_prime_meridian) {
                      
                      if (sub_array_of_sri[[1]] == sub_array_of_sri[[2]]) {
                        # global longitude
                        sub_array_of_sri <- c(1:length(transformed_subset_var))
                      } else {
                        # the common case, i.e., non-global
                        # NOTE: Because sub_array_of_sri order is exchanged due to 
                        # previous development, here [[1]] and [[2]] should exchange
                        sub_array_of_sri <- c(1:sub_array_of_sri[[1]],
                                              sub_array_of_sri[[2]]:length(transformed_subset_var))
                      }
                      
                    } else if (is.list(sub_array_of_sri)) {
                      sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
                    }
                    ordered_sri <- sub_array_of_sri
                    sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri]
                    # In this case, the tvi are not defined and the 'transformed_subset_var'
                    # will be taken instead of the var transformed before in the code.
                    if (debug) {
                      if (inner_dim %in% dims_to_check) {
                        print("-> FIRST INDEX:")
                        print(first_index)
                        print("-> LAST INDEX:")
                        print(last_index)
                        print("-> STRUCTURE OF FIRST ROUND INDICES:")
                        print(str(sub_array_of_fri))
                        print("-> STRUCTURE OF SECOND ROUND INDICES:")
                        print(str(sub_array_of_sri))
                        print("-> STRUCTURE OF TRANSFORMED VARIABLE INDICES:")
                        print(str(tvi))
                      }
                    }
                    ###                    # If the selectors are expressed after transformation
                    ###                    } else {
                    ###if (debug) {
                    ###if (inner_dim %in% dims_to_check) {
                    ###print("-> SELECTORS REQUESTED AFTER TRANSFORM.")
                    ###}
                    ###}
                    ###                      if (goes_across_prime_meridian) {
                    ###                        sub_array_of_indices <- c(sub_array_of_indices[[1]]:m,
                    ###                                                    1:sub_array_of_indices[[2]])
                    ###                      }
                    ###                      first_index <- min(unlist(sub_array_of_indices))
                    ###                      last_index <- max(unlist(sub_array_of_indices))
                    ###                      first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
                    ###                      last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
                    ###                      sub_array_of_fri <- first_index_before_transform:last_index_before_transform
                    ###                      n_of_extra_cells <- round(beta / n * m)
                    ###                      if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
                    ###                        sub_array_of_sri <- 1:(last_index - first_index + 1) 
                    ###                        if (is.null(tvi)) {
                    ###                          tvi <- sub_array_of_sri + first_index - 1
                    ###                        }
                    ###                      } else {
                    ###                        sub_array_of_sri <- sub_array_of_indices - first_index + 1
                    ###                        if (is.null(tvi)) {
                    ###                          tvi <- sub_array_of_indices
                    ###                        }
                    ###                      }
                    ###                      sub_array_of_sri <- sub_array_of_sri + n_of_extra_cells
                    sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
                                             list(value = sub_array_of_sri)))
                  } else {
                    if (goes_across_prime_meridian) {
                      #NOTE: The potential problem here is, if it is global longitude,
                      #      and the indices overlap (e.g., lon = [0, 359.723] and 
                      #      CircularSort(-180, 180), then sub_array_of_indices = list(649, 649)). 
                      #      Therefore, sub_array_of_fri will be c(1:649, 649:1296). We'll
                      #      get two 649.
                      #      The fix below may not be the best solution, but it works for 
                      #      the example above.
                      
                      if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
                        # global longitude
                        sub_array_of_fri <- c(1:n)
                      } else {
                        # the common case, i.e., non-global
                        sub_array_of_fri <- c(1:min(unlist(sub_array_of_indices)),
                                              max(unlist(sub_array_of_indices)):n)
                      }
                      
                    } else if (is.list(sub_array_of_indices)) {
                      sub_array_of_fri <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                    } else {
                      sub_array_of_fri <- sub_array_of_indices
                    }
                  }
                  if (!is.null(var_unorder_indices)) {
                    if (is.null(ordered_fri)) {
                      ordered_fri <- sub_array_of_fri
                    }
                    sub_array_of_fri <- var_unorder_indices[sub_array_of_fri]
                  }
                  fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position),
                                           list(value = sub_array_of_fri)))
                  if (!is.null(file_dim)) {
                    taken_chunks[selector_store_position[[file_dim]]] <- TRUE
                  } else {
                    taken_chunks <- TRUE
                  }
                }
              } else {
                if (debug) {
                  if (inner_dim %in% dims_to_check) {
                    print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.")
                  }
                }
                if (inner_dim %in% names(dim(sub_array_of_selectors))) {
                  if (is.null(var_with_selectors_name)) {
                    if (any(na.omit(unlist(sub_array_of_selectors)) < 1) ||
                        any(na.omit(unlist(sub_array_of_selectors)) > data_dims[inner_dim] * chunk_amount)) {
                      stop("Provided indices out of range for dimension '", inner_dim, "' ", 
                           "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", 
                           data_dims[inner_dim] * chunk_amount, ").")
                    }
                  } else {
                    if (inner_dim %in% names(dim(sub_array_of_values))) {
                      # NOTE: Put across-inner-dim at the 1st position.
                      # POSSIBLE PROB!! Only organize inner dim, the rest dims may not in the same order as sub_array_of_selectors below.
                      inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_values)) == inner_dim)
                      if (inner_dim_pos_in_sub_array != 1) {
                        new_sub_array_order <- (1:length(dim(sub_array_of_values)))[-inner_dim_pos_in_sub_array]
                        new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order)
                        sub_array_of_values <- .aperm2(sub_array_of_values, new_sub_array_order)
                      }
                    }
                  }
                  
                  # NOTE: Put across-inner-dim at the 1st position.
                  # POSSIBLE PROB!! Only organize inner dim, the rest dims may not in the same order as sub_array_of_values above.
                  inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_selectors)) == inner_dim)
                  if (inner_dim_pos_in_sub_array != 1) {
                    new_sub_array_order <- (1:length(dim(sub_array_of_selectors)))[-inner_dim_pos_in_sub_array]
                    new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order)
                    sub_array_of_selectors <- .aperm2(sub_array_of_selectors, new_sub_array_order)
                  }
                  sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values,
                                                           tolerance = tolerance_params[[inner_dim]])
                  # It is needed to expand the indices here, otherwise for 
                  # values(list(date1, date2)) only 2 values are picked.
                  if (is.list(sub_array_of_indices)) {
                    sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                  }
                  sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices),
                                                                             chunks[[inner_dim]]['chunk'],
                                                                             chunks[[inner_dim]]['n_chunks'],
                                                                             inner_dim)]
                  sub_array_is_list <- FALSE
                  if (is.list(sub_array_of_indices)) {
                    sub_array_is_list <- TRUE
                    sub_array_of_indices <- unlist(sub_array_of_indices)
                  }
                  if (is.null(var_with_selectors_name)) {
                    indices_chunk <- floor((sub_array_of_indices - 1) / data_dims[inner_dim]) + 1
                    transformed_indices <- ((sub_array_of_indices - 1) %% data_dims[inner_dim]) + 1
                  } else {
                    indices_chunk <- floor((sub_array_of_indices - 1) / var_full_dims[inner_dim]) + 1
                    transformed_indices <- ((sub_array_of_indices - 1) %% var_full_dims[inner_dim]) + 1
                  }
                  if (sub_array_is_list) {
                    sub_array_of_indices <- as.list(sub_array_of_indices)
                  }
                  if (debug) {
                    if (inner_dim %in% dims_to_check) {
                      print("-> GOING TO ITERATE ALONG CHUNKS.")
                    }
                  }
                  for (chunk in 1:chunk_amount) {
                    if (!is.null(names(selector_store_position))) {
                      selector_store_position[file_dim] <- chunk
                    } else {
                      selector_store_position <- chunk
                    }
                    chunk_selectors <- transformed_indices[which(indices_chunk == chunk)]
                    sub_array_of_indices <- chunk_selectors
                    if (with_transform) {
                      # If the provided selectors are expressed in the world
                      # before transformation
                      if (!aiat) {
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        sub_array_of_fri <- max(c(first_index - beta, 1)):min(c(last_index + beta, n))
                        sub_array_of_sri <- transform_indices(unlist(sub_array_of_indices) - first_index + 1, n, m)
                        if (is.list(sub_array_of_indices)) {
                          if (length(sub_array_of_sri) > 1) {
                            sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
                          }
                        }
                        ##TODO: TRANSFORM SUBSET VARIABLE AS ABOVE, TO COMPUTE SRI
                        # If the selectors are expressed after transformation
                      } else {
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
                        last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
                        sub_array_of_fri <- first_index_before_transform:last_index_before_transform
                        if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
                          sub_array_of_sri <- 1:(last_index - first_index + 1) + 
                            round(beta / n * m) 
                        } else {
                          sub_array_of_sri <- sub_array_of_indices - first_index + 1 +
                            round(beta / n * m)
                        }
                        ##TODO: FILL IN TVI
                      }
                      sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
                                               list(value = sub_array_of_sri)))
                      if (length(sub_array_of_sri) > 0) {
                        taken_chunks[chunk] <- TRUE
                      }
                    } else {
                      sub_array_of_fri <- sub_array_of_indices
                      if (length(sub_array_of_fri) > 0) {
                        taken_chunks[chunk] <- TRUE
                      }
                    }
                    if (!is.null(var_unorder_indices)) {
                      ordered_fri <- sub_array_of_fri
                      sub_array_of_fri <- var_unorder_indices[sub_array_of_fri]
                    }
                    fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position),
                                             list(value = sub_array_of_fri)))
                  }
                  if (debug) {
                    if (inner_dim %in% dims_to_check) {
                      print("-> FINISHED ITERATING ALONG CHUNKS")
                    }
                  }
                } else {
                  stop("Provided array of indices for dimension '", inner_dim, "', ",
                       "which goes across the file dimension '", file_dim, "', but ",
                       "the provided array does not have the dimension '", inner_dim, 
                       "', which is mandatory.")
                }
              }
            }
          }
          if (debug) {
            if (inner_dim %in% dims_to_check) {
              print("-> PROCEEDING TO CROP VARIABLES")
            }
          }
          #if ((length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last'))) {
          #if (!is.null(var_with_selectors_name) || (is.null(var_with_selectors_name) && is.character(selector_array) &&
          #    (length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last')))) {
          empty_chunks <- which(!taken_chunks)
          if (length(empty_chunks) >= length(taken_chunks)) {
            stop("Selectors do not match any of the possible values for the dimension '", inner_dim, "'.")
          }
          if (length(empty_chunks) > 0) {
            #                # Get the first group of chunks to remove, and remove them. 
            #                # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 1 and 2
            #                dist <- abs(rev(empty_chunks) - c(rev(empty_chunks)[1] - 1, head(rev(empty_chunks), length(rev(empty_chunks)) - 1)))
            #                if (all(dist == 1)) {
            #                  start_chunks_to_remove <- NULL
            #                } else {
            #                  first_chunk_to_remove <- tail(which(dist > 1), 1)
            #                  start_chunks_to_remove <- rev(rev(empty_chunks)[first_chunk_to_remove:length(empty_chunks)])
            #                }
            #                # Get the last group of chunks to remove, and remove them. 
            #                # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 8 and 9
            #                dist <- abs(empty_chunks - c(empty_chunks[1] - 1, head(empty_chunks, length(empty_chunks) - 1)))
            #                if (all(dist == 1)) {
            #                  first_chunk_to_remove <- 1
            #                } else {
            #                  first_chunk_to_remove <- tail(which(dist > 1), 1)
            #                }
            #                end_chunks_to_remove <- empty_chunks[first_chunk_to_remove:length(empty_chunks)]
            #                chunks_to_keep <- which(!((1:length(taken_chunks)) %in% c(start_chunks_to_remove, end_chunks_to_remove)))
            chunks_to_keep <- which(taken_chunks)
            dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], list(chunks_to_keep))
            #                found_indices <- Subset(found_indices, file_dim, chunks_to_keep)
            #                # Crop dataset variables file dims.
            #                for (picked_var in names(picked_vars[[i]])) {
            #                  if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) {
            #                    picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, chunks_to_keep)
            #                  }
            #                }
          }
          #}
          dat[[i]][['selectors']][[inner_dim]] <- list(fri = fri, sri = sri)
          # Crop dataset variables inner dims.
          # Crop common variables inner dims.
          types_of_var_to_crop <- 'picked'
          if (with_transform) {
            types_of_var_to_crop <- c(types_of_var_to_crop, 'transformed')
          }
          if (!is.null(dim_reorder_params[[inner_dim]])) {
            types_of_var_to_crop <- c(types_of_var_to_crop, 'reordered')
          }
          for (type_of_var_to_crop in types_of_var_to_crop) {
            if (type_of_var_to_crop == 'transformed') {
              if (is.null(tvi)) {
                if (!is.null(dim_reorder_params[[inner_dim]])) {
                  crop_indices <- unique(unlist(ordered_sri))
                } else {
                  crop_indices <- unique(unlist(sri))
                }
              } else {
                crop_indices <- unique(unlist(tvi))
              }
              vars_to_crop <- transformed_vars[[i]]
              common_vars_to_crop <- transformed_common_vars
            } else if (type_of_var_to_crop == 'reordered') {
              crop_indices <- unique(unlist(ordered_fri))
              vars_to_crop <- picked_vars_ordered[[i]]
              common_vars_to_crop <- picked_common_vars_ordered
            } else {
              crop_indices <- unique(unlist(fri))
              vars_to_crop <- picked_vars[[i]]
              common_vars_to_crop <- picked_common_vars
            }
            for (var_to_crop in names(vars_to_crop)) {
              if (inner_dim %in% names(dim(vars_to_crop[[var_to_crop]]))) {
                if (!is.null(crop_indices)) {
                  if (type_of_var_to_crop == 'transformed') {
                    if (!aiat) {
                      vars_to_crop[[var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices)
                    } else {
                      vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices)
                    }
                  } else {
                    vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices)
                  }
                }
              }
            }
            if (i == length(dat)) {
              for (common_var_to_crop in names(common_vars_to_crop)) {
                if (inner_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) {

                  if (type_of_var_to_crop == 'transformed' & !aiat) {
                  common_vars_to_crop[[common_var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices)
                  } else {  #old code
                  common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices)
                  }

                }
              }
            }
            if (type_of_var_to_crop == 'transformed') {
              if (!is.null(vars_to_crop)) {
                transformed_vars[[i]] <- vars_to_crop
              }
              if (i == length(dat)) {
                transformed_common_vars <- common_vars_to_crop
              }
            } else if (type_of_var_to_crop == 'reordered') {
              if (!is.null(vars_to_crop)) {
                picked_vars_ordered[[i]] <- vars_to_crop
              }
              if (i == length(dat)) {
                picked_common_vars_ordered <- common_vars_to_crop
              }
            } else {
              if (!is.null(vars_to_crop)) {
                picked_vars[[i]] <- vars_to_crop
              }
              if (i == length(dat)) {
                #NOTE: To avoid redundant run
                if (inner_dim %in% names(common_vars_to_crop)) {
                  picked_common_vars <- common_vars_to_crop
                }
aho's avatar
aho committed
              }
            }
          }
          #}
        }
        # After the selectors have been picked (using the original variables), 
        # the variables are transformed. At that point, the original selectors
        # for the transformed variables are also kept in the variable original_selectors.
        #print("L")
      }
    }
  }
  #  if (!is.null(transformed_common_vars)) {
  #    picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars
  #  }
  # Remove the trailing chunks, if any.
  for (file_dim in names(dims_to_crop)) {
    #    indices_to_keep <- min(sapply(dims_to_crop[[file_dim]], min)):max(sapply(dims_to_crop[[file_dim]], max))
    ## TODO: Merge indices in dims_to_crop with some advanced mechanism?
    indices_to_keep <- unique(unlist(dims_to_crop[[file_dim]]))
    array_of_files_to_load <- Subset(array_of_files_to_load, file_dim, indices_to_keep)
    array_of_not_found_files <- Subset(array_of_not_found_files, file_dim, indices_to_keep)
    for (i in 1:length(dat)) {
      # Crop selectors
      for (selector_dim in names(dat[[i]][['selectors']])) {
        if (selector_dim == file_dim) {
          for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['fri']])) {
            dat[[i]][['selectors']][[selector_dim]][['fri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['fri']][[j]][indices_to_keep]
          }
          for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['sri']])) {
            dat[[i]][['selectors']][[selector_dim]][['sri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['sri']][[j]][indices_to_keep]
          }
        }
        if (file_dim %in% names(dim(dat[[i]][['selectors']][[selector_dim]][['fri']]))) {
          dat[[i]][['selectors']][[selector_dim]][['fri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['fri']], file_dim, indices_to_keep)
          dat[[i]][['selectors']][[selector_dim]][['sri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['sri']], file_dim, indices_to_keep)
        }
      }
      # Crop dataset variables file dims.
      for (picked_var in names(picked_vars[[i]])) {
        if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) {
          picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, indices_to_keep)
        }
      }
      for (transformed_var in names(transformed_vars[[i]])) {
        if (file_dim %in% names(dim(transformed_vars[[i]][[transformed_var]]))) {
          transformed_vars[[i]][[transformed_var]] <- Subset(transformed_vars[[i]][[transformed_var]], file_dim, indices_to_keep)
        }
      }
    }
    # Crop common variables file dims.
    for (picked_common_var in names(picked_common_vars)) {
      if (file_dim %in% names(dim(picked_common_vars[[picked_common_var]]))) {
        picked_common_vars[[picked_common_var]] <- Subset(picked_common_vars[[picked_common_var]], file_dim, indices_to_keep)
      }
    }
    for (transformed_common_var in names(transformed_common_vars)) {
      if (file_dim %in% names(dim(transformed_common_vars[[transformed_common_var]]))) {
        transformed_common_vars[[transformed_common_var]] <- Subset(transformed_common_vars[[transformed_common_var]], file_dim, indices_to_keep)
      }
    }
  }
  # Calculate the size of the final array.
  total_inner_dims <- NULL
  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      inner_dims <- expected_inner_dims[[i]]
      inner_dims <- sapply(inner_dims, 
                           function(x) {
                             if (!all(sapply(dat[[i]][['selectors']][[x]][['sri']], is.null))) {
                               max(sapply(dat[[i]][['selectors']][[x]][['sri']], length))
                             } else {
                               if (length(var_params[[x]]) > 0) {
                                 if (var_params[[x]] %in% names(transformed_vars[[i]])) {
                                   length(transformed_vars[[i]][[var_params[[x]]]])
                                 } else if (var_params[[x]] %in% names(transformed_common_vars)) {
                                   length(transformed_common_vars[[var_params[[x]]]])
                                 } else {
                                   max(sapply(dat[[i]][['selectors']][[x]][['fri']], length))
                                 }
                               } else {
                                 max(sapply(dat[[i]][['selectors']][[x]][['fri']], length))
                               }
                             }
                           })
      names(inner_dims) <- expected_inner_dims[[i]]
      if (is.null(total_inner_dims)) {
        total_inner_dims <- inner_dims
      } else {
        new_dims <- .MergeArrayDims(total_inner_dims, inner_dims)
        total_inner_dims <- new_dims[[3]]
      }
    }
  }
  new_dims <- .MergeArrayDims(dim(array_of_files_to_load), total_inner_dims)
  final_dims <- new_dims[[3]][dim_names]
  # final_dims_fake is the vector of final dimensions after having merged the 
  # 'across' file dimensions with the respective 'across' inner dimensions, and
  # after having broken into multiple dimensions those dimensions for which 
  # multidimensional selectors have been provided.
  # final_dims will be used for collocation of data, whereas final_dims_fake 
  # will be used for shaping the final array to be returned to the user.
  final_dims_fake <- final_dims
  if (merge_across_dims) {
    if (!is.null(inner_dims_across_files)) {
      for (file_dim_across in names(inner_dims_across_files)) {
        inner_dim_pos <- which(names(final_dims_fake) == inner_dims_across_files[[file_dim_across]])
        new_dims <- c()
        if (inner_dim_pos > 1) {
          new_dims <- c(new_dims, final_dims_fake[1:(inner_dim_pos - 1)])
        }
        new_dims <- c(new_dims, setNames(prod(final_dims_fake[c(inner_dim_pos, inner_dim_pos + 1)]), 
                                         inner_dims_across_files[[file_dim_across]]))
        if (inner_dim_pos + 1 < length(final_dims_fake)) {
          new_dims <- c(new_dims, final_dims_fake[(inner_dim_pos + 2):length(final_dims_fake)])
        }
        final_dims_fake <- new_dims
      }
    }
  }
  all_split_dims <- NULL
  if (split_multiselected_dims) {
    for (dim_param in 1:length(dim_params)) {
      if (!is.null(dim(dim_params[[dim_param]]))) {
        if (length(dim(dim_params[[dim_param]])) > 1) {
          split_dims <- dim(dim_params[[dim_param]])
          all_split_dims <- c(all_split_dims, setNames(list(split_dims), 
                                                       names(dim_params)[dim_param]))
          if (is.null(names(split_dims))) {
            names(split_dims) <- paste0(names(dim_params)[dim_param], 
                                        1:length(split_dims))
          }
          old_dim_pos <- which(names(final_dims_fake) == names(dim_params)[dim_param])
          
          # NOTE: Three steps to create new dims.
          # 1st: Put in the dims before split_dim.
          # 2nd: Replace the old_dim with split_dims.
          # 3rd: Put in the dims after split_dim.
          new_dims <- c()
          if (old_dim_pos > 1) {
            new_dims <- c(new_dims, final_dims_fake[1:(old_dim_pos - 1)])
          }
          new_dims <- c(new_dims, split_dims)
          if (old_dim_pos < length(final_dims_fake)) {
            new_dims <- c(new_dims, final_dims_fake[(old_dim_pos + 1):length(final_dims_fake)])
          }
          final_dims_fake <- new_dims
        }
      }
    }
  }
  if (merge_across_dims_narm) {
    # only merge_across_dims -> the 'time' dim length needs to be adjusted
    across_inner_dim <- inner_dims_across_files[[1]]  #TODO: more than one?
    across_file_dim <- names(inner_dims_across_files)  #TODO: more than one?
    # Get the length of each inner_dim ('time') along each file_dim ('file_date')  
    length_inner_across_dim <- lapply(dat[[i]][['selectors']][[across_inner_dim]][['fri']], length)
    
    if (!split_multiselected_dims) {
      final_dims_fake_name <- names(final_dims_fake)
      pos_across_inner_dim <- which(final_dims_fake_name == across_inner_dim)
      new_length_inner_dim <- sum(unlist(length_inner_across_dim))
      if (pos_across_inner_dim != length(final_dims_fake)) {
        final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
                             new_length_inner_dim,
                             final_dims_fake[(pos_across_inner_dim + 1):length(final_dims_fake)])
      } else {
        final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
                             new_length_inner_dim)
      }
      names(final_dims_fake) <- final_dims_fake_name
    }
  }
  
  if (!silent) {
    .message("Detected dimension sizes:")
    longest_dim_len <- max(sapply(names(final_dims_fake), nchar))
    longest_size_len <- max(sapply(paste0(final_dims_fake, ''), nchar))
    sapply(names(final_dims_fake), 
           function(x) {
             message(paste0("*   ", paste(rep(' ', longest_dim_len - nchar(x)), collapse = ''), 
                            x, ": ", paste(rep(' ', longest_size_len - nchar(paste0(final_dims_fake[x], ''))), collapse = ''), 
                            final_dims_fake[x]))
           })
    bytes <- prod(c(final_dims_fake, 8))
    dim_sizes <- paste(final_dims_fake, collapse = ' x ')
    if (retrieve) {
      .message(paste("Total size of requested data:"))
    } else {
      .message(paste("Total size of involved data:"))
    }
    .message(paste(dim_sizes, " x 8 bytes =", 
                   format(structure(bytes, class = "object_size"), units = "auto")), 
             indent = 2)
  }
  
  # NOTE: If split_multiselected_dims + merge_across_dims, the dim order may need to be changed.
  #       The inner_dim needs to be the first dim among split dims.
  #       Cannot control the rest dims are in the same order or not...
  #       Suppose users put the same order of across inner and file dims.
  if (split_multiselected_dims & merge_across_dims) {
    # TODO: More than one split?
    inner_dim_pos_in_split_dims <- which(names(all_split_dims[[1]]) == inner_dims_across_files)  
    # if inner_dim is not the first, change!
    if (inner_dim_pos_in_split_dims != 1) {
      split_dims <- c(split_dims[inner_dim_pos_in_split_dims],
                      split_dims[1:length(split_dims)][-inner_dim_pos_in_split_dims])
      split_dims_pos <- which(!is.na(match(names(final_dims_fake), names(split_dims))))
      # Save the current final_dims_fake for later reorder back
      final_dims_fake_output <- final_dims_fake
      new_dims <- c()
      if (split_dims_pos[1] != 1) {
        new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
      }
      new_dims <- c(new_dims, split_dims)
      if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
        new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
      }
      final_dims_fake <- new_dims
    }
  }
  
  # The following several lines will only be run if retrieve = TRUE
  if (retrieve) {
    
    ########## CREATING THE SHARED MATRIX AND DISPATCHING WORK PIECES ###########
    # TODO: try performance of storing all in cols instead of rows
    # Create the shared memory array, and a pointer to it, to be sent
    # to the work pieces.
    if (is.null(ObjectBigmemory)) {
        data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1)
    } else {
        data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1,
                                            backingfile = ObjectBigmemory)
    }
aho's avatar
aho committed
    shared_matrix_pointer <- bigmemory::describe(data_array)
    if (is.null(ObjectBigmemory)) {
        name_bigmemory_obj <- attr(shared_matrix_pointer, 'description')$sharedName
    } else {
        name_bigmemory_obj <- attr(shared_matrix_pointer, 'description')$filename
    }

    #warning(paste("SharedName:", attr(shared_matrix_pointer, 'description')$sharedName))
    #warning(paste("Filename:", attr(shared_matrix_pointer, 'description')$filename))
    #if (!is.null(ObjectBigmemory)) {
    #  attr(shared_matrix_pointer, 'description')$sharedName <- ObjectBigmemory
    #}
aho's avatar
aho committed
    if (is.null(num_procs)) {
      num_procs <- future::availableCores()
    }
    # Creating a shared tmp folder to store metadata from each chunk
    array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load))
    if (!is.null(metadata_dims)) {
      metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load))))
      names(metadata_indices_to_load) <- names(dim(array_of_files_to_load))
      metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims)))
      array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags),  metadata_indices_to_load,
                                                  list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims])))))
    }
    metadata_file_counter <- 0
    metadata_folder <- tempfile('metadata')
    dir.create(metadata_folder)
    # Build the work pieces, each with:
    # - file path
    # - total size (dims) of store array
    # - start position in store array
    # - file selectors (to provide extra info. useful e.g. to select variable)
    # - indices to take from file
    work_pieces <- list()
    for (i in 1:length(dat)) {
      if (dataset_has_files[i]) {
        selectors <- dat[[i]][['selectors']]
        file_dims <- found_file_dims[[i]]
        inner_dims <- expected_inner_dims[[i]]
        sub_array_dims <- final_dims[file_dims]
        sub_array_dims[found_pattern_dim] <- 1
        sub_array_of_files_to_load <- array(1:prod(sub_array_dims), 
                                            dim = sub_array_dims)
        names(dim(sub_array_of_files_to_load)) <- names(sub_array_dims)
        # Detect which of the dimensions of the dataset go across files.
        file_dim_across_files <- lapply(inner_dims, 
                                        function(x) {
                                          dim_across <- sapply(inner_dims_across_files, function(y) x %in% y)
                                          if (any(dim_across)) {
                                            names(inner_dims_across_files)[which(dim_across)[1]]
                                          } else {
                                            NULL
                                          }
                                        })
        names(file_dim_across_files) <- inner_dims
        j <- 1
        while (j <= prod(sub_array_dims)) {
          # Work out file path.
          file_to_load_sub_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ]
          names(file_to_load_sub_indices) <- names(sub_array_dims)
          file_to_load_sub_indices[found_pattern_dim] <- i
          big_dims <- rep(1, length(dim(array_of_files_to_load)))
          names(big_dims) <- names(dim(array_of_files_to_load))
          file_to_load_indices <- .MergeArrayDims(file_to_load_sub_indices, big_dims)[[1]]
          file_to_load <- do.call('[[', c(list(array_of_files_to_load), 
                                          as.list(file_to_load_indices)))
          not_found_file <- do.call('[[', c(list(array_of_not_found_files),
                                            as.list(file_to_load_indices)))
          load_file_metadata <- do.call('[', c(list(array_of_metadata_flags), 
                                               as.list(file_to_load_indices)))
          if (load_file_metadata) {
            metadata_file_counter <- metadata_file_counter + 1
          }
          if (!is.na(file_to_load) && !not_found_file) {
            # Work out indices to take
            first_round_indices <- lapply(inner_dims, 
                                          function (x) {
                                            if (is.null(file_dim_across_files[[x]])) {
                                              selectors[[x]][['fri']][[1]]
                                            } else {
                                              which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] 
                                              selectors[[x]][['fri']][[which_chunk]]
                                            }
                                          })
            names(first_round_indices) <- inner_dims
            second_round_indices <- lapply(inner_dims, 
                                           function (x) {
                                             if (is.null(file_dim_across_files[[x]])) {
                                               selectors[[x]][['sri']][[1]]
                                             } else {
                                               which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]]
                                               selectors[[x]][['sri']][[which_chunk]]
                                             }
                                           })
            if (debug) {
              print("-> BUILDING A WORK PIECE")
              #print(str(selectors))
            }
            names(second_round_indices) <- inner_dims
            if (!any(sapply(first_round_indices, length) == 0)) {
              work_piece <- list()
              work_piece[['first_round_indices']] <- first_round_indices
              work_piece[['second_round_indices']] <- second_round_indices
              work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices
              work_piece[['file_path']] <- file_to_load
              work_piece[['store_dims']] <- final_dims
              # Work out store position
              store_position <- final_dims
              store_position[names(file_to_load_indices)] <- file_to_load_indices
              store_position[inner_dims] <- rep(1, length(inner_dims))
              work_piece[['store_position']] <- store_position
              # Work out file selectors
              file_selectors <- sapply(file_dims, 
                                       function (x) {
                                         vector_to_pick <- 1
                                         if (x %in% names(depending_file_dims)) {
                                           vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]]
                                         }
                                         selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]]
                                       })
              names(file_selectors) <- file_dims
              work_piece[['file_selectors']] <- file_selectors
              # Send variables for transformation
              if (!is.null(transform) && (length(transform_vars) > 0)) {
                vars_to_transform <- NULL
                picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars)
                if (length(picked_vars_to_transform) > 0) {
                  picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform]
                  vars_to_transform <- c(vars_to_transform, picked_vars[[i]][picked_vars_to_transform])
                  if (any(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))) {
                    picked_vars_ordered_to_transform <- picked_vars_to_transform[which(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))]
                    vars_to_transform[picked_vars_ordered_to_transform] <- picked_vars_ordered[[i]][picked_vars_ordered_to_transform]
                  }
                }
                picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars)
                if (length(picked_common_vars_to_transform) > 0) {
                  picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform]
                  vars_to_transform <- c(vars_to_transform, picked_common_vars[picked_common_vars_to_transform])
                  if (any(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))) {
                    picked_common_vars_ordered_to_transform <- picked_common_vars_to_transform[which(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))]
                    vars_to_transform[picked_common_vars_ordered_to_transform] <- picked_common_vars_ordered[picked_common_vars_ordered_to_transform]
                  }
                }
                work_piece[['vars_to_transform']] <- vars_to_transform
              }
              # Send flag to load metadata
              if (load_file_metadata) {
                work_piece[['save_metadata_in']] <- paste0(metadata_folder, '/', metadata_file_counter)
              }
              work_pieces <- c(work_pieces, list(work_piece))
            }
          }
          j <- j + 1
        }
      }
    }
    #print("N")
    if (debug) {
      print("-> WORK PIECES BUILT")
    }
    
    # Calculate the progress %s that will be displayed and assign them to 
    # the appropriate work pieces.
    if (length(work_pieces) / num_procs >= 2 && !silent) {
      if (length(work_pieces) / num_procs < 10) {
        amount <- 100 / ceiling(length(work_pieces) / num_procs)
        reps <- ceiling(length(work_pieces) / num_procs)
      } else {
        amount <- 10
        reps <- 10
      }
      progress_steps <- rep(amount, reps)
      if (length(work_pieces) < (reps + 1)) {
        selected_pieces <- length(work_pieces)
        progress_steps <- c(sum(head(progress_steps, reps)),
                            tail(progress_steps, reps))
      } else {
        selected_pieces <- round(seq(1, length(work_pieces), 
                                     length.out = reps + 1))[-1]
      }
      progress_steps <- paste0(' + ', round(progress_steps, 2), '%')
      progress_message <- 'Progress: 0%'
    } else {
      progress_message <- ''
      selected_pieces <- NULL
    }
    piece_counter <- 1
    step_counter <- 1
    work_pieces <- lapply(work_pieces, 
                          function (x) {
                            if (piece_counter %in% selected_pieces) {
                              wp <- c(x, list(progress_amount = progress_steps[step_counter]))
                              step_counter <<- step_counter + 1
                            } else {
                              wp <- x
                            }
                            piece_counter <<- piece_counter + 1
                            wp
                          })
    if (!silent) {
      .message("If the size of the requested data is close to or above the free shared RAM memory, R may crash.")
      .message("If the size of the requested data is close to or above the half of the free RAM memory, R may crash.")
      .message(paste0("Will now proceed to read and process ", length(work_pieces), " data files:"))
      if (length(work_pieces) < 30) {
        lapply(work_pieces, function (x) .message(x[['file_path']], indent = 2))
      } else {
        .message("The list of files is long. You can check it after Start() finishes in the output '$Files'.", indent = 2, exdent = 5)
      }
    }
    
    # Build the cluster of processes that will do the work and dispatch work pieces.
    # The function .LoadDataFile is applied to each work piece. This function will
    # open the data file, regrid if needed, subset, apply the mask, 
    # compute and apply the weights if needed,
    # disable extreme values and store in the shared memory matrix.
    #print("O")
    if (!silent) {
      .message("Loading... This may take several minutes...")
      if (progress_message != '') {
        .message(progress_message, appendLF = FALSE)
      }
    }

# NOTE: In .LoadDataFile(), metadata is saved in metadata_folder/1 or /2 etc. Before here,
#       the path name is created in work_pieces but the path hasn't been built yet.
    if (num_procs == 1) {
      found_files <- lapply(work_pieces, .LoadDataFile, 
                            shared_matrix_pointer = shared_matrix_pointer,
                            file_data_reader = file_data_reader, 
                            synonims = synonims,