Start.R 240 KB
Newer Older
aho's avatar
aho committed
                      print("-> BETA:")
                      print(beta)
                    }
                  }
                  if (with_transform) {
                    # If there is a transformation and selector values are provided, these
                    # selectors will be processed in the same way either if aiat = TRUE or
                    # aiat = FALSE.
                    ## TODO: If sub_array_of_selectors was integer and aiat then... do what's commented 50 lines below.
                    ##       otherwise, do what's coded.
                    if (debug) {
                      if (inner_dim %in% dims_to_check) {
                        print("-> SELECTORS REQUESTED BEFORE TRANSFORM.")
                      }
                    }
                    
                    ###NOTE: Here, the transform, is different from the below part of non-transform.
                    #  search 'if (goes_across_prime_meridian' to find the lines below.
                    if (goes_across_prime_meridian) {
                      # NOTE: before changing, the return is already correct.
                      
                      #NOTE: The fix below has the same explanation as no with_transform part below. 
                      #      Search the next next 'if (goes_across_prime_meridian) {'.
                      if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
                        # global longitude
                        sub_array_of_fri <- 1:n
                        # Warning if transform_extra_cell != 0
                        if (beta != 0) {
                          .warning(paste0("Adding parameter transform_extra_cells =  ",
                                          transform_extra_cells, " to the transformed index excesses ",
                                          "the border. The border index is used for transformation."))
                        }
                        
                      } else {
                        # normal case, i.e., not global
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        gap_width <- last_index - first_index - 1
                        sub_array_of_fri <- c(1:(min(unlist(sub_array_of_indices)) + min(gap_width, beta)),                       
                                              (max(unlist(sub_array_of_indices)) - min(gap_width, beta)):n)
                        
                        if (min(gap_width, beta) != beta) {
                          .warning(paste0("Adding parameter transform_extra_cells =  ",
                                          transform_extra_cells, " to the transformed index excesses ",
                                          "the border. The border index is used for transformation."))
                        }
                      }
                      
                    } else {
                      #NOTE: This if seems redundant.
                      if (is.list(sub_array_of_indices)) {
                        sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                      }
                      first_index <- min(unlist(sub_array_of_indices))
                      last_index <- max(unlist(sub_array_of_indices))
                      
                      start_padding <- min(beta, first_index - 1)
                      end_padding <- min(beta, n - last_index)
                      
                      if (exists("is_circular_dim")) {
                        if (!is_circular_dim) {  #latitude
                          sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
                          if (start_padding != beta | end_padding != beta) {
                            .warning(paste0("Adding parameter transform_extra_cells =  ",
                                            transform_extra_cells, " to the transformed index excesses ",
                                            "the border. The border index is used for transformation."))
                          }
                        } else {  #longitude
                          if ((last_index - first_index + 1 + beta * 2) >= n) {
                            sub_array_of_fri <- 1:n
                          } else if (start_padding < beta) {  # left side too close to border, need to go to right side
                            sub_array_of_fri <- c((first_index - start_padding):(last_index + end_padding), (n - (beta - start_padding - 1)):n)
                          } else if (end_padding < beta) { # right side too close to border, need to go to left side
                            sub_array_of_fri <- c(1: (beta - end_padding), (first_index - start_padding):(last_index + end_padding))
                          } else {  #normal
                            sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
                          }
                        }
                      } else {   # when <var>_reorder is not used
                        sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
                        if (start_padding != beta | end_padding != beta) {
                          .warning(paste0("Adding parameter transform_extra_cells =  ",
                                          transform_extra_cells, " to the transformed index excesses ",
                                          "the border. The border index is used for transformation."))
                        }
                      }
                      
                    }
                    subset_vars_to_transform <- vars_to_transform
                    if (!is.null(var_ordered)) {
                      
                      ##NOTE: if var_ordered is common_vars, it doesn't have attributes and it is a vector.
                      ## Turn it into array and add dimension name.
                      if (!is.array(var_ordered)) {
                        var_ordered <- as.array(var_ordered)
                        names(dim(var_ordered)) <- inner_dim
                      }
                      
                      subset_vars_to_transform[[var_with_selectors_name]] <- Subset(var_ordered, inner_dim, sub_array_of_fri)
                    } else {
                      ##NOTE: It should be redundant because without reordering the var should remain array
                      ## But just stay same with above...
                      if (!is.array(sub_array_of_values)) {
                        sub_array_of_values <- as.array(sub_array_of_values)
                        names(dim(sub_array_of_values)) <- inner_dim
                      }
                      
                      subset_vars_to_transform[[var_with_selectors_name]] <- Subset(sub_array_of_values, inner_dim, sub_array_of_fri)
                    }
                    
                    # Change the order of longitude crop if no reorder + from big to small.
                    # cdo -sellonlatbox, the lon is west, east (while lat can be north 
                    # to south or opposite)
                    
                    # Before changing crop, first we need to find the name of longitude. 
                    # NOTE: The potential bug here (also the bug for CDORemapper): the lon name
                    #       is limited (only the ones listed in .KnownLonNames() are available.
                    known_lon_names <- startR:::.KnownLonNames()
aho's avatar
aho committed
3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476
                    lon_name <- names(subset_vars_to_transform)[which(names(subset_vars_to_transform) %in% known_lon_names)[1]]
                    
                    # NOTE: The cases not considered: (1) if lon reorder(decreasing = T)
                    #       It doesn't make sense, but if someone uses it, here should
                    #       occur error. (2) crop = TRUE/FALSE
                    if ('crop' %in% names(transform_params) & var_with_selectors_name == lon_name & is.null(dim_reorder_params[[inner_dim]])) {
                      if (is.numeric(class(transform_params$crop))) {
                        if (transform_params$crop[1] > transform_params$crop[2]) {
                          tmp <- transform_params$crop[1]
                          transform_params$crop[1] <- transform_params$crop[2]
                          transform_params$crop[2] <- tmp
                        }
                      }
                    }
                    
                    transformed_subset_var <- do.call(transform, c(list(data_array = NULL,
                                                                        variables = subset_vars_to_transform,
                                                                        file_selectors = selectors_of_first_files_with_data[[i]]),
                                                                   transform_params))$variables[[var_with_selectors_name]]
                    # Sorting the transformed variable and working out the indices again after transform.
                    if (!is.null(dim_reorder_params[[inner_dim]])) {
                      transformed_subset_var_reorder <- dim_reorder_params[[inner_dim]](transformed_subset_var)
                      transformed_subset_var <- transformed_subset_var_reorder$x
                      #NOTE: The fix here solves the mis-ordered lon when across_meridian. 
                      transformed_subset_var_unorder <- transformed_subset_var_reorder$ix
                      #                      transformed_subset_var_unorder <- sort(transformed_subset_var_reorder$ix, index.return = TRUE)$ix
                    } else {
                      transformed_subset_var_unorder <- 1:length(transformed_subset_var)
                    }
                    sub_array_of_sri <- selector_checker(sub_array_of_selectors, transformed_subset_var,
                                                         tolerance = if (aiat) {
                                                           tolerance_params[[inner_dim]]
                                                         } else {
                                                           NULL
                                                         })
                    
                    # Check if selectors fall out of the range of the transform grid
                    # It may happen when original lon is [-180, 180] while want to regrid to
                    # [0, 360], and lon selector = [-20, -10].
                    if (any(is.na(sub_array_of_sri))) {
                      stop(paste0("The selectors of ",
                                  inner_dim, " are out of range of transform grid '",
                                  transform_params$grid, "'. Use parameter '",
                                  inner_dim, "_reorder' or change ", inner_dim,
                                  " selectors."))
                    }
                    
                    if (goes_across_prime_meridian) {
                      
                      if (sub_array_of_sri[[1]] == sub_array_of_sri[[2]]) {
                        # global longitude
                        sub_array_of_sri <- c(1:length(transformed_subset_var))
                      } else {
                        # the common case, i.e., non-global
                        # NOTE: Because sub_array_of_sri order is exchanged due to 
                        # previous development, here [[1]] and [[2]] should exchange
                        sub_array_of_sri <- c(1:sub_array_of_sri[[1]],
                                              sub_array_of_sri[[2]]:length(transformed_subset_var))
                      }
                      
                    } else if (is.list(sub_array_of_sri)) {
                      sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
                    }
                    ordered_sri <- sub_array_of_sri
                    sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri]
                    # In this case, the tvi are not defined and the 'transformed_subset_var'
                    # will be taken instead of the var transformed before in the code.
                    if (debug) {
                      if (inner_dim %in% dims_to_check) {
                        print("-> FIRST INDEX:")
                        print(first_index)
                        print("-> LAST INDEX:")
                        print(last_index)
                        print("-> STRUCTURE OF FIRST ROUND INDICES:")
                        print(str(sub_array_of_fri))
                        print("-> STRUCTURE OF SECOND ROUND INDICES:")
                        print(str(sub_array_of_sri))
                        print("-> STRUCTURE OF TRANSFORMED VARIABLE INDICES:")
                        print(str(tvi))
                      }
                    }
                    ###                    # If the selectors are expressed after transformation
                    ###                    } else {
                    ###if (debug) {
                    ###if (inner_dim %in% dims_to_check) {
                    ###print("-> SELECTORS REQUESTED AFTER TRANSFORM.")
                    ###}
                    ###}
                    ###                      if (goes_across_prime_meridian) {
                    ###                        sub_array_of_indices <- c(sub_array_of_indices[[1]]:m,
                    ###                                                    1:sub_array_of_indices[[2]])
                    ###                      }
                    ###                      first_index <- min(unlist(sub_array_of_indices))
                    ###                      last_index <- max(unlist(sub_array_of_indices))
                    ###                      first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
                    ###                      last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
                    ###                      sub_array_of_fri <- first_index_before_transform:last_index_before_transform
                    ###                      n_of_extra_cells <- round(beta / n * m)
                    ###                      if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
                    ###                        sub_array_of_sri <- 1:(last_index - first_index + 1) 
                    ###                        if (is.null(tvi)) {
                    ###                          tvi <- sub_array_of_sri + first_index - 1
                    ###                        }
                    ###                      } else {
                    ###                        sub_array_of_sri <- sub_array_of_indices - first_index + 1
                    ###                        if (is.null(tvi)) {
                    ###                          tvi <- sub_array_of_indices
                    ###                        }
                    ###                      }
                    ###                      sub_array_of_sri <- sub_array_of_sri + n_of_extra_cells
                    sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
                                             list(value = sub_array_of_sri)))
                  } else {
                    if (goes_across_prime_meridian) {
                      #NOTE: The potential problem here is, if it is global longitude,
                      #      and the indices overlap (e.g., lon = [0, 359.723] and 
                      #      CircularSort(-180, 180), then sub_array_of_indices = list(649, 649)). 
                      #      Therefore, sub_array_of_fri will be c(1:649, 649:1296). We'll
                      #      get two 649.
                      #      The fix below may not be the best solution, but it works for 
                      #      the example above.
                      
                      if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
                        # global longitude
                        sub_array_of_fri <- c(1:n)
                      } else {
                        # the common case, i.e., non-global
                        sub_array_of_fri <- c(1:min(unlist(sub_array_of_indices)),
                                              max(unlist(sub_array_of_indices)):n)
                      }
                      
                    } else if (is.list(sub_array_of_indices)) {
                      sub_array_of_fri <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                    } else {
                      sub_array_of_fri <- sub_array_of_indices
                    }
                  }
                  if (!is.null(var_unorder_indices)) {
                    if (is.null(ordered_fri)) {
                      ordered_fri <- sub_array_of_fri
                    }
                    sub_array_of_fri <- var_unorder_indices[sub_array_of_fri]
                  }
                  fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position),
                                           list(value = sub_array_of_fri)))
                  if (!is.null(file_dim)) {
                    taken_chunks[selector_store_position[[file_dim]]] <- TRUE
                  } else {
                    taken_chunks <- TRUE
                  }
                }
              } else {
                if (debug) {
                  if (inner_dim %in% dims_to_check) {
                    print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.")
                  }
                }
                if (inner_dim %in% names(dim(sub_array_of_selectors))) {
                  if (is.null(var_with_selectors_name)) {
                    if (any(na.omit(unlist(sub_array_of_selectors)) < 1) ||
                        any(na.omit(unlist(sub_array_of_selectors)) > data_dims[inner_dim] * chunk_amount)) {
                      stop("Provided indices out of range for dimension '", inner_dim, "' ", 
                           "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", 
                           data_dims[inner_dim] * chunk_amount, ").")
                    }
                  } else {
                    if (inner_dim %in% names(dim(sub_array_of_values))) {
                      # NOTE: Put across-inner-dim at the 1st position.
                      # POSSIBLE PROB!! Only organize inner dim, the rest dims may not in the same order as sub_array_of_selectors below.
                      inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_values)) == inner_dim)
                      if (inner_dim_pos_in_sub_array != 1) {
                        new_sub_array_order <- (1:length(dim(sub_array_of_values)))[-inner_dim_pos_in_sub_array]
                        new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order)
                        sub_array_of_values <- .aperm2(sub_array_of_values, new_sub_array_order)
                      }
                    }
                  }
                  
                  # NOTE: Put across-inner-dim at the 1st position.
                  # POSSIBLE PROB!! Only organize inner dim, the rest dims may not in the same order as sub_array_of_values above.
                  inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_selectors)) == inner_dim)
                  if (inner_dim_pos_in_sub_array != 1) {
                    new_sub_array_order <- (1:length(dim(sub_array_of_selectors)))[-inner_dim_pos_in_sub_array]
                    new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order)
                    sub_array_of_selectors <- .aperm2(sub_array_of_selectors, new_sub_array_order)
                  }
                  sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values,
                                                           tolerance = tolerance_params[[inner_dim]])
                  # It is needed to expand the indices here, otherwise for 
                  # values(list(date1, date2)) only 2 values are picked.
                  if (is.list(sub_array_of_indices)) {
                    sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                  }
                  sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices),
                                                                             chunks[[inner_dim]]['chunk'],
                                                                             chunks[[inner_dim]]['n_chunks'],
                                                                             inner_dim)]
                  sub_array_is_list <- FALSE
                  if (is.list(sub_array_of_indices)) {
                    sub_array_is_list <- TRUE
                    sub_array_of_indices <- unlist(sub_array_of_indices)
                  }
                  if (is.null(var_with_selectors_name)) {
                    indices_chunk <- floor((sub_array_of_indices - 1) / data_dims[inner_dim]) + 1
                    transformed_indices <- ((sub_array_of_indices - 1) %% data_dims[inner_dim]) + 1
                  } else {
                    indices_chunk <- floor((sub_array_of_indices - 1) / var_full_dims[inner_dim]) + 1
                    transformed_indices <- ((sub_array_of_indices - 1) %% var_full_dims[inner_dim]) + 1
                  }
                  if (sub_array_is_list) {
                    sub_array_of_indices <- as.list(sub_array_of_indices)
                  }
                  if (debug) {
                    if (inner_dim %in% dims_to_check) {
                      print("-> GOING TO ITERATE ALONG CHUNKS.")
                    }
                  }
                  for (chunk in 1:chunk_amount) {
                    if (!is.null(names(selector_store_position))) {
                      selector_store_position[file_dim] <- chunk
                    } else {
                      selector_store_position <- chunk
                    }
                    chunk_selectors <- transformed_indices[which(indices_chunk == chunk)]
                    sub_array_of_indices <- chunk_selectors
                    if (with_transform) {
                      # If the provided selectors are expressed in the world
                      # before transformation
                      if (!aiat) {
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        sub_array_of_fri <- max(c(first_index - beta, 1)):min(c(last_index + beta, n))
                        sub_array_of_sri <- transform_indices(unlist(sub_array_of_indices) - first_index + 1, n, m)
                        if (is.list(sub_array_of_indices)) {
                          if (length(sub_array_of_sri) > 1) {
                            sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
                          }
                        }
                        ##TODO: TRANSFORM SUBSET VARIABLE AS ABOVE, TO COMPUTE SRI
                        # If the selectors are expressed after transformation
                      } else {
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
                        last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
                        sub_array_of_fri <- first_index_before_transform:last_index_before_transform
                        if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
                          sub_array_of_sri <- 1:(last_index - first_index + 1) + 
                            round(beta / n * m) 
                        } else {
                          sub_array_of_sri <- sub_array_of_indices - first_index + 1 +
                            round(beta / n * m)
                        }
                        ##TODO: FILL IN TVI
                      }
                      sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
                                               list(value = sub_array_of_sri)))
                      if (length(sub_array_of_sri) > 0) {
                        taken_chunks[chunk] <- TRUE
                      }
                    } else {
                      sub_array_of_fri <- sub_array_of_indices
                      if (length(sub_array_of_fri) > 0) {
                        taken_chunks[chunk] <- TRUE
                      }
                    }
                    if (!is.null(var_unorder_indices)) {
                      ordered_fri <- sub_array_of_fri
                      sub_array_of_fri <- var_unorder_indices[sub_array_of_fri]
                    }
                    fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position),
                                             list(value = sub_array_of_fri)))
                  }
                  if (debug) {
                    if (inner_dim %in% dims_to_check) {
                      print("-> FINISHED ITERATING ALONG CHUNKS")
                    }
                  }
                } else {
                  stop("Provided array of indices for dimension '", inner_dim, "', ",
                       "which goes across the file dimension '", file_dim, "', but ",
                       "the provided array does not have the dimension '", inner_dim, 
                       "', which is mandatory.")
                }
              }
            }
          }
          if (debug) {
            if (inner_dim %in% dims_to_check) {
              print("-> PROCEEDING TO CROP VARIABLES")
            }
          }
          #if ((length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last'))) {
          #if (!is.null(var_with_selectors_name) || (is.null(var_with_selectors_name) && is.character(selector_array) &&
          #    (length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last')))) {
          empty_chunks <- which(!taken_chunks)
          if (length(empty_chunks) >= length(taken_chunks)) {
            stop("Selectors do not match any of the possible values for the dimension '", inner_dim, "'.")
          }
          if (length(empty_chunks) > 0) {
            #                # Get the first group of chunks to remove, and remove them. 
            #                # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 1 and 2
            #                dist <- abs(rev(empty_chunks) - c(rev(empty_chunks)[1] - 1, head(rev(empty_chunks), length(rev(empty_chunks)) - 1)))
            #                if (all(dist == 1)) {
            #                  start_chunks_to_remove <- NULL
            #                } else {
            #                  first_chunk_to_remove <- tail(which(dist > 1), 1)
            #                  start_chunks_to_remove <- rev(rev(empty_chunks)[first_chunk_to_remove:length(empty_chunks)])
            #                }
            #                # Get the last group of chunks to remove, and remove them. 
            #                # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 8 and 9
            #                dist <- abs(empty_chunks - c(empty_chunks[1] - 1, head(empty_chunks, length(empty_chunks) - 1)))
            #                if (all(dist == 1)) {
            #                  first_chunk_to_remove <- 1
            #                } else {
            #                  first_chunk_to_remove <- tail(which(dist > 1), 1)
            #                }
            #                end_chunks_to_remove <- empty_chunks[first_chunk_to_remove:length(empty_chunks)]
            #                chunks_to_keep <- which(!((1:length(taken_chunks)) %in% c(start_chunks_to_remove, end_chunks_to_remove)))
            chunks_to_keep <- which(taken_chunks)
            dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], list(chunks_to_keep))
            #                found_indices <- Subset(found_indices, file_dim, chunks_to_keep)
            #                # Crop dataset variables file dims.
            #                for (picked_var in names(picked_vars[[i]])) {
            #                  if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) {
            #                    picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, chunks_to_keep)
            #                  }
            #                }
          }
          #}
          dat[[i]][['selectors']][[inner_dim]] <- list(fri = fri, sri = sri)
          # Crop dataset variables inner dims.
          # Crop common variables inner dims.
          types_of_var_to_crop <- 'picked'
          if (with_transform) {
            types_of_var_to_crop <- c(types_of_var_to_crop, 'transformed')
          }
          if (!is.null(dim_reorder_params[[inner_dim]])) {
            types_of_var_to_crop <- c(types_of_var_to_crop, 'reordered')
          }
          for (type_of_var_to_crop in types_of_var_to_crop) {
            if (type_of_var_to_crop == 'transformed') {
              if (is.null(tvi)) {
                if (!is.null(dim_reorder_params[[inner_dim]])) {
                  crop_indices <- unique(unlist(ordered_sri))
                } else {
                  crop_indices <- unique(unlist(sri))
                }
              } else {
                crop_indices <- unique(unlist(tvi))
              }
              vars_to_crop <- transformed_vars[[i]]
              common_vars_to_crop <- transformed_common_vars
            } else if (type_of_var_to_crop == 'reordered') {
              crop_indices <- unique(unlist(ordered_fri))
              vars_to_crop <- picked_vars_ordered[[i]]
              common_vars_to_crop <- picked_common_vars_ordered
            } else {
aho's avatar
aho committed
              #TODO: If fri has different indices in each list, the crop_indices should be 
              #      separated for each list. Otherwise, picked_common_vars later will be wrong.
aho's avatar
aho committed
              crop_indices <- unique(unlist(fri))
              vars_to_crop <- picked_vars[[i]]
              common_vars_to_crop <- picked_common_vars
            }
            for (var_to_crop in names(vars_to_crop)) {
              if (inner_dim %in% names(dim(vars_to_crop[[var_to_crop]]))) {
                if (!is.null(crop_indices)) {
                  if (type_of_var_to_crop == 'transformed') {
                    if (!aiat) {
                      vars_to_crop[[var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices)
                    } else {
                      vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices)
                    }
                  } else {
                    vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices)
                  }
                }
              }
            }
            if (i == length(dat)) {
              for (common_var_to_crop in names(common_vars_to_crop)) {
                if (inner_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) {

                  if (type_of_var_to_crop == 'transformed' & !aiat) {
                  common_vars_to_crop[[common_var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices)
                  } else {  #old code
                  common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices)
                  }

                }
              }
            }
            if (type_of_var_to_crop == 'transformed') {
              if (!is.null(vars_to_crop)) {
                transformed_vars[[i]] <- vars_to_crop
              }
              if (i == length(dat)) {
                transformed_common_vars <- common_vars_to_crop
              }
            } else if (type_of_var_to_crop == 'reordered') {
              if (!is.null(vars_to_crop)) {
                picked_vars_ordered[[i]] <- vars_to_crop
              }
              if (i == length(dat)) {
                picked_common_vars_ordered <- common_vars_to_crop
              }
            } else {
              if (!is.null(vars_to_crop)) {
                picked_vars[[i]] <- vars_to_crop
              }
              if (i == length(dat)) {
                #NOTE: To avoid redundant run
                if (inner_dim %in% names(common_vars_to_crop)) {
                  picked_common_vars <- common_vars_to_crop
                }
aho's avatar
aho committed
              }
            }
          }
          #}
        }
        # After the selectors have been picked (using the original variables), 
        # the variables are transformed. At that point, the original selectors
        # for the transformed variables are also kept in the variable original_selectors.
        #print("L")
      }
    }
  }
  #  if (!is.null(transformed_common_vars)) {
  #    picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars
  #  }
  # Remove the trailing chunks, if any.
  for (file_dim in names(dims_to_crop)) {
    #    indices_to_keep <- min(sapply(dims_to_crop[[file_dim]], min)):max(sapply(dims_to_crop[[file_dim]], max))
    ## TODO: Merge indices in dims_to_crop with some advanced mechanism?
    indices_to_keep <- unique(unlist(dims_to_crop[[file_dim]]))
    array_of_files_to_load <- Subset(array_of_files_to_load, file_dim, indices_to_keep)
    array_of_not_found_files <- Subset(array_of_not_found_files, file_dim, indices_to_keep)
    for (i in 1:length(dat)) {
      # Crop selectors
      for (selector_dim in names(dat[[i]][['selectors']])) {
        if (selector_dim == file_dim) {
          for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['fri']])) {
            dat[[i]][['selectors']][[selector_dim]][['fri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['fri']][[j]][indices_to_keep]
          }
          for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['sri']])) {
            dat[[i]][['selectors']][[selector_dim]][['sri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['sri']][[j]][indices_to_keep]
          }
        }
        if (file_dim %in% names(dim(dat[[i]][['selectors']][[selector_dim]][['fri']]))) {
          dat[[i]][['selectors']][[selector_dim]][['fri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['fri']], file_dim, indices_to_keep)
          dat[[i]][['selectors']][[selector_dim]][['sri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['sri']], file_dim, indices_to_keep)
        }
      }
      # Crop dataset variables file dims.
      for (picked_var in names(picked_vars[[i]])) {
        if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) {
          picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, indices_to_keep)
        }
      }
      for (transformed_var in names(transformed_vars[[i]])) {
        if (file_dim %in% names(dim(transformed_vars[[i]][[transformed_var]]))) {
          transformed_vars[[i]][[transformed_var]] <- Subset(transformed_vars[[i]][[transformed_var]], file_dim, indices_to_keep)
        }
      }
    }
    # Crop common variables file dims.
    for (picked_common_var in names(picked_common_vars)) {
      if (file_dim %in% names(dim(picked_common_vars[[picked_common_var]]))) {
        picked_common_vars[[picked_common_var]] <- Subset(picked_common_vars[[picked_common_var]], file_dim, indices_to_keep)
      }
    }
    for (transformed_common_var in names(transformed_common_vars)) {
      if (file_dim %in% names(dim(transformed_common_vars[[transformed_common_var]]))) {
        transformed_common_vars[[transformed_common_var]] <- Subset(transformed_common_vars[[transformed_common_var]], file_dim, indices_to_keep)
      }
    }
  }
  # Calculate the size of the final array.
  total_inner_dims <- NULL
  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      inner_dims <- expected_inner_dims[[i]]
      inner_dims <- sapply(inner_dims, 
                           function(x) {
                             if (!all(sapply(dat[[i]][['selectors']][[x]][['sri']], is.null))) {
                               max(sapply(dat[[i]][['selectors']][[x]][['sri']], length))
                             } else {
                               if (length(var_params[[x]]) > 0) {
                                 if (var_params[[x]] %in% names(transformed_vars[[i]])) {
                                   length(transformed_vars[[i]][[var_params[[x]]]])
                                 } else if (var_params[[x]] %in% names(transformed_common_vars)) {
                                   length(transformed_common_vars[[var_params[[x]]]])
                                 } else {
                                   max(sapply(dat[[i]][['selectors']][[x]][['fri']], length))
                                 }
                               } else {
                                 max(sapply(dat[[i]][['selectors']][[x]][['fri']], length))
                               }
                             }
                           })
      names(inner_dims) <- expected_inner_dims[[i]]
      if (is.null(total_inner_dims)) {
        total_inner_dims <- inner_dims
      } else {
        new_dims <- .MergeArrayDims(total_inner_dims, inner_dims)
        total_inner_dims <- new_dims[[3]]
      }
    }
  }
  new_dims <- .MergeArrayDims(dim(array_of_files_to_load), total_inner_dims)
  final_dims <- new_dims[[3]][dim_names]
  # final_dims_fake is the vector of final dimensions after having merged the 
  # 'across' file dimensions with the respective 'across' inner dimensions, and
  # after having broken into multiple dimensions those dimensions for which 
  # multidimensional selectors have been provided.
  # final_dims will be used for collocation of data, whereas final_dims_fake 
  # will be used for shaping the final array to be returned to the user.
  final_dims_fake <- final_dims
aho's avatar
aho committed
  if (merge_across_dims) {
    if (!is.null(inner_dims_across_files)) {
      for (file_dim_across in names(inner_dims_across_files)) {
        inner_dim_pos <- which(names(final_dims_fake) == inner_dims_across_files[[file_dim_across]])
        new_dims <- c()
        if (inner_dim_pos > 1) {
          new_dims <- c(new_dims, final_dims_fake[1:(inner_dim_pos - 1)])
        }
        new_dims <- c(new_dims, setNames(prod(final_dims_fake[c(inner_dim_pos, inner_dim_pos + 1)]), 
                                         inner_dims_across_files[[file_dim_across]]))
        if (inner_dim_pos + 1 < length(final_dims_fake)) {
          new_dims <- c(new_dims, final_dims_fake[(inner_dim_pos + 2):length(final_dims_fake)])
        }
        final_dims_fake <- new_dims
      }
    }
  }
  #=========================================================================
  # Find the dimension to split if split_multiselected_dims = TRUE.
  # If there is no dimension able to be split, change split_multiselected_dims to FALSE.
aho's avatar
aho committed
  all_split_dims <- NULL
  if (split_multiselected_dims) {
    for (dim_param in 1:length(dim_params)) {
      if (!is.null(dim(dim_params[[dim_param]]))) {
        if (length(dim(dim_params[[dim_param]])) > 1) {
          split_dims <- dim(dim_params[[dim_param]])
          all_split_dims <- c(all_split_dims, setNames(list(split_dims), 
                                                       names(dim_params)[dim_param]))
          if (is.null(names(split_dims))) {
            names(split_dims) <- paste0(names(dim_params)[dim_param], 
                                        1:length(split_dims))
          }
          old_dim_pos <- which(names(final_dims_fake) == names(dim_params)[dim_param])

          # If merge_across_dims and split_multiselected_dims are both used,
          # on one file dim, and this file dim is multi-dim, it doesn't work.
          if (identical(old_dim_pos, integer(0))) {
            stop(paste0("The dimension '", names(dim_params)[dim_param], 
                        "' to be split cannot be found after 'merge_across_dims' ",
                        "is used. Check if the reshape parameters are used appropriately."))
          }
 
aho's avatar
aho committed
          # NOTE: Three steps to create new dims.
          # 1st: Put in the dims before split_dim.
          # 2nd: Replace the old_dim with split_dims.
          # 3rd: Put in the dims after split_dim.
          new_dims <- c()
          if (old_dim_pos > 1) {
            new_dims <- c(new_dims, final_dims_fake[1:(old_dim_pos - 1)])
          }
          new_dims <- c(new_dims, split_dims)
          if (old_dim_pos < length(final_dims_fake)) {
            new_dims <- c(new_dims, final_dims_fake[(old_dim_pos + 1):length(final_dims_fake)])
          }
          final_dims_fake <- new_dims
        }
      }
    }
aho's avatar
aho committed
    if (is.null(all_split_dims)) {
      split_multiselected_dims <- FALSE
      .warning(paste0("Not found any dimensions able to be split. The parameter ",
                      "'split_multiselected_dims' is changed to FALSE."))
    }
aho's avatar
aho committed
  }
  #======================================================================
aho's avatar
aho committed
    # only merge_across_dims -> the 'time' dim length needs to be adjusted
    across_inner_dim <- inner_dims_across_files[[1]]  #TODO: more than one?
    # Get the length of each inner_dim ('time') along each file_dim ('file_date')  
    length_inner_across_dim <- lapply(dat[[i]][['selectors']][[across_inner_dim]][['fri']], length)

    if (merge_across_dims_narm) {
      across_file_dim <- names(inner_dims_across_files)  #TODO: more than one?

      if (!split_multiselected_dims) {
        final_dims_fake_name <- names(final_dims_fake)
        pos_across_inner_dim <- which(final_dims_fake_name == across_inner_dim)
        new_length_inner_dim <- sum(unlist(length_inner_across_dim))
        if (pos_across_inner_dim != length(final_dims_fake)) {
          final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
                               new_length_inner_dim,
                               final_dims_fake[(pos_across_inner_dim + 1):length(final_dims_fake)])
        } else {
          final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
                               new_length_inner_dim)
        }
        names(final_dims_fake) <- final_dims_fake_name
aho's avatar
aho committed
      }
    }
  }
  
  if (!silent) {
    .message("Detected dimension sizes:")
    longest_dim_len <- max(sapply(names(final_dims_fake), nchar))
    longest_size_len <- max(sapply(paste0(final_dims_fake, ''), nchar))
    sapply(names(final_dims_fake), 
           function(x) {
             message(paste0("*   ", paste(rep(' ', longest_dim_len - nchar(x)), collapse = ''), 
                            x, ": ", paste(rep(' ', longest_size_len - nchar(paste0(final_dims_fake[x], ''))), collapse = ''), 
                            final_dims_fake[x]))
           })
    bytes <- prod(c(final_dims_fake, 8))
    dim_sizes <- paste(final_dims_fake, collapse = ' x ')
    if (retrieve) {
      .message(paste("Total size of requested data:"))
    } else {
      .message(paste("Total size of involved data:"))
    }
    .message(paste(dim_sizes, " x 8 bytes =", 
                   format(structure(bytes, class = "object_size"), units = "auto")), 
             indent = 2)
  }
  
  # NOTE: If split_multiselected_dims + merge_across_dims, the dim order may need to be changed.
  #       The inner_dim needs to be the first dim among split dims.
  #       Cannot control the rest dims are in the same order or not...
  #       Suppose users put the same order of across inner and file dims.
  if (split_multiselected_dims & merge_across_dims) {
    # TODO: More than one split?
    inner_dim_pos_in_split_dims <- which(names(all_split_dims[[1]]) == inner_dims_across_files)  
aho's avatar
aho committed
    # if inner_dim is not the first, change!
    if (inner_dim_pos_in_split_dims != 1) {
      split_dims <- c(split_dims[inner_dim_pos_in_split_dims],
                      split_dims[1:length(split_dims)][-inner_dim_pos_in_split_dims])
      split_dims_pos <- which(!is.na(match(names(final_dims_fake), names(split_dims))))
      # Save the current final_dims_fake for later reorder back
      final_dims_fake_output <- final_dims_fake
      new_dims <- c()
      if (split_dims_pos[1] != 1) {
        new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
      }
      new_dims <- c(new_dims, split_dims)
      if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
        new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
      }
      final_dims_fake <- new_dims
    }
  }
  
  # The following several lines will only be run if retrieve = TRUE
  if (retrieve) {
    
    ########## CREATING THE SHARED MATRIX AND DISPATCHING WORK PIECES ###########
    # TODO: try performance of storing all in cols instead of rows
    # Create the shared memory array, and a pointer to it, to be sent
    # to the work pieces.
    if (is.null(ObjectBigmemory)) {
        data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1)
    } else {
        data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1,
                                            backingfile = ObjectBigmemory)
    }
aho's avatar
aho committed
    shared_matrix_pointer <- bigmemory::describe(data_array)
    if (is.null(ObjectBigmemory)) {
        name_bigmemory_obj <- attr(shared_matrix_pointer, 'description')$sharedName
    } else {
        name_bigmemory_obj <- attr(shared_matrix_pointer, 'description')$filename
    }

    #warning(paste("SharedName:", attr(shared_matrix_pointer, 'description')$sharedName))
    #warning(paste("Filename:", attr(shared_matrix_pointer, 'description')$filename))
    #if (!is.null(ObjectBigmemory)) {
    #  attr(shared_matrix_pointer, 'description')$sharedName <- ObjectBigmemory
    #}
aho's avatar
aho committed
    if (is.null(num_procs)) {
      num_procs <- future::availableCores()
    }
    # Creating a shared tmp folder to store metadata from each chunk
    array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load))
    if (!is.null(metadata_dims)) {
      metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load))))
      names(metadata_indices_to_load) <- names(dim(array_of_files_to_load))
      metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims)))
      array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags),  metadata_indices_to_load,
                                                  list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims])))))
    }
    metadata_file_counter <- 0
    metadata_folder <- tempfile('metadata')
    dir.create(metadata_folder)
    # Build the work pieces, each with:
    # - file path
    # - total size (dims) of store array
    # - start position in store array
    # - file selectors (to provide extra info. useful e.g. to select variable)
    # - indices to take from file
    work_pieces <- list()
    for (i in 1:length(dat)) {
      if (dataset_has_files[i]) {
        selectors <- dat[[i]][['selectors']]
        file_dims <- found_file_dims[[i]]
        inner_dims <- expected_inner_dims[[i]]
        sub_array_dims <- final_dims[file_dims]
        sub_array_dims[found_pattern_dim] <- 1
        sub_array_of_files_to_load <- array(1:prod(sub_array_dims), 
                                            dim = sub_array_dims)
        names(dim(sub_array_of_files_to_load)) <- names(sub_array_dims)
        # Detect which of the dimensions of the dataset go across files.
        file_dim_across_files <- lapply(inner_dims, 
                                        function(x) {
                                          dim_across <- sapply(inner_dims_across_files, function(y) x %in% y)
                                          if (any(dim_across)) {
                                            names(inner_dims_across_files)[which(dim_across)[1]]
                                          } else {
                                            NULL
                                          }
                                        })
        names(file_dim_across_files) <- inner_dims
        j <- 1
        while (j <= prod(sub_array_dims)) {
          # Work out file path.
          file_to_load_sub_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ]
          names(file_to_load_sub_indices) <- names(sub_array_dims)
          file_to_load_sub_indices[found_pattern_dim] <- i
          big_dims <- rep(1, length(dim(array_of_files_to_load)))
          names(big_dims) <- names(dim(array_of_files_to_load))
          file_to_load_indices <- .MergeArrayDims(file_to_load_sub_indices, big_dims)[[1]]
          file_to_load <- do.call('[[', c(list(array_of_files_to_load), 
                                          as.list(file_to_load_indices)))
          not_found_file <- do.call('[[', c(list(array_of_not_found_files),
                                            as.list(file_to_load_indices)))
          load_file_metadata <- do.call('[', c(list(array_of_metadata_flags), 
                                               as.list(file_to_load_indices)))
          if (load_file_metadata) {
            metadata_file_counter <- metadata_file_counter + 1
          }
          if (!is.na(file_to_load) && !not_found_file) {
            # Work out indices to take
            first_round_indices <- lapply(inner_dims, 
                                          function (x) {
                                            if (is.null(file_dim_across_files[[x]])) {
                                              x_dim_name <- attr(attr(selectors[[x]][['fri']], "dim"), "names")
                                              if (!is.null(x_dim_name)) {
                                                which_chunk <- file_to_load_sub_indices[x_dim_name]
                                                selectors[[x]][['fri']][[which_chunk]]
                                              } else {
                                                selectors[[x]][['fri']][[1]]
                                              }
aho's avatar
aho committed
                                            } else {
                                              which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] 
                                              selectors[[x]][['fri']][[which_chunk]]
                                            }
                                          })
            names(first_round_indices) <- inner_dims
            second_round_indices <- lapply(inner_dims, 
                                           function (x) {
                                             if (is.null(file_dim_across_files[[x]])) {
                                              x_dim_name <- attr(attr(selectors[[x]][['sri']], "dim"), "names")
                                              if (!is.null(x_dim_name)) {
                                                which_chunk <- file_to_load_sub_indices[x_dim_name]
                                                selectors[[x]][['sri']][[which_chunk]]
                                              } else {
                                                selectors[[x]][['sri']][[1]]
                                              }
aho's avatar
aho committed
                                             } else {
                                               which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]]
                                               selectors[[x]][['sri']][[which_chunk]]
                                             }
                                           })
            if (debug) {
              print("-> BUILDING A WORK PIECE")
              #print(str(selectors))
            }
            names(second_round_indices) <- inner_dims
            if (!any(sapply(first_round_indices, length) == 0)) {
              work_piece <- list()
              work_piece[['first_round_indices']] <- first_round_indices
              work_piece[['second_round_indices']] <- second_round_indices
              work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices
              work_piece[['file_path']] <- file_to_load
              work_piece[['store_dims']] <- final_dims
              # Work out store position
              store_position <- final_dims
              store_position[names(file_to_load_indices)] <- file_to_load_indices
              store_position[inner_dims] <- rep(1, length(inner_dims))
              work_piece[['store_position']] <- store_position
              # Work out file selectors
              file_selectors <- sapply(file_dims, 
                                       function (x) {
                                         vector_to_pick <- 1
                                         if (x %in% names(depending_file_dims)) {
                                           vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]]
                                         }
                                         selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]]
                                       })
              names(file_selectors) <- file_dims
              work_piece[['file_selectors']] <- file_selectors
              # Send variables for transformation
              if (!is.null(transform) && (length(transform_vars) > 0)) {
                vars_to_transform <- NULL
                picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars)
                if (length(picked_vars_to_transform) > 0) {
                  picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform]
                  vars_to_transform <- c(vars_to_transform, picked_vars[[i]][picked_vars_to_transform])
                  if (any(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))) {
                    picked_vars_ordered_to_transform <- picked_vars_to_transform[which(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))]
                    vars_to_transform[picked_vars_ordered_to_transform] <- picked_vars_ordered[[i]][picked_vars_ordered_to_transform]
                  }
                }
                picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars)
                if (length(picked_common_vars_to_transform) > 0) {
                  picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform]
                  vars_to_transform <- c(vars_to_transform, picked_common_vars[picked_common_vars_to_transform])
                  if (any(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))) {
                    picked_common_vars_ordered_to_transform <- picked_common_vars_to_transform[which(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))]
                    vars_to_transform[picked_common_vars_ordered_to_transform] <- picked_common_vars_ordered[picked_common_vars_ordered_to_transform]
                  }
                }
                work_piece[['vars_to_transform']] <- vars_to_transform
              }
              # Send flag to load metadata
              if (load_file_metadata) {
                work_piece[['save_metadata_in']] <- paste0(metadata_folder, '/', metadata_file_counter)
              }
              work_pieces <- c(work_pieces, list(work_piece))
            }
          }
          j <- j + 1
        }
      }
    }
    #print("N")
    if (debug) {
      print("-> WORK PIECES BUILT")
    }
    
    # Calculate the progress %s that will be displayed and assign them to 
    # the appropriate work pieces.
    if (length(work_pieces) / num_procs >= 2 && !silent) {
      if (length(work_pieces) / num_procs < 10) {
        amount <- 100 / ceiling(length(work_pieces) / num_procs)
        reps <- ceiling(length(work_pieces) / num_procs)
      } else {
        amount <- 10
        reps <- 10
      }
      progress_steps <- rep(amount, reps)
      if (length(work_pieces) < (reps + 1)) {
        selected_pieces <- length(work_pieces)
        progress_steps <- c(sum(head(progress_steps, reps)),
                            tail(progress_steps, reps))
      } else {
        selected_pieces <- round(seq(1, length(work_pieces), 
                                     length.out = reps + 1))[-1]
      }
      progress_steps <- paste0(' + ', round(progress_steps, 2), '%')
      progress_message <- 'Progress: 0%'
    } else {
      progress_message <- ''
      selected_pieces <- NULL
    }
    piece_counter <- 1
    step_counter <- 1
    work_pieces <- lapply(work_pieces, 
                          function (x) {
                            if (piece_counter %in% selected_pieces) {
                              wp <- c(x, list(progress_amount = progress_steps[step_counter]))
                              step_counter <<- step_counter + 1
                            } else {
                              wp <- x
                            }
                            piece_counter <<- piece_counter + 1
                            wp