Start.R 243 KB
Newer Older
aho's avatar
aho committed
4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318
            names(second_round_indices) <- inner_dims
            if (!any(sapply(first_round_indices, length) == 0)) {
              work_piece <- list()
              work_piece[['first_round_indices']] <- first_round_indices
              work_piece[['second_round_indices']] <- second_round_indices
              work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices
              work_piece[['file_path']] <- file_to_load
              work_piece[['store_dims']] <- final_dims
              # Work out store position
              store_position <- final_dims
              store_position[names(file_to_load_indices)] <- file_to_load_indices
              store_position[inner_dims] <- rep(1, length(inner_dims))
              work_piece[['store_position']] <- store_position
              # Work out file selectors
              file_selectors <- sapply(file_dims, 
                                       function (x) {
                                         vector_to_pick <- 1
                                         if (x %in% names(depending_file_dims)) {
                                           vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]]
                                         }
                                         selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]]
                                       })
              names(file_selectors) <- file_dims
              work_piece[['file_selectors']] <- file_selectors
              # Send variables for transformation
              if (!is.null(transform) && (length(transform_vars) > 0)) {
                vars_to_transform <- NULL
                picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars)
                if (length(picked_vars_to_transform) > 0) {
                  picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform]
                  vars_to_transform <- c(vars_to_transform, picked_vars[[i]][picked_vars_to_transform])
                  if (any(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))) {
                    picked_vars_ordered_to_transform <- picked_vars_to_transform[which(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))]
                    vars_to_transform[picked_vars_ordered_to_transform] <- picked_vars_ordered[[i]][picked_vars_ordered_to_transform]
                  }
                }
                picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars)
                if (length(picked_common_vars_to_transform) > 0) {
                  picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform]
                  vars_to_transform <- c(vars_to_transform, picked_common_vars[picked_common_vars_to_transform])
                  if (any(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))) {
                    picked_common_vars_ordered_to_transform <- picked_common_vars_to_transform[which(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))]
                    vars_to_transform[picked_common_vars_ordered_to_transform] <- picked_common_vars_ordered[picked_common_vars_ordered_to_transform]
                  }
                }
                work_piece[['vars_to_transform']] <- vars_to_transform
              }
              # Send flag to load metadata
              if (load_file_metadata) {
                work_piece[['save_metadata_in']] <- paste0(metadata_folder, '/', metadata_file_counter)
              }
              work_pieces <- c(work_pieces, list(work_piece))
            }
          }
          j <- j + 1
        }
      }
    }
    #print("N")
    if (debug) {
      print("-> WORK PIECES BUILT")
    }
    
    # Calculate the progress %s that will be displayed and assign them to 
    # the appropriate work pieces.
    if (length(work_pieces) / num_procs >= 2 && !silent) {
      if (length(work_pieces) / num_procs < 10) {
        amount <- 100 / ceiling(length(work_pieces) / num_procs)
        reps <- ceiling(length(work_pieces) / num_procs)
      } else {
        amount <- 10
        reps <- 10
      }
      progress_steps <- rep(amount, reps)
      if (length(work_pieces) < (reps + 1)) {
        selected_pieces <- length(work_pieces)
        progress_steps <- c(sum(head(progress_steps, reps)),
                            tail(progress_steps, reps))
      } else {
        selected_pieces <- round(seq(1, length(work_pieces), 
                                     length.out = reps + 1))[-1]
      }
      progress_steps <- paste0(' + ', round(progress_steps, 2), '%')
      progress_message <- 'Progress: 0%'
    } else {
      progress_message <- ''
      selected_pieces <- NULL
    }
    piece_counter <- 1
    step_counter <- 1
    work_pieces <- lapply(work_pieces, 
                          function (x) {
                            if (piece_counter %in% selected_pieces) {
                              wp <- c(x, list(progress_amount = progress_steps[step_counter]))
                              step_counter <<- step_counter + 1
                            } else {
                              wp <- x
                            }
                            piece_counter <<- piece_counter + 1
                            wp
                          })
    if (!silent) {
      .message("If the size of the requested data is close to or above the free shared RAM memory, R may crash.")
      .message("If the size of the requested data is close to or above the half of the free RAM memory, R may crash.")
      .message(paste0("Will now proceed to read and process ", length(work_pieces), " data files:"))
      if (length(work_pieces) < 30) {
        lapply(work_pieces, function (x) .message(x[['file_path']], indent = 2))
      } else {
        .message("The list of files is long. You can check it after Start() finishes in the output '$Files'.", indent = 2, exdent = 5)
      }
    }
    
    # Build the cluster of processes that will do the work and dispatch work pieces.
    # The function .LoadDataFile is applied to each work piece. This function will
    # open the data file, regrid if needed, subset, apply the mask, 
    # compute and apply the weights if needed,
    # disable extreme values and store in the shared memory matrix.
    #print("O")
    if (!silent) {
      .message("Loading... This may take several minutes...")
      if (progress_message != '') {
        .message(progress_message, appendLF = FALSE)
      }
    }

# NOTE: In .LoadDataFile(), metadata is saved in metadata_folder/1 or /2 etc. Before here,
#       the path name is created in work_pieces but the path hasn't been built yet.
    if (num_procs == 1) {
      found_files <- lapply(work_pieces, .LoadDataFile, 
                            shared_matrix_pointer = shared_matrix_pointer,
                            file_data_reader = file_data_reader, 
                            synonims = synonims,
                            transform = transform, 
                            transform_params = transform_params,
                            silent = silent, debug = debug)
    } else {
      cluster <- parallel::makeCluster(num_procs, outfile = "")
      # Send the heavy work to the workers
      work_errors <- try({
        found_files <- parallel::clusterApplyLB(cluster, work_pieces, .LoadDataFile, 
                                                shared_matrix_pointer = shared_matrix_pointer,
                                                file_data_reader = file_data_reader,
                                                synonims = synonims,
                                                transform = transform, 
                                                transform_params = transform_params,
                                                silent = silent, debug = debug)
      })
      parallel::stopCluster(cluster)
    }
    
    if (!silent) {
      if (progress_message != '') {
        .message("\n", tag = '')
      }
    }
    #print("P")
    
    # NOTE: If merge_across_dims = TRUE, there might be additional NAs due to
    #       unequal inner_dim ('time') length across file_dim ('file_date').
    #       If merge_across_dims_narm = TRUE, add additional lines to remove these NAs.
    # TODO: Now it assumes that only one '_across'. Add a for loop for more-than-one case. 
    if (merge_across_dims_narm) {
      
      # Get the length of these two dimensions in final_dims
      length_inner_across_store_dims <- final_dims[across_inner_dim]
      length_file_across_store_dims <- final_dims[across_file_dim]
      
      # Create a logical array for merge_across_dims
      logi_array <- array(rep(FALSE,
                              length_file_across_store_dims * length_inner_across_store_dims),
                          dim = c(length_inner_across_store_dims, length_file_across_store_dims))
      for (i in 1:length_file_across_store_dims) {  #1:4
        logi_array[1:length_inner_across_dim[[i]], i] <- TRUE
      }
      
      # First, get the data array with final_dims dimension
      data_array_final_dims <- array(bigmemory::as.matrix(data_array), dim = final_dims)
      
      # Change the NA derived from additional spaces to -9999, then remove these -9999
      func_remove_blank <- function(data_array, logi_array) {
        # dim(data_array) = [time, file_date]
        # dim(logi_array) = [time, file_date]
        # Change the blank spaces from NA to -9999
        data_array[which(!logi_array)] <- -9999
        return(data_array)
      }
      data_array_final_dims <- multiApply::Apply(data_array_final_dims,
                                                 target_dims = c(across_inner_dim, across_file_dim),  #c('time', 'file_date')
                                                 output_dims = c(across_inner_dim, across_file_dim),
                                                 fun = func_remove_blank,
                                                 logi_array = logi_array)$output1
      ## reorder back to the correct dim
      tmp <- match(names(final_dims), names(dim(data_array_final_dims)))
      data_array_final_dims <- .aperm2(data_array_final_dims, tmp)
      data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999]  # become a vector
      
 #NOTE: When one file contains values for dicrete dimensions, rearrange the 
 #      chunks (i.e., work_piece) is necessary.
      if (split_multiselected_dims) {

      # generate the correct order list from indices_chunk 
        final_order_list <- list()
        i <- 1
        j <- 1
        a <- indices_chunk[i]
        while (i < length(indices_chunk)) {
          while (indices_chunk[i+1] == indices_chunk[i] & i < length(indices_chunk)) {
            a <- c(a, indices_chunk[i+1])
            i <- i + 1
          }
          final_order_list[[j]] <- a
          a <- indices_chunk[i+1]
          i <- i + 1
          j <- j + 1
        }
        names(final_order_list) <- sapply(final_order_list, '[[', 1)
        final_order_list <- lapply(final_order_list, length)
        
        if (!all(diff(as.numeric(names(final_order_list))) > 0)) {


      # shape the vector into the array without split_dims
        split_dims_pos <- match(split_dims, final_dims_fake)
        new_dims <- c()
        if (split_dims_pos[1] > 1) {
          new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
        }
        new_dims <- c(new_dims,  prod(split_dims))
        names(new_dims)[split_dims_pos[1]] <- across_inner_dim
        if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
          new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
        }
        final_dims_fake_no_split <- new_dims
        data_array_no_split <- array(data_array_tmp, dim = final_dims_fake_no_split)
      # seperate 'time' dim into each work_piece length
        data_array_seperate <- list()
        tmp <- cumsum(unlist(length_inner_across_dim))
        tmp <- c(0, tmp)
        for (i in 1:length(length_inner_across_dim)) {
          data_array_seperate[[i]] <- Subset(data_array_no_split, across_inner_dim,
                                             (tmp[i] + 1):tmp[i + 1])
        }

      # re-build the array: chunk 
        which_chunk <- as.numeric(names(final_order_list))
        how_many_indices <- unlist(final_order_list)
        array_piece <- list()
        ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate)))
        for (i in 1:length(final_order_list)) {
          array_piece[[i]] <- Subset(data_array_seperate[[which_chunk[i]]],
                                     across_inner_dim,
                                     ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1))
          ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i]
        }
        
        # re-build the array: paste
        data_array_tmp <- array_piece[[1]]
        along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim)
        if (length(array_piece) > 1) {
          for (i in 2:length(array_piece)) {
          data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]],
                                        along = along_pos)
          }
        }
      }        
      }
  
      data_array <- array(data_array_tmp, dim = final_dims_fake)
      
    } else {  # merge_across_dims_narm = F (old version)
      data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake)
    }
    
    # NOTE: If split_multiselected_dims + merge_across_dims, the dimension order may change above.
    #       To get the user-required dim order, we need to reorder the array again.
    if (split_multiselected_dims & merge_across_dims) {
      if (inner_dim_pos_in_split_dims != 1) {
        correct_order <- match(names(final_dims_fake_output), names(final_dims_fake))
        data_array <- .aperm2(data_array, correct_order)
      }
    }
    
    gc()
    
    # Load metadata and remove the metadata folder
    if (!is.null(metadata_dims)) {
      loaded_metadata_files <- list.files(metadata_folder)

      if (!identical(loaded_metadata_files, character(0))) {  # old code
        loaded_metadata <- lapply(paste0(metadata_folder, '/', loaded_metadata_files), readRDS)
      } else {
        loaded_metadata <- NULL
      }

      unlink(metadata_folder, recursive = TRUE)

#NOTE: Here, metadata can be saved in one of two ways: one for $common and the other for $dat
#      for $common, it is a list of metadata length. For $dat, it is a list of dat length,
#      and each sublist has the metadata for each dat.
      dim_of_metadata <- dim(array_of_metadata_flags)[metadata_dims]
      if (!any(names(dim_of_metadata) == pattern_dims) |
          (any(names(dim_of_metadata) == pattern_dims) &
           dim_of_metadata[pattern_dims] == 1)) {  # put under $common; old code
        return_metadata <- vector('list',
                                  length = prod(dim(array_of_metadata_flags)[metadata_dims]))
        return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata
        dim(return_metadata) <- dim(array_of_metadata_flags[metadata_dims])

      } else { # put under $dat. metadata_dims has 'dat' and dat length > 1
        return_metadata <- vector('list',
                                  length = dim_of_metadata[pattern_dims])
        names(return_metadata) <- dat_names
        for (kk in 1:length(return_metadata)) {
          return_metadata[[kk]] <- vector('list', length = prod(dim_of_metadata[-1])) # 1 is dat
        }
        loaded_metadata_count <- 1
        for (kk in 1:length(return_metadata)) {
          for (jj in 1:length(return_metadata[[kk]])) {
aho's avatar
aho committed

            if (dataset_has_files[kk]) {
aho's avatar
aho committed
              if (loaded_metadata_count %in% loaded_metadata_files) {
                return_metadata[[kk]][jj] <- loaded_metadata[[loaded_metadata_count]]
                names(return_metadata[[kk]])[jj] <- names(loaded_metadata[[loaded_metadata_count]])
              } else {
                return_metadata[[kk]][jj] <- NULL
              }
              loaded_metadata_count <- loaded_metadata_count + 1
            } else {
              return_metadata[[kk]][jj] <- NULL
            }

aho's avatar
aho committed
          }
        }
      }
      attr(data_array, 'Variables') <- return_metadata
      # TODO: Try to infer data type from loaded_metadata
      # as.integer(data_array) 
    }
    
    failed_pieces <- work_pieces[which(unlist(found_files))]
    for (failed_piece in failed_pieces) {
      array_of_not_found_files <- do.call('[<-', 
                                          c(list(array_of_not_found_files), 
                                            as.list(failed_piece[['file_indices_in_array_of_files']]),
                                            list(value = TRUE)))
    }
    if (any(array_of_not_found_files)) {
      for (i in 1:prod(dim(array_of_files_to_load))) {
        if (is.na(array_of_not_found_files[i])) {
          array_of_files_to_load[i] <- NA
        } else {
          if (array_of_not_found_files[i]) {
            array_of_not_found_files[i] <- array_of_files_to_load[i]
            array_of_files_to_load[i] <- NA
          } else {
            array_of_not_found_files[i] <- NA
          }
        }
      }
    } else {
      array_of_not_found_files <- NULL
    }
    
  } # End if (retrieve)
  
  # Change final_dims_fake back because retrieve = FALSE will use it for attributes later
  if (exists("final_dims_fake_output")) {
    final_dims_fake <- final_dims_fake_output
  }
  # Replace the vars and common vars by the transformed vars and common vars
  for (i in 1:length(dat)) {
    if (length(names(transformed_vars[[i]])) > 0) {
      picked_vars[[i]][names(transformed_vars[[i]])] <- transformed_vars[[i]]
    } else if (length(names(picked_vars_ordered[[i]])) > 0) {
      picked_vars[[i]][names(picked_vars_ordered[[i]])] <- picked_vars_ordered[[i]]
    }
  }
  if (length(names(transformed_common_vars)) > 0) {
    picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars
  } else if (length(names(picked_common_vars_ordered)) > 0) {
    picked_common_vars[names(picked_common_vars_ordered)] <- picked_common_vars_ordered
  }
  if (debug) {
    print("-> THE TRANSFORMED VARS:")
    print(str(transformed_vars))
    print("-> THE PICKED VARS:")
    print(str(picked_vars))
  }
  
  file_selectors <- NULL
  for (i in 1:length(dat)) {
    file_selectors[[dat[[i]][['name']]]] <- dat[[i]][['selectors']][which(names(dat[[i]][['selectors']]) %in% found_file_dims[[i]])]
  }
  if (retrieve) {
    if (!silent) {
      .message("Successfully retrieved data.")
    }

    if (all(sapply(attr(data_array, 'Variables'), is.null))) {
      var_backup <- NULL
      .warning(paste0("Metadata cannot be retrieved. The reason may be the ",
                      "non-existence of the first file. Use parameter 'metadata_dims'",
                      " to assign to file dimensions along which to return metadata, ",
                      "or check the existence of the first file."))
    } else {

#NOTE: The metadata of variables can be saved in one of the two different structures.
#      (1) metadata_dims != 'dat', or (metadata_dims == 'dat' & length(dat) == 1):
#          put under $common
#      (2) (metadata_dims == 'dat' & length(dat) > 1):
#          put under $dat1, $dat2, .... Put it in picked_vars list
#TODO: The current (2) uses the inefficient method. Should define the list structure first
#      then fill the list, rather than expand it in the for loop.
    if (any(metadata_dims == pattern_dims) & length(dat) > 1) { # (2)
      var_backup <- attr(data_array, 'Variables')
      for (kk in 1:length(var_backup)) {
        sublist_names <- lapply(var_backup, names)[[kk]]
        if (!is.null(sublist_names)) {
          for (jj in 1:length(sublist_names)) {
            picked_vars[[kk]][[sublist_names[jj]]] <- var_backup[[kk]][[jj]]
          }
aho's avatar
aho committed
        }
      }
      var_backup <- NULL

    } else {  #(1)
      var_backup <- attr(data_array, 'Variables')
      len <- unlist(lapply(var_backup, length))
      len <- sum(len) + length(which(len == 0))  #0 means NULL
      name_list <- lapply(var_backup, names)
      new_list <- vector('list', length = len)
      for (kk in 1:length(var_backup)) {
        if (length(var_backup[[kk]]) == 0) {  #NULL
        } else {
          for (jj in 1:length(var_backup[[kk]])) {
            new_list[[count]] <- var_backup[[kk]][[jj]]
            names(new_list)[count] <- name_list[[kk]][jj]
            count <- count + 1
          }
aho's avatar
aho committed
    }
}
aho's avatar
aho committed
    attr(data_array, 'Variables') <- NULL
    attributes(data_array) <- c(attributes(data_array), 
                                list(Variables = c(list(common = c(picked_common_vars, var_backup)), 
                                                   picked_vars),
                                     Files = array_of_files_to_load, 
                                     NotFoundFiles = array_of_not_found_files,
                                     FileSelectors = file_selectors,
                                     PatternDim = found_pattern_dim,
                                     ObjectBigmemory = name_bigmemory_obj) #attr(shared_matrix_pointer, 'description')$sharedName)
aho's avatar
aho committed
    )
    attr(data_array, 'class') <- c('startR_array', attr(data_array, 'class'))
    data_array
  } else {
    if (!silent) {
      .message("Successfully discovered data dimensions.")
    }
    start_call <- match.call()
    for (i in 2:length(start_call)) {
      if (class(start_call[[i]]) %in% c('name', 'call')) {
        start_call[[i]] <- eval.parent(start_call[[i]])
      }
    }
    start_call[['retrieve']] <- TRUE
    attributes(start_call) <- c(attributes(start_call),
                                list(Dimensions = final_dims_fake,
                                     Variables = c(list(common = picked_common_vars), picked_vars),
                                     ExpectedFiles = array_of_files_to_load,
                                     FileSelectors = file_selectors,
                                     PatternDim = found_pattern_dim,
                                     MergedDims = if (merge_across_dims) {
                                       inner_dims_across_files
                                     } else {
                                       NULL
                                     },
                                     SplitDims = if (split_multiselected_dims) {
                                       all_split_dims
                                     } else {
                                       NULL
                                     })
    )
    attr(start_call, 'class') <- c('startR_cube', attr(start_call, 'class'))
    start_call
  }
}

# This function is the responsible for loading the data of each work
# piece.
.LoadDataFile <- function(work_piece, shared_matrix_pointer, 
                          file_data_reader, synonims,
                          transform, transform_params,
                          silent = FALSE, debug = FALSE) {
  #warning(attr(shared_matrix_pointer, 'description')$sharedName)
aho's avatar
aho committed
  #  suppressPackageStartupMessages({library(bigmemory)})
  ### TODO: Specify dependencies as parameter
  #  suppressPackageStartupMessages({library(ncdf4)})
  
  #print("1")
  store_indices <- as.list(work_piece[['store_position']])
  first_round_indices <- work_piece[['first_round_indices']]
  second_round_indices <- work_piece[['second_round_indices']]
  #print("2")
  file_to_open <- work_piece[['file_path']]
  sub_array <- file_data_reader(file_to_open, NULL, 
                                work_piece[['file_selectors']],
                                first_round_indices, synonims)
  if (debug) {
    if (all(unlist(store_indices[1:6]) == 1)) {
      print("-> LOADING A WORK PIECE")
      print("-> STRUCTURE OF READ UNTRANSFORMED DATA:")
      print(str(sub_array))
      print("-> STRUCTURE OF VARIABLES TO TRANSFORM:")
      print(str(work_piece[['vars_to_transform']]))
      print("-> COMMON ARRAY DIMENSIONS:")
      print(str(work_piece[['store_dims']]))
    }
  }
  if (!is.null(sub_array)) {
    # Apply data transformation once we have the data arrays.
    if (!is.null(transform)) {
      if (debug) {
        if (all(unlist(store_indices[1:6]) == 1)) {
          print("-> PROCEEDING TO TRANSFORM ARRAY")
          print("-> DIMENSIONS OF ARRAY RIGHT BEFORE TRANSFORMING:")
          print(dim(sub_array))
        }
      }
      sub_array <- do.call(transform, c(list(data_array = sub_array,
                                             variables = work_piece[['vars_to_transform']],
                                             file_selectors = work_piece[['file_selectors']]),
                                        transform_params))
      if (debug) {
        if (all(unlist(store_indices[1:6]) == 1)) {
          print("-> STRUCTURE OF ARRAY AND VARIABLES RIGHT AFTER TRANSFORMING:")
          print(str(sub_array))
          print("-> DIMENSIONS OF ARRAY RIGHT AFTER TRANSFORMING:")
          print(dim(sub_array$data_array))
        }
      }
      sub_array <- sub_array$data_array
      # Subset with second round of indices
      dims_to_crop <- which(!sapply(second_round_indices, is.null))
      if (length(dims_to_crop) > 0) {
        dimnames_to_crop <- names(second_round_indices)[dims_to_crop]
        sub_array <- Subset(sub_array, dimnames_to_crop, 
                            second_round_indices[dimnames_to_crop])
      }
      if (debug) {
        if (all(unlist(store_indices[1:6]) == 1)) {
          print("-> STRUCTURE OF ARRAY AND VARIABLES RIGHT AFTER SUBSETTING WITH 2nd ROUND INDICES:")
          print(str(sub_array))
        }
      }
    }
    
    metadata <- attr(sub_array, 'variables')
    
    names_bk <- names(store_indices)
    store_indices <- lapply(names(store_indices), 
                            function (x) {
                              if (!(x %in% names(first_round_indices))) {
                                store_indices[[x]]
                              } else if (is.null(second_round_indices[[x]])) {
                                1:dim(sub_array)[x]
                              } else {
                                if (is.numeric(second_round_indices[[x]])) {
                                  ## TODO: Review carefully this line. Inner indices are all 
                                  ## aligned to the left-most positions. If dataset A has longitudes
                                  ## 1, 2, 3, 4 but dataset B has only longitudes 3 and 4, then
                                  ## they will be stored as follows:
                                  ## 1, 2, 3, 4
                                  ## 3, 4, NA, NA
                                  ##x - min(x) + 1
                                  1:length(second_round_indices[[x]])
                                } else {
                                  1:length(second_round_indices[[x]])
                                }
                              }
                            })
    names(store_indices) <- names_bk
    if (debug) {
      if (all(unlist(store_indices) == 1)) {
        print("-> STRUCTURE OF FIRST ROUND INDICES FOR THIS WORK PIECE:")
        print(str(first_round_indices))
        print("-> STRUCTURE OF SECOND ROUND INDICES FOR THIS WORK PIECE:")
        print(str(second_round_indices))
        print("-> STRUCTURE OF STORE INDICES FOR THIS WORK PIECE:")
        print(str(store_indices))
      }
    }
    
    store_indices <- lapply(store_indices, as.integer)
    store_dims <- work_piece[['store_dims']]
    
    # split the storage work of the loaded subset in parts
    largest_dim_name <- names(dim(sub_array))[which.max(dim(sub_array))]
    max_parts <- length(store_indices[[largest_dim_name]])
    
    # Indexing a data file of N MB with expand.grid takes 30*N MB
    # The peak ram of Start is, minimum, 2 * total data to load from all files
    # due to inefficiencies in other regions of the code
    # The more parts we split the indexing done below in, the lower
    # the memory footprint of the indexing and the fast. 
    # But more than 10 indexing iterations (parts) for each MB processed 
    # makes the iteration slower (tested empirically on BSC workstations).
    subset_size_in_mb <- prod(dim(sub_array)) * 8 / 1024 / 1024
    best_n_parts <- ceiling(subset_size_in_mb * 10)
    # We want to set n_parts to a greater value than the one that would 
    # result in a memory footprint (of the subset indexing code below) equal
    # to 2 * total data to load from all files.
    # s = subset size in MB
    # p = number of parts to break it in
    # T = total size of data to load
    # then, s / p * 30 = 2 * T
    # then, p = s * 15 / T
    min_n_parts <- ceiling(prod(dim(sub_array)) * 15 / prod(store_dims))
    # Make sure we pick n_parts much greater than the minimum calculated
    n_parts <- min_n_parts * 10
    if (n_parts > best_n_parts) {
      n_parts <- best_n_parts
    }
    # Boundary checks
    if (n_parts < 1) {
      n_parts <- 1
    }
    if (n_parts > max_parts) {
      n_parts <- max_parts
    }
    
    if (n_parts > 1) {
      make_parts <- function(length, n) {
        clusters <- cut(1:length, n, labels = FALSE)
        lapply(1:n, function(y) which(clusters == y))
      }
      part_indices <- make_parts(max_parts, n_parts)
      parts <- lapply(part_indices, 
                      function(x) {
                        store_indices[[largest_dim_name]][x]
                      })
    } else {
      part_indices <- list(1:max_parts)
      parts <- store_indices[largest_dim_name]
    }
    
    # do the storage work
    weights <- sapply(1:length(store_dims), 
                      function(i) prod(c(1, store_dims)[1:i]))
    part_indices_in_sub_array <- as.list(rep(TRUE, length(dim(sub_array))))
    names(part_indices_in_sub_array) <- names(dim(sub_array))
    data_array <- bigmemory::attach.big.matrix(shared_matrix_pointer)
    for (i in 1:n_parts) {
      store_indices[[largest_dim_name]] <- parts[[i]]
      # Converting array indices to vector indices
      matrix_indices <- do.call("expand.grid", store_indices)
      # Given a matrix where each row is a set of array indices of an element
      # the vector indices are computed
      matrix_indices <- 1 + colSums(t(matrix_indices - 1) * weights)
      part_indices_in_sub_array[[largest_dim_name]] <- part_indices[[i]]
      data_array[matrix_indices] <- as.vector(do.call('[',
                                                      c(list(x = sub_array), 
                                                        part_indices_in_sub_array)))
    }
    rm(data_array)
    gc()
    
    if (!is.null(work_piece[['save_metadata_in']])) {
      saveRDS(metadata, file = work_piece[['save_metadata_in']])
    }
  }
  if (!is.null(work_piece[['progress_amount']]) && !silent) {
    message(work_piece[['progress_amount']], appendLF = FALSE)
  }
  is.null(sub_array)
}