Start.R 232 KB
Newer Older
aho's avatar
aho committed
3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320
                    # [0, 360], and lon selector = [-20, -10].
                    if (any(is.na(sub_array_of_sri))) {
                      stop(paste0("The selectors of ",
                                  inner_dim, " are out of range of transform grid '",
                                  transform_params$grid, "'. Use parameter '",
                                  inner_dim, "_reorder' or change ", inner_dim,
                                  " selectors."))
                    }
                    
                    if (goes_across_prime_meridian) {
                      
                      if (sub_array_of_sri[[1]] == sub_array_of_sri[[2]]) {
                        # global longitude
                        sub_array_of_sri <- c(1:length(transformed_subset_var))
                      } else {
                        # the common case, i.e., non-global
                        # NOTE: Because sub_array_of_sri order is exchanged due to 
                        # previous development, here [[1]] and [[2]] should exchange
                        sub_array_of_sri <- c(1:sub_array_of_sri[[1]],
                                              sub_array_of_sri[[2]]:length(transformed_subset_var))
                      }
                      
                    } else if (is.list(sub_array_of_sri)) {
                      sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
                    }
                    ordered_sri <- sub_array_of_sri
                    sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri]
                    # In this case, the tvi are not defined and the 'transformed_subset_var'
                    # will be taken instead of the var transformed before in the code.
                    if (debug) {
                      if (inner_dim %in% dims_to_check) {
                        print("-> FIRST INDEX:")
                        print(first_index)
                        print("-> LAST INDEX:")
                        print(last_index)
                        print("-> STRUCTURE OF FIRST ROUND INDICES:")
                        print(str(sub_array_of_fri))
                        print("-> STRUCTURE OF SECOND ROUND INDICES:")
                        print(str(sub_array_of_sri))
                        print("-> STRUCTURE OF TRANSFORMED VARIABLE INDICES:")
                        print(str(tvi))
                      }
                    }
                    ###                    # If the selectors are expressed after transformation
                    ###                    } else {
                    ###if (debug) {
                    ###if (inner_dim %in% dims_to_check) {
                    ###print("-> SELECTORS REQUESTED AFTER TRANSFORM.")
                    ###}
                    ###}
                    ###                      if (goes_across_prime_meridian) {
                    ###                        sub_array_of_indices <- c(sub_array_of_indices[[1]]:m,
                    ###                                                    1:sub_array_of_indices[[2]])
                    ###                      }
                    ###                      first_index <- min(unlist(sub_array_of_indices))
                    ###                      last_index <- max(unlist(sub_array_of_indices))
                    ###                      first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
                    ###                      last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
                    ###                      sub_array_of_fri <- first_index_before_transform:last_index_before_transform
                    ###                      n_of_extra_cells <- round(beta / n * m)
                    ###                      if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
                    ###                        sub_array_of_sri <- 1:(last_index - first_index + 1) 
                    ###                        if (is.null(tvi)) {
                    ###                          tvi <- sub_array_of_sri + first_index - 1
                    ###                        }
                    ###                      } else {
                    ###                        sub_array_of_sri <- sub_array_of_indices - first_index + 1
                    ###                        if (is.null(tvi)) {
                    ###                          tvi <- sub_array_of_indices
                    ###                        }
                    ###                      }
                    ###                      sub_array_of_sri <- sub_array_of_sri + n_of_extra_cells
                    sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
                                             list(value = sub_array_of_sri)))
                  } else {
                    if (goes_across_prime_meridian) {
                      #NOTE: The potential problem here is, if it is global longitude,
                      #      and the indices overlap (e.g., lon = [0, 359.723] and 
                      #      CircularSort(-180, 180), then sub_array_of_indices = list(649, 649)). 
                      #      Therefore, sub_array_of_fri will be c(1:649, 649:1296). We'll
                      #      get two 649.
                      #      The fix below may not be the best solution, but it works for 
                      #      the example above.
                      
                      if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
                        # global longitude
                        sub_array_of_fri <- c(1:n)
                      } else {
                        # the common case, i.e., non-global
                        sub_array_of_fri <- c(1:min(unlist(sub_array_of_indices)),
                                              max(unlist(sub_array_of_indices)):n)
                      }
                      
                    } else if (is.list(sub_array_of_indices)) {
                      sub_array_of_fri <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                    } else {
                      sub_array_of_fri <- sub_array_of_indices
                    }
                  }
                  if (!is.null(var_unorder_indices)) {
                    if (is.null(ordered_fri)) {
                      ordered_fri <- sub_array_of_fri
                    }
                    sub_array_of_fri <- var_unorder_indices[sub_array_of_fri]
                  }
                  fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position),
                                           list(value = sub_array_of_fri)))
                  if (!is.null(file_dim)) {
                    taken_chunks[selector_store_position[[file_dim]]] <- TRUE
                  } else {
                    taken_chunks <- TRUE
                  }
                }
              } else {
                if (debug) {
                  if (inner_dim %in% dims_to_check) {
                    print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.")
                  }
                }
                if (inner_dim %in% names(dim(sub_array_of_selectors))) {
                  if (is.null(var_with_selectors_name)) {
                    if (any(na.omit(unlist(sub_array_of_selectors)) < 1) ||
                        any(na.omit(unlist(sub_array_of_selectors)) > data_dims[inner_dim] * chunk_amount)) {
                      stop("Provided indices out of range for dimension '", inner_dim, "' ", 
                           "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", 
                           data_dims[inner_dim] * chunk_amount, ").")
                    }
                  } else {
                    if (inner_dim %in% names(dim(sub_array_of_values))) {
                      # NOTE: Put across-inner-dim at the 1st position.
                      # POSSIBLE PROB!! Only organize inner dim, the rest dims may not in the same order as sub_array_of_selectors below.
                      inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_values)) == inner_dim)
                      if (inner_dim_pos_in_sub_array != 1) {
                        new_sub_array_order <- (1:length(dim(sub_array_of_values)))[-inner_dim_pos_in_sub_array]
                        new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order)
                        sub_array_of_values <- .aperm2(sub_array_of_values, new_sub_array_order)
                      }
                    }
                  }
                  
                  # NOTE: Put across-inner-dim at the 1st position.
                  # POSSIBLE PROB!! Only organize inner dim, the rest dims may not in the same order as sub_array_of_values above.
                  inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_selectors)) == inner_dim)
                  if (inner_dim_pos_in_sub_array != 1) {
                    new_sub_array_order <- (1:length(dim(sub_array_of_selectors)))[-inner_dim_pos_in_sub_array]
                    new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order)
                    sub_array_of_selectors <- .aperm2(sub_array_of_selectors, new_sub_array_order)
                  }
                  sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values,
                                                           tolerance = tolerance_params[[inner_dim]])
                  # It is needed to expand the indices here, otherwise for 
                  # values(list(date1, date2)) only 2 values are picked.
                  if (is.list(sub_array_of_indices)) {
                    sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                  }
                  sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices),
                                                                             chunks[[inner_dim]]['chunk'],
                                                                             chunks[[inner_dim]]['n_chunks'],
                                                                             inner_dim)]
                  sub_array_is_list <- FALSE
                  if (is.list(sub_array_of_indices)) {
                    sub_array_is_list <- TRUE
                    sub_array_of_indices <- unlist(sub_array_of_indices)
                  }
                  if (is.null(var_with_selectors_name)) {
                    indices_chunk <- floor((sub_array_of_indices - 1) / data_dims[inner_dim]) + 1
                    transformed_indices <- ((sub_array_of_indices - 1) %% data_dims[inner_dim]) + 1
                  } else {
                    indices_chunk <- floor((sub_array_of_indices - 1) / var_full_dims[inner_dim]) + 1
                    transformed_indices <- ((sub_array_of_indices - 1) %% var_full_dims[inner_dim]) + 1
                  }
                  if (sub_array_is_list) {
                    sub_array_of_indices <- as.list(sub_array_of_indices)
                  }
                  if (debug) {
                    if (inner_dim %in% dims_to_check) {
                      print("-> GOING TO ITERATE ALONG CHUNKS.")
                    }
                  }
                  for (chunk in 1:chunk_amount) {
                    if (!is.null(names(selector_store_position))) {
                      selector_store_position[file_dim] <- chunk
                    } else {
                      selector_store_position <- chunk
                    }
                    chunk_selectors <- transformed_indices[which(indices_chunk == chunk)]
                    sub_array_of_indices <- chunk_selectors
                    if (with_transform) {
                      # If the provided selectors are expressed in the world
                      # before transformation
                      if (!aiat) {
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        sub_array_of_fri <- max(c(first_index - beta, 1)):min(c(last_index + beta, n))
                        sub_array_of_sri <- transform_indices(unlist(sub_array_of_indices) - first_index + 1, n, m)
                        if (is.list(sub_array_of_indices)) {
                          if (length(sub_array_of_sri) > 1) {
                            sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
                          }
                        }
                        ##TODO: TRANSFORM SUBSET VARIABLE AS ABOVE, TO COMPUTE SRI
                        # If the selectors are expressed after transformation
                      } else {
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
                        last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
                        sub_array_of_fri <- first_index_before_transform:last_index_before_transform
                        if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
                          sub_array_of_sri <- 1:(last_index - first_index + 1) + 
                            round(beta / n * m) 
                        } else {
                          sub_array_of_sri <- sub_array_of_indices - first_index + 1 +
                            round(beta / n * m)
                        }
                        ##TODO: FILL IN TVI
                      }
                      sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
                                               list(value = sub_array_of_sri)))
                      if (length(sub_array_of_sri) > 0) {
                        taken_chunks[chunk] <- TRUE
                      }
                    } else {
                      sub_array_of_fri <- sub_array_of_indices
                      if (length(sub_array_of_fri) > 0) {
                        taken_chunks[chunk] <- TRUE
                      }
                    }
                    if (!is.null(var_unorder_indices)) {
                      ordered_fri <- sub_array_of_fri
                      sub_array_of_fri <- var_unorder_indices[sub_array_of_fri]
                    }
                    fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position),
                                             list(value = sub_array_of_fri)))
                  }
                  if (debug) {
                    if (inner_dim %in% dims_to_check) {
                      print("-> FINISHED ITERATING ALONG CHUNKS")
                    }
                  }
                } else {
                  stop("Provided array of indices for dimension '", inner_dim, "', ",
                       "which goes across the file dimension '", file_dim, "', but ",
                       "the provided array does not have the dimension '", inner_dim, 
                       "', which is mandatory.")
                }
              }
            }
          }
          if (debug) {
            if (inner_dim %in% dims_to_check) {
              print("-> PROCEEDING TO CROP VARIABLES")
            }
          }
          #if ((length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last'))) {
          #if (!is.null(var_with_selectors_name) || (is.null(var_with_selectors_name) && is.character(selector_array) &&
          #    (length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last')))) {
          empty_chunks <- which(!taken_chunks)
          if (length(empty_chunks) >= length(taken_chunks)) {
            stop("Selectors do not match any of the possible values for the dimension '", inner_dim, "'.")
          }
          if (length(empty_chunks) > 0) {
            #                # Get the first group of chunks to remove, and remove them. 
            #                # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 1 and 2
            #                dist <- abs(rev(empty_chunks) - c(rev(empty_chunks)[1] - 1, head(rev(empty_chunks), length(rev(empty_chunks)) - 1)))
            #                if (all(dist == 1)) {
            #                  start_chunks_to_remove <- NULL
            #                } else {
            #                  first_chunk_to_remove <- tail(which(dist > 1), 1)
            #                  start_chunks_to_remove <- rev(rev(empty_chunks)[first_chunk_to_remove:length(empty_chunks)])
            #                }
            #                # Get the last group of chunks to remove, and remove them. 
            #                # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 8 and 9
            #                dist <- abs(empty_chunks - c(empty_chunks[1] - 1, head(empty_chunks, length(empty_chunks) - 1)))
            #                if (all(dist == 1)) {
            #                  first_chunk_to_remove <- 1
            #                } else {
            #                  first_chunk_to_remove <- tail(which(dist > 1), 1)
            #                }
            #                end_chunks_to_remove <- empty_chunks[first_chunk_to_remove:length(empty_chunks)]
            #                chunks_to_keep <- which(!((1:length(taken_chunks)) %in% c(start_chunks_to_remove, end_chunks_to_remove)))
            chunks_to_keep <- which(taken_chunks)
            dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], list(chunks_to_keep))
            #                found_indices <- Subset(found_indices, file_dim, chunks_to_keep)
            #                # Crop dataset variables file dims.
            #                for (picked_var in names(picked_vars[[i]])) {
            #                  if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) {
            #                    picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, chunks_to_keep)
            #                  }
            #                }
          }
          #}
          dat[[i]][['selectors']][[inner_dim]] <- list(fri = fri, sri = sri)
          # Crop dataset variables inner dims.
          # Crop common variables inner dims.
          types_of_var_to_crop <- 'picked'
          if (with_transform) {
            types_of_var_to_crop <- c(types_of_var_to_crop, 'transformed')
          }
          if (!is.null(dim_reorder_params[[inner_dim]])) {
            types_of_var_to_crop <- c(types_of_var_to_crop, 'reordered')
          }
          for (type_of_var_to_crop in types_of_var_to_crop) {
            if (type_of_var_to_crop == 'transformed') {
              if (is.null(tvi)) {
                if (!is.null(dim_reorder_params[[inner_dim]])) {
                  crop_indices <- unique(unlist(ordered_sri))
                } else {
                  crop_indices <- unique(unlist(sri))
                }
              } else {
                crop_indices <- unique(unlist(tvi))
              }
              vars_to_crop <- transformed_vars[[i]]
              common_vars_to_crop <- transformed_common_vars
            } else if (type_of_var_to_crop == 'reordered') {
              crop_indices <- unique(unlist(ordered_fri))
              vars_to_crop <- picked_vars_ordered[[i]]
              common_vars_to_crop <- picked_common_vars_ordered
            } else {
aho's avatar
aho committed
              #TODO: If fri has different indices in each list, the crop_indices should be 
              #      separated for each list. Otherwise, picked_common_vars later will be wrong.
aho's avatar
aho committed
              crop_indices <- unique(unlist(fri))
              vars_to_crop <- picked_vars[[i]]
              common_vars_to_crop <- picked_common_vars
            }
            for (var_to_crop in names(vars_to_crop)) {
              if (inner_dim %in% names(dim(vars_to_crop[[var_to_crop]]))) {
                if (!is.null(crop_indices)) {
                  if (type_of_var_to_crop == 'transformed') {
                    if (!aiat) {
                      vars_to_crop[[var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices)
                    } else {
                      vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices)
                    }
                  } else {
                    vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices)
                  }
                }
              }
            }
            if (i == length(dat)) {
              for (common_var_to_crop in names(common_vars_to_crop)) {
                if (inner_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) {

                  if (type_of_var_to_crop == 'transformed' & !aiat) {
                  common_vars_to_crop[[common_var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices)
                  } else {  #old code
                  common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices)
                  }

                }
              }
            }
            if (type_of_var_to_crop == 'transformed') {
              if (!is.null(vars_to_crop)) {
                transformed_vars[[i]] <- vars_to_crop
              }
              if (i == length(dat)) {
                transformed_common_vars <- common_vars_to_crop
              }
            } else if (type_of_var_to_crop == 'reordered') {
              if (!is.null(vars_to_crop)) {
                picked_vars_ordered[[i]] <- vars_to_crop
              }
              if (i == length(dat)) {
                picked_common_vars_ordered <- common_vars_to_crop
              }
            } else {
              if (!is.null(vars_to_crop)) {
                picked_vars[[i]] <- vars_to_crop
              }
              if (i == length(dat)) {
                #NOTE: To avoid redundant run
                if (inner_dim %in% names(common_vars_to_crop)) {
                  picked_common_vars <- common_vars_to_crop
                }
aho's avatar
aho committed
              }
            }
          }
          #}
        }
        # After the selectors have been picked (using the original variables), 
        # the variables are transformed. At that point, the original selectors
        # for the transformed variables are also kept in the variable original_selectors.
        #print("L")
      }
    }
  }
  #  if (!is.null(transformed_common_vars)) {
  #    picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars
  #  }
  # Remove the trailing chunks, if any.
  for (file_dim in names(dims_to_crop)) {
    #    indices_to_keep <- min(sapply(dims_to_crop[[file_dim]], min)):max(sapply(dims_to_crop[[file_dim]], max))
    ## TODO: Merge indices in dims_to_crop with some advanced mechanism?
    indices_to_keep <- unique(unlist(dims_to_crop[[file_dim]]))
    array_of_files_to_load <- Subset(array_of_files_to_load, file_dim, indices_to_keep)
    array_of_not_found_files <- Subset(array_of_not_found_files, file_dim, indices_to_keep)
    for (i in 1:length(dat)) {
      # Crop selectors
      for (selector_dim in names(dat[[i]][['selectors']])) {
        if (selector_dim == file_dim) {
          for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['fri']])) {
            dat[[i]][['selectors']][[selector_dim]][['fri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['fri']][[j]][indices_to_keep]
          }
          for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['sri']])) {
            dat[[i]][['selectors']][[selector_dim]][['sri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['sri']][[j]][indices_to_keep]
          }
        }
        if (file_dim %in% names(dim(dat[[i]][['selectors']][[selector_dim]][['fri']]))) {
          dat[[i]][['selectors']][[selector_dim]][['fri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['fri']], file_dim, indices_to_keep)
          dat[[i]][['selectors']][[selector_dim]][['sri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['sri']], file_dim, indices_to_keep)
        }
      }
      # Crop dataset variables file dims.
      for (picked_var in names(picked_vars[[i]])) {
        if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) {
          picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, indices_to_keep)
        }
      }
      for (transformed_var in names(transformed_vars[[i]])) {
        if (file_dim %in% names(dim(transformed_vars[[i]][[transformed_var]]))) {
          transformed_vars[[i]][[transformed_var]] <- Subset(transformed_vars[[i]][[transformed_var]], file_dim, indices_to_keep)
        }
      }
    }
    # Crop common variables file dims.
    for (picked_common_var in names(picked_common_vars)) {
      if (file_dim %in% names(dim(picked_common_vars[[picked_common_var]]))) {
        picked_common_vars[[picked_common_var]] <- Subset(picked_common_vars[[picked_common_var]], file_dim, indices_to_keep)
      }
    }
    for (transformed_common_var in names(transformed_common_vars)) {
      if (file_dim %in% names(dim(transformed_common_vars[[transformed_common_var]]))) {
        transformed_common_vars[[transformed_common_var]] <- Subset(transformed_common_vars[[transformed_common_var]], file_dim, indices_to_keep)
      }
    }
  }
  # Calculate the size of the final array.
  total_inner_dims <- NULL
  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      inner_dims <- expected_inner_dims[[i]]
      inner_dims <- sapply(inner_dims, 
                           function(x) {
                             if (!all(sapply(dat[[i]][['selectors']][[x]][['sri']], is.null))) {
                               max(sapply(dat[[i]][['selectors']][[x]][['sri']], length))
                             } else {
                               if (length(var_params[[x]]) > 0) {
                                 if (var_params[[x]] %in% names(transformed_vars[[i]])) {
                                   length(transformed_vars[[i]][[var_params[[x]]]])
                                 } else if (var_params[[x]] %in% names(transformed_common_vars)) {
                                   length(transformed_common_vars[[var_params[[x]]]])
                                 } else {
                                   max(sapply(dat[[i]][['selectors']][[x]][['fri']], length))
                                 }
                               } else {
                                 max(sapply(dat[[i]][['selectors']][[x]][['fri']], length))
                               }
                             }
                           })
      names(inner_dims) <- expected_inner_dims[[i]]
      if (is.null(total_inner_dims)) {
        total_inner_dims <- inner_dims
      } else {
        new_dims <- .MergeArrayDims(total_inner_dims, inner_dims)
        total_inner_dims <- new_dims[[3]]
      }
    }
  }
  new_dims <- .MergeArrayDims(dim(array_of_files_to_load), total_inner_dims)
  final_dims <- new_dims[[3]][dim_names]
  # final_dims_fake is the vector of final dimensions after having merged the 
  # 'across' file dimensions with the respective 'across' inner dimensions, and
  # after having broken into multiple dimensions those dimensions for which 
  # multidimensional selectors have been provided.
  # final_dims will be used for collocation of data, whereas final_dims_fake 
  # will be used for shaping the final array to be returned to the user.
  final_dims_fake <- final_dims
aho's avatar
aho committed
  if (merge_across_dims) {
    if (!is.null(inner_dims_across_files)) {
      for (file_dim_across in names(inner_dims_across_files)) {
        inner_dim_pos <- which(names(final_dims_fake) == inner_dims_across_files[[file_dim_across]])
        new_dims <- c()
        if (inner_dim_pos > 1) {
          new_dims <- c(new_dims, final_dims_fake[1:(inner_dim_pos - 1)])
        }
        new_dims <- c(new_dims, setNames(prod(final_dims_fake[c(inner_dim_pos, inner_dim_pos + 1)]), 
                                         inner_dims_across_files[[file_dim_across]]))
        if (inner_dim_pos + 1 < length(final_dims_fake)) {
          new_dims <- c(new_dims, final_dims_fake[(inner_dim_pos + 2):length(final_dims_fake)])
        }
        final_dims_fake <- new_dims
      }
    }
  }
  #=========================================================================
  # Find the dimension to split if split_multiselected_dims = TRUE.
  # If there is no dimension able to be split, change split_multiselected_dims to FALSE.
aho's avatar
aho committed
  all_split_dims <- NULL
  if (split_multiselected_dims) {
    for (dim_param in 1:length(dim_params)) {
      if (!is.null(dim(dim_params[[dim_param]]))) {
        if (length(dim(dim_params[[dim_param]])) > 1) {
          split_dims <- dim(dim_params[[dim_param]])
          all_split_dims <- c(all_split_dims, setNames(list(split_dims), 
                                                       names(dim_params)[dim_param]))
          if (is.null(names(split_dims))) {
            names(split_dims) <- paste0(names(dim_params)[dim_param], 
                                        1:length(split_dims))
          }
          old_dim_pos <- which(names(final_dims_fake) == names(dim_params)[dim_param])

          # If merge_across_dims and split_multiselected_dims are both used,
          # on one file dim, and this file dim is multi-dim, it doesn't work.
          if (identical(old_dim_pos, integer(0))) {
            stop(paste0("The dimension '", names(dim_params)[dim_param], 
                        "' to be split cannot be found after 'merge_across_dims' ",
                        "is used. Check if the reshape parameters are used appropriately."))
          }
 
aho's avatar
aho committed
          # NOTE: Three steps to create new dims.
          # 1st: Put in the dims before split_dim.
          # 2nd: Replace the old_dim with split_dims.
          # 3rd: Put in the dims after split_dim.
          new_dims <- c()
          if (old_dim_pos > 1) {
            new_dims <- c(new_dims, final_dims_fake[1:(old_dim_pos - 1)])
          }
          new_dims <- c(new_dims, split_dims)
          if (old_dim_pos < length(final_dims_fake)) {
            new_dims <- c(new_dims, final_dims_fake[(old_dim_pos + 1):length(final_dims_fake)])
          }
          final_dims_fake <- new_dims
        }
      }
    }
aho's avatar
aho committed
    if (is.null(all_split_dims)) {
      split_multiselected_dims <- FALSE
      .warning(paste0("Not found any dimensions able to be split. The parameter ",
                      "'split_multiselected_dims' is changed to FALSE."))
    }
aho's avatar
aho committed
  }
  #======================================================================
aho's avatar
aho committed
    # only merge_across_dims -> the 'time' dim length needs to be adjusted
    across_inner_dim <- inner_dims_across_files[[1]]  #TODO: more than one?
    # Get the length of each inner_dim ('time') along each file_dim ('file_date')  
    length_inner_across_dim <- lapply(dat[[i]][['selectors']][[across_inner_dim]][['fri']], length)

    if (merge_across_dims_narm) {
      across_file_dim <- names(inner_dims_across_files)  #TODO: more than one?

      if (!split_multiselected_dims) {
        final_dims_fake_name <- names(final_dims_fake)
        pos_across_inner_dim <- which(final_dims_fake_name == across_inner_dim)
        new_length_inner_dim <- sum(unlist(length_inner_across_dim))
        if (pos_across_inner_dim != length(final_dims_fake)) {
          final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
                               new_length_inner_dim,
                               final_dims_fake[(pos_across_inner_dim + 1):length(final_dims_fake)])
        } else {
          final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
                               new_length_inner_dim)
        }
        names(final_dims_fake) <- final_dims_fake_name
aho's avatar
aho committed
      }
    }
  }
  
  if (!silent) {
    .message("Detected dimension sizes:")
    longest_dim_len <- max(sapply(names(final_dims_fake), nchar))
    longest_size_len <- max(sapply(paste0(final_dims_fake, ''), nchar))
    sapply(names(final_dims_fake), 
           function(x) {
             message(paste0("*   ", paste(rep(' ', longest_dim_len - nchar(x)), collapse = ''), 
                            x, ": ", paste(rep(' ', longest_size_len - nchar(paste0(final_dims_fake[x], ''))), collapse = ''), 
                            final_dims_fake[x]))
           })
    bytes <- prod(c(final_dims_fake, 8))
    dim_sizes <- paste(final_dims_fake, collapse = ' x ')
    if (retrieve) {
      .message(paste("Total size of requested data:"))
    } else {
      .message(paste("Total size of involved data:"))
    }
    .message(paste(dim_sizes, " x 8 bytes =", 
                   format(structure(bytes, class = "object_size"), units = "auto")), 
             indent = 2)
  }
  
  # NOTE: If split_multiselected_dims + merge_across_dims, the dim order may need to be changed.
  #       The inner_dim needs to be the first dim among split dims.
  #       Cannot control the rest dims are in the same order or not...
  #       Suppose users put the same order of across inner and file dims.
  if (split_multiselected_dims & merge_across_dims) {
    # TODO: More than one split?
    inner_dim_pos_in_split_dims <- which(names(all_split_dims[[1]]) == inner_dims_across_files)  
aho's avatar
aho committed
    # if inner_dim is not the first, change!
    if (inner_dim_pos_in_split_dims != 1) {
      split_dims <- c(split_dims[inner_dim_pos_in_split_dims],
                      split_dims[1:length(split_dims)][-inner_dim_pos_in_split_dims])
      split_dims_pos <- which(!is.na(match(names(final_dims_fake), names(split_dims))))
      # Save the current final_dims_fake for later reorder back
      final_dims_fake_output <- final_dims_fake
      new_dims <- c()
      if (split_dims_pos[1] != 1) {
        new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
      }
      new_dims <- c(new_dims, split_dims)
      if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
        new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
      }
      final_dims_fake <- new_dims
    }
  }
  
  # The following several lines will only be run if retrieve = TRUE
  if (retrieve) {
    
    ########## CREATING THE SHARED MATRIX AND DISPATCHING WORK PIECES ###########
    # TODO: try performance of storing all in cols instead of rows
    # Create the shared memory array, and a pointer to it, to be sent
    # to the work pieces.
    if (is.null(ObjectBigmemory)) {
        data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1)
    } else {
        data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1,
                                            backingfile = ObjectBigmemory)
    }
aho's avatar
aho committed
    shared_matrix_pointer <- bigmemory::describe(data_array)
    if (is.null(ObjectBigmemory)) {
        name_bigmemory_obj <- attr(shared_matrix_pointer, 'description')$sharedName
    } else {
        name_bigmemory_obj <- attr(shared_matrix_pointer, 'description')$filename
    }

    #warning(paste("SharedName:", attr(shared_matrix_pointer, 'description')$sharedName))
    #warning(paste("Filename:", attr(shared_matrix_pointer, 'description')$filename))
    #if (!is.null(ObjectBigmemory)) {
    #  attr(shared_matrix_pointer, 'description')$sharedName <- ObjectBigmemory
    #}
aho's avatar
aho committed
    if (is.null(num_procs)) {
      num_procs <- future::availableCores()
    }
    # Creating a shared tmp folder to store metadata from each chunk
    array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load))
    if (!is.null(metadata_dims)) {
      metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load))))
      names(metadata_indices_to_load) <- names(dim(array_of_files_to_load))
      metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims)))
      array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags),  metadata_indices_to_load,
                                                  list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims])))))
    }
    metadata_file_counter <- 0
    metadata_folder <- tempfile('metadata')
    dir.create(metadata_folder)
    # Build the work pieces, each with:
    # - file path
    # - total size (dims) of store array
    # - start position in store array
    # - file selectors (to provide extra info. useful e.g. to select variable)
    # - indices to take from file
    work_pieces <- list()
    for (i in 1:length(dat)) {
      if (dataset_has_files[i]) {
        selectors <- dat[[i]][['selectors']]
        file_dims <- found_file_dims[[i]]
        inner_dims <- expected_inner_dims[[i]]
        sub_array_dims <- final_dims[file_dims]
        sub_array_dims[found_pattern_dim] <- 1
        sub_array_of_files_to_load <- array(1:prod(sub_array_dims), 
                                            dim = sub_array_dims)
        names(dim(sub_array_of_files_to_load)) <- names(sub_array_dims)
        # Detect which of the dimensions of the dataset go across files.
        file_dim_across_files <- lapply(inner_dims, 
                                        function(x) {
                                          dim_across <- sapply(inner_dims_across_files, function(y) x %in% y)
                                          if (any(dim_across)) {
                                            names(inner_dims_across_files)[which(dim_across)[1]]
                                          } else {
                                            NULL
                                          }
                                        })
        names(file_dim_across_files) <- inner_dims
        j <- 1
        while (j <= prod(sub_array_dims)) {
          # Work out file path.
          file_to_load_sub_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ]
          names(file_to_load_sub_indices) <- names(sub_array_dims)
          file_to_load_sub_indices[found_pattern_dim] <- i
          big_dims <- rep(1, length(dim(array_of_files_to_load)))
          names(big_dims) <- names(dim(array_of_files_to_load))
          file_to_load_indices <- .MergeArrayDims(file_to_load_sub_indices, big_dims)[[1]]
          file_to_load <- do.call('[[', c(list(array_of_files_to_load), 
                                          as.list(file_to_load_indices)))
          not_found_file <- do.call('[[', c(list(array_of_not_found_files),
                                            as.list(file_to_load_indices)))
          load_file_metadata <- do.call('[', c(list(array_of_metadata_flags), 
                                               as.list(file_to_load_indices)))
          if (load_file_metadata) {
            metadata_file_counter <- metadata_file_counter + 1
          }
          if (!is.na(file_to_load) && !not_found_file) {
            # Work out indices to take
            first_round_indices <- lapply(inner_dims, 
                                          function (x) {
                                            if (is.null(file_dim_across_files[[x]])) {
                                              x_dim_name <- attr(attr(selectors[[x]][['fri']], "dim"), "names")
                                              if (!is.null(x_dim_name)) {
                                                which_chunk <- file_to_load_sub_indices[x_dim_name]
                                                selectors[[x]][['fri']][[which_chunk]]
                                              } else {
                                                selectors[[x]][['fri']][[1]]
                                              }
aho's avatar
aho committed
                                            } else {
                                              which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] 
                                              selectors[[x]][['fri']][[which_chunk]]
                                            }
                                          })
            names(first_round_indices) <- inner_dims
            second_round_indices <- lapply(inner_dims, 
                                           function (x) {
                                             if (is.null(file_dim_across_files[[x]])) {
                                              x_dim_name <- attr(attr(selectors[[x]][['sri']], "dim"), "names")
                                              if (!is.null(x_dim_name)) {
                                                which_chunk <- file_to_load_sub_indices[x_dim_name]
                                                selectors[[x]][['sri']][[which_chunk]]
                                              } else {
                                                selectors[[x]][['sri']][[1]]
                                              }
aho's avatar
aho committed
                                             } else {
                                               which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]]
                                               selectors[[x]][['sri']][[which_chunk]]
                                             }
                                           })
            if (debug) {
              print("-> BUILDING A WORK PIECE")
              #print(str(selectors))
            }
            names(second_round_indices) <- inner_dims
            if (!any(sapply(first_round_indices, length) == 0)) {
              work_piece <- list()
              work_piece[['first_round_indices']] <- first_round_indices
              work_piece[['second_round_indices']] <- second_round_indices
              work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices
              work_piece[['file_path']] <- file_to_load
              work_piece[['store_dims']] <- final_dims
              # Work out store position
              store_position <- final_dims
              store_position[names(file_to_load_indices)] <- file_to_load_indices
              store_position[inner_dims] <- rep(1, length(inner_dims))
              work_piece[['store_position']] <- store_position
              # Work out file selectors
              file_selectors <- sapply(file_dims, 
                                       function (x) {
                                         vector_to_pick <- 1
                                         if (x %in% names(depending_file_dims)) {
                                           vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]]
                                         }
                                         selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]]
                                       })
              names(file_selectors) <- file_dims
              work_piece[['file_selectors']] <- file_selectors
              # Send variables for transformation
              if (!is.null(transform) && (length(transform_vars) > 0)) {
                vars_to_transform <- NULL
                picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars)
                if (length(picked_vars_to_transform) > 0) {
                  picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform]
                  vars_to_transform <- c(vars_to_transform, picked_vars[[i]][picked_vars_to_transform])
                  if (any(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))) {
                    picked_vars_ordered_to_transform <- picked_vars_to_transform[which(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))]
                    vars_to_transform[picked_vars_ordered_to_transform] <- picked_vars_ordered[[i]][picked_vars_ordered_to_transform]
                  }
                }
                picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars)
                if (length(picked_common_vars_to_transform) > 0) {
                  picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform]
                  vars_to_transform <- c(vars_to_transform, picked_common_vars[picked_common_vars_to_transform])
                  if (any(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))) {
                    picked_common_vars_ordered_to_transform <- picked_common_vars_to_transform[which(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))]
                    vars_to_transform[picked_common_vars_ordered_to_transform] <- picked_common_vars_ordered[picked_common_vars_ordered_to_transform]
                  }
                }
                work_piece[['vars_to_transform']] <- vars_to_transform
              }
              # Send flag to load metadata
              if (load_file_metadata) {
                work_piece[['save_metadata_in']] <- paste0(metadata_folder, '/', metadata_file_counter)
              }
              work_pieces <- c(work_pieces, list(work_piece))
            }
          }
          j <- j + 1
        }
      }
    }
    #print("N")
    if (debug) {
      print("-> WORK PIECES BUILT")
    }
    
    # Calculate the progress %s that will be displayed and assign them to 
    # the appropriate work pieces.
    if (length(work_pieces) / num_procs >= 2 && !silent) {
      if (length(work_pieces) / num_procs < 10) {
        amount <- 100 / ceiling(length(work_pieces) / num_procs)
        reps <- ceiling(length(work_pieces) / num_procs)
      } else {
        amount <- 10
        reps <- 10
      }
      progress_steps <- rep(amount, reps)
      if (length(work_pieces) < (reps + 1)) {
        selected_pieces <- length(work_pieces)
        progress_steps <- c(sum(head(progress_steps, reps)),
                            tail(progress_steps, reps))
      } else {
        selected_pieces <- round(seq(1, length(work_pieces), 
                                     length.out = reps + 1))[-1]
      }
      progress_steps <- paste0(' + ', round(progress_steps, 2), '%')
      progress_message <- 'Progress: 0%'
    } else {
      progress_message <- ''
      selected_pieces <- NULL
    }
    piece_counter <- 1
    step_counter <- 1
    work_pieces <- lapply(work_pieces, 
                          function (x) {
                            if (piece_counter %in% selected_pieces) {
                              wp <- c(x, list(progress_amount = progress_steps[step_counter]))
                              step_counter <<- step_counter + 1
                            } else {
                              wp <- x
                            }
                            piece_counter <<- piece_counter + 1
                            wp
                          })
    if (!silent) {
      .message("If the size of the requested data is close to or above the free shared RAM memory, R may crash.")
      .message("If the size of the requested data is close to or above the half of the free RAM memory, R may crash.")
      .message(paste0("Will now proceed to read and process ", length(work_pieces), " data files:"))
      if (length(work_pieces) < 30) {
        lapply(work_pieces, function (x) .message(x[['file_path']], indent = 2))
      } else {
        .message("The list of files is long. You can check it after Start() finishes in the output '$Files'.", indent = 2, exdent = 5)
      }
    }
    
    # Build the cluster of processes that will do the work and dispatch work pieces.
    # The function .LoadDataFile is applied to each work piece. This function will
    # open the data file, regrid if needed, subset, apply the mask, 
    # compute and apply the weights if needed,
    # disable extreme values and store in the shared memory matrix.
    #print("O")
    if (!silent) {
      .message("Loading... This may take several minutes...")
      if (progress_message != '') {
        .message(progress_message, appendLF = FALSE)
      }
    }

# NOTE: In .LoadDataFile(), metadata is saved in metadata_folder/1 or /2 etc. Before here,
#       the path name is created in work_pieces but the path hasn't been built yet.
    if (num_procs == 1) {
      found_files <- lapply(work_pieces, .LoadDataFile, 
                            shared_matrix_pointer = shared_matrix_pointer,
                            file_data_reader = file_data_reader, 
                            synonims = synonims,
                            transform = transform, 
                            transform_params = transform_params,
                            silent = silent, debug = debug)
    } else {
      cluster <- parallel::makeCluster(num_procs, outfile = "")
      # Send the heavy work to the workers
      work_errors <- try({
        found_files <- parallel::clusterApplyLB(cluster, work_pieces, .LoadDataFile, 
                                                shared_matrix_pointer = shared_matrix_pointer,
                                                file_data_reader = file_data_reader,
                                                synonims = synonims,
                                                transform = transform, 
                                                transform_params = transform_params,
                                                silent = silent, debug = debug)
      })
      parallel::stopCluster(cluster)
    }
    
    if (!silent) {
      if (progress_message != '') {
        .message("\n", tag = '')
      }
    }
    #print("P")
    
    # NOTE: If merge_across_dims = TRUE, there might be additional NAs due to
    #       unequal inner_dim ('time') length across file_dim ('file_date').
    #       If merge_across_dims_narm = TRUE, add additional lines to remove these NAs.
    # TODO: Now it assumes that only one '_across'. Add a for loop for more-than-one case. 
    if (merge_across_dims & (split_multiselected_dims | merge_across_dims_narm)) {
      if (!merge_across_dims_narm) {
        data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims)
      } else {
        # Get the length of these two dimensions in final_dims
        length_inner_across_store_dims <- final_dims[across_inner_dim]
        length_file_across_store_dims <- final_dims[across_file_dim]
        
        # Create a logical array for merge_across_dims
        logi_array <- array(rep(FALSE,
                                length_file_across_store_dims * length_inner_across_store_dims),
                            dim = c(length_inner_across_store_dims, length_file_across_store_dims))
        for (i in 1:length_file_across_store_dims) {  #1:4
          logi_array[1:length_inner_across_dim[[i]], i] <- TRUE
        }
        
        # First, get the data array with final_dims dimension
        data_array_final_dims <- array(bigmemory::as.matrix(data_array), dim = final_dims)
        
        # Change the NA derived from additional spaces to -9999, then remove these -9999
        func_remove_blank <- function(data_array, logi_array) {
          # dim(data_array) = [time, file_date]
          # dim(logi_array) = [time, file_date]
          # Change the blank spaces from NA to -9999
          data_array[which(!logi_array)] <- -9999
          return(data_array)
        }
        data_array_final_dims <- multiApply::Apply(data_array_final_dims,
                                                   target_dims = c(across_inner_dim, across_file_dim),  #c('time', 'file_date')
                                                   output_dims = c(across_inner_dim, across_file_dim),
                                                   fun = func_remove_blank,
                                                   logi_array = logi_array)$output1
        ## reorder back to the correct dim
        tmp <- match(names(final_dims), names(dim(data_array_final_dims)))
        data_array_final_dims <- .aperm2(data_array_final_dims, tmp)
        data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999]  # become a vector
aho's avatar
aho committed
      }

      if (length(data_array_tmp) != prod(final_dims_fake)) {
        stop(paste0("After reshaping, the data do not fit into the expected output dimension. ",
                    "Check if the reshaping parameters are used correctly."))
aho's avatar
aho committed
      }
 
      #NOTE: When one file contains values for dicrete dimensions, rearrange the 
      #      chunks (i.e., work_piece) is necessary.
aho's avatar
aho committed
      if (split_multiselected_dims) {

      # generate the correct order list from indices_chunk 
        final_order_list <- list()
        i <- 1
        j <- 1
        a <- indices_chunk[i]
        while (i <= length(indices_chunk)) {
aho's avatar
aho committed
          while (indices_chunk[i+1] == indices_chunk[i] & i < length(indices_chunk)) {
            a <- c(a, indices_chunk[i+1])
            i <- i + 1
          }
          final_order_list[[j]] <- a
          a <- indices_chunk[i+1]
          i <- i + 1
          j <- j + 1
        }
        names(final_order_list) <- sapply(final_order_list, '[[', 1)
        final_order_list <- lapply(final_order_list, length)
        
        if (!all(diff(as.numeric(names(final_order_list))) > 0)) {
          # shape the vector into the array without split_dims
          split_dims_pos <- match(split_dims, final_dims_fake)
          new_dims <- c()
          if (split_dims_pos[1] > 1) {
            new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
          }
          new_dims <- c(new_dims,  prod(split_dims))
          names(new_dims)[split_dims_pos[1]] <- across_inner_dim
          if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
            new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
          }
          final_dims_fake_no_split <- new_dims
          data_array_no_split <- array(data_array_tmp, dim = final_dims_fake_no_split)
          # seperate 'time' dim into each work_piece length
          data_array_seperate <- list()
          tmp <- cumsum(unlist(length_inner_across_dim))
          tmp <- c(0, tmp)
          for (i in 1:length(length_inner_across_dim)) {
            data_array_seperate[[i]] <- Subset(data_array_no_split, across_inner_dim,
                                               (tmp[i] + 1):tmp[i + 1])
          }
  
          # re-build the array: chunk 
          which_chunk <- as.numeric(names(final_order_list))
          how_many_indices <- unlist(final_order_list)
          array_piece <- list()
          ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate)))
          for (i in 1:length(final_order_list)) {
            array_piece[[i]] <- Subset(data_array_seperate[[which_chunk[i]]],