From 1b04d1d2de8809cdd1d1f522543d11d1e01b372b Mon Sep 17 00:00:00 2001
From: nperez <nperez@bscearth372.int.bsc.es>
Date: Tue, 2 Mar 2021 13:01:58 +0100
Subject: [PATCH 01/10] split Start step to found_pattern_dims using
 parent.frame() env

---
 R/Start.R | 42 ++++--------------------------------------
 R/zzz.R   | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/R/Start.R b/R/Start.R
index 33d5809..33c6ab1 100644
--- a/R/Start.R
+++ b/R/Start.R
@@ -874,45 +874,11 @@ Start <- function(..., # dim = indices/selectors,
   dim_names <- names(dim_params)
   # Look for chunked dims
   chunks <- look_for_chunks(dim_params, dim_names) 
-  
+
   # Check pattern_dims
-  if (is.null(pattern_dims)) {
-    .warning(paste0("Parameter 'pattern_dims' not specified. Taking the first dimension, '", 
-                    dim_names[1], "' as 'pattern_dims'."))
-    pattern_dims <- dim_names[1]
-  } else if (is.character(pattern_dims) && (length(pattern_dims) > 0)) {
-    pattern_dims <- unique(pattern_dims)
-  } else {
-    stop("Parameter 'pattern_dims' must be a vector of character strings.")
-  }
-  if (any(names(var_params) %in% pattern_dims)) {
-    stop("'*_var' parameters specified for pattern dimensions. Remove or fix them.")
-  }
-  # Find the pattern dimension with the pattern specifications
-  found_pattern_dim <- NULL
-  for (pattern_dim in pattern_dims) {
-    # Check all specifications in pattern_dim are valid
-    dat <- datasets <- dim_params[[pattern_dim]]
-    if (is.null(dat) || !(is.character(dat) && all(nchar(dat) > 0)) && !is.list(dat)) {
-      stop(paste0("Parameter '", pattern_dim, 
-                  "' must be a list of lists with pattern specifications or a vector of character strings."))
-    }
-    if (!is.null(dim_reorder_params[[pattern_dim]])) {
-      .warning(paste0("A reorder for the selectors of '", pattern_dim, 
-                      "' has been specified, but it is a pattern dimension and the reorder will be ignored."))
-    }
-    if (is.list(dat) || any(sapply(dat, is.list))) {
-      if (is.null(found_pattern_dim)) {
-        found_pattern_dim <- pattern_dim
-      } else {
-        stop("Found more than one pattern dim with pattern specifications (list of lists). One and only one pattern dim must contain pattern specifications.")
-      }
-    }
-  }
-  if (is.null(found_pattern_dim)) {
-    .warning(paste0("Could not find any pattern dim with explicit data set descriptions (in the form of list of lists). Taking the first pattern dim, '", pattern_dims[1], "', as dimension with pattern specifications."))
-    found_pattern_dim <- pattern_dims[1]
-  }
+  # Function found_pattern_dims may change pattern_dims in the .GlobalEnv
+  found_pattern_dim <- found_pattern_dims(pattern_dims, dim_names, var_params,
+                                          dim_params, dim_reorder_params)
   
   # Check all *_reorder are NULL or functions, and that they all have 
   # a matching dimension param.
diff --git a/R/zzz.R b/R/zzz.R
index af04e6e..7dda044 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -182,3 +182,49 @@ look_for_chunks <- function(dim_params, dim_names) {
     array(indices, dim = setNames(length(indices), dim_name))
   }
 
+# Check pattern_dims
+# Function found_pattern_dims may change pattern_dims in the parent.env
+found_pattern_dims <- function(pattern_dims, dim_names, var_params,
+                               dim_params, dim_reorder_params) {
+  if (is.null(pattern_dims)) {
+    .warning(paste0("Parameter 'pattern_dims' not specified. Taking the first dimension, '",
+                    dim_names[1], "' as 'pattern_dims'."))
+    assign('pattern_dims', dim_names[1], envir = parent.frame())
+    pattern_dims <- dim_names[1]
+  } else if (is.character(pattern_dims) && (length(pattern_dims) > 0)) {
+    assign('pattern_dims', unique(pattern_dims), envir = parent.frame())
+    pattern_dims <- unique(pattern_dims)
+  } else {
+    stop("Parameter 'pattern_dims' must be a vector of character strings.")
+  }
+  if (any(names(var_params) %in% pattern_dims)) {
+    stop("'*_var' parameters specified for pattern dimensions. Remove or fix them.")
+  }
+  # Find the pattern dimension with the pattern specifications
+  found_pattern_dim <- NULL
+  for (pattern_dim in pattern_dims) {
+    # Check all specifications in pattern_dim are valid
+    dat <- datasets <- dim_params[[pattern_dim]]
+    if (is.null(dat) || !(is.character(dat) && all(nchar(dat) > 0)) && !is.list(dat)) {
+      stop(paste0("Parameter '", pattern_dim,
+                  "' must be a list of lists with pattern specifications or a vector of character strings."))
+    }
+    if (!is.null(dim_reorder_params[[pattern_dim]])) {
+      .warning(paste0("A reorder for the selectors of '", pattern_dim,
+                      "' has been specified, but it is a pattern dimension and the reorder will be ignored."))
+    }
+    if (is.list(dat) || any(sapply(dat, is.list))) {
+      if (is.null(found_pattern_dim)) {
+        found_pattern_dim <- pattern_dim
+      } else {
+        stop("Found more than one pattern dim with pattern specifications (list of lists). One and only one pattern dim must contain pattern specifications.")
+      }
+    }
+  }
+  if (is.null(found_pattern_dim)) {
+   .warning(paste0("Could not find any pattern dim with explicit data set descriptions (in the form of list of lists). Taking the first pattern dim, '", pattern_dims[1], "', as dimension with pattern specifications."))
+    found_pattern_dim <- pattern_dims[1]
+  }
+  return(found_pattern_dim)
+}
+
-- 
GitLab


From e07c0a87ae725aa2d8f3588822443cbb0a97a636 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 16 Jul 2021 17:58:34 +0200
Subject: [PATCH 02/10] Modularize Start() in progress

---
 R/Start.R | 294 +++++++++++++-----------------------------------------
 R/zzz.R   | 194 ++++++++++++++++++++++++++++++++++-
 2 files changed, 262 insertions(+), 226 deletions(-)

diff --git a/R/Start.R b/R/Start.R
index 00e9877..d723ea4 100644
--- a/R/Start.R
+++ b/R/Start.R
@@ -885,7 +885,7 @@ Start <- function(..., # dim = indices/selectors,
   # Function found_pattern_dims may change pattern_dims in the .GlobalEnv
   found_pattern_dim <- found_pattern_dims(pattern_dims, dim_names, var_params,
                                           dim_params, dim_reorder_params)
-  
+
   # Check all *_reorder are NULL or functions, and that they all have 
   # a matching dimension param.
   i <- 1
@@ -950,17 +950,17 @@ Start <- function(..., # dim = indices/selectors,
   
   # Check if pattern_dims is the first item in metadata_dims
   if ((pattern_dims %in% metadata_dims) & metadata_dims[1] != pattern_dims) {
-    metadata_dims <- c(metadata_dims[-1], metadata_dims[1])
+    metadata_dims <- c(pattern_dims, metadata_dims[-which(metadata_dim == pattern_dims)])
   }
   # Check if metadata_dims has more than 2 elements
   if ((metadata_dims[1] == pattern_dims & length(metadata_dims) > 2)) {
-  .warning(paste0("Parameter 'metadata_dims' has too many elements which serve repetitive ",
-                  "function. Keep '", metadata_dims[1], "' and '", metadata_dims[2], "' only."))
-  metadata_dims <- metadata_dims[1:2]
+    .warning(paste0("Parameter 'metadata_dims' has too many elements which serve repetitive ",
+                    "function. Keep '", metadata_dims[1], "' and '", metadata_dims[2], "' only."))
+    metadata_dims <- metadata_dims[1:2]
   } else if (!(pattern_dims %in% metadata_dims) & length(metadata_dims) > 1) {
-  .warning(paste0("Parameter 'metadata_dims' has too many elements which serve repetitive ",
-                  "function. Keep '", metadata_dims[1], "' only."))
-  metadata_dims <- metadata_dims[1]
+    .warning(paste0("Parameter 'metadata_dims' has too many elements which serve repetitive ",
+                    "function. Keep '", metadata_dims[1], "' only."))
+    metadata_dims <- metadata_dims[1]
   }
 
   # Once the pattern dimension with dataset specifications is found,
@@ -972,64 +972,12 @@ Start <- function(..., # dim = indices/selectors,
                                 chunks[[found_pattern_dim]]['n_chunks'],
                                 found_pattern_dim)
   dim_params[[found_pattern_dim]] <- dim_params[[found_pattern_dim]][dats_to_take]
-  dat <- datasets <- dim_params[[found_pattern_dim]]
-  dat_info_names <- c('name', 'path')#, 'nc_var_name', 'suffix', 'var_min', 'var_max', 'dimnames')
-  dat_to_fetch <- c()
-  dat_names <- c()
-  if (!is.list(dat)) {
-    dat <- as.list(dat)
-  } else {
-    if (!any(sapply(dat, is.list))) {
-      dat <- list(dat)
-    }
-  }
-  for (i in 1:length(dat)) {
-    if (is.character(dat[[i]]) && length(dat[[i]]) == 1 && nchar(dat[[i]]) > 0) {
-      if (grepl('^(\\./|\\.\\./|/.*/|~/)', dat[[i]])) {
-        dat[[i]] <- list(path = dat[[i]])
-      } else {
-        dat[[i]] <- list(name = dat[[i]])
-      }
-    } else if (!is.list(dat[[i]])) {
-      stop(paste0("Parameter '", pattern_dim, 
-                  "' is incorrect. It must be a list of lists or character strings."))
-    }
-    #if (!(all(names(dat[[i]]) %in% dat_info_names))) {
-    #  stop("Error: parameter 'dat' is incorrect. There are unrecognized components in the information of some of the datasets. Check 'dat' in ?Load for details.")
-    #}
-    if (!('name' %in% names(dat[[i]]))) {
-      dat[[i]][['name']] <- paste0('dat', i)
-      if (!('path' %in% names(dat[[i]]))) {
-        stop(paste0("Parameter '", found_pattern_dim, 
-                    "' is incorrect. A 'path' should be provided for each dataset if no 'name' is provided."))
-      }
-    } else if (!('path' %in% names(dat[[i]]))) {
-      dat_to_fetch <- c(dat_to_fetch, i)
-    }
-    #if ('path' %in% names(dat[[i]])) {
-    #  if (!('nc_var_name' %in% names(dat[[i]]))) {
-    #    dat[[i]][['nc_var_name']] <- '$var_name$'
-    #  }
-    #  if (!('suffix' %in% names(dat[[i]]))) {
-    #    dat[[i]][['suffix']] <- ''
-    #  }
-    #  if (!('var_min' %in% names(dat[[i]]))) {
-    #    dat[[i]][['var_min']] <- ''
-    #  }
-    #  if (!('var_max' %in% names(dat[[i]]))) {
-    #    dat[[i]][['var_max']] <- ''
-    #  }
-    #}
-    dat_names <- c(dat_names, dat[[i]][['name']])
-  }
-  if ((length(dat_to_fetch) > 0) && (length(dat_to_fetch) < length(dat))) {
-    .warning("'path' has been provided for some datasets. Any information in the configuration file related to these will be ignored.")
-  }
-  if (length(dat_to_fetch) > 0) {
-    stop("Specified only the name for some data sets, but not the path ",
-         "pattern. This option has not been yet implemented.")
-  }
-  
+  dat <- dim_params[[found_pattern_dim]]
+  #NOTE: This function creates the object 'dat_names'
+  dat <- mount_dat(dat, pattern_dim, found_pattern_dim) 
+
+  dim_params[[found_pattern_dim]] <- dat_names
+
   # Reorder inner_dims_across_files (to make the keys be the file dimensions,
   # and the values to be the inner dimensions that go across it).
   if (!is.null(inner_dims_across_files)) {
@@ -1127,7 +1075,24 @@ Start <- function(..., # dim = indices/selectors,
   if (length(var_entries_to_add) > 0) {
     synonims[names(var_params)[var_entries_to_add]] <- as.list(names(var_params)[var_entries_to_add])
   }
-  
+ 
+  # Check if return_vars name is inner dim name. If it is synonim, change back to inner dim name
+  # and return a warning.
+  use_syn_names <- which(names(return_vars) %in% unlist(synonims) & 
+                         !names(return_vars) %in% names(synonims))
+  if (!identical(use_syn_names, integer(0))) {
+    for (use_syn_name in use_syn_names) {
+      wrong_name <- names(return_vars)[use_syn_name]
+      names(return_vars)[use_syn_name] <- names(unlist(
+                                            lapply(lapply(synonims, '%in%',
+                                                   names(return_vars)[use_syn_name]),
+                                            which)))
+      .warning(paste0("The name '", wrong_name, "' in parameter 'return_vars' in synonim. ",
+                      "Change it back to the inner dimension name, '",
+                      names(return_vars)[use_syn_name], "'."))
+    }
+  }
+ 
   # Check selector_checker
   if (is.null(selector_checker) || !is.function(selector_checker)) {
     stop("Parameter 'selector_checker' must be a function.")
@@ -1242,8 +1207,6 @@ Start <- function(..., # dim = indices/selectors,
     stop("Parameter 'silent' must be logical.")
   }
   
-  dim_params[[found_pattern_dim]] <- dat_names
-  
   if (!silent) {
     .message(paste0("Exploring files... This will take a variable amount ",
                     "of time depending on the issued request and the ",
@@ -1340,60 +1303,34 @@ Start <- function(..., # dim = indices/selectors,
         stop("All dimensions in 'metadata_dims' must be file dimensions.")
       }
     }
+
+    # Add attributes indicating whether this dimension selector is value or indice
+    tmp <- lapply(dat_selectors[which(dim_names != pattern_dims)], add_value_indices_flag)
+    dat_selectors <- c(dat_selectors[pattern_dims], tmp)
+
     ## Look for _var params that should be requested automatically.
-    for (dim_name in dim_names) {
-      if (!(dim_name %in% pattern_dims)) {
-        if (is.null(attr(dat_selectors[[dim_name]], 'values')) ||
-            is.null(attr(dat_selectors[[dim_name]], 'indices'))) {
-          flag <- ((any(dat_selectors[[dim_name]] %in% c('all', 'first', 'last'))) || 
-                     (is.numeric(unlist(dat_selectors[[dim_name]]))))
-          attr(dat_selectors[[dim_name]], 'values') <- !flag
-          attr(dat_selectors[[dim_name]], 'indices') <- flag
-        }
-        ## The following code 'rewrites' var_params for all datasets. If providing different
-        ## path pattern repositories with different file/inner dimensions, var_params might
-        ## have to be handled for each dataset separately.
-        if ((attr(dat_selectors[[dim_name]], 'values') || (dim_name %in% c('var', 'variable'))) &&
-            !(dim_name %in% names(var_params)) && !(dim_name %in% file_dims))  {
-          if (dim_name %in% c('var', 'variable')) {
-            var_params <- c(var_params, setNames(list('var_names'), dim_name))
-            .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", 
-                            dim_name, "_var' requested. ", '"', dim_name, "_var = '", 
-                            'var_names', "'", '"', " has been automatically added to ",
-                            "the Start call."))
-          } else {
-            var_params <- c(var_params, setNames(list(dim_name), dim_name))
-            .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", 
-                            dim_name, "_var' requested. ", '"', dim_name, "_var = '", 
-                            dim_name, "'", '"', " has been automatically added to ",
-                            "the Start call."))
-          }
+    for (dim_name in dim_names[-which(dim_names == pattern_dims)]) {
+      ## The following code 'rewrites' var_params for all datasets. If providing different
+      ## path pattern repositories with different file/inner dimensions, var_params might
+      ## have to be handled for each dataset separately.
+      if ((attr(dat_selectors[[dim_name]], 'values') || (dim_name %in% c('var', 'variable'))) &&
+          !(dim_name %in% names(var_params)) && !(dim_name %in% file_dims))  {
+        if (dim_name %in% c('var', 'variable')) {
+          var_params <- c(var_params, setNames(list('var_names'), dim_name))
+          .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", 
+                          dim_name, "_var' requested. ", '"', dim_name, "_var = '", 
+                          'var_names', "'", '"', " has been automatically added to ",
+                          "the Start call."))
+        } else {
+          var_params <- c(var_params, setNames(list(dim_name), dim_name))
+          .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", 
+                          dim_name, "_var' requested. ", '"', dim_name, "_var = '", 
+                          dim_name, "'", '"', " has been automatically added to ",
+                          "the Start call."))
         }
       }
     }
 
-#==================================================
-    # Check if return_vars name is inner dim name. If it is synonim, change back to
-    # inner dim name and return a warning.
-    dim_params_found_file_dims <- dim_params[found_file_dims[[i]]]
-    if (any(names(return_vars) %in% unlist(synonims) &
-            !names(return_vars) %in% names(synonims))) {
-      use_syn_names <- which(names(return_vars) %in% unlist(synonims) & 
-                             !names(return_vars) %in% names(synonims))
-      for (use_syn_name in use_syn_names) {
-        wrong_name <- names(return_vars)[use_syn_name]
-        names(return_vars)[use_syn_name] <- names(unlist(
-                                              lapply(lapply(synonims, '%in%',
-                                                     names(return_vars)[use_syn_name]),
-                                              which)))
-        .warning(paste0("The name '", wrong_name,
-                        "' in parameter 'return_vars' in synonim. ",
-                        "Change it back to the inner dimension name, '",
-                        names(return_vars)[use_syn_name], "'."))
-      }
-    }
-#=====================================================
-
     ## (Check the *_var parameters).
     if (any(!(unlist(var_params) %in% names(return_vars)))) {
       vars_to_add <- which(!(unlist(var_params) %in% names(return_vars)))
@@ -1406,15 +1343,17 @@ Start <- function(..., # dim = indices/selectors,
                       paste(paste0("'", unlist(var_params), "'"), collapse = ', ')))
     }
     
+    # Examine the selectors of file dim and create 'replace_values', which uses the first 
+    # explicit selector (i.e., character) for all file dimensions.
     replace_values <- vector('list', length = length(file_dims))
     names(replace_values) <- file_dims
-    # Take the first selector for all possible file dimensions
     for (file_dim in file_dims) {
       if (file_dim %in% names(var_params)) {
         .warning(paste0("The '", file_dim, "_var' param will be ignored since '", 
                         file_dim, "' is a file dimension (for the dataset with pattern ", 
                         dat[[i]][['path']], ")."))
       }
+      # If the selector is a vector or a list of 2 without names (represent the value range)
       if (!is.list(dat_selectors[[file_dim]]) || 
           (is.list(dat_selectors[[file_dim]]) && 
            length(dat_selectors[[file_dim]]) == 2 &&
@@ -1446,15 +1385,18 @@ Start <- function(..., # dim = indices/selectors,
         } 
       }
       sv <- dat_selectors[[file_dim]][[1]]
+      # 'replace_values' has the first selector (if it's character) or NULL (if it's not explicitly 
+      # defined) for each file dimension.
       if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% c('all', 'first', 'last')))) {
-        replace_values[[file_dim]] <- dat_selectors[[file_dim]][[1]][1]
+        replace_values[[file_dim]] <- sv[1]
       }
     }
     #print("C")
     # Now we know which dimensions whose selectors are provided non-explicitly.
     undefined_file_dims <- file_dims[which(sapply(replace_values, is.null))]
     defined_file_dims <- file_dims[which(!(file_dims %in% undefined_file_dims))]
-    # Quickly check if the depending dimensions are provided properly.
+    # Quickly check if the depending dimensions are provided properly. The check is only for 
+    # if the depending and depended file dims are both explicited defined.
     for (file_dim in file_dims) {
       if (file_dim %in% names(depending_file_dims)) {
         ## TODO: Detect multi-dependancies and forbid.
@@ -1540,112 +1482,13 @@ Start <- function(..., # dim = indices/selectors,
         dataset_has_files[i] <- TRUE
         ## TODO: Improve message here if no variable found:
         if (length(undefined_file_dims) > 0) {
-          # Looking for the first values, parsed from first_file.
-          first_values <- vector('list', length = length(undefined_file_dims))
-          names(first_values) <- undefined_file_dims
-          found_values <- 0
-          stop <- FALSE
-          try_dim <- 1
-          last_success <- 1
-          while ((found_values < length(undefined_file_dims)) && !stop) {
-            u_file_dim <- undefined_file_dims[try_dim]
-            if (is.null(first_values[[u_file_dim]])) {
-              path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']], 
-                                                                   replace_values[-which(file_dims == u_file_dim)],
-                                                                   allow_undefined_key_vars = TRUE)
-              found_value <- .FindTagValue(path_with_globs_and_tag, 
-                                           first_file, u_file_dim)
-              if (!is.null(found_value)) {
-                found_values <- found_values + 1
-                last_success <- try_dim
-                first_values[[u_file_dim]] <- found_value
-                replace_values[[u_file_dim]] <- found_value
-              }
-            }
-            try_dim <- (try_dim %% length(undefined_file_dims)) + 1
-            if (try_dim == last_success) {
-              stop <- TRUE
-            }
-          }
-          if (found_values < length(undefined_file_dims)) {
-            stop(paste0("Path pattern of dataset '", dat[[i]][['name']], 
-                        "' is too complex. Could not automatically ",
-                        "detect values for all non-explicitly defined ",
-                        "indices. Check its pattern: ", dat[[i]][['path']]))
-          }
-          ## TODO: Replace ReplaceGlobExpressions by looped call to FindTagValue? As done above
-          ##       Maybe it can solve more cases actually. I got warnings in ReplGlobExp with a typical
-          ##       cmor case, requesting all members and chunks for fixed var and sdate. Not fixing
-          ##       sdate raised 'too complex' error.
-          # Replace shell globs in path pattern and keep the file_dims as tags
-          dat[[i]][['path']] <- .ReplaceGlobExpressions(dat[[i]][['path']], first_file, replace_values, 
-                                                        file_dims, dat[[i]][['name']], path_glob_permissive)
-          # Now time to look for the available values for the non 
-          # explicitly defined selectors for the file dimensions.
-          #print("H")
-          # Check first the ones that do not depend on others.
-          ufd <- c(undefined_file_dims[which(!(undefined_file_dims %in% names(depending_file_dims)))],
-                   undefined_file_dims[which(undefined_file_dims %in% names(depending_file_dims))])
-          
-          for (u_file_dim in ufd) {
-            replace_values[undefined_file_dims] <- first_values
-            replace_values[[u_file_dim]] <- '*'
-            depended_dim <- NULL
-            depended_dim_values <- NA
-
-#NOTE: Here 'selectors' is always 1. Is it supposed to be like this?
-            selectors <- dat_selectors[[u_file_dim]][[1]]
-            if (u_file_dim %in% names(depending_file_dims)) {
-              depended_dim <- depending_file_dims[[u_file_dim]]
-              depended_dim_values <- dat_selectors[[depended_dim]][[1]]
-              dat_selectors[[u_file_dim]] <- vector('list', length = length(depended_dim_values))
-              names(dat_selectors[[u_file_dim]]) <- depended_dim_values
-            } else {
-              dat_selectors[[u_file_dim]] <- list()
-            }
-            if (u_file_dim %in% unlist(depending_file_dims)) {
-              depending_dims <- names(depending_file_dims)[which(sapply(depending_file_dims, function(x) u_file_dim %in% x))]
-              replace_values[depending_dims] <- rep('*', length(depending_dims))
-            }
-            for (j in 1:length(depended_dim_values)) {
-              parsed_values <- c()
-              if (!is.null(depended_dim)) {
-                replace_values[[depended_dim]] <- depended_dim_values[j]
-              }
-              path_with_globs <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values)
-              found_files <- Sys.glob(path_with_globs)
-              ## TODO: Enhance this error message, or change by warning.
-              ##       Raises if a wrong sdate is specified, for example.
-              if (length(found_files) == 0) {
-                .warning(paste0("Could not find files for any '", u_file_dim, 
-                                "' for '", depended_dim, "' = '", 
-                                depended_dim_values[j], "'."))
-                dat_selectors[[u_file_dim]][[j]] <- NA
-              } else {
-                for (found_file in found_files) {
-                  path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']],
-                                                                       replace_values[-which(file_dims == u_file_dim)],
-                                                                       allow_undefined_key_vars = TRUE)
-                  parsed_values <- c(parsed_values, 
-                                     .FindTagValue(path_with_globs_and_tag, found_file, 
-                                                   u_file_dim))
-                }
-                #TODO: selector_checker() doesn't allow selectors to be characters. For selectors
-                #      like "member = 'r7i1p1f1", it cannot be defined with values.
-                dat_selectors[[u_file_dim]][[j]] <- selector_checker(selectors = selectors,
-                                                                     var = unique(parsed_values),
-                                                                     return_indices = FALSE)
-                # Take chunk if needed
-                dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][chunk_indices(length(dat_selectors[[u_file_dim]][[j]]),
-                                                                                                   chunks[[u_file_dim]]['chunk'],
-                                                                                                   chunks[[u_file_dim]]['n_chunks'], 
-                                                                                                   u_file_dim)]
-              }
-            }
-          }
+          # Note: "dat[[i]][['path']]" is changed by the function below.
+          dat_selectors <- find_ufd_value(undefined_file_dims, dat, i, replace_values,
+                                          first_file, file_dims, path_glob_permissive,             
+                                          depending_file_dims, dat_selectors, selector_checker,
+                                          chunks)
           #print("I")
         } else {
-
           #NOTE: If there is no non-explicitly defined dim, use the first found file
           #      to modify. Problem: '*' doesn't catch all the possible file. Only use
           #      the first file.
@@ -1654,6 +1497,7 @@ Start <- function(..., # dim = indices/selectors,
         }
       }
     }
+#NEW
     # Now fetch for the first available file
     if (dataset_has_files[i]) {
       known_dims <- file_dims
diff --git a/R/zzz.R b/R/zzz.R
index 7dda044..9d607d2 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -204,7 +204,8 @@ found_pattern_dims <- function(pattern_dims, dim_names, var_params,
   found_pattern_dim <- NULL
   for (pattern_dim in pattern_dims) {
     # Check all specifications in pattern_dim are valid
-    dat <- datasets <- dim_params[[pattern_dim]]
+#    dat <- datasets <- dim_params[[pattern_dim]]
+    dat <- dim_params[[pattern_dim]]
     if (is.null(dat) || !(is.character(dat) && all(nchar(dat) > 0)) && !is.list(dat)) {
       stop(paste0("Parameter '", pattern_dim,
                   "' must be a list of lists with pattern specifications or a vector of character strings."))
@@ -228,3 +229,194 @@ found_pattern_dims <- function(pattern_dims, dim_names, var_params,
   return(found_pattern_dim)
 }
 
+
+# The variable 'dat' is mounted with the information (name, path) of each dataset.
+# NOTE: This function creates the object 'dat_names' in the parent env.
+mount_dat <- function(dat, pattern_dim, found_pattern_dim) {
+
+#  dat_info_names <- c('name', 'path')#, 'nc_var_name', 'suffix', 'var_min', 'var_max', 'dimnames')
+  dat_to_fetch <- c()
+  dat_names <- c()
+  if (!is.list(dat)) {
+    dat <- as.list(dat)
+  } else {
+    if (!any(sapply(dat, is.list))) {
+      dat <- list(dat)
+    }
+  }
+  for (i in 1:length(dat)) {
+    if (is.character(dat[[i]]) && length(dat[[i]]) == 1 && nchar(dat[[i]]) > 0) {
+      if (grepl('^(\\./|\\.\\./|/.*/|~/)', dat[[i]])) {
+        dat[[i]] <- list(path = dat[[i]])
+      } else {
+        dat[[i]] <- list(name = dat[[i]])
+      }
+    } else if (!is.list(dat[[i]])) {
+      stop(paste0("Parameter '", pattern_dim, 
+                  "' is incorrect. It must be a list of lists or character strings."))
+    }
+    #if (!(all(names(dat[[i]]) %in% dat_info_names))) {
+    #  stop("Error: parameter 'dat' is incorrect. There are unrecognized components in the information of some of the datasets. Check 'dat' in ?Load for details.")
+    #}
+    if (!('name' %in% names(dat[[i]]))) {
+      dat[[i]][['name']] <- paste0('dat', i)
+      if (!('path' %in% names(dat[[i]]))) {
+        stop(paste0("Parameter '", found_pattern_dim, 
+                    "' is incorrect. A 'path' should be provided for each dataset if no 'name' is provided."))
+      }
+    } else if (!('path' %in% names(dat[[i]]))) {
+      dat_to_fetch <- c(dat_to_fetch, i)
+    }
+    #if ('path' %in% names(dat[[i]])) {
+    #  if (!('nc_var_name' %in% names(dat[[i]]))) {
+    #    dat[[i]][['nc_var_name']] <- '$var_name$'
+    #  }
+    #  if (!('suffix' %in% names(dat[[i]]))) {
+    #    dat[[i]][['suffix']] <- ''
+    #  }
+    #  if (!('var_min' %in% names(dat[[i]]))) {
+    #    dat[[i]][['var_min']] <- ''
+    #  }
+    #  if (!('var_max' %in% names(dat[[i]]))) {
+    #    dat[[i]][['var_max']] <- ''
+    #  }
+    #}
+    dat_names <- c(dat_names, dat[[i]][['name']])
+  }
+  if ((length(dat_to_fetch) > 0) && (length(dat_to_fetch) < length(dat))) {
+    .warning("'path' has been provided for some datasets. Any information in the configuration file related to these will be ignored.")
+  }
+  if (length(dat_to_fetch) > 0) {
+    stop("Specified only the name for some data sets, but not the path ",
+         "pattern. This option has not been yet implemented.")
+  }
+
+  assign('dat_names', dat_names, envir = parent.frame())
+  return(dat)
+}
+
+# Add attributes indicating whether this dimension selector is value or indice
+add_value_indices_flag <- function(x) {
+  if (is.null(attr(x, 'values')) || is.null(attr(x, 'indices'))) {
+    flag <- (any(x %in% c('all', 'first', 'last')) || is.numeric(unlist(x)))
+    attr(x, 'values') <- !flag
+    attr(x, 'indices') <- flag
+  }
+  return(x)
+}
+
+
+# Find the value for the undefined selector (i.e., indices()). Use the value from the first 
+# found file.
+# Note that "dat[[i]][['path']]" in parent env. is changed in this function.
+find_ufd_value <- function(undefined_file_dims, dat, i, replace_values,
+                           first_file, file_dims, path_glob_permissive, 
+                           depending_file_dims, dat_selectors, selector_checker, chunks) {
+  first_values <- vector('list', length = length(undefined_file_dims))
+  names(first_values) <- undefined_file_dims
+  found_values <- 0
+  stop <- FALSE
+  try_dim <- 1
+  last_success <- 1
+  while ((found_values < length(undefined_file_dims)) && !stop) {
+    u_file_dim <- undefined_file_dims[try_dim]
+    if (is.null(first_values[[u_file_dim]])) {
+      path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']], 
+                                                           replace_values[-which(file_dims == u_file_dim)],
+                                                           allow_undefined_key_vars = TRUE)
+      found_value <- .FindTagValue(path_with_globs_and_tag, 
+                                   first_file, u_file_dim)
+      if (!is.null(found_value)) {
+        found_values <- found_values + 1
+        last_success <- try_dim
+        first_values[[u_file_dim]] <- found_value
+        replace_values[[u_file_dim]] <- found_value
+      }
+    }
+    try_dim <- (try_dim %% length(undefined_file_dims)) + 1
+    if (try_dim == last_success) {
+      stop <- TRUE
+    }
+  }
+  if (found_values < length(undefined_file_dims)) {
+    stop(paste0("Path pattern of dataset '", dat[[i]][['name']], 
+                "' is too complex. Could not automatically ",
+                "detect values for all non-explicitly defined ",
+                "indices. Check its pattern: ", dat[[i]][['path']]))
+  }
+  ## TODO: Replace ReplaceGlobExpressions by looped call to FindTagValue? As done above
+  ##       Maybe it can solve more cases actually. I got warnings in ReplGlobExp with a typical
+  ##       cmor case, requesting all members and chunks for fixed var and sdate. Not fixing
+  ##       sdate raised 'too complex' error.
+  # Replace shell globs in path pattern and keep the file_dims as tags
+  dat[[i]][['path']] <- .ReplaceGlobExpressions(dat[[i]][['path']], first_file, replace_values, 
+                                                file_dims, dat[[i]][['name']], path_glob_permissive)
+
+  # Now time to look for the available values for the non 
+  # explicitly defined selectors for the file dimensions.
+  #print("H")
+  # Check first the ones that do not depend on others.
+  ufd <- c(undefined_file_dims[which(!(undefined_file_dims %in% names(depending_file_dims)))],
+           undefined_file_dims[which(undefined_file_dims %in% names(depending_file_dims))])
+  
+  for (u_file_dim in ufd) {
+    replace_values[undefined_file_dims] <- first_values
+    replace_values[[u_file_dim]] <- '*'
+    depended_dim <- NULL
+    depended_dim_values <- NA
+
+    #NOTE: Here 'selectors' is always 1. Is it supposed to be like this?
+    selectors <- dat_selectors[[u_file_dim]][[1]]
+    if (u_file_dim %in% names(depending_file_dims)) {
+      depended_dim <- depending_file_dims[[u_file_dim]]
+      depended_dim_values <- dat_selectors[[depended_dim]][[1]]
+      dat_selectors[[u_file_dim]] <- vector('list', length = length(depended_dim_values))
+      names(dat_selectors[[u_file_dim]]) <- depended_dim_values
+    } else {
+      dat_selectors[[u_file_dim]] <- list()
+    }
+    if (u_file_dim %in% unlist(depending_file_dims)) {
+      depending_dims <- names(depending_file_dims)[which(sapply(depending_file_dims, function(x) u_file_dim %in% x))]
+      replace_values[depending_dims] <- rep('*', length(depending_dims))
+    }
+    for (j in 1:length(depended_dim_values)) {
+      parsed_values <- c()
+      if (!is.null(depended_dim)) {
+        replace_values[[depended_dim]] <- depended_dim_values[j]
+      }
+      path_with_globs <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values)
+      found_files <- Sys.glob(path_with_globs)
+      ## TODO: Enhance this error message, or change by warning.
+      ##       Raises if a wrong sdate is specified, for example.
+      if (length(found_files) == 0) {
+        .warning(paste0("Could not find files for any '", u_file_dim, 
+                        "' for '", depended_dim, "' = '", 
+                        depended_dim_values[j], "'."))
+        dat_selectors[[u_file_dim]][[j]] <- NA
+      } else {
+        for (found_file in found_files) {
+          path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']],
+                                                               replace_values[-which(file_dims == u_file_dim)],
+                                                               allow_undefined_key_vars = TRUE)
+          parsed_values <- c(parsed_values, 
+                             .FindTagValue(path_with_globs_and_tag, found_file, 
+                                           u_file_dim))
+        }
+        #TODO: selector_checker() doesn't allow selectors to be characters. For selectors
+        #      like "member = 'r7i1p1f1", it cannot be defined with values.
+        dat_selectors[[u_file_dim]][[j]] <- selector_checker(selectors = selectors,
+                                                             var = unique(parsed_values),
+                                                             return_indices = FALSE)
+        # Take chunk if needed
+        dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][chunk_indices(length(dat_selectors[[u_file_dim]][[j]]),
+                                                                                           chunks[[u_file_dim]]['chunk'],
+                                                                                           chunks[[u_file_dim]]['n_chunks'], 
+                                                                                           u_file_dim)]
+      }
+    }
+  }
+  #NOTE: change 'dat' in parent env. because "dat[[i]][['path']]" is changed.
+  assign('dat', dat, envir = parent.frame())
+  return(dat_selectors)
+}
+
-- 
GitLab


From e2ea2a47d1b424a5be8a01a6ad0531dd919b572b Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 2 Aug 2021 09:43:31 +0200
Subject: [PATCH 03/10] Modify unit test for the change of netCDF file
 timestamps

---
 .../testthat/test-Start-implicit_inner_dim.R  |  2 +-
 .../test-Start-path_glob_permissive.R         | 90 +++++++++++--------
 tests/testthat/test-Start-split-merge.R       |  2 +-
 3 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R
index 26e3ce7..6a3262a 100644
--- a/tests/testthat/test-Start-implicit_inner_dim.R
+++ b/tests/testthat/test-Start-implicit_inner_dim.R
@@ -37,7 +37,7 @@ c(file_date = 2, time = 1)
 )
 expect_equal(
 attr(obs, 'Variables')$common$time[1, 1],
-as.POSIXct('2013-11-15 23:30:00', tz = 'UTC')
+as.POSIXct('2013-11-15', tz = 'UTC')
 )
 
 })
diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R
index f298bd4..ddd69be 100644
--- a/tests/testthat/test-Start-path_glob_permissive.R
+++ b/tests/testthat/test-Start-path_glob_permissive.R
@@ -96,45 +96,65 @@ test_that("2. tag at the end", {
 # Without the layer that path_glob_permissive allows to contain *, the last item in the path is tag. In the example below, the path without path_glob_permissive layer is 
 # "/esarchive/oper/S2S4E-data/weekly_statistics/S2S/$var$/$sdate$/". The last item is "$sdate$"
 
-sdates.seq.thu <- format(seq(as.Date(paste(2020, 06, 11, sep = '-')),                                                as.Date(paste(2020, 09, 17, sep = '-')),
-                             by = 'weeks'), format='%Y%m%d')
-path <- "/esarchive/oper/S2S4E-data/weekly_statistics/S2S/$var$/$sdate$/$var$_$sdate$_*.nc"
-suppressWarnings(
-exp <- Start(dat = path,
-             var = "tas",
-             sdate = sdates.seq.thu,   
-             time = 'all',
-             ensemble = "all",
-             latitude = indices(1:2),
-             longitude = indices(1:2),
-             path_glob_permissive = 1, 
-             retrieve = F) 
-)
-  asd <- as.list(attr(exp, 'ExpectedFiles'))
-  qwe <- sapply(sapply(asd, strsplit, '/'), '[[', 9)
-  files <- paste0('tas_', sdates.seq.thu, '_', 24:38, '.nc')
-  expect_equal(
-  qwe, files
-  )
+#!!!!!!!!!!This data has been removed!!!!!!!!!!!!!
+#sdates.seq.thu <- format(seq(as.Date(paste(2020, 06, 11, sep = '-')),                                                as.Date(paste(2020, 09, 17, sep = '-')),
+#                             by = 'weeks'), format='%Y%m%d')
+#path <- "/esarchive/oper/S2S4E-data/weekly_statistics/S2S/$var$/$sdate$/$var$_$sdate$_*.nc"
+#suppressWarnings(
+#exp <- Start(dat = path,
+#             var = "tas",
+#             sdate = sdates.seq.thu,   
+#             time = 'all',
+#             ensemble = "all",
+#             latitude = indices(1:2),
+#             longitude = indices(1:2),
+#             path_glob_permissive = 1, 
+#             retrieve = F) 
+#)
+#  asd <- as.list(attr(exp, 'ExpectedFiles'))
+#  qwe <- sapply(sapply(asd, strsplit, '/'), '[[', 9)
+#  files <- paste0('tas_', sdates.seq.thu, '_', 24:38, '.nc')
+#  expect_equal(
+#  qwe, files
+#  )
+#
+#suppressWarnings(
+#exp <- Start(dat = path,
+#             var = "tas",
+#             sdate = sdates.seq.thu,
+#             time = 'all',
+#             ensemble = "all",
+#             latitude = indices(1:2),
+#             longitude = indices(1:2),
+#             path_glob_permissive = FALSE, 
+#             retrieve = F)
+#)
+#  asd <- as.list(attr(exp, 'ExpectedFiles'))
+#  qwe <- sapply(sapply(asd, strsplit, '/'), '[[', 9)
+#  files <- paste0('tas_', sdates.seq.thu, '_', 24, '.nc')
+#  expect_equal(
+#    qwe, files
+#  ) 
 
+
+path <- "/esarchive/exp/ecmwf/system4_m1/6hourly/$var$/$var$_$year$0*.nc"
 suppressWarnings(
-exp <- Start(dat = path,
-             var = "tas",
-             sdate = sdates.seq.thu,
-             time = 'all',
-             ensemble = "all",
-             latitude = indices(1:2),
-             longitude = indices(1:2),
-             path_glob_permissive = FALSE, 
-             retrieve = F)
+data <- Start(dat = path,
+              var = "tas",
+              year = c('1994', '1995'),
+              time = indices(1:2),
+              ensemble = indices(1),
+              latitude = indices(1:2),
+              longitude = indices(1:2),
+              path_glob_permissive = 1,
+              retrieve = F)
 )
-  asd <- as.list(attr(exp, 'ExpectedFiles'))
-  qwe <- sapply(sapply(asd, strsplit, '/'), '[[', 9)
-  files <- paste0('tas_', sdates.seq.thu, '_', 24, '.nc')
-  expect_equal(
-    qwe, files
-  ) 
 
+expect_equal(
+as.list(attr(data, 'ExpectedFiles')),
+list("/esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19940501.nc",
+     "/esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19950101.nc")
+)
 
 })
 
diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R
index 9376f9a..fe686dc 100644
--- a/tests/testthat/test-Start-split-merge.R
+++ b/tests/testthat/test-Start-split-merge.R
@@ -180,7 +180,7 @@ c(file_date = 4, time = 1)
 )
 expect_equal(
 attr(obs, 'Variables')$common$time[1, 1],
-as.POSIXct('2013-11-15 23:30:00', tz = 'UTC')
+as.POSIXct('2013-11-15', tz = 'UTC')
 )
 
 
-- 
GitLab


From f414d54c6396ca361b3fcfc6af46481f1639ecc7 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 4 Aug 2021 12:04:37 +0200
Subject: [PATCH 04/10] ex1_8 has bugs

---
 inst/doc/usecase/ex1_8_tasandtos.R   |  5 +-
 tests/testthat/test-Start-two_dats.R | 89 ++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 2 deletions(-)
 create mode 100644 tests/testthat/test-Start-two_dats.R

diff --git a/inst/doc/usecase/ex1_8_tasandtos.R b/inst/doc/usecase/ex1_8_tasandtos.R
index 38fdf95..a384368 100644
--- a/inst/doc/usecase/ex1_8_tasandtos.R
+++ b/inst/doc/usecase/ex1_8_tasandtos.R
@@ -19,7 +19,7 @@ library(startR)
 paths = list(list(path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc'),
              list(path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc'))
 data1 <- Start(dataset = paths,
-               var = c('tas','tos'),
+               var = c('tas', 'tos'),
                sdate = paste0(1960:1962),
                fmonth = 1,
                lat = values(list(0, 10)),
@@ -29,9 +29,10 @@ data1 <- Start(dataset = paths,
                fyear_depends = 'sdate',
                fmonth_across = 'fyear',
                merge_across_dims = TRUE,
-               synonims = list(fmonth = c('fmonth','time'),
+               synonims = list(fmonth = c('fmonth', 'time'),
                                lon = c('lon', 'longitude'),
                                lat = c('lat', 'latitude')),
+               metadata_dims = 'var',
                return_vars = list(lat = 'dataset', lon = 'dataset'),
                num_procs = 1, retrieve = TRUE)
 
diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R
new file mode 100644
index 0000000..74738d0
--- /dev/null
+++ b/tests/testthat/test-Start-two_dats.R
@@ -0,0 +1,89 @@
+# ex1_8
+context("Start() two dats in one call")
+
+test_that("1. ex1_8", {
+
+path_tas <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/',
+                   'dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/',
+                   '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc')
+path_tos <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/',
+                   'dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/',
+                   '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc')
+suppressWarnings(
+data <- Start(dataset = list(list(path = path_tas), list(path = path_tos)),
+              var = c('tas', 'tos'),
+              sdate = paste0(1960:1962),
+              fmonth = 1,
+              lat = values(list(8, 10)),
+              lon = values(list(8, 10)),
+              fyear = 'all',
+              member = indices(1),
+              fyear_depends = 'sdate',
+              fmonth_across = 'fyear',
+              merge_across_dims = TRUE,
+              synonims = list(fmonth = c('fmonth', 'time'),
+                              lon = c('lon', 'longitude'),
+                              lat = c('lat', 'latitude')),
+              metadata_dims = 'var',
+              return_vars = list(lat = 'dataset', lon = 'dataset'),
+              retrieve = TRUE)
+)
+
+expect_equal(
+dim(data),
+c(dataset = 2, var = 2, sdate = 3, fmonth = 1, lat = 3, lon = 3, member = 1)
+)
+expect_equal(
+sum(data, na.rm = T),
+15396.7,
+tolerance = 0.0001
+)
+expect_equal(
+length(data[is.na(data)]),
+54
+)
+expect_equal(
+sum(is.na(data[1,1,,,,,])),
+0
+)
+# Amon also has tos
+expect_equal(
+sum(is.na(data[1,2,,,,,])),
+0
+)
+expect_equal(
+sum(is.na(data[2,1,,,,,])),
+27
+)
+# WRONG!!!! Omon should have tos. The value should be 0
+expect_equal(
+sum(is.na(data[2,2,,,,,])),
+27
+)
+
+expect_equal(
+names(attr(data, 'Variables')),
+c("common", "dat1", "dat2")
+)
+expect_equal(
+names(attr(data, 'Variables')$common),
+c("fmonth", "tas", "tos")
+)
+expect_equal(
+names(attr(data, 'Variables')$dat1),
+c("lat", "lon")
+)
+expect_equal(
+names(attr(data, 'Variables')$dat2),
+c("lat", "lon")
+)
+expect_equal(
+length(attr(data, 'Variables')$common$tas),
+17
+)
+expect_equal(
+length(attr(data, 'Variables')$common$tos),
+16
+)
+
+})
-- 
GitLab


From a87d15793e41e2585366363eed7e56e7401a0a58 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 4 Aug 2021 12:05:07 +0200
Subject: [PATCH 05/10] Finish the first stage of modularization

---
 R/Start.R | 969 +++++++++++-------------------------------------------
 R/zzz.R   | 752 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 944 insertions(+), 777 deletions(-)

diff --git a/R/Start.R b/R/Start.R
index d723ea4..6f602a5 100644
--- a/R/Start.R
+++ b/R/Start.R
@@ -1428,6 +1428,7 @@ Start <- function(..., # dim = indices/selectors,
         }
       }
     }
+
     # Find the possible values for the selectors that are provided as
     # indices. If the requested file is on server, impossible operation.
     if (length(grep("^http", dat[[i]][['path']])) > 0) {
@@ -1497,7 +1498,8 @@ Start <- function(..., # dim = indices/selectors,
         }
       }
     }
-#NEW
+    dat[[i]][['selectors']] <- dat_selectors
+
     # Now fetch for the first available file
     if (dataset_has_files[i]) {
       known_dims <- file_dims
@@ -1514,24 +1516,33 @@ Start <- function(..., # dim = indices/selectors,
     sub_array_of_not_found_files <- array(!dataset_has_files[i], 
                                           dim = files_to_load)
     names(dim(sub_array_of_not_found_files)) <- known_dims
-    j <- 1
-    if (!exists('selector_indices_save')) {
-      selector_indices_save <- vector('list', length = length(dat))
-    }
-    if (!exists('selectors_total_list')) {
-      selectors_total_list <- vector('list', length = length(dat))
+    
+    if (largest_dims_length) {
+      if (!exists('selector_indices_save')) {
+        selector_indices_save <- vector('list', length = length(dat))
+      }
+      if (!exists('selectors_total_list')) {
+        selectors_total_list <- vector('list', length = length(dat))
+      }
+      selector_indices_save[[i]] <- vector('list', length = prod(files_to_load))
+      selectors_total_list[[i]] <- vector('list', length = prod(files_to_load))
     }
-    selector_indices_save[[i]] <- vector('list', length = prod(files_to_load))
-    selectors_total_list[[i]] <- vector('list', length = prod(files_to_load))
 
+    j <- 1
+    # NOTE: This while loop has these objects that are used afterward: 'sub_array_of_files_to_load',
+    #       'sub_array_of_not_found_files', 'indices_of_first_files_with_data', 'selectors_of_first_files_with_data';
+    #       'selector_indices_save' and 'selectors_total_list' are used if 'largest_dims_length = T'.
     while (j <= prod(files_to_load)) {
       selector_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ]
       names(selector_indices) <- known_dims
 
-      tmp <- selector_indices
-      tmp[which(known_dims == found_pattern_dim)] <- i
-      selector_indices_save[[i]][[j]] <- tmp
+      if (largest_dims_length) {
+        tmp <- selector_indices
+        tmp[which(known_dims == found_pattern_dim)] <- i
+        selector_indices_save[[i]][[j]] <- tmp
+      }
 
+      # This 'selectors' is only used in this while loop
       selectors <- sapply(1:length(known_dims), 
                           function (x) {
                             vector_to_pick <- 1
@@ -1541,9 +1552,13 @@ Start <- function(..., # dim = indices/selectors,
                             dat_selectors[known_dims][[x]][[vector_to_pick]][selector_indices[x]]
                           })
       names(selectors) <- known_dims
-      selectors_total_list[[i]][[j]] <- selectors
-      names(selectors_total_list[[i]][[j]]) <- known_dims
 
+      if (largest_dims_length) {
+        selectors_total_list[[i]][[j]] <- selectors
+        names(selectors_total_list[[i]][[j]]) <- known_dims
+      }
+
+      # 'replace_values' and 'file_path' are only used in this while loop
       replace_values[known_dims] <- selectors
       if (!dataset_has_files[i]) {
         if (any(is.na(selectors))) {
@@ -1561,13 +1576,14 @@ Start <- function(..., # dim = indices/selectors,
         } else {
           file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values)
 
-#NOTE: After replacing tags, there is still * if path_glob_permissive is not FALSE.
+          #NOTE: After replacing tags, there is still * if path_glob_permissive is not FALSE.
+          #      Find the possible value to substitute *.
           if (grepl('\\*', file_path)) {
             found_files <- Sys.glob(file_path)
             file_path <- found_files[1]   # choose only the first file.
-#NOTE: Above line chooses only the first found file. Because * is not tags, which means
-#      it is not a dimension. So it cannot store more than one item. If use * to define
-#      the path, that * should only represent one possibility.
+            #NOTE: Above line chooses only the first found file. Because * is not tags, which means
+            #      it is not a dimension. So it cannot store more than one item. If use * to define
+            #      the path, that * should only represent one possibility.
             if (length(found_files) > 1) {
               .warning("Using glob expression * to define the path, but more ",
                        "than one match is found. Choose the first match only.")
@@ -1616,7 +1632,6 @@ Start <- function(..., # dim = indices/selectors,
       array_of_not_found_files <- .MergeArrays(array_of_not_found_files, sub_array_of_not_found_files,
                                                along = found_pattern_dim)
     }    
-    dat[[i]][['selectors']] <- dat_selectors
   }
   if (all(sapply(indices_of_first_files_with_data, is.null))) {
     stop("No data files found for any of the specified datasets.")
@@ -1640,6 +1655,7 @@ Start <- function(..., # dim = indices/selectors,
 #  }
 #//////////////////////////////////////////////
 
+  # Separate 'return_vars' into 'common_return_vars' and 'return_vars' (those = 'dat').
   common_return_vars <- NULL
   common_first_found_file <- NULL
   common_return_vars_pos <- NULL
@@ -1652,6 +1668,9 @@ Start <- function(..., # dim = indices/selectors,
     common_first_found_file <- rep(FALSE, length(which(sapply(common_return_vars, length) == 0)))
     names(common_first_found_file) <- names(common_return_vars[which(sapply(common_return_vars, length) == 0)])
   }
+
+#!!!!!!!Check here. return_vars has removed the common ones, and here remove 'dat' value????
+#It seems like it does some benefits to later parts
   return_vars <- lapply(return_vars, 
                         function(x) {
                           if (found_pattern_dim %in% x) {
@@ -1660,7 +1679,6 @@ Start <- function(..., # dim = indices/selectors,
                             x
                           }
                         })
- 
 #////////////////////////////////////////////
   # Force return_vars = (time = NULL) to (time = 'sdate') if (1) selector = [sdate = 2, time = 4] or
   # (2) time_across = 'sdate'.
@@ -1672,38 +1690,17 @@ Start <- function(..., # dim = indices/selectors,
       file_dim_as_selector_array_dim <- 1
 
       if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) {
-        file_dim_as_selector_array_dim <- found_file_dims[[i]][which(found_file_dims[[i]] %in% names(dim(selector_array)))]
+        file_dim_as_selector_array_dim <- 
+          found_file_dims[[i]][which(found_file_dims[[i]] %in% names(dim(selector_array)))]
       }
-
-      if (inner_dim %in% inner_dims_across_files | is.character(file_dim_as_selector_array_dim)) {  #(2) or (1)
+      if (inner_dim %in% inner_dims_across_files | 
+          is.character(file_dim_as_selector_array_dim)) {  #(2) or (1)
         # inner_dim is not in return_vars or is NULL
         if (((!inner_dim %in% names(common_return_vars)) & (!inner_dim %in% names(return_vars))) |
-            (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]])) ) {
-          if (is.character(file_dim_as_selector_array_dim)) { #(1)
-            if (file_dim_as_selector_array_dim %in% found_pattern_dim) {
-              stop(paste0("Found '", inner_dim, "' selector has dimension of the pattern dim '",
-                          found_pattern_dim, "', which is not allowed. To assign the dependency on the pattern dim, ",
-                          "use 'return_vars = list(", inner_dim, " = 'dat')' instead."))
-            } else {
-              common_return_vars[[inner_dim]] <- file_dim_as_selector_array_dim
-              tmp <- file_dim_as_selector_array_dim
-            }
-            tmp <- file_dim_as_selector_array_dim
-          } else if (inner_dim %in% inner_dims_across_files) { #(2)
-            file_dim_name <- names(which(inner_dim == inner_dims_across_files))
-            if (file_dim_name %in% found_pattern_dim) {
-              stop(paste0("Found '", inner_dim, "' has across dependency on the pattern dim '",
-                          found_pattern_dim, "', which is not allowed."))
-            } else {
-              common_return_vars[[inner_dim]] <- file_dim_name
-              tmp <- file_dim_name
-            }
-            tmp <- file_dim_name
-          }
-          .warning(paste0("Found ", inner_dim, " dependency on file diemnsion '", tmp,
-                          "', but '", inner_dim, "' is not in return_vars list or is NULL. ",
-                          "To provide the correct metadata, the value of ", inner_dim,
-                          " in 'return_vars' is specified as '", tmp, "'."))
+            (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]]))) {
+          common_return_vars[[inner_dim]] <- correct_return_vars(
+                                               inner_dim, inner_dims_across_files, 
+                                               found_pattern_dim, file_dim_as_selector_array_dim)
         }
       }
     }
@@ -1719,6 +1716,7 @@ Start <- function(..., # dim = indices/selectors,
   }
 #////////////////////////////////////////////
 
+  # Create 'picked_common_vars'
   if (length(common_return_vars) > 0) {
     picked_common_vars <- vector('list', length = length(common_return_vars))
     names(picked_common_vars) <- names(common_return_vars)
@@ -1727,29 +1725,34 @@ Start <- function(..., # dim = indices/selectors,
   }
   picked_common_vars_ordered <- picked_common_vars
   picked_common_vars_unorder_indices <- picked_common_vars
+
+  # Create 'picked_vars'
   picked_vars <- vector('list', length = length(dat))
   names(picked_vars) <- dat_names
   picked_vars_ordered <- picked_vars
   picked_vars_unorder_indices <- picked_vars
+
   for (i in 1:length(dat)) {
     if (dataset_has_files[i]) {
       # Put all selectors in a list of a single list/vector of selectors. 
       # The dimensions that go across files will later be extended to have 
       # lists of lists/vectors of selectors.
       for (inner_dim in expected_inner_dims[[i]]) {
-        if (!is.list(dat[[i]][['selectors']][[inner_dim]]) || 
-            (is.list(dat[[i]][['selectors']][[inner_dim]]) && 
+        if (!is.list(dat[[i]][['selectors']][[inner_dim]]) || # not list, or
+            (is.list(dat[[i]][['selectors']][[inner_dim]]) &&  # list of 2 that represents range
              length(dat[[i]][['selectors']][[inner_dim]]) == 2 &&
              is.null(names(dat[[i]][['selectors']][[inner_dim]])))) {
           dat[[i]][['selectors']][[inner_dim]] <- list(dat[[i]][['selectors']][[inner_dim]])
         }
       }
+
       if (length(return_vars) > 0) {
         picked_vars[[i]] <- vector('list', length = length(return_vars))
         names(picked_vars[[i]]) <- names(return_vars)
         picked_vars_ordered[[i]] <- picked_vars[[i]]
         picked_vars_unorder_indices[[i]] <- picked_vars[[i]]
       }
+
       indices_of_first_file <- as.list(indices_of_first_files_with_data[[i]])
       array_file_dims <- sapply(dat[[i]][['selectors']][found_file_dims[[i]]], function(x) length(x[[1]]))
       names(array_file_dims) <- found_file_dims[[i]]
@@ -1759,35 +1762,30 @@ Start <- function(..., # dim = indices/selectors,
       array_of_var_files <- do.call('[', c(list(x = array_of_files_to_load), indices_of_first_file, list(drop = FALSE)))
       array_of_var_indices <- array(1:length(array_of_var_files), dim = dim(array_of_var_files))
       array_of_not_found_var_files <- do.call('[', c(list(x = array_of_not_found_files), indices_of_first_file, list(drop = FALSE)))
+      # Create previous_indices. The initial value is -1 because there is no 'previous' before the
+      # 1st current_indices.
       previous_indices <- rep(-1, length(indices_of_first_file))
       names(previous_indices) <- names(indices_of_first_file)
+      # Create first_found_file for vars_to_read defining. It is for the dim value in return_vars
+      # that is NULL or character(0). Because these dims only need to be read once, so 
+      # first_found_file indicates if these dims have been read or not. 
+      # If read, it turns to TRUE and won't be included in vars_to_read again in the next 
+      # 'for j loop'. 
       first_found_file <- NULL
       if (length(return_vars) > 0) {
         first_found_file <- rep(FALSE, length(which(sapply(return_vars, length) == 0)))
         names(first_found_file) <- names(return_vars[which(sapply(return_vars, length) == 0)])
       }
+
       for (j in 1:length(array_of_var_files)) {
         current_indices <- which(array_of_var_indices == j, arr.ind = TRUE)[1, ]
         names(current_indices) <- names(indices_of_first_file)
         if (!is.na(array_of_var_files[j]) && !array_of_not_found_var_files[j]) {
           changed_dims <- which(current_indices != previous_indices)
-          vars_to_read <- NULL
-          if (length(return_vars) > 0) {
-            vars_to_read <- names(return_vars)[sapply(return_vars, function(x) any(names(changed_dims) %in% x))]
-          }
-          if (!is.null(first_found_file)) {
-            if (any(!first_found_file)) {
-              vars_to_read <- c(vars_to_read, names(first_found_file[which(!first_found_file)]))
-            }
-          }
-          if ((i == 1) && (length(common_return_vars) > 0)) {
-            vars_to_read <- c(vars_to_read, names(common_return_vars)[sapply(common_return_vars, function(x) any(names(changed_dims) %in% x))])
-          }
-          if (!is.null(common_first_found_file)) {
-            if (any(!common_first_found_file)) {
-              vars_to_read <- c(vars_to_read, names(common_first_found_file[which(!common_first_found_file)]))
-            }
-          }
+          # Prepare vars_to_read for this dataset (i loop) and this file (j loop)
+          vars_to_read <- generate_vars_to_read(return_vars, changed_dims, first_found_file, 
+                                                common_return_vars, common_first_found_file, i)
+
           file_object <- file_opener(array_of_var_files[j])
           if (!is.null(file_object)) {
             for (var_to_read in vars_to_read) {
@@ -1800,15 +1798,7 @@ Start <- function(..., # dim = indices/selectors,
                                           synonims)
               # file_dim_reader returns dimension names as found in the file.
               # Need to translate accoridng to synonims:
-              names(var_dims) <- sapply(names(var_dims), 
-                                        function(x) {
-                                          which_entry <- which(sapply(synonims, function(y) x %in% y))
-                                          if (length(which_entry) > 0) {
-                                            names(synonims)[which_entry]
-                                          } else {
-                                            x
-                                          }
-                                        })
+              names(var_dims) <- replace_with_synonmins(var_dims, synonims)
               if (!is.null(var_dims)) {
                 var_file_dims <- NULL
                 if (var_to_read %in% names(common_return_vars)) {
@@ -1829,13 +1819,11 @@ Start <- function(..., # dim = indices/selectors,
                          "file dimension which also appears in the dimensions of ",
                          "the variable inside the file.\n", array_of_var_files[j])
                   }
-                  special_types <- list('POSIXct' = as.POSIXct, 'POSIXlt' = as.POSIXlt, 
-                                        'Date' = as.Date)
                   first_sample <- file_var_reader(NULL, file_object, NULL, 
                                                   var_to_read, synonims)
-                  if (any(class(first_sample) %in% names(special_types))) {
+                  if (any(class(first_sample) %in% names(time_special_types()))) {
                     array_size <- prod(c(var_file_dims, var_dims))
-                    new_array <- rep(special_types[[class(first_sample)[1]]](NA), array_size)
+                    new_array <- rep(time_special_types()[[class(first_sample)[1]]](NA), array_size)
                     dim(new_array) <- c(var_file_dims, var_dims)
                   } else {
                     new_array <- array(dim = c(var_file_dims, var_dims))
@@ -1895,16 +1883,14 @@ Start <- function(..., # dim = indices/selectors,
                       padding_dims <- full_array_var_dims
                       padding_dims[longer_dims_in_full_array] <- var_dims[longer_dims] - 
                         array_var_dims[longer_dims]
-                      special_types <- list('POSIXct' = as.POSIXct, 'POSIXlt' = as.POSIXlt,
-                                            'Date' = as.Date)
                       if (var_to_read %in% names(common_return_vars)) {
                         var_class <- class(picked_common_vars[[var_to_read]])
                       } else {
                         var_class <- class(picked_vars[[i]][[var_to_read]])
                       }
-                      if (any(var_class %in% names(special_types))) {
+                      if (any(var_class %in% names(time_special_types()))) {
                         padding_size <- prod(padding_dims)
-                        padding <- rep(special_types[[var_class[1]]](NA), padding_size)
+                        padding <- rep(time_special_types()[[var_class[1]]](NA), padding_size)
                         dim(padding) <- padding_dims
                       } else {
                         padding <- array(dim = padding_dims)
@@ -1975,7 +1961,7 @@ Start <- function(..., # dim = indices/selectors,
                                                                  var_store_indices, 
                                                                  list(value = var_values)))
                   # Turn time zone back to UTC if this var_to_read is 'time'
-                  if (all(class(picked_common_vars[[var_to_read]]) == c('POSIXct', 'POSIXt'))) {
+                  if (all(class(picked_common_vars[[var_to_read]]) == names(time_special_types))) {
                     attr(picked_common_vars[[var_to_read]], "tzone") <- 'UTC'
                   }
                 } else {
@@ -1984,7 +1970,7 @@ Start <- function(..., # dim = indices/selectors,
                                                                var_store_indices, 
                                                                list(value = var_values)))
                   # Turn time zone back to UTC if this var_to_read is 'time'
-                  if (all(class(picked_vars[[i]][[var_to_read]]) == c('POSIXct', 'POSIXt'))) {
+                  if (all(class(picked_vars[[i]][[var_to_read]]) == names(time_special_types))) {
                     attr(picked_vars[[i]][[var_to_read]], "tzone") <- 'UTC'
                   }
                 }
@@ -2051,15 +2037,7 @@ Start <- function(..., # dim = indices/selectors,
                                          synonims)
             # file_dim_reader returns dimension names as found in the file.
             # Need to translate accoridng to synonims:
-            names(data_dims) <- sapply(names(data_dims),
-                                       function(x) {
-                                         which_entry <- which(sapply(synonims, function(y) x %in% y))
-                                         if (length(which_entry) > 0) {
-                                           names(synonims)[which_entry]
-                                         } else {
-                                           x
-                                         }
-                                       })
+            names(data_dims) <- replace_with_synonmins(data_dims, synonims)
           }
 
           if (is.numeric(largest_dims_length)) { # largest_dims_length is a named vector
@@ -2074,57 +2052,15 @@ Start <- function(..., # dim = indices/selectors,
           }
 
         } else { # largest_dims_length == TRUE
-          # Open and get all the dim from all the files
-          data_dims_all_files <- vector('list', length = length(selectors_total_list[[i]]))
-
-          for (selectors_kk in 1:length(data_dims_all_files)) {
-            file_path <- do.call("[", c(list(array_of_files_to_load), as.list(selector_indices_save[[i]][[selectors_kk]])))
-            file_to_open <- file_path
-
-            data_dims_all_files[[selectors_kk]] <- try(file_dim_reader(file_to_open, NULL, selectors_total_list[[i]][[selectors_kk]],
-                                           lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1),
-                                           synonims), silent = TRUE)
-
-          }
-
-          # Remove the missing files (i.e., fail try above)
-          if (!identical(which(substr(data_dims_all_files, 1, 5) == 'Error'), integer(0))) {
-            tmp <- which(substr(data_dims_all_files, 1, 5) == 'Error')
-            data_dims_all_files <- data_dims_all_files[-tmp]
-          }
-
-          # Find the longest dimensions from all the files
-          largest_data_dims <- rep(0, length(data_dims_all_files[[1]]))
-
-          # The inner dim order may differ among files. Need to align them before
-          # find out the largest dim length.
-          dim_names_first_file <- names(data_dims_all_files[[1]])
-          same_dim_order <-lapply(lapply(data_dims_all_files, names),      
-                                  identical, dim_names_first_file)
-          for (to_fix in which(!unlist(same_dim_order))) {
-            data_dims_all_files[[to_fix]] <- data_dims_all_files[[to_fix]][match(dim_names_first_file,
-                                                 names(data_dims_all_files[[to_fix]]))]
-          }
-
-          for (kk in 1:length(data_dims_all_files[[1]])) {
-            largest_data_dims[kk] <- max(sapply(data_dims_all_files, '[', kk))
-          }
-          names(largest_data_dims) <- names(data_dims_all_files[[1]])
+          data_dims <- find_largest_dims_length(
+                         selectors_total_list[[i]], array_of_files_to_load,
+                         selector_indices_save[[i]], dat[[i]], expected_inner_dims[[i]],
+                         synonims, file_dim_reader)
 
           # file_dim_reader returns dimension names as found in the file.
           # Need to translate accoridng to synonims:
-          names(largest_data_dims) <- sapply(names(largest_data_dims),
-                                       function(x) {
-                                         which_entry <- which(sapply(synonims, function(y) x %in% y))
-                                         if (length(which_entry) > 0) {
-                                           names(synonims)[which_entry]
-                                         } else {
-                                           x
-                                         }
-                                       })
+          names(data_dims) <- replace_with_synonmins(data_dims, synonims)
 
-          # replace data_dims with largest_data_dims
-          data_dims <- largest_data_dims
         } # end of if (largest_dims_length == TRUE)
 
         # Transform the variables if needed and keep them apart.
@@ -2133,38 +2069,13 @@ Start <- function(..., # dim = indices/selectors,
             stop("Could not find all the required variables in 'transform_vars' ",
                  "for the dataset '", dat[[i]][['name']], "'.")
           }
+
           vars_to_transform <- NULL
-          picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars)
-          if (length(picked_vars_to_transform) > 0) {
-            picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform]
-            new_vars_to_transform <- picked_vars[[i]][picked_vars_to_transform]
-            which_are_ordered <- which(!sapply(picked_vars_ordered[[i]][picked_vars_to_transform], is.null))
-            
-            ##NOTE: The following 'if' replaces the original with reordering vector
-            if (length(which_are_ordered) > 0) {
-              tmp <- which(!is.na(match(names(picked_vars_ordered[[i]]), names(which_are_ordered))))
-              new_vars_to_transform[which_are_ordered] <- picked_vars_ordered[[i]][tmp]
-            
-            }
-            vars_to_transform <- c(vars_to_transform, new_vars_to_transform)
-          }
-          
-          ##NOTE: Above is non-common vars, here is common vars (ie, return_vars = NULL).
-          picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars)
-          if (length(picked_common_vars_to_transform) > 0) {
-            picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform]
-            
-            new_vars_to_transform <- picked_common_vars[picked_common_vars_to_transform]
-            which_are_ordered <- which(!sapply(picked_common_vars_ordered[picked_common_vars_to_transform], is.null))
-            
-            if (length(which_are_ordered) > 0) {
-              
-              tmp <- which(!is.na(match(names(picked_common_vars_ordered), names(which_are_ordered))))
-              new_vars_to_transform[which_are_ordered] <- picked_common_vars_ordered[tmp]
-            }
-            vars_to_transform <- c(vars_to_transform, new_vars_to_transform)
-          }
-          
+          # picked_vars[[i]]
+          vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_vars[[i]], transform_vars, picked_vars_ordered[[i]])
+          # picked_common_vars
+          vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_common_vars, transform_vars, picked_common_vars_ordered)
+
           # Transform the variables
           transformed_data <- do.call(transform, c(list(data_array = NULL,
                                                         variables = vars_to_transform,
@@ -2674,6 +2585,7 @@ Start <- function(..., # dim = indices/selectors,
                   print(file_dim)
                 }
               }
+              #????????????????
               if ((!is.null(file_dim) && (file_dim %in% names(selector_file_dims))) || is.null(file_dim)) {
                 if (length(sub_array_of_selectors) > 0) {
                   if (debug) {
@@ -2697,23 +2609,25 @@ Start <- function(..., # dim = indices/selectors,
                   
                   # The selector_checker will return either a vector of indices or a list
                   # with the first and last desired indices.
+                  #NOTE: goes_across_prime_meridian may be TRUE only if the selector is list
                   goes_across_prime_meridian <- FALSE
+                  is_circular_dim <- FALSE
                   if (!is.null(var_ordered) && !selectors_are_indices) {
                     if (!is.null(dim_reorder_params[[inner_dim]])) {
                       if (is.list(sub_array_of_selectors)) {
-                        
                         ## NOTE: The check of 'goes_across_prime_meridian' is moved forward to here.
-                        is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
-                        if (!is.null(is_circular_dim)) {
-                          if (is_circular_dim) {
-                            
-                            # NOTE: Use CircularSort() to put the values in the assigned range, and get the order.
-                            # For example, [-10, 20] in CircularSort(0, 360) is [350, 20]. The $ix list is [2, 1].
-                            # 'goes_across_prime_meridian' means the selector range across the border. For example,
-                            # CircularSort(-180, 180) with selector [170, 190] -> goes_across_prime_meridian = TRUE.
-                            tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix
-                            goes_across_prime_meridian <-  tmp[1] > tmp[2]
-                          }
+                        if ('circular' %in% names(attributes(dim_reorder_params[[inner_dim]]))) {
+                          is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
+                        }
+                        if (is_circular_dim) {
+                          # NOTE: Use CircularSort() to put the values in the assigned range, and get the order.
+                          # For example, [-10, 20] in CircularSort(0, 360) is [350, 20]. The $ix list is [2, 1].
+                          # 'goes_across_prime_meridian' means the selector range across the border. For example,
+                          # CircularSort(-180, 180) with selector [170, 190] -> goes_across_prime_meridian = TRUE.
+                          # dim_reorder_params is a list of Reorder function, i.e.,
+                          # Sort() or CircularSort().
+                          tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix
+                          goes_across_prime_meridian <-  tmp[1] > tmp[2]
                         }
                         
                         # HERE change to the same code as below (under 'else'). Not sure why originally 
@@ -2724,19 +2638,12 @@ Start <- function(..., # dim = indices/selectors,
                         #sub_array_of_selectors <- as.list(sub_array_reordered$x[sub_array_unorder])
                         
                         # Add warning if the boundary is out of range
-                        if (sub_array_of_selectors[1] < range(var_ordered)[1] | sub_array_of_selectors[1] > range(var_ordered)[2]) {
-                          .warning(paste0("The lower boundary of selector of ",
-                                          inner_dim,
-                                          " is out of range [",
-                                          min(var_ordered), ", ", max(var_ordered), "]. ",
-                                          "Check if the desired range is all included."))
+                        if (min(unlist(sub_array_of_selectors)) < range(var_ordered)[1]) { 
+                          show_out_of_range_warning(inner_dim, range = range(var_ordered), 
+                                                    bound = 'lower')
                         }
-                        if (sub_array_of_selectors[2] < range(var_ordered)[1] | sub_array_of_selectors[2] > range(var_ordered)[2]) {
-                          .warning(paste0("The upper boundary of selector of ",
-                                          inner_dim,
-                                          " is out of range [",
-                                          min(var_ordered), ", ", max(var_ordered), "]. ",
-                                          "Check if the desired range is all included."))
+                        if (max(unlist(sub_array_of_selectors)) > range(var_ordered)[2]) {
+                          show_out_of_range_warning(inner_dim, range = range(var_ordered),                                                     bound = 'upper')
                         }
                         
                         
@@ -2781,23 +2688,13 @@ Start <- function(..., # dim = indices/selectors,
                     
                     # Add warning if the boundary is out of range
                     if (is.list(sub_array_of_selectors)) {
-                      if (sub_array_of_selectors[1] <
-                          min(sub_array_of_values) | sub_array_of_selectors[1] >
-                          max(sub_array_of_values)) {
-                        .warning(paste0("The lower boundary of selector of ",
-                                        inner_dim, " is out of range [",
-                                        min(sub_array_of_values), ", ",
-                                        max(sub_array_of_values), "]. ",
-                                        "Check if the desired range is all included."))
+                      if (min(unlist(sub_array_of_selectors)) < min(sub_array_of_values)) {
+                         show_out_of_range_warning(inner_dim, range = range(sub_array_of_values),
+                                                   bound = 'lower')
                       }
-                      if (sub_array_of_selectors[2] <
-                          min(sub_array_of_values) | sub_array_of_selectors[2] >
-                          max(sub_array_of_values)) {
-                        .warning(paste0("The upper boundary of selector of ",
-                                        inner_dim, " is out of range [",
-                                        min(sub_array_of_values), ", ",
-                                        max(sub_array_of_values), "]. ",
-                                        "Check if the desired range is all included."))
+                      if (max(unlist(sub_array_of_selectors)) > max(sub_array_of_values)) {
+                         show_out_of_range_warning(inner_dim, range = range(sub_array_of_values),
+                                                   bound = 'upper')
                       }
                     }
                     
@@ -2857,84 +2754,16 @@ Start <- function(..., # dim = indices/selectors,
                         print("-> SELECTORS REQUESTED BEFORE TRANSFORM.")
                       }
                     }
-                    
-                    ###NOTE: Here, the transform, is different from the below part of non-transform.
-                    #  search 'if (goes_across_prime_meridian' to find the lines below.
-                    if (goes_across_prime_meridian) {
-                      # NOTE: before changing, the return is already correct.
-                      
-                      #NOTE: The fix below has the same explanation as no with_transform part below. 
-                      #      Search the next next 'if (goes_across_prime_meridian) {'.
-                      if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
-                        # global longitude
-                        sub_array_of_fri <- 1:n
-                        # Warning if transform_extra_cell != 0
-                        if (beta != 0) {
-                          .warning(paste0("Adding parameter transform_extra_cells =  ",
-                                          transform_extra_cells, " to the transformed index excesses ",
-                                          "the border. The border index is used for transformation."))
-                        }
-                        
-                      } else {
-                        # normal case, i.e., not global
-                        first_index <- min(unlist(sub_array_of_indices))
-                        last_index <- max(unlist(sub_array_of_indices))
-                        gap_width <- last_index - first_index - 1
-                        sub_array_of_fri <- c(1:(min(unlist(sub_array_of_indices)) + min(gap_width, beta)),                       
-                                              (max(unlist(sub_array_of_indices)) - min(gap_width, beta)):n)
-                        
-                        if (min(gap_width, beta) != beta) {
-                          .warning(paste0("Adding parameter transform_extra_cells =  ",
-                                          transform_extra_cells, " to the transformed index excesses ",
-                                          "the border. The border index is used for transformation."))
-                        }
-                      }
-                      
-                    } else {
-                      #NOTE: This if seems redundant.
-                      if (is.list(sub_array_of_indices)) {
-                        sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
-                      }
-                      first_index <- min(unlist(sub_array_of_indices))
-                      last_index <- max(unlist(sub_array_of_indices))
-                      
-                      start_padding <- min(beta, first_index - 1)
-                      end_padding <- min(beta, n - last_index)
-                      
-                      if (exists("is_circular_dim")) {
-                        if (!is_circular_dim) {  #latitude
-                          sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
-                          if (start_padding != beta | end_padding != beta) {
-                            .warning(paste0("Adding parameter transform_extra_cells =  ",
-                                            transform_extra_cells, " to the transformed index excesses ",
-                                            "the border. The border index is used for transformation."))
-                          }
-                        } else {  #longitude
-                          if ((last_index - first_index + 1 + beta * 2) >= n) {
-                            sub_array_of_fri <- 1:n
-                          } else if (start_padding < beta) {  # left side too close to border, need to go to right side
-                            sub_array_of_fri <- c((first_index - start_padding):(last_index + end_padding), (n - (beta - start_padding - 1)):n)
-                          } else if (end_padding < beta) { # right side too close to border, need to go to left side
-                            sub_array_of_fri <- c(1: (beta - end_padding), (first_index - start_padding):(last_index + end_padding))
-                          } else {  #normal
-                            sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
-                          }
-                        }
-                      } else {   # when <var>_reorder is not used
-                        sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
-                        if (start_padding != beta | end_padding != beta) {
-                          .warning(paste0("Adding parameter transform_extra_cells =  ",
-                                          transform_extra_cells, " to the transformed index excesses ",
-                                          "the border. The border index is used for transformation."))
-                        }
-                      }
-                      
-                    }
+                    # Generate sub_array_of_fri 
+                    sub_array_of_fri <- generate_sub_array_of_fri(
+                      with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta, 
+                      is_circular_dim)
+
                     subset_vars_to_transform <- vars_to_transform
                     if (!is.null(var_ordered)) {
                       
-                      ##NOTE: if var_ordered is common_vars, it doesn't have attributes and it is a vector.
-                      ## Turn it into array and add dimension name.
+                      #NOTE: If var_ordered is common_vars, it doesn't have attributes and it is a vector.
+                      #      Turn it into array and add dimension name.
                       if (!is.array(var_ordered)) {
                         var_ordered <- as.array(var_ordered)
                         names(dim(var_ordered)) <- inner_dim
@@ -3030,9 +2859,11 @@ Start <- function(..., # dim = indices/selectors,
                     if (debug) {
                       if (inner_dim %in% dims_to_check) {
                         print("-> FIRST INDEX:")
-                        print(first_index)
+#                        print(first_index)
+                        print("NOTE: Check function generate_sub_array_of_fri() in zzz.R")
                         print("-> LAST INDEX:")
-                        print(last_index)
+#                        print(last_index)
+                        print("NOTE: Check function generate_sub_array_of_fri() in zzz.R")
                         print("-> STRUCTURE OF FIRST ROUND INDICES:")
                         print(str(sub_array_of_fri))
                         print("-> STRUCTURE OF SECOND ROUND INDICES:")
@@ -3072,30 +2903,11 @@ Start <- function(..., # dim = indices/selectors,
                     ###                      sub_array_of_sri <- sub_array_of_sri + n_of_extra_cells
                     sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
                                              list(value = sub_array_of_sri)))
-                  } else {
-                    if (goes_across_prime_meridian) {
-                      #NOTE: The potential problem here is, if it is global longitude,
-                      #      and the indices overlap (e.g., lon = [0, 359.723] and 
-                      #      CircularSort(-180, 180), then sub_array_of_indices = list(649, 649)). 
-                      #      Therefore, sub_array_of_fri will be c(1:649, 649:1296). We'll
-                      #      get two 649.
-                      #      The fix below may not be the best solution, but it works for 
-                      #      the example above.
-                      
-                      if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
-                        # global longitude
-                        sub_array_of_fri <- c(1:n)
-                      } else {
-                        # the common case, i.e., non-global
-                        sub_array_of_fri <- c(1:min(unlist(sub_array_of_indices)),
-                                              max(unlist(sub_array_of_indices)):n)
-                      }
-                      
-                    } else if (is.list(sub_array_of_indices)) {
-                      sub_array_of_fri <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
-                    } else {
-                      sub_array_of_fri <- sub_array_of_indices
-                    }
+
+                  } else {  # !with_transform
+                    sub_array_of_fri <- generate_sub_array_of_fri(
+                      with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
+                      is_circular_dim)
                   }
                   if (!is.null(var_unorder_indices)) {
                     if (is.null(ordered_fri)) {
@@ -3111,7 +2923,7 @@ Start <- function(..., # dim = indices/selectors,
                     taken_chunks <- TRUE
                   }
                 }
-              } else {
+              } else {  #????????????
                 if (debug) {
                   if (inner_dim %in% dims_to_check) {
                     print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.")
@@ -3478,95 +3290,37 @@ Start <- function(..., # dim = indices/selectors,
   # final_dims will be used for collocation of data, whereas final_dims_fake 
   # will be used for shaping the final array to be returned to the user.
   final_dims_fake <- final_dims
-
   if (merge_across_dims) {
-    if (!is.null(inner_dims_across_files)) {
-      for (file_dim_across in names(inner_dims_across_files)) {
-        inner_dim_pos <- which(names(final_dims_fake) == inner_dims_across_files[[file_dim_across]])
-        new_dims <- c()
-        if (inner_dim_pos > 1) {
-          new_dims <- c(new_dims, final_dims_fake[1:(inner_dim_pos - 1)])
-        }
-        new_dims <- c(new_dims, setNames(prod(final_dims_fake[c(inner_dim_pos, inner_dim_pos + 1)]), 
-                                         inner_dims_across_files[[file_dim_across]]))
-        if (inner_dim_pos + 1 < length(final_dims_fake)) {
-          new_dims <- c(new_dims, final_dims_fake[(inner_dim_pos + 2):length(final_dims_fake)])
-        }
-        final_dims_fake <- new_dims
-      }
-    }
+    final_dims_fake <- dims_merge(inner_dims_across_files, final_dims_fake)
   }
   #=========================================================================
   # Find the dimension to split if split_multiselected_dims = TRUE.
   # If there is no dimension able to be split, change split_multiselected_dims to FALSE.
   all_split_dims <- NULL
   if (split_multiselected_dims) {
-    for (dim_param in 1:length(dim_params)) {
-      if (!is.null(dim(dim_params[[dim_param]]))) {
-        if (length(dim(dim_params[[dim_param]])) > 1) {
-          split_dims <- dim(dim_params[[dim_param]])
-          all_split_dims <- c(all_split_dims, setNames(list(split_dims), 
-                                                       names(dim_params)[dim_param]))
-          if (is.null(names(split_dims))) {
-            names(split_dims) <- paste0(names(dim_params)[dim_param], 
-                                        1:length(split_dims))
-          }
-          old_dim_pos <- which(names(final_dims_fake) == names(dim_params)[dim_param])
+    tmp <- dims_split(dim_params, final_dims_fake)
+    final_dims_fake <- tmp[[1]]
+    # all_split_dims is a list containing all the split dims
+    all_split_dims <- tmp[[2]]
 
-          # If merge_across_dims and split_multiselected_dims are both used,
-          # on one file dim, and this file dim is multi-dim, it doesn't work.
-          if (identical(old_dim_pos, integer(0))) {
-            stop(paste0("The dimension '", names(dim_params)[dim_param], 
-                        "' to be split cannot be found after 'merge_across_dims' ",
-                        "is used. Check if the reshape parameters are used appropriately."))
-          }
- 
-          # NOTE: Three steps to create new dims.
-          # 1st: Put in the dims before split_dim.
-          # 2nd: Replace the old_dim with split_dims.
-          # 3rd: Put in the dims after split_dim.
-          new_dims <- c()
-          if (old_dim_pos > 1) {
-            new_dims <- c(new_dims, final_dims_fake[1:(old_dim_pos - 1)])
-          }
-          new_dims <- c(new_dims, split_dims)
-          if (old_dim_pos < length(final_dims_fake)) {
-            new_dims <- c(new_dims, final_dims_fake[(old_dim_pos + 1):length(final_dims_fake)])
-          }
-          final_dims_fake <- new_dims
-        }
-      }
-    }
-    if (is.null(all_split_dims)) {
-      split_multiselected_dims <- FALSE
-      .warning(paste0("Not found any dimensions able to be split. The parameter ",
-                      "'split_multiselected_dims' is changed to FALSE."))
-    }
+   if (is.null(all_split_dims)) {
+     split_multiselected_dims <- FALSE
+     .warning(paste0("Not found any dimensions able to be split. The parameter ",
+                     "'split_multiselected_dims' is changed to FALSE."))
+   }
   }
   #======================================================================
+  # If only merge_across_dims and merge_across_dims_narm and no split_multiselected_dims,
+  # the length of inner across dim (e.g., time) needs to be adjusted. Sum up the actual length
+  # without potential NAs.
   if (merge_across_dims) {
-    # only merge_across_dims -> the 'time' dim length needs to be adjusted
+    # Prepare the arguments for later use
     across_inner_dim <- inner_dims_across_files[[1]]  #TODO: more than one?
     # Get the length of each inner_dim ('time') along each file_dim ('file_date')  
     length_inner_across_dim <- lapply(dat[[i]][['selectors']][[across_inner_dim]][['fri']], length)
 
-    if (merge_across_dims_narm) {
-      across_file_dim <- names(inner_dims_across_files)  #TODO: more than one?
-
-      if (!split_multiselected_dims) {
-        final_dims_fake_name <- names(final_dims_fake)
-        pos_across_inner_dim <- which(final_dims_fake_name == across_inner_dim)
-        new_length_inner_dim <- sum(unlist(length_inner_across_dim))
-        if (pos_across_inner_dim != length(final_dims_fake)) {
-          final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
-                               new_length_inner_dim,
-                               final_dims_fake[(pos_across_inner_dim + 1):length(final_dims_fake)])
-        } else {
-          final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
-                               new_length_inner_dim)
-        }
-        names(final_dims_fake) <- final_dims_fake_name
-      }
+    if (merge_across_dims_narm & !split_multiselected_dims) {
+      final_dims_fake <- merge_narm_dims(final_dims_fake, across_inner_dim, length_inner_across_dim)
     }
   }
   
@@ -3594,7 +3348,7 @@ Start <- function(..., # dim = indices/selectors,
   
   # NOTE: If split_multiselected_dims + merge_across_dims, the dim order may need to be changed.
   #       The inner_dim needs to be the first dim among split dims.
-  #       Cannot control the rest dims are in the same order or not...
+  # TODO: Cannot control the rest dims are in the same order or not...
   #       Suppose users put the same order of across inner and file dims.
   if (split_multiselected_dims & merge_across_dims) {
     # TODO: More than one split?
@@ -3602,24 +3356,15 @@ Start <- function(..., # dim = indices/selectors,
 
     # if inner_dim is not the first, change!
     if (inner_dim_pos_in_split_dims != 1) {
-      split_dims <- c(split_dims[inner_dim_pos_in_split_dims],
-                      split_dims[1:length(split_dims)][-inner_dim_pos_in_split_dims])
-      split_dims_pos <- which(!is.na(match(names(final_dims_fake), names(split_dims))))
-      # Save the current final_dims_fake for later reorder back
+      # Save the current final_dims_fake for reordering it back later
       final_dims_fake_output <- final_dims_fake
-      new_dims <- c()
-      if (split_dims_pos[1] != 1) {
-        new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
-      }
-      new_dims <- c(new_dims, split_dims)
-      if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
-        new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
-      }
-      final_dims_fake <- new_dims
+      tmp <- reorder_split_dims(all_split_dims[[1]], inner_dim_pos_in_split_dims, final_dims_fake)
+      final_dims_fake <- tmp[[1]]
+      all_split_dims[[1]] <- tmp[[2]]
     }
   }
   
-  # The following several lines will only be run if retrieve = TRUE
+  # The following several lines will only run if retrieve = TRUE
   if (retrieve) {
     
     ########## CREATING THE SHARED MATRIX AND DISPATCHING WORK PIECES ###########
@@ -3668,136 +3413,22 @@ Start <- function(..., # dim = indices/selectors,
     work_pieces <- list()
     for (i in 1:length(dat)) {
       if (dataset_has_files[i]) {
-        selectors <- dat[[i]][['selectors']]
-        file_dims <- found_file_dims[[i]]
-        inner_dims <- expected_inner_dims[[i]]
-        sub_array_dims <- final_dims[file_dims]
-        sub_array_dims[found_pattern_dim] <- 1
-        sub_array_of_files_to_load <- array(1:prod(sub_array_dims), 
-                                            dim = sub_array_dims)
-        names(dim(sub_array_of_files_to_load)) <- names(sub_array_dims)
-        # Detect which of the dimensions of the dataset go across files.
-        file_dim_across_files <- lapply(inner_dims, 
-                                        function(x) {
-                                          dim_across <- sapply(inner_dims_across_files, function(y) x %in% y)
-                                          if (any(dim_across)) {
-                                            names(inner_dims_across_files)[which(dim_across)[1]]
-                                          } else {
-                                            NULL
-                                          }
-                                        })
-        names(file_dim_across_files) <- inner_dims
-        j <- 1
-        while (j <= prod(sub_array_dims)) {
-          # Work out file path.
-          file_to_load_sub_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ]
-          names(file_to_load_sub_indices) <- names(sub_array_dims)
-          file_to_load_sub_indices[found_pattern_dim] <- i
-          big_dims <- rep(1, length(dim(array_of_files_to_load)))
-          names(big_dims) <- names(dim(array_of_files_to_load))
-          file_to_load_indices <- .MergeArrayDims(file_to_load_sub_indices, big_dims)[[1]]
-          file_to_load <- do.call('[[', c(list(array_of_files_to_load), 
-                                          as.list(file_to_load_indices)))
-          not_found_file <- do.call('[[', c(list(array_of_not_found_files),
-                                            as.list(file_to_load_indices)))
-          load_file_metadata <- do.call('[', c(list(array_of_metadata_flags), 
-                                               as.list(file_to_load_indices)))
-          if (load_file_metadata) {
-            metadata_file_counter <- metadata_file_counter + 1
-          }
-          if (!is.na(file_to_load) && !not_found_file) {
-            # Work out indices to take
-            first_round_indices <- lapply(inner_dims, 
-                                          function (x) {
-                                            if (is.null(file_dim_across_files[[x]])) {
-                                              x_dim_name <- attr(attr(selectors[[x]][['fri']], "dim"), "names")
-                                              if (!is.null(x_dim_name)) {
-                                                which_chunk <- file_to_load_sub_indices[x_dim_name]
-                                                selectors[[x]][['fri']][[which_chunk]]
-                                              } else {
-                                                selectors[[x]][['fri']][[1]]
-                                              }
-                                            } else {
-                                              which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] 
-                                              selectors[[x]][['fri']][[which_chunk]]
-                                            }
-                                          })
-            names(first_round_indices) <- inner_dims
-            second_round_indices <- lapply(inner_dims, 
-                                           function (x) {
-                                             if (is.null(file_dim_across_files[[x]])) {
-                                              x_dim_name <- attr(attr(selectors[[x]][['sri']], "dim"), "names")
-                                              if (!is.null(x_dim_name)) {
-                                                which_chunk <- file_to_load_sub_indices[x_dim_name]
-                                                selectors[[x]][['sri']][[which_chunk]]
-                                              } else {
-                                                selectors[[x]][['sri']][[1]]
-                                              }
-                                             } else {
-                                               which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]]
-                                               selectors[[x]][['sri']][[which_chunk]]
-                                             }
-                                           })
-            if (debug) {
-              print("-> BUILDING A WORK PIECE")
-              #print(str(selectors))
-            }
-            names(second_round_indices) <- inner_dims
-            if (!any(sapply(first_round_indices, length) == 0)) {
-              work_piece <- list()
-              work_piece[['first_round_indices']] <- first_round_indices
-              work_piece[['second_round_indices']] <- second_round_indices
-              work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices
-              work_piece[['file_path']] <- file_to_load
-              work_piece[['store_dims']] <- final_dims
-              # Work out store position
-              store_position <- final_dims
-              store_position[names(file_to_load_indices)] <- file_to_load_indices
-              store_position[inner_dims] <- rep(1, length(inner_dims))
-              work_piece[['store_position']] <- store_position
-              # Work out file selectors
-              file_selectors <- sapply(file_dims, 
-                                       function (x) {
-                                         vector_to_pick <- 1
-                                         if (x %in% names(depending_file_dims)) {
-                                           vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]]
-                                         }
-                                         selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]]
-                                       })
-              names(file_selectors) <- file_dims
-              work_piece[['file_selectors']] <- file_selectors
-              # Send variables for transformation
-              if (!is.null(transform) && (length(transform_vars) > 0)) {
-                vars_to_transform <- NULL
-                picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars)
-                if (length(picked_vars_to_transform) > 0) {
-                  picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform]
-                  vars_to_transform <- c(vars_to_transform, picked_vars[[i]][picked_vars_to_transform])
-                  if (any(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))) {
-                    picked_vars_ordered_to_transform <- picked_vars_to_transform[which(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))]
-                    vars_to_transform[picked_vars_ordered_to_transform] <- picked_vars_ordered[[i]][picked_vars_ordered_to_transform]
-                  }
-                }
-                picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars)
-                if (length(picked_common_vars_to_transform) > 0) {
-                  picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform]
-                  vars_to_transform <- c(vars_to_transform, picked_common_vars[picked_common_vars_to_transform])
-                  if (any(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))) {
-                    picked_common_vars_ordered_to_transform <- picked_common_vars_to_transform[which(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))]
-                    vars_to_transform[picked_common_vars_ordered_to_transform] <- picked_common_vars_ordered[picked_common_vars_ordered_to_transform]
-                  }
-                }
-                work_piece[['vars_to_transform']] <- vars_to_transform
-              }
-              # Send flag to load metadata
-              if (load_file_metadata) {
-                work_piece[['save_metadata_in']] <- paste0(metadata_folder, '/', metadata_file_counter)
-              }
-              work_pieces <- c(work_pieces, list(work_piece))
-            }
-          }
-          j <- j + 1
-        }
+        # metadata_file_counter may be changed by the following function
+        work_pieces <- build_work_pieces(
+                         work_pieces = work_pieces, i = i, selectors = dat[[i]][['selectors']], 
+                         file_dims = found_file_dims[[i]],
+                         inner_dims = expected_inner_dims[[i]], final_dims = final_dims,
+                         found_pattern_dim = found_pattern_dim, 
+                         inner_dims_across_files = inner_dims_across_files,
+                         array_of_files_to_load = array_of_files_to_load,
+                         array_of_not_found_files = array_of_not_found_files,
+                         array_of_metadata_flags = array_of_metadata_flags,
+                         metadata_file_counter = metadata_file_counter,
+                         depending_file_dims = depending_file_dims, transform = transform,
+                         transform_vars = transform_vars, picked_vars = picked_vars[[i]],
+                         picked_vars_ordered = picked_vars_ordered[[i]],
+                         picked_common_vars = picked_common_vars, 
+                         metadata_folder = metadata_folder, debug = debug)
       }
     }
     #print("N")
@@ -3807,65 +3438,7 @@ Start <- function(..., # dim = indices/selectors,
     
     # Calculate the progress %s that will be displayed and assign them to 
     # the appropriate work pieces.
-    if (length(work_pieces) / num_procs >= 2 && !silent) {
-      if (length(work_pieces) / num_procs < 10) {
-        amount <- 100 / ceiling(length(work_pieces) / num_procs)
-        reps <- ceiling(length(work_pieces) / num_procs)
-      } else {
-        amount <- 10
-        reps <- 10
-      }
-      progress_steps <- rep(amount, reps)
-      if (length(work_pieces) < (reps + 1)) {
-        selected_pieces <- length(work_pieces)
-        progress_steps <- c(sum(head(progress_steps, reps)),
-                            tail(progress_steps, reps))
-      } else {
-        selected_pieces <- round(seq(1, length(work_pieces), 
-                                     length.out = reps + 1))[-1]
-      }
-      progress_steps <- paste0(' + ', round(progress_steps, 2), '%')
-      progress_message <- 'Progress: 0%'
-    } else {
-      progress_message <- ''
-      selected_pieces <- NULL
-    }
-    piece_counter <- 1
-    step_counter <- 1
-    work_pieces <- lapply(work_pieces, 
-                          function (x) {
-                            if (piece_counter %in% selected_pieces) {
-                              wp <- c(x, list(progress_amount = progress_steps[step_counter]))
-                              step_counter <<- step_counter + 1
-                            } else {
-                              wp <- x
-                            }
-                            piece_counter <<- piece_counter + 1
-                            wp
-                          })
-    if (!silent) {
-      .message("If the size of the requested data is close to or above the free shared RAM memory, R may crash.")
-      .message("If the size of the requested data is close to or above the half of the free RAM memory, R may crash.")
-      .message(paste0("Will now proceed to read and process ", length(work_pieces), " data files:"))
-      if (length(work_pieces) < 30) {
-        lapply(work_pieces, function (x) .message(x[['file_path']], indent = 2))
-      } else {
-        .message("The list of files is long. You can check it after Start() finishes in the output '$Files'.", indent = 2, exdent = 5)
-      }
-    }
-    
-    # Build the cluster of processes that will do the work and dispatch work pieces.
-    # The function .LoadDataFile is applied to each work piece. This function will
-    # open the data file, regrid if needed, subset, apply the mask, 
-    # compute and apply the weights if needed,
-    # disable extreme values and store in the shared memory matrix.
-    #print("O")
-    if (!silent) {
-      .message("Loading... This may take several minutes...")
-      if (progress_message != '') {
-        .message(progress_message, appendLF = FALSE)
-      }
-    }
+    work_pieces <- retrieve_progress_message(work_pieces, num_procs, silent)
 
 # NOTE: In .LoadDataFile(), metadata is saved in metadata_folder/1 or /2 etc. Before here,
 #       the path name is created in work_pieces but the path hasn't been built yet.
@@ -3893,7 +3466,8 @@ Start <- function(..., # dim = indices/selectors,
     }
     
     if (!silent) {
-      if (progress_message != '') {
+      # if (progress_message != '')
+      if (length(work_pieces) / num_procs >= 2 && !silent) {
         .message("\n", tag = '')
       }
     }
@@ -3907,38 +3481,9 @@ Start <- function(..., # dim = indices/selectors,
       if (!merge_across_dims_narm) {
         data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims)
       } else {
-        # Get the length of these two dimensions in final_dims
-        length_inner_across_store_dims <- final_dims[across_inner_dim]
-        length_file_across_store_dims <- final_dims[across_file_dim]
-        
-        # Create a logical array for merge_across_dims
-        logi_array <- array(rep(FALSE,
-                                length_file_across_store_dims * length_inner_across_store_dims),
-                            dim = c(length_inner_across_store_dims, length_file_across_store_dims))
-        for (i in 1:length_file_across_store_dims) {  #1:4
-          logi_array[1:length_inner_across_dim[[i]], i] <- TRUE
-        }
-        
-        # First, get the data array with final_dims dimension
-        data_array_final_dims <- array(bigmemory::as.matrix(data_array), dim = final_dims)
-        
-        # Change the NA derived from additional spaces to -9999, then remove these -9999
-        func_remove_blank <- function(data_array, logi_array) {
-          # dim(data_array) = [time, file_date]
-          # dim(logi_array) = [time, file_date]
-          # Change the blank spaces from NA to -9999
-          data_array[which(!logi_array)] <- -9999
-          return(data_array)
-        }
-        data_array_final_dims <- multiApply::Apply(data_array_final_dims,
-                                                   target_dims = c(across_inner_dim, across_file_dim),  #c('time', 'file_date')
-                                                   output_dims = c(across_inner_dim, across_file_dim),
-                                                   fun = func_remove_blank,
-                                                   logi_array = logi_array)$output1
-        ## reorder back to the correct dim
-        tmp <- match(names(final_dims), names(dim(data_array_final_dims)))
-        data_array_final_dims <- .aperm2(data_array_final_dims, tmp)
-        data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999]  # become a vector
+        data_array_tmp <- remove_additional_na_from_merge(
+                            inner_dims_across_files, final_dims, across_inner_dim,
+                            length_inner_across_dim, data_array)
       }
 
       if (length(data_array_tmp) != prod(final_dims_fake)) {
@@ -3949,70 +3494,9 @@ Start <- function(..., # dim = indices/selectors,
       #NOTE: When one file contains values for dicrete dimensions, rearrange the 
       #      chunks (i.e., work_piece) is necessary.
       if (split_multiselected_dims) {
-
-      # generate the correct order list from indices_chunk 
-        final_order_list <- list()
-        i <- 1
-        j <- 1
-        a <- indices_chunk[i]
-        while (i <= length(indices_chunk)) {
-          while (indices_chunk[i+1] == indices_chunk[i] & i < length(indices_chunk)) {
-            a <- c(a, indices_chunk[i+1])
-            i <- i + 1
-          }
-          final_order_list[[j]] <- a
-          a <- indices_chunk[i+1]
-          i <- i + 1
-          j <- j + 1
-        }
-        names(final_order_list) <- sapply(final_order_list, '[[', 1)
-        final_order_list <- lapply(final_order_list, length)
-        
-        if (!all(diff(as.numeric(names(final_order_list))) > 0)) {
-          # shape the vector into the array without split_dims
-          split_dims_pos <- match(split_dims, final_dims_fake)
-          new_dims <- c()
-          if (split_dims_pos[1] > 1) {
-            new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
-          }
-          new_dims <- c(new_dims,  prod(split_dims))
-          names(new_dims)[split_dims_pos[1]] <- across_inner_dim
-          if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
-            new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
-          }
-          final_dims_fake_no_split <- new_dims
-          data_array_no_split <- array(data_array_tmp, dim = final_dims_fake_no_split)
-          # seperate 'time' dim into each work_piece length
-          data_array_seperate <- list()
-          tmp <- cumsum(unlist(length_inner_across_dim))
-          tmp <- c(0, tmp)
-          for (i in 1:length(length_inner_across_dim)) {
-            data_array_seperate[[i]] <- Subset(data_array_no_split, across_inner_dim,
-                                               (tmp[i] + 1):tmp[i + 1])
-          }
-  
-          # re-build the array: chunk 
-          which_chunk <- as.numeric(names(final_order_list))
-          how_many_indices <- unlist(final_order_list)
-          array_piece <- list()
-          ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate)))
-          for (i in 1:length(final_order_list)) {
-            array_piece[[i]] <- Subset(data_array_seperate[[which_chunk[i]]],
-                                       across_inner_dim,
-                                       ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1))
-            ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i]
-          }
-        
-          # re-build the array: paste
-          data_array_tmp <- array_piece[[1]]
-          along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim)
-          if (length(array_piece) > 1) {
-            for (i in 2:length(array_piece)) {
-            data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]],
-                                          along = along_pos)
-            }
-          }
-        }        
+        data_array_tmp <- rebuild_array_merge_split(
+                            data_array_tmp, indices_chunk, all_split_dims, final_dims_fake, 
+                            across_inner_dim, length_inner_across_dim) 
       }
   
       data_array <- array(data_array_tmp, dim = final_dims_fake)
@@ -4044,45 +3528,10 @@ Start <- function(..., # dim = indices/selectors,
 
       unlink(metadata_folder, recursive = TRUE)
 
-#NOTE: Here, metadata can be saved in one of two ways: one for $common and the other for $dat
-#      for $common, it is a list of metadata length. For $dat, it is a list of dat length,
-#      and each sublist has the metadata for each dat.
-      dim_of_metadata <- dim(array_of_metadata_flags)[metadata_dims]
-      if (!any(names(dim_of_metadata) == pattern_dims) |
-          (any(names(dim_of_metadata) == pattern_dims) &
-           dim_of_metadata[pattern_dims] == 1)) {  # put under $common; old code
-        return_metadata <- vector('list',
-                                  length = prod(dim(array_of_metadata_flags)[metadata_dims]))
-        return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata
-        dim(return_metadata) <- dim(array_of_metadata_flags[metadata_dims])
-
-      } else { # put under $dat. metadata_dims has 'dat' and dat length > 1
-        return_metadata <- vector('list',
-                                  length = dim_of_metadata[pattern_dims])
-        names(return_metadata) <- dat_names
-        for (kk in 1:length(return_metadata)) {
-          return_metadata[[kk]] <- vector('list', length = prod(dim_of_metadata[-1])) # 1 is dat
-        }
-        loaded_metadata_count <- 1
-        for (kk in 1:length(return_metadata)) {
-          for (jj in 1:length(return_metadata[[kk]])) {
-
-            if (dataset_has_files[kk]) {
-              if (loaded_metadata_count %in% loaded_metadata_files) {
-                return_metadata[[kk]][jj] <- loaded_metadata[[loaded_metadata_count]]
-                names(return_metadata[[kk]])[jj] <- names(loaded_metadata[[loaded_metadata_count]])
-              } else {
-                return_metadata[[kk]][jj] <- NULL
-              }
-              loaded_metadata_count <- loaded_metadata_count + 1
-            } else {
-              return_metadata[[kk]][jj] <- NULL
-            }
-
-          }
-        }
-      }
-      attr(data_array, 'Variables') <- return_metadata
+      # Create a list of metadata of the variable (e.g., tas) 
+      return_metadata <- create_metadata_list(array_of_metadata_flags, metadata_dims, pattern_dims,
+                                              loaded_metadata_files, loaded_metadata, dat_names,
+                                              dataset_has_files) 
       # TODO: Try to infer data type from loaded_metadata
       # as.integer(data_array) 
     }
@@ -4141,65 +3590,30 @@ Start <- function(..., # dim = indices/selectors,
   for (i in 1:length(dat)) {
     file_selectors[[dat[[i]][['name']]]] <- dat[[i]][['selectors']][which(names(dat[[i]][['selectors']]) %in% found_file_dims[[i]])]
   }
+
   if (retrieve) {
     if (!silent) {
       .message("Successfully retrieved data.")
     }
 
-    if (all(sapply(attr(data_array, 'Variables'), is.null))) {
-      var_backup <- NULL
+    if (all(sapply(return_metadata, is.null))) {
+      # We don't have metadata of the variable (e.g., tas). The returned metadata list only 
+      # contains those are specified in argument "return_vars".
+      Variables_list <- c(list(common = picked_common_vars), picked_vars)
       .warning(paste0("Metadata cannot be retrieved. The reason may be the ",
                       "non-existence of the first file. Use parameter 'metadata_dims'",
                       " to assign to file dimensions along which to return metadata, ",
                       "or check the existence of the first file."))
     } else {
-
-#NOTE: The metadata of variables can be saved in one of the two different structures.
-#      (1) metadata_dims != 'dat', or (metadata_dims == 'dat' & length(dat) == 1):
-#          put under $common
-#      (2) (metadata_dims == 'dat' & length(dat) > 1):
-#          put under $dat1, $dat2, .... Put it in picked_vars list
-#TODO: The current (2) uses the inefficient method. Should define the list structure first
-#      then fill the list, rather than expand it in the for loop.
-    if (any(metadata_dims == pattern_dims) & length(dat) > 1) { # (2)
-      var_backup <- attr(data_array, 'Variables')
-      for (kk in 1:length(var_backup)) {
-        sublist_names <- lapply(var_backup, names)[[kk]]
-        if (!is.null(sublist_names)) {
-          for (jj in 1:length(sublist_names)) {
-            picked_vars[[kk]][[sublist_names[jj]]] <- var_backup[[kk]][[jj]]
-          }
-        }
-      }
-      var_backup <- NULL
-
-    } else {  #(1)
-      var_backup <- attr(data_array, 'Variables')
-      len <- unlist(lapply(var_backup, length))
-      len <- sum(len) + length(which(len == 0))  #0 means NULL
-      name_list <- lapply(var_backup, names)
-      new_list <- vector('list', length = len)
-      count <- 1
-
-      for (kk in 1:length(var_backup)) {
-        if (length(var_backup[[kk]]) == 0) {  #NULL
-          count <- count + 1
-        } else {
-          for (jj in 1:length(var_backup[[kk]])) {
-            new_list[[count]] <- var_backup[[kk]][[jj]]
-            names(new_list)[count] <- name_list[[kk]][jj]
-            count <- count + 1
-          }
-        }
-      }
-      var_backup <- new_list
+      # Add the metadata of the variable (e.g., tas) into the list of picked_vars or
+      # picked_common_vars.
+      Variables_list <- combine_metadata_picked_vars(
+                          return_metadata, picked_vars, picked_common_vars,
+                          metadata_dims, pattern_dims, length(dat))
     }
-}
 
-    attr(data_array, 'Variables') <- NULL
     attributes(data_array) <- c(attributes(data_array), 
-                                list(Variables = c(list(common = c(picked_common_vars, var_backup)), 
-                                                   picked_vars),
+                                list(Variables = Variables_list,
                                      Files = array_of_files_to_load, 
                                      NotFoundFiles = array_of_not_found_files,
                                      FileSelectors = file_selectors,
@@ -4208,7 +3622,8 @@ Start <- function(..., # dim = indices/selectors,
     )
     attr(data_array, 'class') <- c('startR_array', attr(data_array, 'class'))
     data_array
-  } else {
+
+  } else { # retrieve = FALSE
     if (!silent) {
       .message("Successfully discovered data dimensions.")
     }
diff --git a/R/zzz.R b/R/zzz.R
index 9d607d2..a775b47 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -420,3 +420,755 @@ find_ufd_value <- function(undefined_file_dims, dat, i, replace_values,
   return(dat_selectors)
 }
 
+
+# Adjust the argument 'return_vars' if users don't assign them properly.
+# Force return_vars = (time = NULL) to (time = 'sdate') if one of the situations:
+# (1) selector = [sdate = 2, time = 4], or
+# (2) time_across = 'sdate'.
+correct_return_vars <- function(inner_dim, inner_dims_across_files, found_pattern_dim,
+                                file_dim_as_selector_array_dim) {
+    # inner_dim is not in return_vars or is NULL
+      if (is.character(file_dim_as_selector_array_dim)) { #(1)
+        if (file_dim_as_selector_array_dim %in% found_pattern_dim) {
+          stop(paste0("Found '", inner_dim, "' selector has dimension of the pattern dim '",
+                      found_pattern_dim, 
+                      "', which is not allowed. To assign the dependency on the pattern dim, ",
+                      "use 'return_vars = list(", inner_dim, " = 'dat')' instead."))
+        } else {
+          corrected_value <- file_dim_as_selector_array_dim
+        }
+      } else if (inner_dim %in% inner_dims_across_files) { #(2)
+        file_dim_name <- names(which(inner_dim == inner_dims_across_files))
+        if (file_dim_name %in% found_pattern_dim) {
+          stop(paste0("Found '", inner_dim, "' has across dependency on the pattern dim '",
+                      found_pattern_dim, "', which is not allowed."))
+        } else {
+          corrected_value <- file_dim_name
+        }
+      }
+      .warning(paste0("Found ", inner_dim, " dependency on file diemnsion '", corrected_value,
+                      "', but '", inner_dim, "' is not in return_vars list or is NULL. ",
+                      "To provide the correct metadata, the value of ", inner_dim,
+                      " in 'return_vars' is specified as '", corrected_value, "'."))
+  return(corrected_value)
+}
+
+# The time classes that are needed to adjust time zone back to UTC.
+time_special_types <- function() {
+  list('POSIXct' = as.POSIXct, 'POSIXlt' = as.POSIXlt, 'Date' = as.Date)
+}
+
+# Replace the dim names read from netCDF file with the user-specified synonims.
+replace_with_synonmins <- function(read_dims, synonims) {
+  corrected_dim_name <- sapply(names(read_dims), 
+                            function(x) {
+                              which_entry <- which(sapply(synonims, function(y) x %in% y))
+                              if (length(which_entry) > 0) {
+                                names(synonims)[which_entry]
+                              } else {
+                                x
+                              }
+                            })
+  return(corrected_dim_name)
+}
+
+
+# Prepare vars_to_read for this dataset (i loop) and this file (j loop)
+generate_vars_to_read <- function(return_vars, changed_dims, first_found_file, common_return_vars,
+                                  common_first_found_file, i) {
+  vars_to_read <- NULL
+  if (length(return_vars) > 0) {
+    #NOTE: because return_vars has changed 'dat' to character(0) above (line 1775),
+    #      'dat' won't be included in vars_to_read here.
+    vars_to_read <- names(return_vars)[sapply(return_vars, function(x) any(names(changed_dims) %in% x))]
+  }
+  if (!is.null(first_found_file)) {
+    if (any(!first_found_file)) {
+      vars_to_read <- c(vars_to_read, names(first_found_file[which(!first_found_file)]))
+    }
+  }
+  if ((i == 1) && (length(common_return_vars) > 0)) {
+    vars_to_read <- c(vars_to_read, names(common_return_vars)[sapply(common_return_vars, function(x) any(names(changed_dims) %in% x))])
+  }
+  if (!is.null(common_first_found_file)) {
+    if (any(!common_first_found_file)) {
+      vars_to_read <- c(vars_to_read, names(common_first_found_file[which(!common_first_found_file)]))
+    }
+  }
+  return(vars_to_read)
+}
+
+# Find the largest dims length within one dataset.
+find_largest_dims_length <- function(selectors_total_list, array_of_files_to_load,
+                                     selector_indices_save, dat, expected_inner_dims,
+                                     synonims, file_dim_reader) {
+  # Open and get all the dims from all the files
+  data_dims_all_files <- vector('list', length = length(selectors_total_list))
+
+  for (selectors_kk in 1:length(data_dims_all_files)) {
+    file_to_open <- do.call("[", c(list(array_of_files_to_load), 
+                         as.list(selector_indices_save[[selectors_kk]])))
+    data_dims_all_files[[selectors_kk]] <- try(
+      file_dim_reader(file_to_open, NULL, selectors_total_list[[selectors_kk]],
+                      lapply(dat[['selectors']][expected_inner_dims], '[[', 1),
+                      synonims), silent = TRUE)
+
+  }
+
+  # Remove the missing files (i.e., fail try above)
+  if (!identical(which(substr(data_dims_all_files, 1, 5) == 'Error'), integer(0))) {
+      tmp <- which(substr(data_dims_all_files, 1, 5) == 'Error')
+      data_dims_all_files <- data_dims_all_files[-tmp]
+  }
+
+  # Find the longest dimensions from all the files
+  largest_data_dims <- rep(0, length(data_dims_all_files[[1]]))
+
+  # The inner dim order may differ among files. Need to align them before
+  # find out the largest dim length.
+  dim_names_first_file <- names(data_dims_all_files[[1]])
+  same_dim_order <-lapply(lapply(data_dims_all_files, names),      
+                          identical, dim_names_first_file)
+  for (to_fix in which(!unlist(same_dim_order))) {
+    data_dims_all_files[[to_fix]] <- data_dims_all_files[[to_fix]][match(dim_names_first_file,
+                                       names(data_dims_all_files[[to_fix]]))]
+  }
+
+  for (kk in 1:length(data_dims_all_files[[1]])) {
+    largest_data_dims[kk] <- max(sapply(data_dims_all_files, '[', kk))
+  }
+  names(largest_data_dims) <- names(data_dims_all_files[[1]])
+  return(largest_data_dims)
+}
+
+# Gererate vars_to_transform from picked_vars[[i]] and picked_common_vars
+generate_vars_to_transform <- function(vars_to_transform, picked_vars, transform_vars, 
+                                       picked_vars_ordered) {
+  # In Start(), picked_vars can be picked_vars[[i]] or picked_common_vars
+  picked_vars_to_transform <- which(names(picked_vars) %in% transform_vars)
+  if (length(picked_vars_to_transform) > 0) {
+    picked_vars_to_transform <- names(picked_vars)[picked_vars_to_transform]
+    new_vars_to_transform <- picked_vars[picked_vars_to_transform]
+    which_are_ordered <- which(!sapply(picked_vars_ordered[picked_vars_to_transform], is.null))
+            
+    if (length(which_are_ordered) > 0) {    
+      tmp <- which(!is.na(match(names(picked_vars_ordered), names(which_are_ordered))))
+      new_vars_to_transform[which_are_ordered] <- picked_vars_ordered[tmp]
+    }
+    vars_to_transform <- c(vars_to_transform, new_vars_to_transform)
+  }
+  return(vars_to_transform)
+}
+
+# Out-of-range warning
+show_out_of_range_warning <- function(inner_dim, range, bound) {
+  # bound: 'lower' or 'upper'
+  .warning(paste0("The ", bound, " boundary of selector of ", inner_dim,
+                  " is out of range [", min(range), ", ", max(range), "]. ",
+                  "Check if the desired range is all included."))
+}
+
+
+# Generate sub_array_of_fri 
+generate_sub_array_of_fri <- function(with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta,
+                                      is_circular_dim) {
+  print_warning <- FALSE
+  if (goes_across_prime_meridian) {
+    #NOTE: The potential problem here is, if it is global longitude,
+    #      and the indices overlap (e.g., lon = [0, 359.723] and 
+    #      CircularSort(-180, 180), then sub_array_of_indices = list(649, 649)). 
+    #      Therefore, sub_array_of_fri will be c(1:649, 649:1296). We'll get two 649.
+    #      The fix below may not be the best solution, but it works for the example above.
+
+    if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
+      # global longitude
+      sub_array_of_fri <- 1:n  # n = prod(dim(var_with_selectors))
+
+      if (with_transform & beta != 0) {
+        # Warning if transform_extra_cell != 0
+        print_warning <- TRUE
+      }
+
+    } else {
+      # normal case, i.e., not global
+      first_index <- min(unlist(sub_array_of_indices))
+      last_index <- max(unlist(sub_array_of_indices))
+      if (with_transform) {
+        gap_width <- last_index - first_index - 1
+        actual_beta <- min(gap_width, beta)
+        sub_array_of_fri <- c(1:(first_index + actual_beta),                                      
+                              (last_index - actual_beta):n)
+        if (actual_beta != beta) {
+          print_warning <- TRUE
+        }
+      } else {
+        sub_array_of_fri <- c(1:first_index, last_index:n)
+      }
+    }
+
+  } else {
+    #NOTE: This if seems redundant.
+#    if (is.list(sub_array_of_indices)) {
+#      sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
+#    }
+    #NOTE: sub_array_of_indices may be vector or list
+    if (with_transform) {
+      first_index <- min(unlist(sub_array_of_indices))
+      last_index <- max(unlist(sub_array_of_indices))
+      start_padding <- min(beta, first_index - 1)
+      end_padding <- min(beta, n - last_index)
+  
+      if (!is_circular_dim) {  #latitude or when <var>_reorder is not used
+        sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
+        if (start_padding != beta | end_padding != beta) {
+          print_warning <- TRUE
+        }
+      } else {  #longitude
+  #      if (((last_index + beta) - (first_index - beta) + 1) >= n) {
+        if (start_padding <= beta & end_padding <= beta) {
+          sub_array_of_fri <- 1:n
+        } else if (start_padding < beta) {  # left side too close to border, need to go to right side
+          sub_array_of_fri <- c((first_index - start_padding):(last_index + end_padding), (n - (beta - start_padding - 1)):n)
+        } else if (end_padding < beta) { # right side too close to border, need to go to left side
+          sub_array_of_fri <- c(1: (beta - end_padding), (first_index - start_padding):(last_index + end_padding))
+        } else {  #normal
+          sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
+        }
+      }
+
+    } else {
+      if (is.list(sub_array_of_indices)) {
+        sub_array_of_fri <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
+      } else {
+        sub_array_of_fri <- sub_array_of_indices
+      }
+    }
+  }
+  if (print_warning) {
+    .warning(paste0("Adding parameter transform_extra_cells =  ", beta,
+                    " to the transformed index excesses ",
+                    "the border. The border index is used for transformation."))
+  }
+
+  return(sub_array_of_fri)
+}
+
+# This function merges two dimensions (e.g., time and sdate if "time_across = 'sdate'") into one. 
+# The two dimensions have to be next to each other. In Start(), it is used to reshape 
+# final_dims_fake if merge_across_dims = TRUE
+dims_merge <- function(inner_dims_across_files, final_dims_fake) {
+  # inner_dims_across_files would be like: $sdate: "time"
+  for (file_dim_across in names(inner_dims_across_files)) {
+    inner_dim_pos <- which(names(final_dims_fake) == inner_dims_across_files[[file_dim_across]])
+    new_dims <- c()
+    # part 1: Put the dims before 'time' in new_dims
+    if (inner_dim_pos > 1) {
+      new_dims <- c(new_dims, final_dims_fake[1:(inner_dim_pos - 1)])
+    }
+    # part 2: Merge time and sdate together, and name this dim as 'time'
+    # The cross and being crossed dims are next to each other, e.g., [time, sdate]
+    new_dims <- c(new_dims, setNames(prod(final_dims_fake[c(inner_dim_pos, inner_dim_pos + 1)]), 
+                                     inner_dims_across_files[[file_dim_across]]))
+    # part 3: Put the dimes after 'sdate' in new_dims
+    if (inner_dim_pos + 1 < length(final_dims_fake)) {
+      new_dims <- c(new_dims, final_dims_fake[(inner_dim_pos + 2):length(final_dims_fake)])
+    }
+    final_dims_fake <- new_dims
+  }
+  return(final_dims_fake)
+}
+
+# This function splits one dimension into two. In Start(), it is used to reshape final_dims_fake
+# if split_multiselected_dims = TRUE. 
+dims_split <- function(dim_params, final_dims_fake) {
+  all_split_dims <- NULL
+  for (dim_param in 1:length(dim_params)) {
+    split_dims <- dim(dim_params[[dim_param]])
+    if (!is.null(split_dims)) {
+      if (length(split_dims) > 1) {
+        all_split_dims <- c(all_split_dims, setNames(list(split_dims), 
+                                                     names(dim_params)[dim_param]))
+        if (is.null(names(split_dims))) {
+          names(split_dims) <- paste0(names(dim_params)[dim_param], 
+                                      1:length(split_dims))
+        }
+        old_dim_pos <- which(names(final_dims_fake) == names(dim_params)[dim_param])
+
+        # If merge_across_dims and split_multiselected_dims are both used,
+        # on one file dim, and this file dim is multi-dim, it doesn't work.
+        if (identical(old_dim_pos, integer(0))) {
+          stop(paste0("The dimension '", names(dim_params)[dim_param], 
+                      "' to be split cannot be found after 'merge_across_dims' ",
+                      "is used. Check if the reshape parameters are used appropriately."))
+        }
+ 
+        # NOTE: Three steps to create new dims.
+        # 1st: Put in the dims before split_dim.
+        # 2nd: Replace the old_dim with split_dims.
+        # 3rd: Put in the dims after split_dim.
+        new_dims <- c()
+        if (old_dim_pos > 1) {
+          new_dims <- c(new_dims, final_dims_fake[1:(old_dim_pos - 1)])
+        }
+        new_dims <- c(new_dims, split_dims)
+        if (old_dim_pos < length(final_dims_fake)) {
+          new_dims <- c(new_dims, final_dims_fake[(old_dim_pos + 1):length(final_dims_fake)])
+        }
+        final_dims_fake <- new_dims
+      }
+    }
+  }
+  return(list(final_dims_fake, all_split_dims))
+}
+
+
+# This function sums up the length of all the inner across dim (e.g., time: list(31, 29, 31, 30))
+# and use it to replace the value of that inner dim. That is, it returns the actual length of 
+# time rather than using the one including NAs. In Start(), it is used to reshape final_dims_fake
+# if merge_across_dims = TRUE, merge_across_dims_narm = TRUE, and split_multiselected_dims = FALSE. 
+merge_narm_dims <- function(final_dims_fake, across_inner_dim, length_inner_across_dim) {
+  final_dims_fake_name <- names(final_dims_fake)
+  pos_across_inner_dim <- which(final_dims_fake_name == across_inner_dim)
+  new_length_inner_dim <- sum(unlist(length_inner_across_dim))
+  if (pos_across_inner_dim != length(final_dims_fake)) {
+    final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
+                         new_length_inner_dim,
+                         final_dims_fake[(pos_across_inner_dim + 1):length(final_dims_fake)])
+  } else {
+    final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
+                         new_length_inner_dim)
+  }
+  names(final_dims_fake) <- final_dims_fake_name
+  return(final_dims_fake)
+}
+
+
+
+# Adjust the dim order. If split_multiselected_dims + merge_across_dims, the dim order may 
+# need to be changed. The inner_dim needs to be the first dim among split dims.
+reorder_split_dims <- function(all_split_dims, inner_dim_pos_in_split_dims, final_dims_fake) {
+  all_split_dims <- c(all_split_dims[inner_dim_pos_in_split_dims],
+                      all_split_dims[1:length(all_split_dims)][-inner_dim_pos_in_split_dims])
+  split_dims_pos <- which(!is.na(match(names(final_dims_fake), names(all_split_dims))))
+  new_dims <- c()
+  if (split_dims_pos[1] != 1) {
+    new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
+  }
+  new_dims <- c(new_dims, all_split_dims)
+  if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
+    new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
+  }
+  final_dims_fake <- new_dims
+
+  return(list(final_dims_fake, all_split_dims))
+}
+
+
+# Build the work pieces.
+build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, final_dims, 
+                              found_pattern_dim, inner_dims_across_files, array_of_files_to_load,
+                              array_of_not_found_files, array_of_metadata_flags,
+                              metadata_file_counter, depending_file_dims, transform, 
+                              transform_vars, picked_vars, picked_vars_ordered, picked_common_vars,
+                              metadata_folder, debug = debug) {
+  sub_array_dims <- final_dims[file_dims]
+  sub_array_dims[found_pattern_dim] <- 1
+  sub_array_of_files_to_load <- array(1:prod(sub_array_dims), 
+                                      dim = sub_array_dims)
+  names(dim(sub_array_of_files_to_load)) <- names(sub_array_dims)
+  # Detect which of the dimensions of the dataset go across files.
+  file_dim_across_files <- lapply(inner_dims, 
+                                  function(x) {
+                                    dim_across <- sapply(inner_dims_across_files, function(y) x %in% y)
+                                    if (any(dim_across)) {
+                                      names(inner_dims_across_files)[which(dim_across)[1]]
+                                    } else {
+                                      NULL
+                                    }
+                                  })
+  names(file_dim_across_files) <- inner_dims
+  j <- 1
+  while (j <= prod(sub_array_dims)) {
+    # Work out file path.
+    file_to_load_sub_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ]
+    names(file_to_load_sub_indices) <- names(sub_array_dims)
+    file_to_load_sub_indices[found_pattern_dim] <- i
+    big_dims <- rep(1, length(dim(array_of_files_to_load)))
+    names(big_dims) <- names(dim(array_of_files_to_load))
+    file_to_load_indices <- .MergeArrayDims(file_to_load_sub_indices, big_dims)[[1]]
+    file_to_load <- do.call('[[', c(list(array_of_files_to_load), 
+                            as.list(file_to_load_indices)))
+    not_found_file <- do.call('[[', c(list(array_of_not_found_files),
+                              as.list(file_to_load_indices)))
+    load_file_metadata <- do.call('[', c(list(array_of_metadata_flags), 
+                                  as.list(file_to_load_indices)))
+    if (load_file_metadata) {
+       metadata_file_counter <- metadata_file_counter + 1
+       assign('metadata_file_counter', metadata_file_counter, envir = parent.frame())
+    }
+    if (!is.na(file_to_load) && !not_found_file) {
+      # Work out indices to take
+      first_round_indices <- lapply(inner_dims, 
+                                    function (x) {
+                                      if (is.null(file_dim_across_files[[x]])) {
+                                        x_dim_name <- attr(attr(selectors[[x]][['fri']], "dim"), "names")
+                                        if (!is.null(x_dim_name)) {
+                                          which_chunk <- file_to_load_sub_indices[x_dim_name]
+                                          selectors[[x]][['fri']][[which_chunk]]
+                                        } else {
+                                          selectors[[x]][['fri']][[1]]
+                                        }
+                                      } else {
+                                        which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] 
+                                        selectors[[x]][['fri']][[which_chunk]]
+                                      }
+                                    })
+      names(first_round_indices) <- inner_dims
+      second_round_indices <- lapply(inner_dims, 
+                                     function (x) {
+                                       if (is.null(file_dim_across_files[[x]])) {
+                                         x_dim_name <- attr(attr(selectors[[x]][['sri']], "dim"), "names")
+                                         if (!is.null(x_dim_name)) {
+                                           which_chunk <- file_to_load_sub_indices[x_dim_name]
+                                           selectors[[x]][['sri']][[which_chunk]]
+                                         } else {
+                                           selectors[[x]][['sri']][[1]]
+                                         }
+                                       } else {
+                                         which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]]
+                                         selectors[[x]][['sri']][[which_chunk]]
+                                       }
+                                     })
+      if (debug) {
+        print("-> BUILDING A WORK PIECE")
+        #print(str(selectors))
+      }
+      names(second_round_indices) <- inner_dims
+      if (!any(sapply(first_round_indices, length) == 0)) {
+        work_piece <- list()
+        work_piece[['first_round_indices']] <- first_round_indices
+        work_piece[['second_round_indices']] <- second_round_indices
+        work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices
+        work_piece[['file_path']] <- file_to_load
+        work_piece[['store_dims']] <- final_dims
+        # Work out store position
+        store_position <- final_dims
+        store_position[names(file_to_load_indices)] <- file_to_load_indices
+        store_position[inner_dims] <- rep(1, length(inner_dims))
+        work_piece[['store_position']] <- store_position
+        # Work out file selectors
+        file_selectors <- sapply(file_dims, 
+                                 function (x) {
+                                   vector_to_pick <- 1
+                                   if (x %in% names(depending_file_dims)) {
+                                     vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]]
+                                   }
+                                   selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]]
+                                 })
+        names(file_selectors) <- file_dims
+        work_piece[['file_selectors']] <- file_selectors
+        # Send variables for transformation
+        if (!is.null(transform) && (length(transform_vars) > 0)) {
+          vars_to_transform <- NULL
+          picked_vars_to_transform <- which(names(picked_vars) %in% transform_vars)
+          if (length(picked_vars_to_transform) > 0) {
+            picked_vars_to_transform <- names(picked_vars)[picked_vars_to_transform]
+            vars_to_transform <- c(vars_to_transform, picked_vars[picked_vars_to_transform])
+            if (any(picked_vars_to_transform %in% names(picked_vars_ordered))) {
+              picked_vars_ordered_to_transform <- picked_vars_to_transform[which(picked_vars_to_transform %in% names(picked_vars_ordered))]
+              vars_to_transform[picked_vars_ordered_to_transform] <- picked_vars_ordered[picked_vars_ordered_to_transform]
+            }
+          }
+          picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars)
+          if (length(picked_common_vars_to_transform) > 0) {
+            picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform]
+            vars_to_transform <- c(vars_to_transform, picked_common_vars[picked_common_vars_to_transform])
+            if (any(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))) {
+              picked_common_vars_ordered_to_transform <- picked_common_vars_to_transform[which(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))]
+              vars_to_transform[picked_common_vars_ordered_to_transform] <- picked_common_vars_ordered[picked_common_vars_ordered_to_transform]
+            }
+          }
+          work_piece[['vars_to_transform']] <- vars_to_transform
+        }
+        # Send flag to load metadata
+        if (load_file_metadata) {
+          work_piece[['save_metadata_in']] <- paste0(metadata_folder, '/', metadata_file_counter)
+        }
+        work_pieces <- c(work_pieces, list(work_piece))
+      }
+    }
+    j <- j + 1
+  }
+
+  return(work_pieces)
+}
+
+# Calculate the progress %s that will be displayed and assign them to the appropriate work pieces.
+retrieve_progress_message <- function(work_pieces, num_procs, silent) {
+  if (length(work_pieces) / num_procs >= 2 && !silent) {
+    if (length(work_pieces) / num_procs < 10) {
+      amount <- 100 / ceiling(length(work_pieces) / num_procs)
+      reps <- ceiling(length(work_pieces) / num_procs)
+    } else {
+      amount <- 10
+      reps <- 10
+    }
+    progress_steps <- rep(amount, reps)
+    if (length(work_pieces) < (reps + 1)) {
+      selected_pieces <- length(work_pieces)
+      progress_steps <- c(sum(head(progress_steps, reps)),
+                          tail(progress_steps, reps))
+    } else {
+      selected_pieces <- round(seq(1, length(work_pieces), 
+                                   length.out = reps + 1))[-1]
+    }
+    progress_steps <- paste0(' + ', round(progress_steps, 2), '%')
+    progress_message <- 'Progress: 0%'
+  } else {
+    progress_message <- ''
+    selected_pieces <- NULL
+  }
+  piece_counter <- 1
+  step_counter <- 1
+  work_pieces <- lapply(work_pieces, 
+                        function (x) {
+                          if (piece_counter %in% selected_pieces) {
+                            wp <- c(x, list(progress_amount = progress_steps[step_counter]))
+                            step_counter <<- step_counter + 1
+                          } else {
+                            wp <- x
+                          }
+                          piece_counter <<- piece_counter + 1
+                          wp
+                        })
+  if (!silent) {
+    .message("If the size of the requested data is close to or above the free shared RAM memory, R may crash.")
+    .message("If the size of the requested data is close to or above the half of the free RAM memory, R may crash.")
+    .message(paste0("Will now proceed to read and process ", length(work_pieces), " data files:"))
+    if (length(work_pieces) < 30) {
+      lapply(work_pieces, function (x) .message(x[['file_path']], indent = 2))
+    } else {
+      .message("The list of files is long. You can check it after Start() finishes in the output '$Files'.", indent = 2, exdent = 5)
+    }
+  }
+    
+  # Build the cluster of processes that will do the work and dispatch work pieces.
+  # The function .LoadDataFile is applied to each work piece. This function will
+  # open the data file, regrid if needed, subset, apply the mask, 
+  # compute and apply the weights if needed,
+  # disable extreme values and store in the shared memory matrix.
+  #print("O")
+  if (!silent) {
+    .message("Loading... This may take several minutes...")
+    if (progress_message != '') {
+      .message(progress_message, appendLF = FALSE)
+    }
+  }
+  return(work_pieces)
+}
+
+# If merge_across_dims = TRUE and merge_across_dims_narm = TRUE, remove the additional NAs 
+# due to unequal inner_dim ('time') length across file_dim ('sdate').
+remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, across_inner_dim,
+                                            length_inner_across_dim, data_array) {
+  across_file_dim <- names(inner_dims_across_files)  #TODO: more than one?
+  # Get the length of these two dimensions in final_dims
+  length_inner_across_store_dims <- final_dims[across_inner_dim]
+  length_file_across_store_dims <- final_dims[across_file_dim]
+       
+  # Create a logical array for merge_across_dims
+  logi_array <- array(rep(FALSE,
+                          length_file_across_store_dims * length_inner_across_store_dims),
+                      dim = c(length_inner_across_store_dims, length_file_across_store_dims))
+  for (i in 1:length_file_across_store_dims) {  #1:4
+    logi_array[1:length_inner_across_dim[[i]], i] <- TRUE
+  }
+  
+  # First, get the data array with final_dims dimension
+  data_array_final_dims <- array(bigmemory::as.matrix(data_array), dim = final_dims)
+       
+  # Change the NA derived from additional spaces to -9999, then remove these -9999
+  func_remove_blank <- function(data_array, logi_array) {
+    # dim(data_array) = [time, file_date]
+    # dim(logi_array) = [time, file_date]
+    # Change the blank spaces from NA to -9999
+    data_array[which(!logi_array)] <- -9999
+    return(data_array)
+  }
+  data_array_final_dims <- multiApply::Apply(data_array_final_dims,
+                                             target_dims = c(across_inner_dim, across_file_dim),  #c('time', 'file_date')
+                                             output_dims = c(across_inner_dim, across_file_dim),
+                                             fun = func_remove_blank,
+                                             logi_array = logi_array)$output1
+  ## reorder back to the correct dim
+  tmp <- match(names(final_dims), names(dim(data_array_final_dims)))
+  data_array_final_dims <- .aperm2(data_array_final_dims, tmp)
+  data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999]  # become a vector
+
+  return(data_array_tmp)
+}
+
+
+
+# When merge_across_dims = TRUE and split_multiselected_dims = TRUE, rearrange the chunks 
+# (i.e., work_piece) is necessary if one file contains values for discrete dimensions
+rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_dims, 
+                                      final_dims_fake, across_inner_dim, length_inner_across_dim) {
+  # generate the correct order list from indices_chunk 
+  final_order_list <- list()
+  i <- 1
+  j <- 1
+  a <- indices_chunk[i]
+  while (i <= length(indices_chunk)) {
+    while (indices_chunk[i+1] == indices_chunk[i] & i < length(indices_chunk)) {
+      a <- c(a, indices_chunk[i+1])
+        i <- i + 1
+    }
+    final_order_list[[j]] <- a
+    a <- indices_chunk[i+1]
+    i <- i + 1
+    j <- j + 1
+  }
+  names(final_order_list) <- sapply(final_order_list, '[[', 1)
+  final_order_list <- lapply(final_order_list, length)
+        
+  if (!all(diff(as.numeric(names(final_order_list))) > 0)) {
+    # shape the vector into the array without split_dims
+    split_dims_pos <- match(all_split_dims[[1]], final_dims_fake)
+    new_dims <- c()
+    if (split_dims_pos[1] > 1) {
+      new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
+    }
+    new_dims <- c(new_dims,  prod(all_split_dims[[1]]))
+    names(new_dims)[split_dims_pos[1]] <- across_inner_dim
+    if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
+      new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
+    }
+    data_array_no_split <- array(data_array_tmp, dim = new_dims)
+    # seperate 'time' dim into each work_piece length
+    data_array_seperate <- list()
+    tmp <- cumsum(unlist(length_inner_across_dim))
+    tmp <- c(0, tmp)
+    for (i in 1:length(length_inner_across_dim)) {
+      data_array_seperate[[i]] <- Subset(data_array_no_split, across_inner_dim,
+                                         (tmp[i] + 1):tmp[i + 1])
+    }
+    # re-build the array: chunk 
+    which_chunk <- as.numeric(names(final_order_list))
+    how_many_indices <- unlist(final_order_list)
+    array_piece <- list()
+    ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate)))
+    for (i in 1:length(final_order_list)) {
+      array_piece[[i]] <- Subset(data_array_seperate[[which_chunk[i]]],
+                                 across_inner_dim,
+                                 ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1))
+      ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i]
+    }
+   
+    # re-build the array: paste
+    data_array_tmp <- array_piece[[1]]
+    along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim)
+    if (length(array_piece) > 1) {
+      for (i in 2:length(array_piece)) {
+        data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]],
+                                       along = along_pos)
+      }
+    }
+  }    
+  
+  return(data_array_tmp)
+}
+
+
+# Create a list of metadata of the variable (e.g., tas)
+create_metadata_list <- function(array_of_metadata_flags, metadata_dims, pattern_dims,
+                                 loaded_metadata_files, loaded_metadata, dat_names,
+                                 dataset_has_files) {
+  #NOTE: Here, metadata can be saved in one of two ways: one for $common and the other for $dat
+  #      for $common, it is a list of metadata length. For $dat, it is a list of dat length,
+  #      and each sublist has the metadata for each dat.
+      dim_of_metadata <- dim(array_of_metadata_flags)[metadata_dims]
+      if (!any(names(dim_of_metadata) == pattern_dims) |
+          (any(names(dim_of_metadata) == pattern_dims) &
+           dim_of_metadata[pattern_dims] == 1)) {  # put under $common; old code
+        return_metadata <- vector('list',
+                                  length = prod(dim_of_metadata))
+        return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata
+        dim(return_metadata) <- dim_of_metadata
+
+      } else { # put under $dat. metadata_dims has 'dat' and dat length > 1
+        return_metadata <- vector('list',
+                                  length = dim_of_metadata[pattern_dims])
+        names(return_metadata) <- dat_names
+        for (kk in 1:length(return_metadata)) {
+          return_metadata[[kk]] <- vector('list', length = prod(dim_of_metadata[-1])) # 1 is dat
+        }
+        loaded_metadata_count <- 1
+        for (kk in 1:length(return_metadata)) {
+          for (jj in 1:length(return_metadata[[kk]])) {
+
+            if (dataset_has_files[kk]) {
+              if (loaded_metadata_count %in% loaded_metadata_files) {
+                return_metadata[[kk]][jj] <- loaded_metadata[[loaded_metadata_count]]
+                names(return_metadata[[kk]])[jj] <- names(loaded_metadata[[loaded_metadata_count]])
+              } else {
+                return_metadata[[kk]][jj] <- NULL
+              }
+              loaded_metadata_count <- loaded_metadata_count + 1
+            } else {
+              return_metadata[[kk]][jj] <- NULL
+            }
+
+          }
+        }
+      }
+
+  return(return_metadata)
+}
+
+# This function adds the metadata of the variable (e.g., tas) into the list of picked_vars or
+# picked_common_vars. The metadata is only retrieved when 'retrieve = TRUE'.
+combine_metadata_picked_vars <- function(return_metadata, picked_vars, picked_common_vars,
+                                         metadata_dims, pattern_dims, length_dat) {
+#NOTE: The metadata of variables can be saved in one of the two different structures.
+#      (1) metadata_dims != 'dat', or (metadata_dims == 'dat' & length(dat) == 1):
+#          put under $common
+#      (2) (metadata_dims == 'dat' & length(dat) > 1):
+#          put under $dat1, $dat2, .... Put it in picked_vars list
+#TODO: The current (2) uses the inefficient method. Should define the list structure first
+#      then fill the list, rather than expand it in the for loop.
+
+  if (any(metadata_dims == pattern_dims) & length_dat > 1) { # (2)
+    for (kk in 1:length(return_metadata)) {
+      sublist_names <- lapply(return_metadata, names)[[kk]]
+      if (!is.null(sublist_names)) {
+        for (jj in 1:length(sublist_names)) {
+          picked_vars[[kk]][[sublist_names[jj]]] <- return_metadata[[kk]][[jj]]
+        }
+      }
+    }
+    Variables_list <- c(list(common = picked_common_vars), picked_vars)
+
+  } else {  #(1)
+    len <- unlist(lapply(return_metadata, length))
+    len <- sum(len) + length(which(len == 0))  #0 means NULL
+    name_list <- lapply(return_metadata, names)
+    new_list <- vector('list', length = len)
+    count <- 1
+
+    for (kk in 1:length(return_metadata)) {
+      if (length(return_metadata[[kk]]) == 0) {  #NULL
+        count <- count + 1
+      } else {
+        for (jj in 1:length(return_metadata[[kk]])) {
+          new_list[[count]] <- return_metadata[[kk]][[jj]]
+          names(new_list)[count] <- name_list[[kk]][jj]
+          count <- count + 1
+        }
+      }
+    }
+    Variables_list <- c(list(common = c(picked_common_vars, new_list)), picked_vars)
+  }
+
+  return(Variables_list)
+}
-- 
GitLab


From 6e7523c429be9ea9a5e20343b65bdebbf6c57c63 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 4 Aug 2021 16:05:44 +0200
Subject: [PATCH 06/10] Add data value check

---
 tests/testthat/test-Start-metadata_dims.R | 70 ++++++++++++++++++-----
 1 file changed, 56 insertions(+), 14 deletions(-)

diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R
index b514df6..841786d 100644
--- a/tests/testthat/test-Start-metadata_dims.R
+++ b/tests/testthat/test-Start-metadata_dims.R
@@ -8,8 +8,8 @@ suppressWarnings(
                 sdate = '20170101',
                 ensemble = indices(1),
                 time = indices(1),
-                lat = indices(1:10),
-                lon = indices(1:10),
+                lat = indices(1:2),
+                lon = indices(2:3),
                 synonims = list(lat = c('lat', 'latitude'),
                                 lon = c('lon', 'longitude')),
                 return_vars = list(time = 'sdate',
@@ -39,6 +39,11 @@ suppressWarnings(
   length(attr(data, 'Variables')$common$tas),
   12
   )
+  expect_equal(
+  data[1, 1, 1, 1, 1, , 1],
+  c(248.5012, 248.7815),
+  tolerance = 0.0001
+  )
 
 })
 
@@ -53,8 +58,8 @@ suppressWarnings(
                 sdate = '20170101',
                 ensemble = indices(1),
                 time = indices(1),
-                lat = indices(1:10),
-                lon = indices(1:10),
+                lat = indices(1:2),
+                lon = indices(2:3),
                 synonims = list(lat = c('lat', 'latitude'),
                                 lon = c('lon', 'longitude')),
                 return_vars = list(time = 'sdate',
@@ -92,7 +97,16 @@ suppressWarnings(
   length(attr(data, 'Variables')$system4_m1$tas),
   11
   )
-
+  expect_equal(
+  data[, 1, 1, 1, 1, 1, 1],
+  c(247.2570, 248.5012),
+  tolerance = 0.0001
+  )
+  expect_equal(
+  data[, 1, 1, 1, 1, 1, 2],
+  c(247.2570, 248.5016),
+  tolerance = 0.0001
+  )
 })
 
 test_that("3. One data set, two vars", {
@@ -100,13 +114,12 @@ test_that("3. One data set, two vars", {
                   'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/',
                   '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_185001-185012.nc')
   var <- c('tas', 'clt')
-  sdate <- '20170101'
 suppressWarnings(
   data <- Start(dat = repos,
                 var = var, 
                 time = indices(1),
-                lat = indices(1:10),
-                lon = indices(10:19), 
+                lat = indices(9:10),
+                lon = indices(10:11), 
                 return_vars = list(lat = NULL, lon = NULL),
                 metadata_dims = 'var',
                 synonims = list(lat = c('lat', 'latitude'),
@@ -138,7 +151,16 @@ suppressWarnings(
   length(attr(data, 'Variables')$common$clt),
   16
   )
-
+  expect_equal(
+  data[1, , 1, 1, 1],
+  c(249.42436, 32.45226),
+  tolerance = 0.0001
+  )
+  expect_equal(
+  data[1, , 1, 2, 1],
+  c(250.00110, 25.04345),
+  tolerance = 0.0001
+  )
 })
 
 test_that("4. Two data sets, two vars", {
@@ -151,8 +173,8 @@ suppressWarnings(
                 sdate = '20170101',
                 ensemble = indices(1),
                 time = indices(1),
-                lat = indices(1:10),
-                lon = indices(1:10),
+                lat = indices(1:2),
+                lon = indices(1:2),
                 synonims = list(lat = c('lat', 'latitude'),
                                 lon = c('lon', 'longitude')),
                 return_vars = list(time = 'sdate',
@@ -190,7 +212,18 @@ suppressWarnings(
   length(attr(data, 'Variables')$system4_m1$tas),
   11
   )
+  expect_equal(
+  data[1, , 1, 1, 1, 2, 2],
+  c(247.227219, 6.370782),
+  tolerance = 0.0001
+  )
+  expect_equal(
+  data[2, , 1, 1, 1, 2, 2],
+  c(248.781540, 5.794801),
+  tolerance = 0.0001
+  )
 
+#-------------------------------------------------------------
 suppressWarnings(
   data <- Start(dat = list(list(name = 'system4_m1', path = repos2),
                            list(name = 'system5_m1', path = repos)),
@@ -198,8 +231,8 @@ suppressWarnings(
                 sdate = '20170101',
                 ensemble = indices(1),
                 time = indices(1),
-                lat = indices(1:10),
-                lon = indices(1:10),
+                lat = indices(1:2),
+                lon = indices(1:2),
                 synonims = list(lat = c('lat', 'latitude'),
                                 lon = c('lon', 'longitude')),
                 return_vars = list(time = 'sdate',
@@ -246,7 +279,16 @@ suppressWarnings(
   length(attr(data, 'Variables')$system4_m1$sfcWind),
   11
   )
-
+  expect_equal(
+  data[1, , 1, 1, 1, 2, 2],
+  c(247.227219, 6.370782),
+  tolerance = 0.0001
+  )
+  expect_equal(
+  data[2, , 1, 1, 1, 2, 2],
+  c(248.781540, 5.794801),
+  tolerance = 0.0001
+  )
 })
 
 test_that("5. Specify metadata_dims with another file dimension", {
-- 
GitLab


From 87c0f579be4aa897f57172db8cfae2419f4643c3 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 4 Aug 2021 18:13:16 +0200
Subject: [PATCH 07/10] Revise ex1_8 and its corresponding unit test

---
 inst/doc/usecase/ex1_8_tasandtos.R   | 137 +++++++++++++++++----------
 tests/testthat/test-Start-two_dats.R |  61 +++++++-----
 2 files changed, 125 insertions(+), 73 deletions(-)

diff --git a/inst/doc/usecase/ex1_8_tasandtos.R b/inst/doc/usecase/ex1_8_tasandtos.R
index a384368..2c9e75e 100644
--- a/inst/doc/usecase/ex1_8_tasandtos.R
+++ b/inst/doc/usecase/ex1_8_tasandtos.R
@@ -1,6 +1,7 @@
 # -----------------------------------------------------
 # Loading tas and tos for EC-Earth decadal predictions:
 # Authors: Carlos Delgado and Núria Pérez-Zanón
+# Revised by An-Chi Ho on 4th Aug. 2021
 # ------------------------------------------------------
 
 # Three ways to load the same data are provided:
@@ -9,21 +10,35 @@
 # 3) two Start call (one for each path and variable)
 
 
-# Case 1) returns dimensions 'dataset' and 'var' with length 2 , but only the positions of the diagonal are filled: 
-# tas is stored in {dataset = 1, var = 1}
-# tos is stored in {dataset = 2, var = 2}
-# NOTE!!! check {datastet = 1, var = 2} because an issue in ESMValTool:https://earth.bsc.es/gitlab/es/auto-ecearth3/issues/1258
+# Case 1:
+# Return dimensions 'dataset' and 'var' with length 2, but only the positions of the diagonal 
+# are filled: 
+# = Amon/tas is stored in {dataset = 1, var = 1}
+# = Omon/tos is stored in {dataset = 2, var = 2}
+# We choose an ocean region (lon = 150:170; lat = 10:20) so 'tos' will have values.
+
+## NOTE!!! [dataset = 1, var = 2] has values because an issue in ESMValTool:https://earth.bsc.es/gitlab/es/auto-ecearth3/issues/1258. 
+## However, the file seems incorrect. tos shouldn't have values on land. But it is a file issue
+## rather than Start()'s problem.
 
 library(startR)
 
-paths = list(list(path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc'),
-             list(path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc'))
-data1 <- Start(dataset = paths,
+path_tas <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/',
+                   'dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/',
+                   '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc')
+path_tos <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/',
+                   'dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/',
+                   '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc')
+
+data1 <- Start(dataset = list(list(path = path_tas), 
+                         list(path = path_tos)),
                var = c('tas', 'tos'),
                sdate = paste0(1960:1962),
                fmonth = 1,
-               lat = values(list(0, 10)),
-               lon = values(list(0, 10)),
+               lat = values(list(10, 20)),
+               lat_reorder = Sort(),
+               lon = values(list(150, 170)),
+               lon_reorder = CircularSort(0, 360),
                fyear = 'all',
                member = indices(1),
                fyear_depends = 'sdate',
@@ -32,75 +47,101 @@ data1 <- Start(dataset = paths,
                synonims = list(fmonth = c('fmonth', 'time'),
                                lon = c('lon', 'longitude'),
                                lat = c('lat', 'latitude')),
-               metadata_dims = 'var',
+               metadata_dims = c('dataset', 'var'),
                return_vars = list(lat = 'dataset', lon = 'dataset'),
                num_procs = 1, retrieve = TRUE)
 
 dim(data1)
 #dataset     var   sdate  fmonth     lat     lon  member 
-#      2       2       3      12      14      15       1 
+#      2       2       3       1      15      28       1 
 # Check empty and filled dimensions:
-sum(is.na(data1[1,1,,,,,])) == (3*12*14*15)
-#[1] FALSE
-sum(is.na(data1[1,2,,,,,])) == (3*12*14*15)
-#[1] TRUE 
-sum(is.na(data1[2,2,,,,,])) == (3*12*14*15)
-#[1] FALSE
-sum(is.na(data1[2,1,,,,,])) == (3*12*14*15)
-#[1] TRUE
+sum(is.na(data1[1, 1, , , , , ]))
+#[1] 0
+sum(is.na(data1[1, 2, , , , , ]))  # It should be 1260 if Amon/tos doesn't exist
+#[1] 0
+sum(is.na(data1[2, 1, , , , , ])) 
+#[1] 1260
+sum(is.na(data1[2, 2, , , , , ]))
+#[1] 0
 
 lat1 <- as.vector(attributes(data1)$Variables$dat1$lat)
 lon1 <- as.vector(attributes(data1)$Variables$dat1$lon)
 
+# Check metadata. 'dat1' has 'tas' and 'tos'; 'dat2' has 'tos'
+names(attr(data1, 'Variables')$common)
+#[1] "fmonth"
+names(attr(data1, 'Variables')$dat1)
+#[1] "lat" "lon" "tas" "tos"
+names(attr(data1, 'Variables')$dat2)
+#[1] "lat" "lon" "tos"
+
 # ---------------------------------------------------------------
 
-# Case 2) using a single path, {dataset = 1, var = 2, type = 1}
-# 'type' dimension is necessary to distinguish between 'Amon' and 'Omon'. 
+# Case 2:
+# Use a single path, {dataset = 1, var = 2, type = 1}.
+# 'type' dimension is necessary to distinguish between 'Amon' and 'Omon', and the dependency
+# needs to be specified.
 
 library(startR)
 
-path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/$type$/$var$/gr/v20190713/$var$_$type$_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc'
+path <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/',
+               'dcppA-hindcast/$member$/$type$/$var$/gr/v20190713/',
+               '$var$_$type$_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc')
 data2 <- Start(dataset = path,
                var = c('tas', 'tos'),
-               type = 'all',
+               type = list('tas' = 'Amon', 'tos' = 'Omon'),
                type_depends = 'var',
                sdate = paste0(1960:1962),
                fmonth = 1,
-               lat = values(list(0, 10)),
-               lon = values(list(0, 10)),
+               lat = values(list(10, 20)),
+               lat_reorder = Sort(),
+               lon = values(list(150, 170)),
+               lon_reorder = CircularSort(0, 360),
                fyear = indices(1),
                member = indices(1),
                fyear_depends = 'sdate',
                fmonth_across = 'fyear',
                merge_across_dims = TRUE,
-               synonims = list(fmonth = c('fmonth','time'),
+               synonims = list(fmonth = c('fmonth', 'time'),
                                lon = c('lon', 'longitude'),
                                lat = c('lat', 'latitude')),
+               metadata_dims = 'var',
                return_vars = list(lat = 'dataset', lon = 'dataset'),
                num_procs = 1, retrieve = TRUE)
 
 dim(data2)
-# dataset     var    type   sdate  fmonth     lat     lon  member 
-#      1       2       1       3       1      14      15       1 
+#dataset     var    type   sdate  fmonth     lat     lon  member 
+#      1       2       1       3       1      15      28       1 
 
+# Compare data1 and data2
+identical(as.vector(data1[1, 1, , 1, , , 1]), as.vector(data2[1, 1, 1, , 1, , , 1]))
+#[1] TRUE
+identical(as.vector(data1[2, 2, , 1, , , 1]), as.vector(data2[1, 2, 1, , 1, , , 1]))
+#[1] TRUE
 
 # ---------------------------------------------------------------
 
-# Case 3) Two different Start calls can save data_tas and data_tos both with {dataset = 1 and var = 1} dimensions and avoiding extra dimensions like 'type'.
+# Case 3: 
+# Two different Start calls can save data_tas and data_tos both with {dataset = 1 and var = 1} 
+# dimensions and avoid extra dimensions like 'type'.
 
-path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc'
+path <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/',
+               'dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/',
+               '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc')
 data_tas <- Start(dataset = path,
                var = 'tas',
                sdate = paste0(1960:1962),
                fmonth = 1,
-               lat = values(list(0, 10)),
-               lon = values(list(0, 10)),
+               lat = values(list(10, 20)),
+               lat_reorder = Sort(),
+               lon = values(list(150, 170)),
+               lon_reorder = CircularSort(0, 360),
                fyear = indices(1),
                member = indices(1),
                fyear_depends = 'sdate',
                fmonth_across = 'fyear',
                merge_across_dims = TRUE,
-               synonims = list(fmonth = c('fmonth','time'),
+               synonims = list(fmonth = c('fmonth', 'time'),
                                lon = c('lon', 'longitude'),
                                lat = c('lat', 'latitude')),
                return_vars = list(lat = 'dataset', lon = 'dataset'),
@@ -108,21 +149,25 @@ data_tas <- Start(dataset = path,
 
 dim(data_tas)
 #dataset     var   sdate  fmonth     lat     lon  member 
-#      1       1       3       1      14      15       1 
+#      1       1       3       1      15      28       1 
 
-path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc'
+path <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/',
+               'dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/',
+               '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc')
 data_tos <- Start(dataset = path,
                var = 'tos',
                sdate = paste0(1960:1962),
                fmonth = 1,
-               lat = values(list(0, 10)),
-               lon = values(list(0, 10)),
+               lat = values(list(10, 20)),
+               lat_reorder = Sort(),
+               lon = values(list(150, 170)),
+               lon_reorder = CircularSort(0, 360),
                fyear = indices(1),
                member = indices(1),
                fyear_depends = 'sdate',
                fmonth_across = 'fyear',
                merge_across_dims = TRUE,
-               synonims = list(fmonth = c('fmonth','time'),
+               synonims = list(fmonth = c('fmonth', 'time'),
                                lon = c('lon', 'longitude'),
                                lat = c('lat', 'latitude')),
                return_vars = list(lat = 'dataset', lon = 'dataset'),
@@ -130,15 +175,11 @@ data_tos <- Start(dataset = path,
 
 dim(data_tos)
 #dataset     var   sdate  fmonth     lat     lon  member 
-#      1       1       3       1      14      15       1 
-
-# ---------------------------------------------------------------------
+#      1       1       3       1      15      28       1 
 
-# Comparison cases 1) to 3):
-#----------------------------------------------------------------------
-
-all(data1[1, 1, , , , , ] == data_tas[1, 1, , , , , ])
-all((data1[2, 2, , , , , ]) == data_tos[1, 1, , , , , ], na.rm = TRUE)
-all(data2[1, 1, 1, , , , , ] == data_tas[1, 1, , , , ,])
-all((data2[1, 2, 1, , , , , ]) == data_tos[1, 1, , , , , ], na.rm = TRUE)
+# Compare with previous results
+identical(as.vector(data2[1, 1, 1, , 1, , , 1]), as.vector(data_tas))
+#[1] TRUE
+identical(as.vector(data2[1, 2, 1, , 1, , , 1]), as.vector(data_tos))
+#[1] TRUE
 
diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R
index 74738d0..4fa8642 100644
--- a/tests/testthat/test-Start-two_dats.R
+++ b/tests/testthat/test-Start-two_dats.R
@@ -1,7 +1,7 @@
 # ex1_8
-context("Start() two dats in one call")
+context("Start() two dats and two vars in one call")
 
-test_that("1. ex1_8", {
+test_that("1. ex1_8, case 1", {
 
 path_tas <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/',
                    'dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/',
@@ -12,53 +12,49 @@ path_tos <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consorti
 suppressWarnings(
 data <- Start(dataset = list(list(path = path_tas), list(path = path_tos)),
               var = c('tas', 'tos'),
-              sdate = paste0(1960:1962),
-              fmonth = 1,
-              lat = values(list(8, 10)),
-              lon = values(list(8, 10)),
+              sdate = paste0(1960),
+              time = 1,
+              lat = values(list(10, 20)),
+              lat_reorder = Sort(),
+              lon = values(list(150, 170)),
+              lon_reorder = CircularSort(0, 360),
               fyear = 'all',
               member = indices(1),
               fyear_depends = 'sdate',
-              fmonth_across = 'fyear',
+              time_across = 'fyear',
               merge_across_dims = TRUE,
-              synonims = list(fmonth = c('fmonth', 'time'),
-                              lon = c('lon', 'longitude'),
+              synonims = list(lon = c('lon', 'longitude'),
                               lat = c('lat', 'latitude')),
-              metadata_dims = 'var',
+              metadata_dims = c('dataset', 'var'),
               return_vars = list(lat = 'dataset', lon = 'dataset'),
               retrieve = TRUE)
 )
 
 expect_equal(
 dim(data),
-c(dataset = 2, var = 2, sdate = 3, fmonth = 1, lat = 3, lon = 3, member = 1)
+c(dataset = 2, var = 2, sdate = 1, time = 1, lat = 15, lon = 28, member = 1)
 )
 expect_equal(
 sum(data, na.rm = T),
-15396.7,
+264710,
 tolerance = 0.0001
 )
 expect_equal(
-length(data[is.na(data)]),
-54
-)
-expect_equal(
 sum(is.na(data[1,1,,,,,])),
 0
 )
-# Amon also has tos
+# Amon also has tos (though the file is incorrect)
 expect_equal(
 sum(is.na(data[1,2,,,,,])),
 0
 )
 expect_equal(
 sum(is.na(data[2,1,,,,,])),
-27
+420
 )
-# WRONG!!!! Omon should have tos. The value should be 0
 expect_equal(
 sum(is.na(data[2,2,,,,,])),
-27
+0
 )
 
 expect_equal(
@@ -67,23 +63,38 @@ c("common", "dat1", "dat2")
 )
 expect_equal(
 names(attr(data, 'Variables')$common),
-c("fmonth", "tas", "tos")
+c("time")
 )
 expect_equal(
 names(attr(data, 'Variables')$dat1),
-c("lat", "lon")
+c("lat", "lon", "tas", "tos")
 )
 expect_equal(
 names(attr(data, 'Variables')$dat2),
-c("lat", "lon")
+c("lat", "lon", "tos")
 )
 expect_equal(
-length(attr(data, 'Variables')$common$tas),
+length(attr(data, 'Variables')$dat1$tas),
 17
 )
 expect_equal(
-length(attr(data, 'Variables')$common$tos),
+length(attr(data, 'Variables')$dat1$tos),
 16
 )
+expect_equal(
+length(attr(data, 'Variables')$dat2$tos),
+16
+)
+
+expect_equal(
+data[1, , 1, 1, 1, 1, 1],
+c(299.9199, 302.5184),
+tolerance = 0.0001
+)
+expect_equal(
+data[2, , 1, 1, 1, 1, 1],
+c(NA, 29.3684),
+tolerance = 0.0001
+)
 
 })
-- 
GitLab


From 93ac5f8c4b076c3ec94e343e8a56f8792fc8cc4a Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 4 Aug 2021 18:15:12 +0200
Subject: [PATCH 08/10] Bugfix for metadata file count

---
 R/Start.R |  4 ++--
 R/zzz.R   | 64 +++++++++++++++++++++++++++----------------------------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/R/Start.R b/R/Start.R
index 6f602a5..5f4a295 100644
--- a/R/Start.R
+++ b/R/Start.R
@@ -3440,8 +3440,8 @@ Start <- function(..., # dim = indices/selectors,
     # the appropriate work pieces.
     work_pieces <- retrieve_progress_message(work_pieces, num_procs, silent)
 
-# NOTE: In .LoadDataFile(), metadata is saved in metadata_folder/1 or /2 etc. Before here,
-#       the path name is created in work_pieces but the path hasn't been built yet.
+    # NOTE: In .LoadDataFile(), metadata is saved in metadata_folder/1 or /2 etc. Before here,
+    #       the path name is created in work_pieces but the path hasn't been built yet.
     if (num_procs == 1) {
       found_files <- lapply(work_pieces, .LoadDataFile, 
                             shared_matrix_pointer = shared_matrix_pointer,
diff --git a/R/zzz.R b/R/zzz.R
index a775b47..d5429d1 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -1087,41 +1087,41 @@ create_metadata_list <- function(array_of_metadata_flags, metadata_dims, pattern
   #NOTE: Here, metadata can be saved in one of two ways: one for $common and the other for $dat
   #      for $common, it is a list of metadata length. For $dat, it is a list of dat length,
   #      and each sublist has the metadata for each dat.
-      dim_of_metadata <- dim(array_of_metadata_flags)[metadata_dims]
-      if (!any(names(dim_of_metadata) == pattern_dims) |
-          (any(names(dim_of_metadata) == pattern_dims) &
-           dim_of_metadata[pattern_dims] == 1)) {  # put under $common; old code
-        return_metadata <- vector('list',
-                                  length = prod(dim_of_metadata))
-        return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata
-        dim(return_metadata) <- dim_of_metadata
-
-      } else { # put under $dat. metadata_dims has 'dat' and dat length > 1
-        return_metadata <- vector('list',
-                                  length = dim_of_metadata[pattern_dims])
-        names(return_metadata) <- dat_names
-        for (kk in 1:length(return_metadata)) {
-          return_metadata[[kk]] <- vector('list', length = prod(dim_of_metadata[-1])) # 1 is dat
-        }
-        loaded_metadata_count <- 1
-        for (kk in 1:length(return_metadata)) {
-          for (jj in 1:length(return_metadata[[kk]])) {
-
-            if (dataset_has_files[kk]) {
-              if (loaded_metadata_count %in% loaded_metadata_files) {
-                return_metadata[[kk]][jj] <- loaded_metadata[[loaded_metadata_count]]
-                names(return_metadata[[kk]])[jj] <- names(loaded_metadata[[loaded_metadata_count]])
-              } else {
-                return_metadata[[kk]][jj] <- NULL
-              }
-              loaded_metadata_count <- loaded_metadata_count + 1
-            } else {
-              return_metadata[[kk]][jj] <- NULL
-            }
-
+  dim_of_metadata <- dim(array_of_metadata_flags)[metadata_dims]
+  if (!any(names(dim_of_metadata) == pattern_dims) |
+      (any(names(dim_of_metadata) == pattern_dims) &
+       dim_of_metadata[pattern_dims] == 1)) {  # put under $common; old code
+    return_metadata <- vector('list',
+                              length = prod(dim_of_metadata))
+    return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata
+    dim(return_metadata) <- dim_of_metadata
+
+  } else { # put under $dat. metadata_dims has 'dat' and dat length > 1
+    return_metadata <- vector('list',
+                              length = dim_of_metadata[pattern_dims])
+    names(return_metadata) <- dat_names
+    for (kk in 1:length(return_metadata)) {
+      return_metadata[[kk]] <- vector('list', length = prod(dim_of_metadata[-1])) # 1 is dat
+    }
+    loaded_metadata_count <- 1
+    for (kk in 1:length(return_metadata)) {
+      for (jj in 1:length(return_metadata[[kk]])) {
+        if (dataset_has_files[kk]) {
+          if (loaded_metadata_count %in% loaded_metadata_files) {
+            return_metadata[[kk]][jj] <- loaded_metadata[[which(loaded_metadata_files == loaded_metadata_count)]]
+            names(return_metadata[[kk]])[jj] <- names(loaded_metadata[[which(loaded_metadata_files == loaded_metadata_count)]])
+
+          } else {
+            return_metadata[[kk]][jj] <- NULL
           }
+          loaded_metadata_count <- loaded_metadata_count + 1
+        } else {
+          return_metadata[[kk]][jj] <- NULL
         }
+
       }
+    }
+  }
 
   return(return_metadata)
 }
-- 
GitLab


From 5834f9315400f9a541463b808c72b2e1babf210b Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 16 Aug 2021 16:48:13 +0200
Subject: [PATCH 09/10] Fix the warning and note from check

---
 DESCRIPTION | 3 ++-
 R/Start.R   | 8 +++++---
 R/zzz.R     | 8 ++++----
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 27d4a77..6ad8a64 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -32,7 +32,8 @@ Imports:
     PCICt
 Suggests:
     stats,
-    utils
+    utils,
+    testthat
 License: LGPL-3
 URL: https://earth.bsc.es/gitlab/es/startR/
 BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues
diff --git a/R/Start.R b/R/Start.R
index 5f4a295..5571dae 100644
--- a/R/Start.R
+++ b/R/Start.R
@@ -950,7 +950,7 @@ Start <- function(..., # dim = indices/selectors,
   
   # Check if pattern_dims is the first item in metadata_dims
   if ((pattern_dims %in% metadata_dims) & metadata_dims[1] != pattern_dims) {
-    metadata_dims <- c(pattern_dims, metadata_dims[-which(metadata_dim == pattern_dims)])
+    metadata_dims <- c(pattern_dims, metadata_dims[-which(metadata_dims == pattern_dims)])
   }
   # Check if metadata_dims has more than 2 elements
   if ((metadata_dims[1] == pattern_dims & length(metadata_dims) > 2)) {
@@ -974,7 +974,8 @@ Start <- function(..., # dim = indices/selectors,
   dim_params[[found_pattern_dim]] <- dim_params[[found_pattern_dim]][dats_to_take]
   dat <- dim_params[[found_pattern_dim]]
   #NOTE: This function creates the object 'dat_names'
-  dat <- mount_dat(dat, pattern_dim, found_pattern_dim) 
+  dat_names <- c()
+  dat <- mount_dat(dat, pattern_dims, found_pattern_dim, dat_names) 
 
   dim_params[[found_pattern_dim]] <- dat_names
 
@@ -3427,7 +3428,8 @@ Start <- function(..., # dim = indices/selectors,
                          depending_file_dims = depending_file_dims, transform = transform,
                          transform_vars = transform_vars, picked_vars = picked_vars[[i]],
                          picked_vars_ordered = picked_vars_ordered[[i]],
-                         picked_common_vars = picked_common_vars, 
+                         picked_common_vars = picked_common_vars,
+                         picked_common_vars_ordered = picked_common_vars_ordered, 
                          metadata_folder = metadata_folder, debug = debug)
       }
     }
diff --git a/R/zzz.R b/R/zzz.R
index d5429d1..b045e85 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -232,11 +232,11 @@ found_pattern_dims <- function(pattern_dims, dim_names, var_params,
 
 # The variable 'dat' is mounted with the information (name, path) of each dataset.
 # NOTE: This function creates the object 'dat_names' in the parent env.
-mount_dat <- function(dat, pattern_dim, found_pattern_dim) {
+mount_dat <- function(dat, pattern_dims, found_pattern_dim, dat_names) {
 
 #  dat_info_names <- c('name', 'path')#, 'nc_var_name', 'suffix', 'var_min', 'var_max', 'dimnames')
   dat_to_fetch <- c()
-  dat_names <- c()
+#  dat_names <- c()
   if (!is.list(dat)) {
     dat <- as.list(dat)
   } else {
@@ -252,7 +252,7 @@ mount_dat <- function(dat, pattern_dim, found_pattern_dim) {
         dat[[i]] <- list(name = dat[[i]])
       }
     } else if (!is.list(dat[[i]])) {
-      stop(paste0("Parameter '", pattern_dim, 
+      stop(paste0("Parameter '", pattern_dims, 
                   "' is incorrect. It must be a list of lists or character strings."))
     }
     #if (!(all(names(dat[[i]]) %in% dat_info_names))) {
@@ -770,7 +770,7 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims,
                               array_of_not_found_files, array_of_metadata_flags,
                               metadata_file_counter, depending_file_dims, transform, 
                               transform_vars, picked_vars, picked_vars_ordered, picked_common_vars,
-                              metadata_folder, debug = debug) {
+                              picked_common_vars_ordered, metadata_folder, debug = debug) {
   sub_array_dims <- final_dims[file_dims]
   sub_array_dims[found_pattern_dim] <- 1
   sub_array_of_files_to_load <- array(1:prod(sub_array_dims), 
-- 
GitLab


From e054a9814ebc106e4701fbbe5727fc6397356701 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 16 Aug 2021 17:37:41 +0200
Subject: [PATCH 10/10] Remove lubridate dependency in tests

---
 tests/testthat.R                                    |  4 ++++
 .../test-Start-implicit_dependency_by_selector.R    | 13 +++++--------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/tests/testthat.R b/tests/testthat.R
index d424073..5073b5e 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,5 +1,9 @@
 library(testthat)
 library(startR)
+library(SpecsVerification)
+library(dplyr)
+library(plyr)
+library(s2dv)
 
 test_check("startR")
 
diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R
index bcc5ac1..5c2f050 100644
--- a/tests/testthat/test-Start-implicit_dependency_by_selector.R
+++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R
@@ -79,19 +79,16 @@ data2[1, 1, 1, 1, ]
 
 test_that("2. time depends on sdate", {
 
-library(lubridate)
-
 repos <- '/esarchive/exp/ecmwf/system4_m1/daily_mean/$var$_f24h/$var$_$sdate$.nc'
-
-sdates <- ymd("20010501") + rep(years(0:2), each = 1)
-times <- array(ymd("20010501") + days(0:30) + rep(years(0:2), each = 31),
-               dim = c(time = 31, sdate = 3))
-times <- as.POSIXct(times * 86400, tz = 'UTC', origin = '1970-01-01')
+sdates <- paste0(2001:2003, '0501')
+tmp <- as.POSIXct(sapply(2001:2003, function(x) paste0(x, '-05-', sprintf('%02d', 1:31))), tz = 'UTC')
+tmp <- array(tmp, dim = c(time = 31, sdate = 3))
+times <- as.POSIXct(tmp, tz = 'UTC', origin = '1970-01-01')
 
 suppressWarnings(
 exp <- Start(dat = repos,
              var = 'tos',
-             sdate = format(sdates, "%Y%m%d"),
+             sdate = sdates,
              time = times,  #dim: [time = 31, sdate = 3]. time is corresponding to each sdate
              ensemble = indices(1:2),
              lat = indices(1:3),
-- 
GitLab