Start.R 238 KB
Newer Older
aho's avatar
aho committed
      if (!(inner_dims_across_files[[i]] %in% names(new_idaf))) {
        new_idaf[[inner_dims_across_files[[i]]]] <- i
      } else {
        new_idaf[[inner_dims_across_files[[i]]]] <- c(new_idaf[[inner_dims_across_files[[i]]]], i)
      }
    }
    inner_dims_across_files <- new_idaf
  }
  
  # Check return_vars
  if (is.null(return_vars)) {
    return_vars <- list()
    #    if (length(var_params) > 0) {
    #      return_vars <- as.list(paste0(names(var_params), '_var'))
    #    } else {
    #      return_vars <- list()
    #    }
  }
  if (!is.list(return_vars)) {
    stop("Parameter 'return_vars' must be a list or NULL.")
  }
  if (length(return_vars) > 0 && is.null(names(return_vars))) {
    #    names(return_vars) <- rep('', length(return_vars))
    stop("Parameter 'return_vars' must be a named list.")
  }
  i <- 1
  while (i <= length(return_vars)) {
    #    if (names(return_vars)[i] == '') {
    #      if (!(is.character(return_vars[[i]]) && (length(return_vars[[i]]) == 1))) {
    #        stop("The ", i, "th specification in 'return_vars' is malformed.")
    #      } 
    #      if (!grepl('_var$', return_vars[[i]])) {
    #        stop("The ", i, "th specification in 'return_vars' is malformed.")
    #      }
    #      dim_name <- strsplit(return_vars[[i]], '_var$')[[1]][1]
    #      if (!(dim_name %in% names(var_params))) {
    #        stop("'", dim_name, "_var' requested in 'return_vars' but ",
    #             "no '", dim_name, "_var' specified in the .Load call.")
    #      }
    #      names(return_vars)[i] <- var_params[[dim_name]]
    #      return_vars[[i]] <- found_pattern_dim
    #    } else
    if (length(return_vars[[i]]) > 0) { 
      if (!is.character(return_vars[[i]])) {
        stop("The ", i, "th specification in 'return_vars' is malformed. It ",
             "must be a vector of character strings of valid file dimension ",
             "names.")
      }
    }
    i <- i + 1
  }
  
  # Check synonims
  if (!is.null(synonims)) {
    error <- FALSE
    if (!is.list(synonims)) {
      error <- TRUE
    }
    for (synonim_entry in names(synonims)) {
      if (!(synonim_entry %in% names(dim_params)) &&
          !(synonim_entry %in% names(return_vars))) {
        error <- TRUE
      }
      if (!is.character(synonims[[synonim_entry]]) ||
          length(synonims[[synonim_entry]]) < 1) {
        error <- TRUE
      }
    }
    if (error) {
      stop("Parameter 'synonims' must be a named list, where the names are ",
           "a name of a requested dimension or variable and the values are ",
           "vectors of character strings with at least one alternative name ",
           " for each dimension or variable in 'synonims'.")
    }
  }
  if (length(unique(names(synonims))) < length(names(synonims))) {
    stop("There must not be repeated entries in 'synonims'.")
  }
  if (length(unique(unlist(synonims))) < length(unlist(synonims))) {
    stop("There must not be repeated values in 'synonims'.")
  }
  # Make that all dims and vars have an entry in synonims, even if only dim_name = dim_name
  dim_entries_to_add <- which(!(names(dim_params) %in% names(synonims)))
  if (length(dim_entries_to_add) > 0) {
    synonims[names(dim_params)[dim_entries_to_add]] <- as.list(names(dim_params)[dim_entries_to_add])
  }
  var_entries_to_add <- which(!(names(var_params) %in% names(synonims)))
  if (length(var_entries_to_add) > 0) {
    synonims[names(var_params)[var_entries_to_add]] <- as.list(names(var_params)[var_entries_to_add])
  }
aho's avatar
aho committed
 
  # Check if return_vars name is inner dim name. If it is synonim, change back to inner dim name
  # and return a warning.
  use_syn_names <- which(names(return_vars) %in% unlist(synonims) & 
                         !names(return_vars) %in% names(synonims))
  if (!identical(use_syn_names, integer(0))) {
    for (use_syn_name in use_syn_names) {
      wrong_name <- names(return_vars)[use_syn_name]
      names(return_vars)[use_syn_name] <- names(unlist(
                                            lapply(lapply(synonims, '%in%',
                                                   names(return_vars)[use_syn_name]),
                                            which)))
      .warning(paste0("The name '", wrong_name, "' in parameter 'return_vars' in synonim. ",
                      "Change it back to the inner dimension name, '",
                      names(return_vars)[use_syn_name], "'."))
    }
  }
 
aho's avatar
aho committed
  # Check selector_checker
  if (is.null(selector_checker) || !is.function(selector_checker)) {
    stop("Parameter 'selector_checker' must be a function.")
  }
  
  # Check file_opener
  if (is.null(file_opener) || !is.function(file_opener)) {
    stop("Parameter 'file_opener' must be a function.")
  }
  
  # Check file_var_reader
  if (!is.null(file_var_reader) && !is.function(file_var_reader)) {
    stop("Parameter 'file_var_reader' must be a function.")
  }
  
  # Check file_dim_reader
  if (!is.null(file_dim_reader) && !is.function(file_dim_reader)) {
    stop("Parameter 'file_dim_reader' must be a function.")
  }
  
  # Check file_data_reader
  if (is.null(file_data_reader) || !is.function(file_data_reader)) {
    stop("Parameter 'file_data_reader' must be a function.")
  }
  
  # Check file_closer
  if (is.null(file_closer) || !is.function(file_closer)) {
    stop("Parameter 'file_closer' must be a function.")
  }
  
  # Check transform
  if (!is.null(transform)) {
    if (!is.function(transform)) {
      stop("Parameter 'transform' must be a function.")
    }
  }
  
  # Check transform_params
  if (!is.null(transform_params)) {
    if (!is.list(transform_params)) {
      stop("Parameter 'transform_params' must be a list.")
    }
    if (is.null(names(transform_params))) {
      stop("Parameter 'transform_params' must be a named list.")
    }
  }
  
  # Check transform_vars
  if (!is.null(transform_vars)) {
    if (!is.character(transform_vars)) {
      stop("Parameter 'transform_vars' must be a vector of character strings.")
    }
  }
  if (any(!(transform_vars %in% names(return_vars)))) {
    stop("All the variables specified in 'transform_vars' must also be specified in 'return_vars'.")
  }
  
  # Check apply_indices_after_transform
  if (!is.logical(apply_indices_after_transform)) {
    stop("Parameter 'apply_indices_after_transform' must be either TRUE or FALSE.")
  }
  aiat <- apply_indices_after_transform
  
  # Check transform_extra_cells
  if (!is.numeric(transform_extra_cells)) {
    stop("Parameter 'transform_extra_cells' must be numeric.")
  }
  transform_extra_cells <- round(transform_extra_cells)
  
  # Check split_multiselected_dims
  if (!is.logical(split_multiselected_dims)) {
    stop("Parameter 'split_multiselected_dims' must be TRUE or FALSE.")
  }
  
  # Check path_glob_permissive
  if (!is.numeric(path_glob_permissive) && !is.logical(path_glob_permissive)) {
    stop("Parameter 'path_glob_permissive' must be TRUE, FALSE or an integer.")
  }
  if (length(path_glob_permissive) != 1) {
    stop("Parameter 'path_glob_permissive' must be of length 1.")
  }
  
  # Check largest_dims_length
  if (!is.numeric(largest_dims_length) && !is.logical(largest_dims_length)) {
    stop("Parameter 'largest_dims_length' must be TRUE, FALSE or a named integer vector.")
  }
  if (is.numeric(largest_dims_length)) {
    if (any(largest_dims_length %% 1 != 0) | any(largest_dims_length < 0) | is.null(names(largest_dims_length))) {
      stop("Parameter 'largest_dims_length' must be TRUE, FALSE or a named integer vector.")
    }
  }
  if (is.logical(largest_dims_length) && length(largest_dims_length) != 1) {
    stop("Parameter 'path_glob_permissive' must be TRUE, FALSE or a named integer vector.")
  }

aho's avatar
aho committed
  # Check retrieve
  if (!is.logical(retrieve)) {
    stop("Parameter 'retrieve' must be TRUE or FALSE.")
  }
  
  # Check num_procs
  if (!is.null(num_procs)) {
    if (!is.numeric(num_procs)) {
      stop("Parameter 'num_procs' must be numeric.")
    } else {
      num_procs <- round(num_procs)
    }
  }
  
  # Check silent
  if (!is.logical(silent)) {
    stop("Parameter 'silent' must be logical.")
  }
  
  if (!silent) {
    .message(paste0("Exploring files... This will take a variable amount ",
                    "of time depending on the issued request and the ",
                    "performance of the file server..."))
  }
  
  if (!is.character(debug)) {
    dims_to_check <- c('time')
  } else {
    dims_to_check <- debug
    debug <- TRUE
  }
aho's avatar
aho committed
  ############################## READING FILE DIMS ############################
  # Check that no unrecognized variables are present in the path patterns
  # and also that no file dimensions are requested to THREDDs catalogs.
  # And in the mean time, build all the work pieces and look for the 
  # first available file of each dataset.
  array_of_files_to_load <- NULL
  array_of_not_found_files <- NULL
  indices_of_first_files_with_data <- vector('list', length(dat))
  selectors_of_first_files_with_data <- vector('list', length(dat))
  dataset_has_files <- rep(FALSE, length(dat))
  found_file_dims <- vector('list', length(dat))
  expected_inner_dims <- vector('list', length(dat))
  
  #print("A")
  for (i in 1:length(dat)) {
    #print("B")
    dat_selectors <- dim_params
    dat_selectors[[found_pattern_dim]] <- dat_selectors[[found_pattern_dim]][i]
    dim_vars <- paste0('$', dim_names, '$')
    file_dims <- which(sapply(dim_vars, grepl, dat[[i]][['path']], fixed = TRUE))
    if (length(file_dims) > 0) {
      file_dims <- dim_names[file_dims]
    }
    file_dims <- unique(c(pattern_dims, file_dims))
    found_file_dims[[i]] <- file_dims
    expected_inner_dims[[i]] <- dim_names[which(!(dim_names %in% file_dims))]
    # (Check the depending_file_dims).
    if (any(c(names(depending_file_dims), unlist(depending_file_dims)) %in% 
            expected_inner_dims[[i]])) {
      stop(paste0("The dimension dependancies specified in ",
                  "'depending_file_dims' can only be between file ",
                  "dimensions, but some inner dimensions found in ",
                  "dependancies for '", dat[[i]][['name']], "', which ",
                  "has the following file dimensions: ", 
                  paste(paste0("'", file_dims, "'"), collapse = ', '), ".")) 
    } else {
      a <- names(depending_file_dims) %in% file_dims
      b <- unlist(depending_file_dims) %in% file_dims
      ab <- a & b
      if (any(!ab)) {
        .warning(paste0("Detected some dependancies in 'depending_file_dims' with ",
                        "non-existing dimension names. These will be disregarded."))
        depending_file_dims <- depending_file_dims[-which(!ab)]
      }
      if (any(names(depending_file_dims) == unlist(depending_file_dims))) {
        depending_file_dims <- depending_file_dims[-which(names(depending_file_dims) == unlist(depending_file_dims))]
      }
    }
    # (Check the inner_dims_across_files).
    if (any(!(names(inner_dims_across_files) %in% file_dims)) ||
        any(!(unlist(inner_dims_across_files) %in% expected_inner_dims[[i]]))) {
      stop(paste0("All relationships specified in ",
                  "'_across' parameters must be between a inner ",
                  "dimension and a file dimension. Found wrong ",
                  "specification for '", dat[[i]][['name']], "', which ",
                  "has the following file dimensions: ", 
                  paste(paste0("'", file_dims, "'"), collapse = ', '),
                  ", and the following inner dimensions: ", 
                  paste(paste0("'", expected_inner_dims[[i]], "'"), 
                        collapse = ', '), "."))
    }
    # (Check the return_vars).
    j <- 1
    while (j <= length(return_vars)) {
      if (any(!(return_vars[[j]] %in% file_dims))) {
        if (any(return_vars[[j]] %in% expected_inner_dims[[i]])) {
          stop("Found variables in 'return_vars' requested ",
               "for some inner dimensions (for dataset '",
               dat[[i]][['name']], "'), but variables can only be ",
               "requested for file dimensions.")
        } else {
          stop("Found variables in 'return_vars' requested ",
               "for non-existing dimensions.")
        }
      }
      j <- j + 1
    }
    # (Check the metadata_dims).
    if (!is.null(metadata_dims)) {
      if (any(!(metadata_dims %in% file_dims))) {
        stop("All dimensions in 'metadata_dims' must be file dimensions.")
      }
    }
aho's avatar
aho committed

    # Add attributes indicating whether this dimension selector is value or indice
    tmp <- lapply(dat_selectors[which(dim_names != pattern_dims)], add_value_indices_flag)
    dat_selectors <- c(dat_selectors[pattern_dims], tmp)

aho's avatar
aho committed
    ## Look for _var params that should be requested automatically.
aho's avatar
aho committed
    for (dim_name in dim_names[-which(dim_names == pattern_dims)]) {
      ## The following code 'rewrites' var_params for all datasets. If providing different
      ## path pattern repositories with different file/inner dimensions, var_params might
      ## have to be handled for each dataset separately.
      if ((attr(dat_selectors[[dim_name]], 'values') || (dim_name %in% c('var', 'variable'))) &&
          !(dim_name %in% names(var_params)) && !(dim_name %in% file_dims))  {
        if (dim_name %in% c('var', 'variable')) {
          var_params <- c(var_params, setNames(list('var_names'), dim_name))
          .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", 
aho's avatar
aho committed
                          dim_name, "_var' provided. ", '"', dim_name, "_var = '", 
aho's avatar
aho committed
                          'var_names', "'", '"', " has been automatically added to ",
                          "the Start call."))
        } else {
          var_params <- c(var_params, setNames(list(dim_name), dim_name))
          .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", 
                          dim_name, "_var' requested. ", '"', dim_name, "_var = '", 
                          dim_name, "'", '"', " has been automatically added to ",
                          "the Start call."))
aho's avatar
aho committed
        }
      }
aho's avatar
aho committed

      if (attr(dat_selectors[[dim_name]], 'indices') & !(dim_name %in% names(var_params))) {
        if (dim_name %in% transform_vars) {
          var_params <- c(var_params, setNames(list(dim_name), dim_name))
          .warning(paste0("Found dimension '", dim_name, "' is required to transform but no '",
aho's avatar
aho committed
                          dim_name, "_var' provided. ", '"', dim_name, "_var = '",
                          dim_name, "'", '"', " has been automatically added to ",
                          "the Start call."))
aho's avatar
aho committed
        } else if (dim_name %in% names(dim_reorder_params)) {
          var_params <- c(var_params, setNames(list(dim_name), dim_name))
          .warning(paste0("Found dimension '", dim_name, "' is required to reorder but no '",
aho's avatar
aho committed
                          dim_name, "_var' provided. ", '"', dim_name, "_var = '",
                          dim_name, "'", '"', " has been automatically added to ",
                          "the Start call."))
aho's avatar
aho committed
      }
aho's avatar
aho committed
    }
aho's avatar
aho committed
    ## (Check the *_var parameters).
    if (any(!(unlist(var_params) %in% names(return_vars)))) {
      vars_to_add <- which(!(unlist(var_params) %in% names(return_vars)))
      new_return_vars <- vector('list', length(vars_to_add))
      names(new_return_vars) <- unlist(var_params)[vars_to_add]
      return_vars <- c(return_vars, new_return_vars)
      .warning(paste0("All '*_var' params must associate a dimension to one of the ",
                      "requested variables in 'return_vars'. The following variables",
                      " have been added to 'return_vars': ", 
                      paste(paste0("'", unlist(var_params), "'"), collapse = ', ')))
    }
    
aho's avatar
aho committed
    # Examine the selectors of file dim and create 'replace_values', which uses the first 
    # explicit selector (i.e., character) for all file dimensions.
aho's avatar
aho committed
    replace_values <- vector('list', length = length(file_dims))
    names(replace_values) <- file_dims
    for (file_dim in file_dims) {
      if (file_dim %in% names(var_params)) {
        .warning(paste0("The '", file_dim, "_var' param will be ignored since '", 
                        file_dim, "' is a file dimension (for the dataset with pattern ", 
                        dat[[i]][['path']], ")."))
      }
aho's avatar
aho committed
      # If the selector is a vector or a list of 2 without names (represent the value range)
aho's avatar
aho committed
      if (!is.list(dat_selectors[[file_dim]]) || 
          (is.list(dat_selectors[[file_dim]]) && 
           length(dat_selectors[[file_dim]]) == 2 &&
           is.null(names(dat_selectors[[file_dim]])))) {
        dat_selectors[[file_dim]] <- list(dat_selectors[[file_dim]])
      }
      first_class <- class(dat_selectors[[file_dim]][[1]])
      first_length <- length(dat_selectors[[file_dim]][[1]])

      # Length will be > 1 if it is list since beginning, e.g., depending dim is a list with 
      # names as depended dim.
aho's avatar
aho committed
      for (j in 1:length(dat_selectors[[file_dim]])) {
        sv <- selector_vector <- dat_selectors[[file_dim]][[j]]
aho's avatar
aho committed
        if (!inherits(sv, first_class) ||
aho's avatar
aho committed
            !identical(first_length, length(sv))) {
          stop("All provided selectors for depending dimensions must ",
               "be vectors of the same length and of the same class.")
        }
        if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% c('all', 'first', 'last')))) {
          #NOTE: ???? It doesn't make any changes.
aho's avatar
aho committed
          dat_selectors[[file_dim]][[j]] <- selector_checker(selectors = sv,
                                                             return_indices = FALSE)
          # Take chunk if needed (only defined dim; undefined dims will be chunked later in 
          # find_ufd_value().
          if (chunks[[file_dim]]['n_chunks'] > 1) {
            desired_chunk_indices <- get_chunk_indices(
                                       length(dat_selectors[[file_dim]][[j]]),
                                       chunks[[file_dim]]['chunk'],
                                       chunks[[file_dim]]['n_chunks'],
                                       file_dim)
            dat_selectors[[file_dim]][[j]] <- dat_selectors[[file_dim]][[j]][desired_chunk_indices]
            # chunk the depending dim as well
            if (file_dim %in% depending_file_dims) {
              depending_dim_name <- names(which(file_dim == depending_file_dims))
              # Chunk it only if it is defined dim (i.e., list of character with names of depended dim)
              if (!(length(dat_selectors[[depending_dim_name]]) == 1 &&
                    dat_selectors[[depending_dim_name]] %in% c('all', 'first', 'last'))) {
                if (any(sapply(dat_selectors[[depending_dim_name]], is.character))) {
                  dat_selectors[[depending_dim_name]] <-
                    dat_selectors[[depending_dim_name]][desired_chunk_indices]
                }
aho's avatar
aho committed
        } else if (!(is.numeric(sv) || 
                     (is.character(sv) && (length(sv) == 1) && (sv %in% c('all', 'first', 'last'))) || 
                     (is.list(sv) && (length(sv) == 2) && (all(sapply(sv, is.character)) || 
                                                           all(sapply(sv, is.numeric)))))) {
          stop("All explicitly provided selectors for file dimensions must be character strings.")
        } 
      }
      sv <- dat_selectors[[file_dim]][[1]]
aho's avatar
aho committed
      # 'replace_values' has the first selector (if it's character) or NULL (if it's not explicitly 
      # defined) for each file dimension.
aho's avatar
aho committed
      if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% c('all', 'first', 'last')))) {
aho's avatar
aho committed
        replace_values[[file_dim]] <- sv[1]
aho's avatar
aho committed
      }
    }
    #print("C")
    # Now we know which dimensions whose selectors are provided non-explicitly.
    undefined_file_dims <- file_dims[which(sapply(replace_values, is.null))]
    defined_file_dims <- file_dims[which(!(file_dims %in% undefined_file_dims))]
aho's avatar
aho committed
    # Quickly check if the depending dimensions are provided properly. The check is only for 
    # if the depending and depended file dims are both explicited defined.
aho's avatar
aho committed
    for (file_dim in file_dims) {
      if (file_dim %in% names(depending_file_dims)) {

        # Return error if depended dim is a list of values while depending dim is not 
        # defined (i.e., indices or 'all')
        if (file_dim %in% defined_file_dims &
            !(depending_file_dims[[file_dim]] %in% defined_file_dims)) {
          stop(paste0("The depended dimension, ", file_dim, ", is explictly defined ",
                      "by a list of values, while the depending dimension, ",
                       depending_file_dims[[file_dim]], ", is not explictly defined. ",
                      "Specify ", depending_file_dims[[file_dim]], " by characters."))
        }

aho's avatar
aho committed
        ## TODO: Detect multi-dependancies and forbid.
        #NOTE: The if statement below is tricky. It tries to distinguish if the depending dim
        #      has the depended dim as the names of the list. However, if the depending dim
        #      doesn't have list names and its length is 2 (i.e., list( , )), Start() thinks
        #      it means the range, just like `lat = values(list(10, 20))`. And because of this,
        #      we won't enter the following if statement, and the error will occur later in
        #      SelectorChecker(). Need to find a way to distinguish if list( , ) means range or
        #      just the values.
aho's avatar
aho committed
        if (all(c(file_dim, depending_file_dims[[file_dim]]) %in% defined_file_dims)) {
          if (length(dat_selectors[[file_dim]]) != length(dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) {
            stop(paste0("If providing selectors for the depending ",
                        "dimension '", file_dim, "', a ",
                        "vector of selectors must be provided for ",
                        "each selector of the dimension it depends on, '",
                        depending_file_dims[[file_dim]], "'."))
          } else if (!all(names(dat_selectors[[file_dim]]) == dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) {
            stop(paste0("If providing selectors for the depending ",
                        "dimension '", file_dim, "', the name of the ",
                        "provided vectors of selectors must match ",
                        "exactly the selectors of the dimension it ",
                        "depends on, '", depending_file_dims[[file_dim]], "'."))
          } else if (is.null(names(dat_selectors[[file_dim]]))) {
            .warning(paste0("The selectors for the depending dimension '", file_dim, "' do not ",
                            "have list names. Assume that the order of the selectors matches the ",
                            "depended dimensions '", depending_file_dims[[file_dim]], "''s order."))
aho's avatar
aho committed
          }
        }
      }
    }
aho's avatar
aho committed
    # Find the possible values for the selectors that are provided as
    # indices. If the requested file is on server, impossible operation.
    if (length(grep("^http", dat[[i]][['path']])) > 0) {
      if (length(undefined_file_dims) > 0) {
        stop(paste0("All selectors for the file dimensions must be ",
                    "character strings if requesting data to a remote ",
                    "server. Found invalid selectors for the file dimensions ",
                    paste(paste0("'", undefined_file_dims, "'"), collapse = ', '), "."))
      }
      dataset_has_files[i] <- TRUE
    } else {
      dat[[i]][['path']] <- path.expand(dat[[i]][['path']])
      # Iterate over the known dimensions to find the first existing file.
      # The path to the first existing file will be used to find the 
      # values for the non explicitly defined selectors.
      first_file <- NULL
      first_file_selectors <- NULL
      if (length(undefined_file_dims) > 0) {
        replace_values[undefined_file_dims] <- '*'
      }
      ## TODO: What if length of defined_file_dims is 0? code might crash (in practice it worked for an example case)
      files_to_check <- sapply(dat_selectors[defined_file_dims], function(x) length(x[[1]]))
      sub_array_of_files_to_check <- array(1:prod(files_to_check), dim = files_to_check)
      j <- 1
      #print("D")
      while (j <= prod(files_to_check) && is.null(first_file)) {
        selector_indices <- which(sub_array_of_files_to_check == j, arr.ind = TRUE)[1, ]
        selectors <- sapply(1:length(defined_file_dims), 
                            function (x) {
                              vector_to_pick <- 1
                              if (defined_file_dims[x] %in% names(depending_file_dims)) {
                                vector_to_pick <- selector_indices[which(defined_file_dims == depending_file_dims[[defined_file_dims[x]]])]
                              }
                              dat_selectors[defined_file_dims][[x]][[vector_to_pick]][selector_indices[x]]
                            })
        replace_values[defined_file_dims] <- selectors
        file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values)
        file_path <- Sys.glob(file_path)
        if (length(file_path) > 0) {
          first_file <- file_path[1]
          first_file_selectors <- selectors
        }
        j <- j + 1
      }
      #print("E")
      # Start looking for values for the non-explicitly defined selectors.
      if (is.null(first_file)) {
        .warning(paste0("No found files for the datset '", dat[[i]][['name']], 
                        "'. Provide existing selectors for the file dimensions ",
                        " or check and correct its path pattern: ", dat[[i]][['path']]))
      } else {
        dataset_has_files[i] <- TRUE
        ## TODO: Improve message here if no variable found:
        if (length(undefined_file_dims) > 0) {
aho's avatar
aho committed
          # Note: "dat[[i]][['path']]" is changed by the function below.
          dat_selectors <- find_ufd_value(undefined_file_dims, dat, i, replace_values,
                                          first_file, file_dims, path_glob_permissive,             
                                          depending_file_dims, dat_selectors, selector_checker,
                                          chunks)
aho's avatar
aho committed
          #print("I")
        } else {
aho's avatar
aho committed
          #NOTE: If there is no non-explicitly defined dim, use the first found file
          #      to modify. Problem: '*' doesn't catch all the possible file. Only use
          #      the first file.
aho's avatar
aho committed
          dat[[i]][['path']] <- .ReplaceGlobExpressions(dat[[i]][['path']], first_file, replace_values, 
                                                        defined_file_dims, dat[[i]][['name']], path_glob_permissive)
        }
      }
    }
    dat[[i]][['selectors']] <- dat_selectors

aho's avatar
aho committed
    # Now fetch for the first available file
    if (dataset_has_files[i]) {
      known_dims <- file_dims
    } else {
      known_dims <- defined_file_dims
    }
    replace_values <- vector('list', length = length(known_dims))
    names(replace_values) <- known_dims
    files_to_load <- sapply(dat_selectors[known_dims], function(x) length(x[[1]]))
    files_to_load[found_pattern_dim] <- 1
    sub_array_of_files_to_load <- array(1:prod(files_to_load), 
                                        dim = files_to_load)
    names(dim(sub_array_of_files_to_load)) <- known_dims
    sub_array_of_not_found_files <- array(!dataset_has_files[i], 
                                          dim = files_to_load)
    names(dim(sub_array_of_not_found_files)) <- known_dims
    
    if (largest_dims_length) {
      if (!exists('selector_indices_save')) {
        selector_indices_save <- vector('list', length = length(dat))
      }
      if (!exists('selectors_total_list')) {
        selectors_total_list <- vector('list', length = length(dat))
      }
      selector_indices_save[[i]] <- vector('list', length = prod(files_to_load))
      selectors_total_list[[i]] <- vector('list', length = prod(files_to_load))
aho's avatar
aho committed
    }
    j <- 1
    # NOTE: This while loop has these objects that are used afterward: 'sub_array_of_files_to_load',
    #       'sub_array_of_not_found_files', 'indices_of_first_files_with_data', 'selectors_of_first_files_with_data';
    #       'selector_indices_save' and 'selectors_total_list' are used if 'largest_dims_length = T'.
aho's avatar
aho committed
    while (j <= prod(files_to_load)) {
      selector_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ]
      names(selector_indices) <- known_dims
      if (largest_dims_length) {
        tmp <- selector_indices
        tmp[which(known_dims == found_pattern_dim)] <- i
        selector_indices_save[[i]][[j]] <- tmp
      }
      # This 'selectors' is only used in this while loop
aho's avatar
aho committed
      selectors <- sapply(1:length(known_dims), 
                          function (x) {
                            vector_to_pick <- 1
                            if (known_dims[x] %in% names(depending_file_dims)) {
                              vector_to_pick <- selector_indices[which(known_dims == depending_file_dims[[known_dims[x]]])]
                            }
                            dat_selectors[known_dims][[x]][[vector_to_pick]][selector_indices[x]]
                          })
      names(selectors) <- known_dims
      if (largest_dims_length) {
        selectors_total_list[[i]][[j]] <- selectors
        names(selectors_total_list[[i]][[j]]) <- known_dims
      }

      # 'replace_values' and 'file_path' are only used in this while loop
aho's avatar
aho committed
      replace_values[known_dims] <- selectors
      if (!dataset_has_files[i]) {
        if (any(is.na(selectors))) {
          replace_values <- replace_values[-which(names(replace_values) %in% names(selectors[which(is.na(selectors))]))]
        }
        file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values, TRUE)
        sub_array_of_files_to_load[j] <- file_path
        #sub_array_of_not_found_files[j] <- TRUE???
      } else {
        if (any(is.na(selectors))) {
          replace_values <- replace_values[-which(names(replace_values) %in% names(selectors[which(is.na(selectors))]))]
          file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values, TRUE)
          sub_array_of_files_to_load[j] <- file_path
          sub_array_of_not_found_files[j] <- TRUE
        } else {
          file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values)
aho's avatar
aho committed

          #NOTE: After replacing tags, there is still * if path_glob_permissive is not FALSE.
          #      Find the possible value to substitute *.
aho's avatar
aho committed
          if (grepl('\\*', file_path)) {
            found_files <- Sys.glob(file_path)
            file_path <- found_files[1]   # choose only the first file.
            #NOTE: Above line chooses only the first found file. Because * is not tags, which means
            #      it is not a dimension. So it cannot store more than one item. If use * to define
            #      the path, that * should only represent one possibility.
aho's avatar
aho committed
            if (length(found_files) > 1) {
              .warning("Using glob expression * to define the path, but more ",
                       "than one match is found. Choose the first match only.")
            }
          }

aho's avatar
aho committed
          if (!(length(grep("^http", file_path)) > 0)) {
            if (grepl(file_path, '*', fixed = TRUE)) {
              file_path_full <- Sys.glob(file_path)[1]
              if (nchar(file_path_full) > 0) {
                file_path <- file_path_full
              }
            }
          }
          sub_array_of_files_to_load[j] <- file_path
          if (is.null(indices_of_first_files_with_data[[i]])) {
            if (!(length(grep("^http", file_path)) > 0)) {
              if (!file.exists(file_path)) {
                file_path <- NULL
              }
            }
            if (!is.null(file_path)) {
              test_file <- NULL
              ## TODO: suppress error messages
              test_file <- file_opener(file_path)
              if (!is.null(test_file)) {
                selector_indices[which(known_dims == found_pattern_dim)] <- i
                indices_of_first_files_with_data[[i]] <- selector_indices
                selectors_of_first_files_with_data[[i]] <- selectors
                file_closer(test_file)
              }
            }
          }
        }
      }
      j <- j + 1
    }
    # Extend array as needed progressively
    if (is.null(array_of_files_to_load)) {
      array_of_files_to_load <- sub_array_of_files_to_load
      array_of_not_found_files <- sub_array_of_not_found_files
    } else {
      array_of_files_to_load <- .MergeArrays(array_of_files_to_load, sub_array_of_files_to_load,
                                             along = found_pattern_dim)
      ## TODO: file_dims, and variables like that.. are still ok now? I don't think so
      array_of_not_found_files <- .MergeArrays(array_of_not_found_files, sub_array_of_not_found_files,
                                               along = found_pattern_dim)
    }    
  }
  if (all(sapply(indices_of_first_files_with_data, is.null))) {
    stop("No data files found for any of the specified datasets.")
  }
  
  ########################### READING INNER DIMS. #############################
  #print("J")
  ## TODO: To be run in parallel (local multi-core)
  # Now time to work out the inner file dimensions.
  # First pick the requested variables.

#//// This part is moved below the new code////
# NOTE: 'dims_to_iterate' only consider common_return_vars. Move to below can save some work 
#       and get the revised common_return_vars if it is changed.
#  dims_to_iterate <- NULL
#  for (return_var in names(return_vars)) {
#    dims_to_iterate <- unique(c(dims_to_iterate, return_vars[[return_var]]))
#  }
#  if (found_pattern_dim %in% dims_to_iterate) {
#    dims_to_iterate <- dims_to_iterate[-which(dims_to_iterate == found_pattern_dim)]
#  }
#//////////////////////////////////////////////

  # Separate 'return_vars' into 'common_return_vars' and 'return_vars' (those = 'dat').
aho's avatar
aho committed
  common_return_vars <- NULL
  common_first_found_file <- NULL
  common_return_vars_pos <- NULL
  if (length(return_vars) > 0) {
    common_return_vars_pos <- which(sapply(return_vars, function(x) !(found_pattern_dim %in% x)))
  }
  if (length(common_return_vars_pos) > 0) {
    common_return_vars <- return_vars[common_return_vars_pos]
    return_vars <- return_vars[-common_return_vars_pos]
    common_first_found_file <- rep(FALSE, length(which(sapply(common_return_vars, length) == 0)))
    names(common_first_found_file) <- names(common_return_vars[which(sapply(common_return_vars, length) == 0)])
  }

#!!!!!!!Check here. return_vars has removed the common ones, and here remove 'dat' value????
#It seems like it does some benefits to later parts
aho's avatar
aho committed
  return_vars <- lapply(return_vars, 
                        function(x) {
                          if (found_pattern_dim %in% x) {
                            x[-which(x == found_pattern_dim)]
                          } else {
                            x
                          }
                        })
#////////////////////////////////////////////
  # Force return_vars = (time = NULL) to (time = 'sdate') if (1) selector = [sdate = 2, time = 4] or
  # (2) time_across = 'sdate'.
aho's avatar
aho committed
  # NOTE: Not sure if the loop over dat is needed here. In theory, all the dat
  #       should have the same dimensions (?) so expected_inner_dims and
  #       found_file_dims are the same. The selector_array may possible be
  #       different, but then the attribute will be correct? If it's different,
  #       it should depend on 'dat' (but here we only consider common_return_vars)
  for (i in 1:length(dat)) {
    for (inner_dim in expected_inner_dims[[i]]) {
      # The selectors for the inner dimension are taken.
      selector_array <- dat[[i]][['selectors']][[inner_dim]]
      file_dim_as_selector_array_dim <- 1

      if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) {
        file_dim_as_selector_array_dim <- 
          found_file_dims[[i]][which(found_file_dims[[i]] %in% names(dim(selector_array)))]
      if (inner_dim %in% inner_dims_across_files | 
          is.character(file_dim_as_selector_array_dim)) {  #(2) or (1)
        # inner_dim is not in return_vars or is NULL
        need_correct <- FALSE
        if (((!inner_dim %in% names(common_return_vars)) &
             (!inner_dim %in% names(return_vars))) |
            (inner_dim %in% names(common_return_vars) & 
             is.null(common_return_vars[[inner_dim]]))) {
          need_correct <- TRUE
        } else if (inner_dim %in% names(common_return_vars) &
                   (inner_dim %in% inner_dims_across_files) &
                   !is.null(names(inner_dims_across_files))) { #(2)
          if (!names(inner_dims_across_files) %in% common_return_vars[[inner_dim]]) need_correct <- TRUE
          
        } else if (inner_dim %in% names(common_return_vars) &
                   is.character(file_dim_as_selector_array_dim)) { #(1)
          if (!all(file_dim_as_selector_array_dim %in% common_return_vars[[inner_dim]])) {
            need_correct <- TRUE
            file_dim_as_selector_array_dim <- file_dim_as_selector_array_dim[which(!file_dim_as_selector_array_dim %in% common_return_vars[[inner_dim]])]
          }
        }
        if (need_correct) {
          common_return_vars[[inner_dim]] <-
            c(common_return_vars[[inner_dim]],
              correct_return_vars(inner_dim, inner_dims_across_files,
                                  found_pattern_dim, file_dim_as_selector_array_dim))

  # Return info about return_vars when dat > 1
  if (length(dat) > 1 & length(common_return_vars) > 0) {
    .message("\n", "[ATTENTION]", 
             paste0("According to parameter 'return_vars', the inner dimensions: ",
                    paste(names(common_return_vars), collapse = ', '),
                    ", are common among all the datasets. Please be sure that ",
                    "this is expected to avoid potential wrong results, and ",
                    "verify the outputs carefully."),
             "\n", indent = 1)
   }

#////////////////////////////////////////////

# This part was above where return_vars is seperated into return_vars and common_return_vars
# NOTE: 'dims_to_iterate' only consider common_return_vars. Move to here can save some work 
#       and get the revised common_return_vars if it is changed in the part right above.
  dims_to_iterate <- NULL
  for (common_return_var in names(common_return_vars)) {
    dims_to_iterate <- unique(c(dims_to_iterate, common_return_vars[[common_return_var]]))
  }
#////////////////////////////////////////////

  # Change the structure of 'dat'. If the selector is not list or it is list of 2 that represents
  # range, make it as list. The dimensions that go across files will later be extended to have
  # lists of lists/vectors of selectors.
  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      for (inner_dim in expected_inner_dims[[i]]) {
        if (!is.list(dat[[i]][['selectors']][[inner_dim]]) || # not list, or
            (is.list(dat[[i]][['selectors']][[inner_dim]]) &&  # list of 2 that represents range
             length(dat[[i]][['selectors']][[inner_dim]]) == 2 &&
             is.null(names(dat[[i]][['selectors']][[inner_dim]])))) {
          dat[[i]][['selectors']][[inner_dim]] <- list(dat[[i]][['selectors']][[inner_dim]])
        }
      }
    }
  }


  # Use 'common_return_vars' and 'return_vars' to generate the initial picked(_common)_vars,
  # picked(_common)_vars_ordered, and picked(_common)_vars_unorder_indices.
  ## Create 'picked_common_vars'
aho's avatar
aho committed
  if (length(common_return_vars) > 0) {
    picked_common_vars <- vector('list', length = length(common_return_vars))
    names(picked_common_vars) <- names(common_return_vars)
  } else {
    picked_common_vars <- NULL
  }
  picked_common_vars_ordered <- picked_common_vars
  picked_common_vars_unorder_indices <- picked_common_vars
aho's avatar
aho committed
  picked_vars <- vector('list', length = length(dat))
  names(picked_vars) <- dat_names
  if (length(return_vars) > 0) {
    picked_vars <- lapply(picked_vars, function(x) {
                                         x <- vector('list', length = length(return_vars))} )
    picked_vars <- lapply(picked_vars, setNames, names(return_vars))
  }
aho's avatar
aho committed
  picked_vars_ordered <- picked_vars
aho's avatar
aho committed
  picked_vars_unorder_indices <- picked_vars
aho's avatar
aho committed
  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      indices_of_first_file <- as.list(indices_of_first_files_with_data[[i]])
      array_file_dims <- sapply(dat[[i]][['selectors']][found_file_dims[[i]]], function(x) length(x[[1]]))
      names(array_file_dims) <- found_file_dims[[i]]
      if (length(dims_to_iterate) > 0) {
        indices_of_first_file[dims_to_iterate] <- lapply(array_file_dims[dims_to_iterate], function(x) 1:x)
      }
      array_of_var_files <- do.call('[', c(list(x = array_of_files_to_load), indices_of_first_file, list(drop = FALSE)))
      array_of_var_indices <- array(1:length(array_of_var_files), dim = dim(array_of_var_files))
      array_of_not_found_var_files <- do.call('[', c(list(x = array_of_not_found_files), indices_of_first_file, list(drop = FALSE)))
      # Create previous_indices. The initial value is -1 because there is no 'previous' before the
      # 1st current_indices.
aho's avatar
aho committed
      previous_indices <- rep(-1, length(indices_of_first_file))
      names(previous_indices) <- names(indices_of_first_file)
      # Create first_found_file for vars_to_read defining. It is for the dim value in return_vars
      # that is NULL or character(0). Because these dims only need to be read once, so 
      # first_found_file indicates if these dims have been read or not. 
      # If read, it turns to TRUE and won't be included in vars_to_read again in the next 
      # 'for j loop'. 
aho's avatar
aho committed
      first_found_file <- NULL
      if (length(return_vars) > 0) {
        first_found_file <- rep(FALSE, length(which(sapply(return_vars, length) == 0)))
        names(first_found_file) <- names(return_vars[which(sapply(return_vars, length) == 0)])
      }
aho's avatar
aho committed
      for (j in 1:length(array_of_var_files)) {
        current_indices <- which(array_of_var_indices == j, arr.ind = TRUE)[1, ]
        names(current_indices) <- names(indices_of_first_file)
        if (!is.na(array_of_var_files[j]) && !array_of_not_found_var_files[j]) {
          changed_dims <- which(current_indices != previous_indices)
          # Prepare vars_to_read for this dataset (i loop) and this file (j loop)
          vars_to_read <- generate_vars_to_read(return_vars, changed_dims, first_found_file, 
                                                common_return_vars, common_first_found_file, i)

aho's avatar
aho committed
          file_object <- file_opener(array_of_var_files[j])
          if (!is.null(file_object)) {
            for (var_to_read in vars_to_read) {
              if (var_to_read %in% unlist(var_params)) {
                associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)]
              }
              var_name_to_reader <- var_to_read
              names(var_name_to_reader) <- 'var'
              var_dims <- file_dim_reader(NULL, file_object, var_name_to_reader, NULL,
                                          synonims)
              # file_dim_reader returns dimension names as found in the file.
              # Need to translate accoridng to synonims:
              names(var_dims) <- replace_with_synonmins(var_dims, synonims)
aho's avatar
aho committed
              if (!is.null(var_dims)) {
aho's avatar
aho committed
                if (var_to_read %in% names(common_return_vars)) {
                  var_to_check <- common_return_vars[[var_to_read]]
                  list_picked_var_of_read <- generate_picked_var_of_read(
                    var_to_read, var_to_check, array_of_files_to_load, var_dims,
                    array_of_var_files = array_of_var_files[j], file_var_reader,
                    file_object, synonims, associated_dim_name, dim_reorder_params,
                    aiat, current_indices, var_params,
                    either_picked_vars = picked_common_vars[[var_to_read]],
                    either_picked_vars_ordered = picked_common_vars_ordered[[var_to_read]],
                    either_picked_vars_unorder_indices = picked_common_vars_unorder_indices[[var_to_read]]
                  )
                  picked_common_vars[[var_to_read]] <- list_picked_var_of_read$either_picked_vars
                  picked_common_vars_ordered[[var_to_read]] <-
                    list_picked_var_of_read$either_picked_vars_ordered
                  picked_common_vars_unorder_indices[[var_to_read]] <-
                    list_picked_var_of_read$either_picked_vars_unorder_indices

                ## (2) return_vars
aho's avatar
aho committed
                } else {
                  var_to_check <- return_vars[[var_to_read]]
                  list_picked_var_of_read <- generate_picked_var_of_read(
                    var_to_read, var_to_check, array_of_files_to_load, var_dims,
                    array_of_var_files = array_of_var_files[j], file_var_reader,
                    file_object, synonims, associated_dim_name, dim_reorder_params,
                    aiat, current_indices, var_params,
                    either_picked_vars = picked_vars[[i]][[var_to_read]],
                    either_picked_vars_ordered = picked_vars_ordered[[i]][[var_to_read]],
                    either_picked_vars_unorder_indices = picked_vars_unorder_indices[[i]][[var_to_read]]
                  )
                  picked_vars[[i]][[var_to_read]] <- list_picked_var_of_read$either_picked_vars
                  picked_vars_ordered[[i]][[var_to_read]] <-
                    list_picked_var_of_read$either_picked_vars_ordered
                  picked_vars_unorder_indices[[i]][[var_to_read]] <-
                    list_picked_var_of_read$either_picked_vars_unorder_indices
aho's avatar
aho committed
                }
                if (var_to_read %in% names(first_found_file)) {
                  first_found_file[var_to_read] <- TRUE
                }
                if (var_to_read %in% names(common_first_found_file)) {
                  common_first_found_file[var_to_read] <- TRUE
                }
              } else {
                stop("Could not find variable '", var_to_read,
aho's avatar
aho committed
                     "' in the file ", array_of_var_files[j])
              }
            }
            file_closer(file_object)
          }
        }
        previous_indices <- current_indices
      }
    }
  }
  # Once we have the variable values, we can work out the indices
  # for the implicitly defined selectors.
aho's avatar
aho committed
  beta <- transform_extra_cells
  dims_to_crop <- vector('list')
  transformed_vars <- vector('list', length = length(dat))
  names(transformed_vars) <- dat_names
  transformed_vars_ordered <- transformed_vars
  transformed_vars_unorder_indices <- transformed_vars
  transformed_common_vars <- NULL
  transformed_common_vars_ordered <- NULL
  transformed_common_vars_unorder_indices <- NULL
  transform_crop_domain <- NULL

aho's avatar
aho committed
  # store warning messages from transform 
  warnings1 <- NULL
  warnings2 <- NULL

aho's avatar
aho committed
  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      indices <- indices_of_first_files_with_data[[i]]
      if (!is.null(indices)) {
        #//////////////////////////////////////////////////
        # Find data_dims
        ## If largest_dims_length is a number & !merge_across_dims,
        ## directly assign this dim as the number;
        ## If largest_dims_length is a number & this dim is across files, find the dim length of each file
        find_largest_dims_length_by_files <- FALSE
        if (is.numeric(largest_dims_length)) {
          if (names(largest_dims_length) %in% inner_dims_across_files) {
            find_largest_dims_length_by_files <- TRUE
          }
        } else if (largest_dims_length) {
          find_largest_dims_length_by_files <- TRUE
        } 

        if (!find_largest_dims_length_by_files) { # old code
          file_path <- do.call("[", c(list(array_of_files_to_load), as.list(indices_of_first_files_with_data[[i]])))
          # The following 5 lines should go several lines below, but were moved
          # here for better performance.
          # If any of the dimensions comes without defining variable, then we read
          # the data dimensions.