Start.R 201 KB
Newer Older
aho's avatar
aho committed
    #    if (length(var_params) > 0) {
    #      return_vars <- as.list(paste0(names(var_params), '_var'))
    #    } else {
    #      return_vars <- list()
    #    }
  }
  if (!is.list(return_vars)) {
    stop("Parameter 'return_vars' must be a list or NULL.")
  }
  if (length(return_vars) > 0 && is.null(names(return_vars))) {
    #    names(return_vars) <- rep('', length(return_vars))
    stop("Parameter 'return_vars' must be a named list.")
  }
  i <- 1
  while (i <= length(return_vars)) {
    #    if (names(return_vars)[i] == '') {
    #      if (!(is.character(return_vars[[i]]) && (length(return_vars[[i]]) == 1))) {
    #        stop("The ", i, "th specification in 'return_vars' is malformed.")
    #      } 
    #      if (!grepl('_var$', return_vars[[i]])) {
    #        stop("The ", i, "th specification in 'return_vars' is malformed.")
    #      }
    #      dim_name <- strsplit(return_vars[[i]], '_var$')[[1]][1]
    #      if (!(dim_name %in% names(var_params))) {
    #        stop("'", dim_name, "_var' requested in 'return_vars' but ",
    #             "no '", dim_name, "_var' specified in the .Load call.")
    #      }
    #      names(return_vars)[i] <- var_params[[dim_name]]
    #      return_vars[[i]] <- found_pattern_dim
    #    } else
    if (length(return_vars[[i]]) > 0) { 
      if (!is.character(return_vars[[i]])) {
        stop("The ", i, "th specification in 'return_vars' is malformed. It ",
             "must be a vector of character strings of valid file dimension ",
             "names.")
      }
    }
    i <- i + 1
  }
  
  # Check synonims
  if (!is.null(synonims)) {
    error <- FALSE
    if (!is.list(synonims)) {
      error <- TRUE
    }
    for (synonim_entry in names(synonims)) {
      if (!(synonim_entry %in% names(dim_params)) &&
          !(synonim_entry %in% names(return_vars))) {
        error <- TRUE
      }
      if (!is.character(synonims[[synonim_entry]]) ||
          length(synonims[[synonim_entry]]) < 1) {
        error <- TRUE
      }
    }
    if (error) {
      stop("Parameter 'synonims' must be a named list, where the names are ",
           "a name of a requested dimension or variable and the values are ",
           "vectors of character strings with at least one alternative name ",
           " for each dimension or variable in 'synonims'.")
    }
  }
  if (length(unique(names(synonims))) < length(names(synonims))) {
    stop("There must not be repeated entries in 'synonims'.")
  }
  if (length(unique(unlist(synonims))) < length(unlist(synonims))) {
    stop("There must not be repeated values in 'synonims'.")
  }
  # Make that all dims and vars have an entry in synonims, even if only dim_name = dim_name
  dim_entries_to_add <- which(!(names(dim_params) %in% names(synonims)))
  if (length(dim_entries_to_add) > 0) {
    synonims[names(dim_params)[dim_entries_to_add]] <- as.list(names(dim_params)[dim_entries_to_add])
  }
  var_entries_to_add <- which(!(names(var_params) %in% names(synonims)))
  if (length(var_entries_to_add) > 0) {
    synonims[names(var_params)[var_entries_to_add]] <- as.list(names(var_params)[var_entries_to_add])
  }
aho's avatar
aho committed
 
  # Check if return_vars name is inner dim name. If it is synonim, change back to inner dim name
  # and return a warning.
  use_syn_names <- which(names(return_vars) %in% unlist(synonims) & 
                         !names(return_vars) %in% names(synonims))
  if (!identical(use_syn_names, integer(0))) {
    for (use_syn_name in use_syn_names) {
      wrong_name <- names(return_vars)[use_syn_name]
      names(return_vars)[use_syn_name] <- names(unlist(
                                            lapply(lapply(synonims, '%in%',
                                                   names(return_vars)[use_syn_name]),
                                            which)))
      .warning(paste0("The name '", wrong_name, "' in parameter 'return_vars' in synonim. ",
                      "Change it back to the inner dimension name, '",
                      names(return_vars)[use_syn_name], "'."))
    }
  }
 
aho's avatar
aho committed
  # Check selector_checker
  if (is.null(selector_checker) || !is.function(selector_checker)) {
    stop("Parameter 'selector_checker' must be a function.")
  }
  
  # Check file_opener
  if (is.null(file_opener) || !is.function(file_opener)) {
    stop("Parameter 'file_opener' must be a function.")
  }
  
  # Check file_var_reader
  if (!is.null(file_var_reader) && !is.function(file_var_reader)) {
    stop("Parameter 'file_var_reader' must be a function.")
  }
  
  # Check file_dim_reader
  if (!is.null(file_dim_reader) && !is.function(file_dim_reader)) {
    stop("Parameter 'file_dim_reader' must be a function.")
  }
  
  # Check file_data_reader
  if (is.null(file_data_reader) || !is.function(file_data_reader)) {
    stop("Parameter 'file_data_reader' must be a function.")
  }
  
  # Check file_closer
  if (is.null(file_closer) || !is.function(file_closer)) {
    stop("Parameter 'file_closer' must be a function.")
  }
  
  # Check transform
  if (!is.null(transform)) {
    if (!is.function(transform)) {
      stop("Parameter 'transform' must be a function.")
    }
  }
  
  # Check transform_params
  if (!is.null(transform_params)) {
    if (!is.list(transform_params)) {
      stop("Parameter 'transform_params' must be a list.")
    }
    if (is.null(names(transform_params))) {
      stop("Parameter 'transform_params' must be a named list.")
    }
  }
  
  # Check transform_vars
  if (!is.null(transform_vars)) {
    if (!is.character(transform_vars)) {
      stop("Parameter 'transform_vars' must be a vector of character strings.")
    }
  }
  if (any(!(transform_vars %in% names(return_vars)))) {
    stop("All the variables specified in 'transform_vars' must also be specified in 'return_vars'.")
  }
  
  # Check apply_indices_after_transform
  if (!is.logical(apply_indices_after_transform)) {
    stop("Parameter 'apply_indices_after_transform' must be either TRUE or FALSE.")
  }
  aiat <- apply_indices_after_transform
  
  # Check transform_extra_cells
  if (!is.numeric(transform_extra_cells)) {
    stop("Parameter 'transform_extra_cells' must be numeric.")
  }
  transform_extra_cells <- round(transform_extra_cells)
  
  # Check split_multiselected_dims
  if (!is.logical(split_multiselected_dims)) {
    stop("Parameter 'split_multiselected_dims' must be TRUE or FALSE.")
  }
  
  # Check path_glob_permissive
  if (!is.numeric(path_glob_permissive) && !is.logical(path_glob_permissive)) {
    stop("Parameter 'path_glob_permissive' must be TRUE, FALSE or an integer.")
  }
  if (length(path_glob_permissive) != 1) {
    stop("Parameter 'path_glob_permissive' must be of length 1.")
  }
  
  # Check largest_dims_length
  if (!is.numeric(largest_dims_length) && !is.logical(largest_dims_length)) {
    stop("Parameter 'largest_dims_length' must be TRUE, FALSE or a named integer vector.")
  }
  if (is.numeric(largest_dims_length)) {
    if (any(largest_dims_length %% 1 != 0) | any(largest_dims_length < 0) | is.null(names(largest_dims_length))) {
      stop("Parameter 'largest_dims_length' must be TRUE, FALSE or a named integer vector.")
    }
  }
  if (is.logical(largest_dims_length) && length(largest_dims_length) != 1) {
    stop("Parameter 'path_glob_permissive' must be TRUE, FALSE or a named integer vector.")
  }

aho's avatar
aho committed
  # Check retrieve
  if (!is.logical(retrieve)) {
    stop("Parameter 'retrieve' must be TRUE or FALSE.")
  }
  
  # Check num_procs
  if (!is.null(num_procs)) {
    if (!is.numeric(num_procs)) {
      stop("Parameter 'num_procs' must be numeric.")
    } else {
      num_procs <- round(num_procs)
    }
  }
  
  # Check silent
  if (!is.logical(silent)) {
    stop("Parameter 'silent' must be logical.")
  }
  
  if (!silent) {
    .message(paste0("Exploring files... This will take a variable amount ",
                    "of time depending on the issued request and the ",
                    "performance of the file server..."))
  }
  
  if (!is.character(debug)) {
    dims_to_check <- c('time')
  } else {
    dims_to_check <- debug
    debug <- TRUE
  }
  
  ############################## READING FILE DIMS ############################
  # Check that no unrecognized variables are present in the path patterns
  # and also that no file dimensions are requested to THREDDs catalogs.
  # And in the mean time, build all the work pieces and look for the 
  # first available file of each dataset.
  array_of_files_to_load <- NULL
  array_of_not_found_files <- NULL
  indices_of_first_files_with_data <- vector('list', length(dat))
  selectors_of_first_files_with_data <- vector('list', length(dat))
  dataset_has_files <- rep(FALSE, length(dat))
  found_file_dims <- vector('list', length(dat))
  expected_inner_dims <- vector('list', length(dat))
  
  #print("A")
  for (i in 1:length(dat)) {
    #print("B")
    dat_selectors <- dim_params
    dat_selectors[[found_pattern_dim]] <- dat_selectors[[found_pattern_dim]][i]
    dim_vars <- paste0('$', dim_names, '$')
    file_dims <- which(sapply(dim_vars, grepl, dat[[i]][['path']], fixed = TRUE))
    if (length(file_dims) > 0) {
      file_dims <- dim_names[file_dims]
    }
    file_dims <- unique(c(pattern_dims, file_dims))
    found_file_dims[[i]] <- file_dims
    expected_inner_dims[[i]] <- dim_names[which(!(dim_names %in% file_dims))]
    # (Check the depending_file_dims).
    if (any(c(names(depending_file_dims), unlist(depending_file_dims)) %in% 
            expected_inner_dims[[i]])) {
      stop(paste0("The dimension dependancies specified in ",
                  "'depending_file_dims' can only be between file ",
                  "dimensions, but some inner dimensions found in ",
                  "dependancies for '", dat[[i]][['name']], "', which ",
                  "has the following file dimensions: ", 
                  paste(paste0("'", file_dims, "'"), collapse = ', '), ".")) 
    } else {
      a <- names(depending_file_dims) %in% file_dims
      b <- unlist(depending_file_dims) %in% file_dims
      ab <- a & b
      if (any(!ab)) {
        .warning(paste0("Detected some dependancies in 'depending_file_dims' with ",
                        "non-existing dimension names. These will be disregarded."))
        depending_file_dims <- depending_file_dims[-which(!ab)]
      }
      if (any(names(depending_file_dims) == unlist(depending_file_dims))) {
        depending_file_dims <- depending_file_dims[-which(names(depending_file_dims) == unlist(depending_file_dims))]
      }
    }
    # (Check the inner_dims_across_files).
    if (any(!(names(inner_dims_across_files) %in% file_dims)) ||
        any(!(unlist(inner_dims_across_files) %in% expected_inner_dims[[i]]))) {
      stop(paste0("All relationships specified in ",
                  "'_across' parameters must be between a inner ",
                  "dimension and a file dimension. Found wrong ",
                  "specification for '", dat[[i]][['name']], "', which ",
                  "has the following file dimensions: ", 
                  paste(paste0("'", file_dims, "'"), collapse = ', '),
                  ", and the following inner dimensions: ", 
                  paste(paste0("'", expected_inner_dims[[i]], "'"), 
                        collapse = ', '), "."))
    }
    # (Check the return_vars).
    j <- 1
    while (j <= length(return_vars)) {
      if (any(!(return_vars[[j]] %in% file_dims))) {
        if (any(return_vars[[j]] %in% expected_inner_dims[[i]])) {
          stop("Found variables in 'return_vars' requested ",
               "for some inner dimensions (for dataset '",
               dat[[i]][['name']], "'), but variables can only be ",
               "requested for file dimensions.")
        } else {
          stop("Found variables in 'return_vars' requested ",
               "for non-existing dimensions.")
        }
      }
      j <- j + 1
    }
    # (Check the metadata_dims).
    if (!is.null(metadata_dims)) {
      if (any(!(metadata_dims %in% file_dims))) {
        stop("All dimensions in 'metadata_dims' must be file dimensions.")
      }
    }
aho's avatar
aho committed

    # Add attributes indicating whether this dimension selector is value or indice
    tmp <- lapply(dat_selectors[which(dim_names != pattern_dims)], add_value_indices_flag)
    dat_selectors <- c(dat_selectors[pattern_dims], tmp)

aho's avatar
aho committed
    ## Look for _var params that should be requested automatically.
aho's avatar
aho committed
    for (dim_name in dim_names[-which(dim_names == pattern_dims)]) {
      ## The following code 'rewrites' var_params for all datasets. If providing different
      ## path pattern repositories with different file/inner dimensions, var_params might
      ## have to be handled for each dataset separately.
      if ((attr(dat_selectors[[dim_name]], 'values') || (dim_name %in% c('var', 'variable'))) &&
          !(dim_name %in% names(var_params)) && !(dim_name %in% file_dims))  {
        if (dim_name %in% c('var', 'variable')) {
          var_params <- c(var_params, setNames(list('var_names'), dim_name))
          .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", 
                          dim_name, "_var' requested. ", '"', dim_name, "_var = '", 
                          'var_names', "'", '"', " has been automatically added to ",
                          "the Start call."))
        } else {
          var_params <- c(var_params, setNames(list(dim_name), dim_name))
          .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", 
                          dim_name, "_var' requested. ", '"', dim_name, "_var = '", 
                          dim_name, "'", '"', " has been automatically added to ",
                          "the Start call."))
aho's avatar
aho committed
        }
      }
    }
aho's avatar
aho committed
    ## (Check the *_var parameters).
    if (any(!(unlist(var_params) %in% names(return_vars)))) {
      vars_to_add <- which(!(unlist(var_params) %in% names(return_vars)))
      new_return_vars <- vector('list', length(vars_to_add))
      names(new_return_vars) <- unlist(var_params)[vars_to_add]
      return_vars <- c(return_vars, new_return_vars)
      .warning(paste0("All '*_var' params must associate a dimension to one of the ",
                      "requested variables in 'return_vars'. The following variables",
                      " have been added to 'return_vars': ", 
                      paste(paste0("'", unlist(var_params), "'"), collapse = ', ')))
    }
    
aho's avatar
aho committed
    # Examine the selectors of file dim and create 'replace_values', which uses the first 
    # explicit selector (i.e., character) for all file dimensions.
aho's avatar
aho committed
    replace_values <- vector('list', length = length(file_dims))
    names(replace_values) <- file_dims
    for (file_dim in file_dims) {
      if (file_dim %in% names(var_params)) {
        .warning(paste0("The '", file_dim, "_var' param will be ignored since '", 
                        file_dim, "' is a file dimension (for the dataset with pattern ", 
                        dat[[i]][['path']], ")."))
      }
aho's avatar
aho committed
      # If the selector is a vector or a list of 2 without names (represent the value range)
aho's avatar
aho committed
      if (!is.list(dat_selectors[[file_dim]]) || 
          (is.list(dat_selectors[[file_dim]]) && 
           length(dat_selectors[[file_dim]]) == 2 &&
           is.null(names(dat_selectors[[file_dim]])))) {
        dat_selectors[[file_dim]] <- list(dat_selectors[[file_dim]])
      }
      first_class <- class(dat_selectors[[file_dim]][[1]])
      first_length <- length(dat_selectors[[file_dim]][[1]])
      for (j in 1:length(dat_selectors[[file_dim]])) {
        sv <- selector_vector <- dat_selectors[[file_dim]][[j]]
        if (!identical(first_class, class(sv)) ||
            !identical(first_length, length(sv))) {
          stop("All provided selectors for depending dimensions must ",
               "be vectors of the same length and of the same class.")
        }
        if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% c('all', 'first', 'last')))) {
          dat_selectors[[file_dim]][[j]] <- selector_checker(selectors = sv,
                                                             return_indices = FALSE)
          # Take chunk if needed
          if (chunks[[file_dim]]['n_chunks'] > 1) {
            dat_selectors[[file_dim]][[j]] <- dat_selectors[[file_dim]][[j]][chunk_indices(
                                                length(dat_selectors[[file_dim]][[j]]),
                                                chunks[[file_dim]]['chunk'],
                                                chunks[[file_dim]]['n_chunks'],
                                                file_dim)]
            # chunk the depending dim as well
            if (file_dim %in% depending_file_dims) {
              depending_dim_name <- names(which(file_dim == depending_file_dims))
              #TODO: If j is more than 1? What will it be like?
              # Should version (depending dim) has list = 1 (j) above?
              if (!is.null(names(dat_selectors[[depending_dim_name]]))) {
                dat_selectors[[depending_dim_name]] <-
                  dat_selectors[[depending_dim_name]][dat_selectors[[file_dim]][[j]]]
              }
            }
          }
aho's avatar
aho committed
        } else if (!(is.numeric(sv) || 
                     (is.character(sv) && (length(sv) == 1) && (sv %in% c('all', 'first', 'last'))) || 
                     (is.list(sv) && (length(sv) == 2) && (all(sapply(sv, is.character)) || 
                                                           all(sapply(sv, is.numeric)))))) {
          stop("All explicitly provided selectors for file dimensions must be character strings.")
        } 
      }
      sv <- dat_selectors[[file_dim]][[1]]
aho's avatar
aho committed
      # 'replace_values' has the first selector (if it's character) or NULL (if it's not explicitly 
      # defined) for each file dimension.
aho's avatar
aho committed
      if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% c('all', 'first', 'last')))) {
aho's avatar
aho committed
        replace_values[[file_dim]] <- sv[1]
aho's avatar
aho committed
      }
    }
    #print("C")
    # Now we know which dimensions whose selectors are provided non-explicitly.
    undefined_file_dims <- file_dims[which(sapply(replace_values, is.null))]
    defined_file_dims <- file_dims[which(!(file_dims %in% undefined_file_dims))]
aho's avatar
aho committed
    # Quickly check if the depending dimensions are provided properly. The check is only for 
    # if the depending and depended file dims are both explicited defined.
aho's avatar
aho committed
    for (file_dim in file_dims) {
      if (file_dim %in% names(depending_file_dims)) {
        ## TODO: Detect multi-dependancies and forbid.
        #NOTE: The if statement below is tricky. It tries to distinguish if the depending dim
        #      has the depended dim as the names of the list. However, if the depending dim
        #      doesn't have list names and its length is 2 (i.e., list( , )), Start() thinks
        #      it means the range, just like `lat = values(list(10, 20))`. And because of this,
        #      we won't enter the following if statement, and the error will occur later in
        #      SelectorChecker(). Need to find a way to distinguish if list( , ) means range or
        #      just the values.
aho's avatar
aho committed
        if (all(c(file_dim, depending_file_dims[[file_dim]]) %in% defined_file_dims)) {
          if (length(dat_selectors[[file_dim]]) != length(dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) {
            stop(paste0("If providing selectors for the depending ",
                        "dimension '", file_dim, "', a ",
                        "vector of selectors must be provided for ",
                        "each selector of the dimension it depends on, '",
                        depending_file_dims[[file_dim]], "'."))
          } else if (!all(names(dat_selectors[[file_dim]]) == dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) {
            stop(paste0("If providing selectors for the depending ",
                        "dimension '", file_dim, "', the name of the ",
                        "provided vectors of selectors must match ",
                        "exactly the selectors of the dimension it ",
                        "depends on, '", depending_file_dims[[file_dim]], "'."))
          } else if (is.null(names(dat_selectors[[file_dim]]))) {
            .warning(paste0("The selectors for the depending dimension '", file_dim, "' do not ",
                            "have list names. Assume that the order of the selectors matches the ",
                            "depended dimensions '", depending_file_dims[[file_dim]], "''s order."))
aho's avatar
aho committed
          }
        }
      }
    }
aho's avatar
aho committed
    # Find the possible values for the selectors that are provided as
    # indices. If the requested file is on server, impossible operation.
    if (length(grep("^http", dat[[i]][['path']])) > 0) {
      if (length(undefined_file_dims) > 0) {
        stop(paste0("All selectors for the file dimensions must be ",
                    "character strings if requesting data to a remote ",
                    "server. Found invalid selectors for the file dimensions ",
                    paste(paste0("'", undefined_file_dims, "'"), collapse = ', '), "."))
      }
      dataset_has_files[i] <- TRUE
    } else {
      dat[[i]][['path']] <- path.expand(dat[[i]][['path']])
      # Iterate over the known dimensions to find the first existing file.
      # The path to the first existing file will be used to find the 
      # values for the non explicitly defined selectors.
      first_file <- NULL
      first_file_selectors <- NULL
      if (length(undefined_file_dims) > 0) {
        replace_values[undefined_file_dims] <- '*'
      }
      ## TODO: What if length of defined_file_dims is 0? code might crash (in practice it worked for an example case)
      files_to_check <- sapply(dat_selectors[defined_file_dims], function(x) length(x[[1]]))
      sub_array_of_files_to_check <- array(1:prod(files_to_check), dim = files_to_check)
      j <- 1
      #print("D")
      while (j <= prod(files_to_check) && is.null(first_file)) {
        selector_indices <- which(sub_array_of_files_to_check == j, arr.ind = TRUE)[1, ]
        selectors <- sapply(1:length(defined_file_dims), 
                            function (x) {
                              vector_to_pick <- 1
                              if (defined_file_dims[x] %in% names(depending_file_dims)) {
                                vector_to_pick <- selector_indices[which(defined_file_dims == depending_file_dims[[defined_file_dims[x]]])]
                              }
                              dat_selectors[defined_file_dims][[x]][[vector_to_pick]][selector_indices[x]]
                            })
        replace_values[defined_file_dims] <- selectors
        file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values)
        file_path <- Sys.glob(file_path)
        if (length(file_path) > 0) {
          first_file <- file_path[1]
          first_file_selectors <- selectors
        }
        j <- j + 1
      }
      #print("E")
      # Start looking for values for the non-explicitly defined selectors.
      if (is.null(first_file)) {
        .warning(paste0("No found files for the datset '", dat[[i]][['name']], 
                        "'. Provide existing selectors for the file dimensions ",
                        " or check and correct its path pattern: ", dat[[i]][['path']]))
      } else {
        dataset_has_files[i] <- TRUE
        ## TODO: Improve message here if no variable found:
        if (length(undefined_file_dims) > 0) {
aho's avatar
aho committed
          # Note: "dat[[i]][['path']]" is changed by the function below.
          dat_selectors <- find_ufd_value(undefined_file_dims, dat, i, replace_values,
                                          first_file, file_dims, path_glob_permissive,             
                                          depending_file_dims, dat_selectors, selector_checker,
                                          chunks)
aho's avatar
aho committed
          #print("I")
        } else {
aho's avatar
aho committed
          #NOTE: If there is no non-explicitly defined dim, use the first found file
          #      to modify. Problem: '*' doesn't catch all the possible file. Only use
          #      the first file.
aho's avatar
aho committed
          dat[[i]][['path']] <- .ReplaceGlobExpressions(dat[[i]][['path']], first_file, replace_values, 
                                                        defined_file_dims, dat[[i]][['name']], path_glob_permissive)
        }
      }
    }
    dat[[i]][['selectors']] <- dat_selectors

aho's avatar
aho committed
    # Now fetch for the first available file
    if (dataset_has_files[i]) {
      known_dims <- file_dims
    } else {
      known_dims <- defined_file_dims
    }
    replace_values <- vector('list', length = length(known_dims))
    names(replace_values) <- known_dims
    files_to_load <- sapply(dat_selectors[known_dims], function(x) length(x[[1]]))
    files_to_load[found_pattern_dim] <- 1
    sub_array_of_files_to_load <- array(1:prod(files_to_load), 
                                        dim = files_to_load)
    names(dim(sub_array_of_files_to_load)) <- known_dims
    sub_array_of_not_found_files <- array(!dataset_has_files[i], 
                                          dim = files_to_load)
    names(dim(sub_array_of_not_found_files)) <- known_dims
    
    if (largest_dims_length) {
      if (!exists('selector_indices_save')) {
        selector_indices_save <- vector('list', length = length(dat))
      }
      if (!exists('selectors_total_list')) {
        selectors_total_list <- vector('list', length = length(dat))
      }
      selector_indices_save[[i]] <- vector('list', length = prod(files_to_load))
      selectors_total_list[[i]] <- vector('list', length = prod(files_to_load))
aho's avatar
aho committed
    }
    j <- 1
    # NOTE: This while loop has these objects that are used afterward: 'sub_array_of_files_to_load',
    #       'sub_array_of_not_found_files', 'indices_of_first_files_with_data', 'selectors_of_first_files_with_data';
    #       'selector_indices_save' and 'selectors_total_list' are used if 'largest_dims_length = T'.
aho's avatar
aho committed
    while (j <= prod(files_to_load)) {
      selector_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ]
      names(selector_indices) <- known_dims
      if (largest_dims_length) {
        tmp <- selector_indices
        tmp[which(known_dims == found_pattern_dim)] <- i
        selector_indices_save[[i]][[j]] <- tmp
      }
      # This 'selectors' is only used in this while loop
aho's avatar
aho committed
      selectors <- sapply(1:length(known_dims), 
                          function (x) {
                            vector_to_pick <- 1
                            if (known_dims[x] %in% names(depending_file_dims)) {
                              vector_to_pick <- selector_indices[which(known_dims == depending_file_dims[[known_dims[x]]])]
                            }
                            dat_selectors[known_dims][[x]][[vector_to_pick]][selector_indices[x]]
                          })
      names(selectors) <- known_dims
      if (largest_dims_length) {
        selectors_total_list[[i]][[j]] <- selectors
        names(selectors_total_list[[i]][[j]]) <- known_dims
      }

      # 'replace_values' and 'file_path' are only used in this while loop
aho's avatar
aho committed
      replace_values[known_dims] <- selectors
      if (!dataset_has_files[i]) {
        if (any(is.na(selectors))) {
          replace_values <- replace_values[-which(names(replace_values) %in% names(selectors[which(is.na(selectors))]))]
        }
        file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values, TRUE)
        sub_array_of_files_to_load[j] <- file_path
        #sub_array_of_not_found_files[j] <- TRUE???
      } else {
        if (any(is.na(selectors))) {
          replace_values <- replace_values[-which(names(replace_values) %in% names(selectors[which(is.na(selectors))]))]
          file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values, TRUE)
          sub_array_of_files_to_load[j] <- file_path
          sub_array_of_not_found_files[j] <- TRUE
        } else {
          file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values)
aho's avatar
aho committed

          #NOTE: After replacing tags, there is still * if path_glob_permissive is not FALSE.
          #      Find the possible value to substitute *.
aho's avatar
aho committed
          if (grepl('\\*', file_path)) {
            found_files <- Sys.glob(file_path)
            file_path <- found_files[1]   # choose only the first file.
            #NOTE: Above line chooses only the first found file. Because * is not tags, which means
            #      it is not a dimension. So it cannot store more than one item. If use * to define
            #      the path, that * should only represent one possibility.
aho's avatar
aho committed
            if (length(found_files) > 1) {
              .warning("Using glob expression * to define the path, but more ",
                       "than one match is found. Choose the first match only.")
            }
          }

aho's avatar
aho committed
          if (!(length(grep("^http", file_path)) > 0)) {
            if (grepl(file_path, '*', fixed = TRUE)) {
              file_path_full <- Sys.glob(file_path)[1]
              if (nchar(file_path_full) > 0) {
                file_path <- file_path_full
              }
            }
          }
          sub_array_of_files_to_load[j] <- file_path
          if (is.null(indices_of_first_files_with_data[[i]])) {
            if (!(length(grep("^http", file_path)) > 0)) {
              if (!file.exists(file_path)) {
                file_path <- NULL
              }
            }
            if (!is.null(file_path)) {
              test_file <- NULL
              ## TODO: suppress error messages
              test_file <- file_opener(file_path)
              if (!is.null(test_file)) {
                selector_indices[which(known_dims == found_pattern_dim)] <- i
                indices_of_first_files_with_data[[i]] <- selector_indices
                selectors_of_first_files_with_data[[i]] <- selectors
                file_closer(test_file)
              }
            }
          }
        }
      }
      j <- j + 1
    }
    # Extend array as needed progressively
    if (is.null(array_of_files_to_load)) {
      array_of_files_to_load <- sub_array_of_files_to_load
      array_of_not_found_files <- sub_array_of_not_found_files
    } else {
      array_of_files_to_load <- .MergeArrays(array_of_files_to_load, sub_array_of_files_to_load,
                                             along = found_pattern_dim)
      ## TODO: file_dims, and variables like that.. are still ok now? I don't think so
      array_of_not_found_files <- .MergeArrays(array_of_not_found_files, sub_array_of_not_found_files,
                                               along = found_pattern_dim)
    }    
  }
  if (all(sapply(indices_of_first_files_with_data, is.null))) {
    stop("No data files found for any of the specified datasets.")
  }
  
  ########################### READING INNER DIMS. #############################
  #print("J")
  ## TODO: To be run in parallel (local multi-core)
  # Now time to work out the inner file dimensions.
  # First pick the requested variables.

#//// This part is moved below the new code////
# NOTE: 'dims_to_iterate' only consider common_return_vars. Move to below can save some work 
#       and get the revised common_return_vars if it is changed.
#  dims_to_iterate <- NULL
#  for (return_var in names(return_vars)) {
#    dims_to_iterate <- unique(c(dims_to_iterate, return_vars[[return_var]]))
#  }
#  if (found_pattern_dim %in% dims_to_iterate) {
#    dims_to_iterate <- dims_to_iterate[-which(dims_to_iterate == found_pattern_dim)]
#  }
#//////////////////////////////////////////////

  # Separate 'return_vars' into 'common_return_vars' and 'return_vars' (those = 'dat').
aho's avatar
aho committed
  common_return_vars <- NULL
  common_first_found_file <- NULL
  common_return_vars_pos <- NULL
  if (length(return_vars) > 0) {
    common_return_vars_pos <- which(sapply(return_vars, function(x) !(found_pattern_dim %in% x)))
  }
  if (length(common_return_vars_pos) > 0) {
    common_return_vars <- return_vars[common_return_vars_pos]
    return_vars <- return_vars[-common_return_vars_pos]
    common_first_found_file <- rep(FALSE, length(which(sapply(common_return_vars, length) == 0)))
    names(common_first_found_file) <- names(common_return_vars[which(sapply(common_return_vars, length) == 0)])
  }

#!!!!!!!Check here. return_vars has removed the common ones, and here remove 'dat' value????
#It seems like it does some benefits to later parts
aho's avatar
aho committed
  return_vars <- lapply(return_vars, 
                        function(x) {
                          if (found_pattern_dim %in% x) {
                            x[-which(x == found_pattern_dim)]
                          } else {
                            x
                          }
                        })
#////////////////////////////////////////////
  # Force return_vars = (time = NULL) to (time = 'sdate') if (1) selector = [sdate = 2, time = 4] or
  # (2) time_across = 'sdate'.
  # NOTE: Here is not in for loop of dat[[i]]
  for (i in 1:length(dat)) {
    for (inner_dim in expected_inner_dims[[i]]) {
      # The selectors for the inner dimension are taken.
      selector_array <- dat[[i]][['selectors']][[inner_dim]]
      file_dim_as_selector_array_dim <- 1

      if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) {
        file_dim_as_selector_array_dim <- 
          found_file_dims[[i]][which(found_file_dims[[i]] %in% names(dim(selector_array)))]
      if (inner_dim %in% inner_dims_across_files | 
          is.character(file_dim_as_selector_array_dim)) {  #(2) or (1)
        # inner_dim is not in return_vars or is NULL
        if (((!inner_dim %in% names(common_return_vars)) & (!inner_dim %in% names(return_vars))) |
            (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]]))) {
          common_return_vars[[inner_dim]] <- correct_return_vars(
                                               inner_dim, inner_dims_across_files, 
                                               found_pattern_dim, file_dim_as_selector_array_dim)
        }
      }
    }
  }
#////////////////////////////////////////////

# This part was above where return_vars is seperated into return_vars and common_return_vars
# NOTE: 'dims_to_iterate' only consider common_return_vars. Move to here can save some work 
#       and get the revised common_return_vars if it is changed in the part right above.
  dims_to_iterate <- NULL
  for (common_return_var in names(common_return_vars)) {
    dims_to_iterate <- unique(c(dims_to_iterate, common_return_vars[[common_return_var]]))
  }
#////////////////////////////////////////////

  # Create 'picked_common_vars'
aho's avatar
aho committed
  if (length(common_return_vars) > 0) {
    picked_common_vars <- vector('list', length = length(common_return_vars))
    names(picked_common_vars) <- names(common_return_vars)
  } else {
    picked_common_vars <- NULL
  }
  picked_common_vars_ordered <- picked_common_vars
  picked_common_vars_unorder_indices <- picked_common_vars

  # Create 'picked_vars'
aho's avatar
aho committed
  picked_vars <- vector('list', length = length(dat))
  names(picked_vars) <- dat_names
  picked_vars_ordered <- picked_vars
  picked_vars_unorder_indices <- picked_vars
aho's avatar
aho committed
  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      # Put all selectors in a list of a single list/vector of selectors. 
      # The dimensions that go across files will later be extended to have 
      # lists of lists/vectors of selectors.
      for (inner_dim in expected_inner_dims[[i]]) {
        if (!is.list(dat[[i]][['selectors']][[inner_dim]]) || # not list, or
            (is.list(dat[[i]][['selectors']][[inner_dim]]) &&  # list of 2 that represents range
aho's avatar
aho committed
             length(dat[[i]][['selectors']][[inner_dim]]) == 2 &&
             is.null(names(dat[[i]][['selectors']][[inner_dim]])))) {
          dat[[i]][['selectors']][[inner_dim]] <- list(dat[[i]][['selectors']][[inner_dim]])
        }
      }
aho's avatar
aho committed
      if (length(return_vars) > 0) {
        picked_vars[[i]] <- vector('list', length = length(return_vars))
        names(picked_vars[[i]]) <- names(return_vars)
        picked_vars_ordered[[i]] <- picked_vars[[i]]
        picked_vars_unorder_indices[[i]] <- picked_vars[[i]]
      }
aho's avatar
aho committed
      indices_of_first_file <- as.list(indices_of_first_files_with_data[[i]])
      array_file_dims <- sapply(dat[[i]][['selectors']][found_file_dims[[i]]], function(x) length(x[[1]]))
      names(array_file_dims) <- found_file_dims[[i]]
      if (length(dims_to_iterate) > 0) {
        indices_of_first_file[dims_to_iterate] <- lapply(array_file_dims[dims_to_iterate], function(x) 1:x)
      }
      array_of_var_files <- do.call('[', c(list(x = array_of_files_to_load), indices_of_first_file, list(drop = FALSE)))
      array_of_var_indices <- array(1:length(array_of_var_files), dim = dim(array_of_var_files))
      array_of_not_found_var_files <- do.call('[', c(list(x = array_of_not_found_files), indices_of_first_file, list(drop = FALSE)))
      # Create previous_indices. The initial value is -1 because there is no 'previous' before the
      # 1st current_indices.
aho's avatar
aho committed
      previous_indices <- rep(-1, length(indices_of_first_file))
      names(previous_indices) <- names(indices_of_first_file)
      # Create first_found_file for vars_to_read defining. It is for the dim value in return_vars
      # that is NULL or character(0). Because these dims only need to be read once, so 
      # first_found_file indicates if these dims have been read or not. 
      # If read, it turns to TRUE and won't be included in vars_to_read again in the next 
      # 'for j loop'. 
aho's avatar
aho committed
      first_found_file <- NULL
      if (length(return_vars) > 0) {
        first_found_file <- rep(FALSE, length(which(sapply(return_vars, length) == 0)))
        names(first_found_file) <- names(return_vars[which(sapply(return_vars, length) == 0)])
      }
aho's avatar
aho committed
      for (j in 1:length(array_of_var_files)) {
        current_indices <- which(array_of_var_indices == j, arr.ind = TRUE)[1, ]
        names(current_indices) <- names(indices_of_first_file)
        if (!is.na(array_of_var_files[j]) && !array_of_not_found_var_files[j]) {
          changed_dims <- which(current_indices != previous_indices)
          # Prepare vars_to_read for this dataset (i loop) and this file (j loop)
          vars_to_read <- generate_vars_to_read(return_vars, changed_dims, first_found_file, 
                                                common_return_vars, common_first_found_file, i)

aho's avatar
aho committed
          file_object <- file_opener(array_of_var_files[j])
          if (!is.null(file_object)) {
            for (var_to_read in vars_to_read) {
              if (var_to_read %in% unlist(var_params)) {
                associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)]
              }
              var_name_to_reader <- var_to_read
              names(var_name_to_reader) <- 'var'
              var_dims <- file_dim_reader(NULL, file_object, var_name_to_reader, NULL,
                                          synonims)
              # file_dim_reader returns dimension names as found in the file.
              # Need to translate accoridng to synonims:
              names(var_dims) <- replace_with_synonmins(var_dims, synonims)
aho's avatar
aho committed
              if (!is.null(var_dims)) {
                var_file_dims <- NULL
                if (var_to_read %in% names(common_return_vars)) {
                  var_to_check <- common_return_vars[[var_to_read]]
                } else {
                  var_to_check <- return_vars[[var_to_read]]
                }
                if (any(names(dim(array_of_files_to_load)) %in% var_to_check)) {
                  var_file_dims <- dim(array_of_files_to_load)[which(names(dim(array_of_files_to_load)) %in% 
                                                                       var_to_check)]
                }
                if (((var_to_read %in% names(common_return_vars)) && 
                     is.null(picked_common_vars[[var_to_read]])) ||
                    ((var_to_read %in% names(return_vars)) && 
                     is.null(picked_vars[[i]][[var_to_read]]))) {
                  if (any(names(var_file_dims) %in% names(var_dims))) {
                    stop("Found a requested var in 'return_var' requested for a ",
                         "file dimension which also appears in the dimensions of ",
                         "the variable inside the file.\n", array_of_var_files[j])
                  }
                  first_sample <- file_var_reader(NULL, file_object, NULL, 
                                                  var_to_read, synonims)
                  if (any(class(first_sample) %in% names(time_special_types()))) {
aho's avatar
aho committed
                    array_size <- prod(c(var_file_dims, var_dims))
                    new_array <- rep(time_special_types()[[class(first_sample)[1]]](NA), array_size)
aho's avatar
aho committed
                    dim(new_array) <- c(var_file_dims, var_dims)
                  } else {
                    new_array <- array(dim = c(var_file_dims, var_dims))
                  }
                  attr(new_array, 'variables') <- attr(first_sample, 'variables')
                  if (var_to_read %in% names(common_return_vars)) {
                    picked_common_vars[[var_to_read]] <- new_array
                    pick_ordered <- FALSE 
                    if (var_to_read %in% unlist(var_params)) {
                      if (associated_dim_name %in% names(dim_reorder_params) && !aiat) {
                        picked_common_vars_ordered[[var_to_read]] <- new_array
                        pick_ordered <- TRUE
                      }
                    }
                    if (!pick_ordered) {
                      picked_common_vars_ordered[[var_to_read]] <- NULL
                    }
                  } else {
                    picked_vars[[i]][[var_to_read]] <- new_array
                    pick_ordered <- FALSE
                    if (var_to_read %in% unlist(var_params)) {
                      if (associated_dim_name %in% names(dim_reorder_params) && !aiat) {
                        picked_vars_ordered[[i]][[var_to_read]] <- new_array
                        pick_ordered <- TRUE
                      }
                    }
                    if (!pick_ordered) {
                      picked_vars_ordered[[i]][[var_to_read]] <- NULL
                    }
                  }
                } else {
                  if (var_to_read %in% names(common_return_vars)) {
                    array_var_dims <- dim(picked_common_vars[[var_to_read]])
                  } else {
                    array_var_dims <- dim(picked_vars[[i]][[var_to_read]])
                  }
                  full_array_var_dims <- array_var_dims
                  if (any(names(array_var_dims) %in% names(var_file_dims))) {
                    array_var_dims <- array_var_dims[-which(names(array_var_dims) %in% names(var_file_dims))]
                  }
                  if (names(array_var_dims) != names(var_dims)) {
                    stop("Error while reading the variable '", var_to_read, "' from ",
                         "the file. Dimensions do not match.\nExpected ", 
                         paste(paste0("'", names(array_var_dims), "'"), 
                               collapse = ', '), " but found ",
                         paste(paste0("'", names(var_dims), "'"), 
                               collapse = ', '), ".\n", array_of_var_files[j])
                  }
                  if (any(var_dims > array_var_dims)) {
                    longer_dims <- which(var_dims > array_var_dims)
                    if (length(longer_dims) == 1) {
                      longer_dims_in_full_array <- longer_dims
                      if (any(names(full_array_var_dims) %in% names(var_file_dims))) {
                        candidates <- (1:length(full_array_var_dims))[-which(names(full_array_var_dims) %in% names(var_file_dims))]
                        longer_dims_in_full_array <- candidates[longer_dims]
                      }
                      padding_dims <- full_array_var_dims
                      padding_dims[longer_dims_in_full_array] <- var_dims[longer_dims] - 
                        array_var_dims[longer_dims]
                      if (var_to_read %in% names(common_return_vars)) {
                        var_class <- class(picked_common_vars[[var_to_read]])
                      } else {
                        var_class <- class(picked_vars[[i]][[var_to_read]])
                      }
                      if (any(var_class %in% names(time_special_types()))) {
aho's avatar
aho committed
                        padding_size <- prod(padding_dims)
                        padding <- rep(time_special_types()[[var_class[1]]](NA), padding_size)
aho's avatar
aho committed
                        dim(padding) <- padding_dims
                      } else {
                        padding <- array(dim = padding_dims)
                      }
                      if (var_to_read %in% names(common_return_vars)) {
                        picked_common_vars[[var_to_read]] <- .abind2(
                          picked_common_vars[[var_to_read]],
                          padding,
                          names(full_array_var_dims)[longer_dims_in_full_array]
                        )
                      } else {
                        picked_vars[[i]][[var_to_read]] <- .abind2(
                          picked_vars[[i]][[var_to_read]],
                          padding,
                          names(full_array_var_dims)[longer_dims_in_full_array]
                        )
                      }
                    } else {
                      stop("Error while reading the variable '", var_to_read, "' from ",
                           "the file. Found size (", paste(var_dims, collapse = ' x '), 
                           ") is greater than expected maximum size (", 
                           array_var_dims, ").")
                    }
                  }
                }
                var_store_indices <- c(as.list(current_indices[names(var_file_dims)]), lapply(var_dims, function(x) 1:x))
                var_values <- file_var_reader(NULL, file_object, NULL, var_to_read, synonims)
                if (var_to_read %in% unlist(var_params)) {
                  if ((associated_dim_name %in% names(dim_reorder_params)) && !aiat) {
                    ## Is this check really needed?
                    if (length(dim(var_values)) > 1) {
                      stop("Requested a '", associated_dim_name, "_reorder' for a dimension ",
                           "whose coordinate variable that has more than 1 dimension. This is ",
                           "not supported.")
                    }
                    ordered_var_values <- dim_reorder_params[[associated_dim_name]](var_values)
                    attr(ordered_var_values$x, 'variables') <- attr(var_values, 'variables')
                    if (!all(c('x', 'ix') %in% names(ordered_var_values))) {
                      stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.")
                    }
                    # Save the indices to reorder back the ordered variable values.
                    # This will be used to define the first round indices.
                    unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix
                    if (var_to_read %in% names(common_return_vars)) {
                      picked_common_vars_ordered[[var_to_read]] <- do.call('[<-', 
                                                                           c(list(x = picked_common_vars_ordered[[var_to_read]]), 
                                                                             var_store_indices, 
                                                                             list(value = ordered_var_values$x)))
                      picked_common_vars_unorder_indices[[var_to_read]] <- do.call('[<-', 
                                                                                   c(list(x = picked_common_vars_unorder_indices[[var_to_read]]), 
                                                                                     var_store_indices, 
                                                                                     list(value = unorder)))
                    } else {
                      picked_vars_ordered[[i]][[var_to_read]] <- do.call('[<-', 
                                                                         c(list(x = picked_vars_ordered[[i]][[var_to_read]]), 
                                                                           var_store_indices, 
                                                                           list(value = ordered_var_values$x)))
                      picked_vars_unorder_indices[[i]][[var_to_read]] <- do.call('[<-', 
                                                                                 c(list(x = picked_vars_unorder_indices[[i]][[var_to_read]]), 
                                                                                   var_store_indices, 
                                                                                   list(value = unorder)))
                    }
                  }
                }
                if (var_to_read %in% names(common_return_vars)) {
                  picked_common_vars[[var_to_read]] <- do.call('[<-', 
                                                               c(list(x = picked_common_vars[[var_to_read]]), 
                                                                 var_store_indices, 
                                                                 list(value = var_values)))
                  # Turn time zone back to UTC if this var_to_read is 'time'
                  if (all(class(picked_common_vars[[var_to_read]]) == names(time_special_types))) {
aho's avatar
aho committed
                    attr(picked_common_vars[[var_to_read]], "tzone") <- 'UTC'
                  }
                } else {
                  picked_vars[[i]][[var_to_read]] <- do.call('[<-', 
                                                             c(list(x = picked_vars[[i]][[var_to_read]]), 
                                                               var_store_indices, 
                                                               list(value = var_values)))
                  # Turn time zone back to UTC if this var_to_read is 'time'
                  if (all(class(picked_vars[[i]][[var_to_read]]) == names(time_special_types))) {
aho's avatar
aho committed
                    attr(picked_vars[[i]][[var_to_read]], "tzone") <- 'UTC'
                  }
                }
                if (var_to_read %in% names(first_found_file)) {
                  first_found_file[var_to_read] <- TRUE
                }
                if (var_to_read %in% names(common_first_found_file)) {
                  common_first_found_file[var_to_read] <- TRUE
                }
              } else {
                stop("Could not find variable '", var_to_read, 
                     "' in the file ", array_of_var_files[j])
              }