Start.R 171 KB
Newer Older
              if (nchar(file_path_full) > 0) {
                file_path <- file_path_full
              }
            }
          }
          sub_array_of_files_to_load[j] <- file_path
          if (is.null(indices_of_first_files_with_data[[i]])) {
            if (!(length(grep("^http", file_path)) > 0)) {
              if (!file.exists(file_path)) {
                file_path <- NULL
              }
            }
            if (!is.null(file_path)) {
              test_file <- NULL
              ## TODO: suppress error messages
              test_file <- file_opener(file_path)
              if (!is.null(test_file)) {
                selector_indices[which(known_dims == found_pattern_dim)] <- i
                indices_of_first_files_with_data[[i]] <- selector_indices
                selectors_of_first_files_with_data[[i]] <- selectors
                file_closer(test_file)
              }
            }
          }
        }
      }
      j <- j + 1
    }
    # Extend array as needed progressively
    if (is.null(array_of_files_to_load)) {
        array_of_files_to_load <- sub_array_of_files_to_load
        array_of_not_found_files <- sub_array_of_not_found_files
    } else {
      array_of_files_to_load <- .MergeArrays(array_of_files_to_load, sub_array_of_files_to_load,
                                             along = found_pattern_dim)
      ## TODO: file_dims, and variables like that.. are still ok now? I don't think so
      array_of_not_found_files <- .MergeArrays(array_of_not_found_files, sub_array_of_not_found_files,
                                               along = found_pattern_dim)
    }    
    dat[[i]][['selectors']] <- dat_selectors
  }
  if (all(sapply(indices_of_first_files_with_data, is.null))) {
    stop("No data files found for any of the specified datasets.")
  }

  ########################### READING INNER DIMS. #############################
#print("J")
  ## TODO: To be run in parallel (local multi-core)
  # Now time to work out the inner file dimensions.
  # First pick the requested variables.
  dims_to_iterate <- NULL
  for (return_var in names(return_vars)) {
    dims_to_iterate <- unique(c(dims_to_iterate, return_vars[[return_var]]))
  }
  if (found_pattern_dim %in% dims_to_iterate) {
    dims_to_iterate <- dims_to_iterate[-which(dims_to_iterate == found_pattern_dim)]
  }
  common_return_vars <- NULL
  common_first_found_file <- NULL
  common_return_vars_pos <- NULL
  if (length(return_vars) > 0) {
    common_return_vars_pos <- which(sapply(return_vars, function(x) !(found_pattern_dim %in% x)))
  }
  if (length(common_return_vars_pos) > 0) {
    common_return_vars <- return_vars[common_return_vars_pos]
    return_vars <- return_vars[-common_return_vars_pos]
    common_first_found_file <- rep(FALSE, length(which(sapply(common_return_vars, length) == 0)))
    names(common_first_found_file) <- names(common_return_vars[which(sapply(common_return_vars, length) == 0)])
  }
  return_vars <- lapply(return_vars, 
    function(x) {
      if (found_pattern_dim %in% x) {
        x[-which(x == found_pattern_dim)]
      } else {
        x
      }
    })
  if (length(common_return_vars) > 0) {
    picked_common_vars <- vector('list', length = length(common_return_vars))
    names(picked_common_vars) <- names(common_return_vars)
  } else {
    picked_common_vars <- NULL
  }
  picked_common_vars_ordered <- picked_common_vars
  picked_common_vars_unorder_indices <- picked_common_vars
  picked_vars <- vector('list', length = length(dat))
  names(picked_vars) <- dat_names
  picked_vars_ordered <- picked_vars
  picked_vars_unorder_indices <- picked_vars
  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      # Put all selectors in a list of a single list/vector of selectors. 
      # The dimensions that go across files will later be extended to have 
      # lists of lists/vectors of selectors.
      for (inner_dim in expected_inner_dims[[i]]) {
        if (!is.list(dat[[i]][['selectors']][[inner_dim]]) || 
            (is.list(dat[[i]][['selectors']][[inner_dim]]) && 
             length(dat[[i]][['selectors']][[inner_dim]]) == 2 &&
             is.null(names(dat[[i]][['selectors']][[inner_dim]])))) {
          dat[[i]][['selectors']][[inner_dim]] <- list(dat[[i]][['selectors']][[inner_dim]])
        }
      }
      if (length(return_vars) > 0) {
        picked_vars[[i]] <- vector('list', length = length(return_vars))
        names(picked_vars[[i]]) <- names(return_vars)
        picked_vars_ordered[[i]] <- picked_vars[[i]]
        picked_vars_unorder_indices[[i]] <- picked_vars[[i]]
      }
      indices_of_first_file <- as.list(indices_of_first_files_with_data[[i]])
      array_file_dims <- sapply(dat[[i]][['selectors']][found_file_dims[[i]]], function(x) length(x[[1]]))
      names(array_file_dims) <- found_file_dims[[i]]
      if (length(dims_to_iterate) > 0) {
        indices_of_first_file[dims_to_iterate] <- lapply(array_file_dims[dims_to_iterate], function(x) 1:x)
      }
      array_of_var_files <- do.call('[', c(list(x = array_of_files_to_load), indices_of_first_file, list(drop = FALSE)))
      array_of_var_indices <- array(1:length(array_of_var_files), dim = dim(array_of_var_files))
      array_of_not_found_var_files <- do.call('[', c(list(x = array_of_not_found_files), indices_of_first_file, list(drop = FALSE)))
      previous_indices <- rep(-1, length(indices_of_first_file))
      names(previous_indices) <- names(indices_of_first_file)
      first_found_file <- NULL
      if (length(return_vars) > 0) {
        first_found_file <- rep(FALSE, length(which(sapply(return_vars, length) == 0)))
        names(first_found_file) <- names(return_vars[which(sapply(return_vars, length) == 0)])
      }
      for (j in 1:length(array_of_var_files)) {
        current_indices <- which(array_of_var_indices == j, arr.ind = TRUE)[1, ]
        names(current_indices) <- names(indices_of_first_file)
        if (!is.na(array_of_var_files[j]) && !array_of_not_found_var_files[j]) {
          changed_dims <- which(current_indices != previous_indices)
          vars_to_read <- NULL
          if (length(return_vars) > 0) {
            vars_to_read <- names(return_vars)[sapply(return_vars, function(x) any(names(changed_dims) %in% x))]
          }
          if (!is.null(first_found_file)) {
            if (any(!first_found_file)) {
              vars_to_read <- c(vars_to_read, names(first_found_file[which(!first_found_file)]))
            }
          }
          if ((i == 1) && (length(common_return_vars) > 0)) {
            vars_to_read <- c(vars_to_read, names(common_return_vars)[sapply(common_return_vars, function(x) any(names(changed_dims) %in% x))])
          }
          if (!is.null(common_first_found_file)) {
            if (any(!common_first_found_file)) {
              vars_to_read <- c(vars_to_read, names(common_first_found_file[which(!common_first_found_file)]))
            }
          }
          file_object <- file_opener(array_of_var_files[j])
          if (!is.null(file_object)) {
            for (var_to_read in vars_to_read) {
              if (var_to_read %in% unlist(var_params)) {
                associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)]
              var_name_to_reader <- var_to_read
              names(var_name_to_reader) <- 'var'
              var_dims <- file_dim_reader(NULL, file_object, var_name_to_reader, NULL,
                                          synonims)
              # file_dim_reader returns dimension names as found in the file.
              # Need to translate accoridng to synonims:
              names(var_dims) <- sapply(names(var_dims), 
                function(x) {
                  which_entry <- which(sapply(synonims, function(y) x %in% y))
                  if (length(which_entry) > 0) {
                    names(synonims)[which_entry]
                  } else {
                    x
                  }
                })
              if (!is.null(var_dims)) {
                var_file_dims <- NULL
                if (var_to_read %in% names(common_return_vars)) {
                  var_to_check <- common_return_vars[[var_to_read]]
                  var_to_check <- return_vars[[var_to_read]]
                if (any(names(dim(array_of_files_to_load)) %in% var_to_check)) {
                  var_file_dims <- dim(array_of_files_to_load)[which(names(dim(array_of_files_to_load)) %in% 
                                                                     var_to_check)]
                }
                if (((var_to_read %in% names(common_return_vars)) && 
                     is.null(picked_common_vars[[var_to_read]])) ||
                    ((var_to_read %in% names(return_vars)) && 
                     is.null(picked_vars[[i]][[var_to_read]]))) {
                  if (any(names(var_file_dims) %in% names(var_dims))) {
                    stop("Found a requested var in 'return_var' requested for a ",
                         "file dimension which also appears in the dimensions of ",
                         "the variable inside the file.\n", array_of_var_files[j])
                  special_types <- list('POSIXct' = as.POSIXct, 'POSIXlt' = as.POSIXlt, 
                                        'Date' = as.Date)
                  first_sample <- file_var_reader(NULL, file_object, NULL, 
                                                  var_to_read, synonims)
                  if (any(class(first_sample) %in% names(special_types))) {
                    array_size <- prod(c(var_file_dims, var_dims))
                    new_array <- rep(special_types[[class(first_sample)[1]]](NA), array_size)
                    dim(new_array) <- c(var_file_dims, var_dims)
                  } else {
                    new_array <- array(dim = c(var_file_dims, var_dims))
                  attr(new_array, 'variables') <- attr(first_sample, 'variables')
                  if (var_to_read %in% names(common_return_vars)) {
                    picked_common_vars[[var_to_read]] <- new_array
                    pick_ordered <- FALSE 
                    if (var_to_read %in% unlist(var_params)) {
                      if (associated_dim_name %in% names(dim_reorder_param) && !aiat) {
                        picked_common_vars_ordered[[var_to_read]] <- new_array
                        pick_ordered <- TRUE
                      }
                    }
                    if (!pick_ordered) {
                      picked_common_vars_ordered[[var_to_read]] <- NULL
                    }
                  } else {
                    picked_vars[[i]][[var_to_read]] <- new_array
                    pick_ordered <- FALSE
                    if (var_to_read %in% unlist(var_params)) {
                      if (associated_dim_name %in% names(dim_reorder_params) && !aiat) {
                        picked_vars_ordered[[i]][[var_to_read]] <- new_array
                        pick_ordered <- TRUE
                      }
                    }
                    if (!pick_ordered) {
                      picked_vars_ordered[[i]][[var_to_read]] <- NULL
                } else {
                  if (var_to_read %in% names(common_return_vars)) {
                    array_var_dims <- dim(picked_common_vars[[var_to_read]])
                  } else {
                    array_var_dims <- dim(picked_vars[[i]][[var_to_read]])
                  }
                  full_array_var_dims <- array_var_dims
                  if (any(names(array_var_dims) %in% names(var_file_dims))) {
                    array_var_dims <- array_var_dims[-which(names(array_var_dims) %in% names(var_file_dims))]
                  }
                  if (names(array_var_dims) != names(var_dims)) {
                    stop("Error while reading the variable '", var_to_read, "' from ",
                         "the file. Dimensions do not match.\nExpected ", 
                         paste(paste0("'", names(array_var_dims), "'"), 
                               collapse = ', '), " but found ",
                         paste(paste0("'", names(var_dims), "'"), 
                               collapse = ', '), ".\n", array_of_var_files[j])
                  }
                  if (any(var_dims > array_var_dims)) {
                    longer_dims <- which(var_dims > array_var_dims)
                    if (length(longer_dims) == 1) {
                      longer_dims_in_full_array <- longer_dims
                      if (any(names(full_array_var_dims) %in% names(var_file_dims))) {
                        candidates <- (1:length(full_array_var_dims))[-which(names(full_array_var_dims) %in% names(var_file_dims))]
                        longer_dims_in_full_array <- candidates[longer_dims]
                      }
                      padding_dims <- full_array_var_dims
                      padding_dims[longer_dims_in_full_array] <- var_dims[longer_dims] - 
                                                                array_var_dims[longer_dims]
                      special_types <- list('POSIXct' = as.POSIXct, 'POSIXlt' = as.POSIXlt,
                                        'Date' = as.Date)
                      if (var_to_read %in% names(common_return_vars)) {
                        var_class <- class(picked_common_vars[[var_to_read]])
                      } else {
                        var_class <- class(picked_vars[[i]][[var_to_read]])
                      }
                      if (any(var_class %in% names(special_types))) {
                        padding_size <- prod(padding_dims)
                        padding <- rep(special_types[[var_class[1]]](NA), padding_size)
                        dim(padding) <- padding_dims
                      } else {
                        padding <- array(dim = padding_dims)
                      }
                      if (var_to_read %in% names(common_return_vars)) {
                        picked_common_vars[[var_to_read]] <- .abind2(
                          picked_common_vars[[var_to_read]],
                          padding,
                          names(full_array_var_dims)[longer_dims_in_full_array]
                        )
                      } else {
                        picked_vars[[i]][[var_to_read]] <- .abind2(
                          picked_vars[[i]][[var_to_read]],
                          padding,
                          names(full_array_var_dims)[longer_dims_in_full_array]
                        )
                      }
                    } else {
                      stop("Error while reading the variable '", var_to_read, "' from ",
                           "the file. Found size (", paste(var_dims, collapse = ' x '), 
                           ") is greater than expected maximum size (", 
                           array_var_dims, ").")
                    }
                  }
                }
                var_store_indices <- c(as.list(current_indices[names(var_file_dims)]), lapply(var_dims, function(x) 1:x))
                var_values <- file_var_reader(NULL, file_object, NULL, var_to_read, synonims)
                if (var_to_read %in% unlist(var_params)) {
                  if ((associated_dim_name %in% names(dim_reorder_params)) && !aiat) {
                    ## Is this check really needed?
                    if (length(dim(var_values)) > 1) {
                      stop("Requested a '", associated_dim_name, "_reorder' for a dimension ",
                           "whose coordinate variable that has more than 1 dimension. This is ",
                           "not supported.")
                    }
                    ordered_var_values <- dim_reorder_params[[associated_dim_name]](var_values)
                    attr(ordered_var_values$x, 'variables') <- attr(var_values, 'variables')
                    if (!all(c('x', 'ix') %in% names(ordered_var_values))) {
                      stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.")
                    }
                    # Save the indices to reorder back the ordered variable values.
                    # This will be used to define the first round indices.
                    unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix
                    if (var_to_read %in% names(common_return_vars)) {
                       picked_common_vars_ordered[[var_to_read]] <- do.call('[<-', 
                         c(list(x = picked_common_vars_ordered[[var_to_read]]), 
                           var_store_indices, 
                           list(value = ordered_var_values$x)))
                       picked_common_vars_unorder_indices[[var_to_read]] <- do.call('[<-', 
                         c(list(x = picked_common_vars_unorder_indices[[var_to_read]]), 
                           var_store_indices, 
                           list(value = unorder)))
                    } else {
                       picked_vars_ordered[[i]][[var_to_read]] <- do.call('[<-', 
                         c(list(x = picked_vars_ordered[[i]][[var_to_read]]), 
                           var_store_indices, 
                           list(value = ordered_var_values$x)))
                       picked_vars_unorder_indices[[i]][[var_to_read]] <- do.call('[<-', 
                         c(list(x = picked_vars_unorder_indices[[i]][[var_to_read]]), 
                           var_store_indices, 
                           list(value = unorder)))
                    }
                  }
                }
                if (var_to_read %in% names(common_return_vars)) {
                   picked_common_vars[[var_to_read]] <- do.call('[<-', 
                     c(list(x = picked_common_vars[[var_to_read]]), 
                       var_store_indices, 
                       list(value = var_values)))
                   picked_vars[[i]][[var_to_read]] <- do.call('[<-', 
                     c(list(x = picked_vars[[i]][[var_to_read]]), 
                       var_store_indices, 
                       list(value = var_values)))
                if (var_to_read %in% names(first_found_file)) {
                  first_found_file[var_to_read] <- TRUE
                if (var_to_read %in% names(common_first_found_file)) {
                  common_first_found_file[var_to_read] <- TRUE
                stop("Could not find variable '", var_to_read, 
                     "' in the file ", array_of_var_files[j])
            file_closer(file_object)
          }
        }
        previous_indices <- current_indices
      }
    }
  }
  # Once we have the variable values, we can work out the indices
  # for the implicitly defined selectors.
  #
  # Trnasforms a vector of indices v expressed in a world of 
  # length N from 1 to N, into a world of length M, from
  # 1 to M. Repeated adjacent indices are collapsed.
  transform_indices <- function(v, n, m) { 
    #unique2 turns e.g. 1 1 2 2 2 3 3 1 1 1 into 1 2 3 1
    unique2 <- function(v) {
      if (length(v) < 2) {
        v
      } else {
        v[c(1, v[2:length(v)] - v[1:(length(v) - 1)]) != 0]
      }
    }
    unique2(round(((v - 1) / (n - 1)) * (m - 1))) + 1 # this rounding may generate 0s. what then?
  }
  beta <- transform_extra_cells
  dims_to_crop <- vector('list')
  transformed_vars <- vector('list', length = length(dat))
  names(transformed_vars) <- dat_names
  transformed_vars_ordered <- transformed_vars
  transformed_vars_unorder_indices <- transformed_vars
  transformed_common_vars <- NULL
  transformed_common_vars_ordered <- NULL
  transformed_common_vars_unorder_indices <- NULL
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed

  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      indices <- indices_of_first_files_with_data[[i]]
      if (!is.null(indices)) {
        file_path <- do.call("[", c(list(array_of_files_to_load), as.list(indices_of_first_files_with_data[[i]])))
        # The following 5 lines should go several lines below, but were moved
        # here for better performance.
        # If any of the dimensions comes without defining variable, then we read
        # the data dimensions.
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
        data_dims <- NULL
        if (length(unlist(var_params[expected_inner_dims[[i]]])) < length(expected_inner_dims[[i]])) {
          file_to_open <- file_path
          data_dims <- file_dim_reader(file_to_open, NULL, selectors_of_first_files_with_data[[i]],
                                       lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1),
                                       synonims)
          # file_dim_reader returns dimension names as found in the file.
          # Need to translate accoridng to synonims:
          names(data_dims) <- sapply(names(data_dims),
            function(x) {
              which_entry <- which(sapply(synonims, function(y) x %in% y))
              if (length(which_entry) > 0) {
                names(synonims)[which_entry]
              } else {
                x
              }
            })
        }
        # Transform the variables if needed and keep them apart.
        if (!is.null(transform) && (length(transform_vars) > 0)) {
          if (!all(transform_vars %in% c(names(picked_vars[[i]]), names(picked_common_vars)))) {
            stop("Could not find all the required variables in 'transform_vars' ",
                 "for the dataset '", dat[[i]][['name']], "'.")
          }
          vars_to_transform <- NULL
          picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars)
          if (length(picked_vars_to_transform) > 0) {
            picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform]
            new_vars_to_transform <- picked_vars[[i]][picked_vars_to_transform]
            which_are_ordered <- which(!sapply(picked_vars_ordered[[i]][picked_vars_to_transform], is.null))

##NOTE: The following 'if' replaces the original with reordering vector
            if (length(which_are_ordered) > 0) {
                      tmp <- which(!is.na(match(names(picked_vars_ordered[[i]]), names(which_are_ordered))))
                      new_vars_to_transform[which_are_ordered] <- picked_vars_ordered[[i]][tmp]

            }
            vars_to_transform <- c(vars_to_transform, new_vars_to_transform)
          }

##NOTE: Above is non-common vars, here is common vars (ie, return_vars = NULL).
          picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars)
          if (length(picked_common_vars_to_transform) > 0) {
            picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform]

            new_vars_to_transform <- picked_common_vars[picked_common_vars_to_transform]
            which_are_ordered <- which(!sapply(picked_common_vars_ordered[picked_common_vars_to_transform], is.null))

            if (length(which_are_ordered) > 0) {

              tmp <- which(!is.na(match(names(picked_common_vars_ordered), names(which_are_ordered))))
              new_vars_to_transform[which_are_ordered] <- picked_common_vars_ordered[tmp]
            }
            vars_to_transform <- c(vars_to_transform, new_vars_to_transform)
          }
          # Transform the variables
          transformed_data <- do.call(transform, c(list(data_array = NULL,
                                                        variables = vars_to_transform,
                                                        file_selectors = selectors_of_first_files_with_data[[i]]),
                                                   transform_params))
          # Discard the common transformed variables if already transformed before
          if (!is.null(transformed_common_vars)) {
            common_ones <- which(names(picked_common_vars) %in% names(transformed_data$variables))
            if (length(common_ones) > 0) {
              transformed_data$variables <- transformed_data$variables[-common_ones]
            }
          }
          transformed_vars[[i]] <- list()
          transformed_vars_ordered[[i]] <- list()
          transformed_vars_unorder_indices[[i]] <- list()
          # Order the transformed variables if needed
          # 'var_to_read' should be 'transformed_var', but is kept to reuse the same code as above.
          for (var_to_read in names(transformed_data$variables)) {
            if (var_to_read %in% unlist(var_params)) {
              associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)]
Nicolau Manubens's avatar
Nicolau Manubens committed
              if ((associated_dim_name %in% names(dim_reorder_params)) && aiat) {
                ## Is this check really needed?
                if (length(dim(transformed_data$variables[[associated_dim_name]])) > 1) {
                  stop("Requested a '", associated_dim_name, "_reorder' for a dimension ",
                       "whose coordinate variable that has more than 1 dimension (after ",
                       "transform). This is not supported.")
                }
                ordered_var_values <- dim_reorder_params[[associated_dim_name]](transformed_data$variables[[associated_dim_name]])
                attr(ordered_var_values, 'variables') <- attr(transformed_data$variables[[associated_dim_name]], 'variables')
                if (!all(c('x', 'ix') %in% names(ordered_var_values))) {
                  stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.")
                }
                # Save the indices to reorder back the ordered variable values.
                # This will be used to define the first round indices.
                unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix
                if (var_to_read %in% names(picked_common_vars)) {
                  transformed_common_vars_ordered[[var_to_read]] <- ordered_var_values$x
                  transformed_common_vars_unorder_indices[[var_to_read]] <- unorder
                } else {
                  transformed_vars_ordered[[i]][[var_to_read]] <- ordered_var_values$x
                  transformed_vars_unorder_indices[[i]][[var_to_read]] <- unorder
                }
              }
            }
          }
          transformed_picked_vars <- which(names(picked_vars[[i]]) %in% names(transformed_data$variables))
          if (length(transformed_picked_vars) > 0) {
            transformed_picked_vars <- names(picked_vars[[i]])[transformed_picked_vars]
            transformed_vars[[i]][transformed_picked_vars] <- transformed_data$variables[transformed_picked_vars]
          }
          if (is.null(transformed_common_vars)) {
            transformed_picked_common_vars <- which(names(picked_common_vars) %in% names(transformed_data$variables))
            if (length(transformed_picked_common_vars) > 0) {
              transformed_picked_common_vars <- names(picked_common_vars)[transformed_picked_common_vars]
              transformed_common_vars <- transformed_data$variables[transformed_picked_common_vars]
            }
          }
        }
        # Once the variables are transformed, we compute the indices to be 
        # taken for each inner dimension.
        # In all cases, indices will have to be computed to know which data
        # values to take from the original data for each dimension (if a 
        # variable is specified for that dimension, it will be used to 
        # convert the provided selectors into indices). These indices are
        # referred to as 'first round of indices'.
        # The taken data will then be transformed if needed, together with
        # the dimension variable if specified, and, in that case, indices 
        # will have to be computed again to know which values to take from the 
        # transformed data. These are the 'second round of indices'. In the 
        # case there is no transformation, the second round of indices will
        # be all the available indices, i.e. from 1 to the number of taken
        # values with the first round of indices.
        for (inner_dim in expected_inner_dims[[i]]) {
if (debug) {
print("-> DEFINING INDICES FOR INNER DIMENSION:")
print(inner_dim)
}
Nicolau Manubens's avatar
Nicolau Manubens committed
          file_dim <- NULL
          if (inner_dim %in% unlist(inner_dims_across_files)) {
            file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]]
            chunk_amount <- length(dat[[i]][['selectors']][[file_dim]][[1]])
            names(chunk_amount) <- file_dim
          } else {
            chunk_amount <- 1
          }
Nicolau Manubens's avatar
Nicolau Manubens committed
          # In the special case that the selectors for a dimension are 'all', 'first', ...
          # and chunking (dividing in more than 1 chunk) is requested, the selectors are
          # replaced for equivalent indices.
          if ((dat[[i]][['selectors']][[inner_dim]][[1]] %in% c('all', 'first', 'last')) && 
              (chunks[[inner_dim]]['n_chunks'] != 1)) {
            selectors <- dat[[i]][['selectors']][[inner_dim]][[1]]
            if (selectors == 'all') {
Nicolau Manubens's avatar
Nicolau Manubens committed
              selectors <- indices(1:(data_dims[[inner_dim]] * chunk_amount))
Nicolau Manubens's avatar
Nicolau Manubens committed
            } else if (selectors == 'first') {
              selectors <- indices(1)
            } else {
Nicolau Manubens's avatar
Nicolau Manubens committed
              selectors <- indices(data_dims[[inner_dim]] * chunk_amount)
Nicolau Manubens's avatar
Nicolau Manubens committed
            }
            dat[[i]][['selectors']][[inner_dim]][[1]] <- selectors
          }
          # The selectors for the inner dimension are taken.
          selector_array <- dat[[i]][['selectors']][[inner_dim]][[1]]
if (debug) {
if (inner_dim %in% dims_to_check) {
print(paste0("-> DEBUG MESSAGES FOR THE DATASET", i, " AND INNER DIMENSION '", inner_dim, "':"))
print("-> STRUCTURE OF SELECTOR ARRAY:")
print(str(selector_array))
print("-> PICKED VARS:")
print(picked_vars)
print("-> TRANSFORMED VARS:")
print(transformed_vars)
}
}
          if (is.null(dim(selector_array))) {
            dim(selector_array) <- length(selector_array)
          }
          if (is.null(names(dim(selector_array)))) {
            if (length(dim(selector_array)) == 1) {
              names(dim(selector_array)) <- inner_dim
            } else {
              stop("Provided selector arrays must be provided with dimension ",
                   "names. Found an array of selectors without dimension names ",
                   "for the dimension '", inner_dim, "'.")
            } 
          }
Nicolau Manubens's avatar
Nicolau Manubens committed
          selectors_are_indices <- FALSE
          if (!is.null(attr(selector_array, 'indices'))) {
            if (!is.logical(attr(selector_array, 'indices'))) {
              stop("The atribute 'indices' for the selectors for the dimension '", 
                   inner_dim, "' must be TRUE or FALSE.")
            }
            selectors_are_indices <- attr(selector_array, 'indices')
          }
          taken_chunks <- rep(FALSE, chunk_amount)
          selector_file_dims <- 1
          if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) {
            selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])]
          }
          selector_inner_dims <- dim(selector_array)[which(!(names(dim(selector_array)) %in% found_file_dims[[i]]))]
          var_with_selectors <- NULL
          var_with_selectors_name <- var_params[[inner_dim]]
          var_ordered <- NULL
          var_unorder_indices <- NULL
          with_transform <- FALSE
          # If the selectors come with an associated variable
          if (!is.null(var_with_selectors_name)) {
            if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) {
              with_transform <- TRUE
              if (!is.null(file_dim)) {
                stop("Requested a transformation over the dimension '", 
                     inner_dim, "', wich goes across files. This feature ", 
                     "is not supported. Either do the request without the ",
                     "transformation or request it over dimensions that do ",
                     "not go across files.")
              }
            }
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> NAME OF THE VARIABLE WITH THE SELECTOR VALUES FOR THE CURRENT INNER DIMENSION:")
print(var_with_selectors_name)
print("-> NAMES OF THE VARIABLES TO BE TRANSFORMED:")
print(transform_vars)
print("-> STRUCTURE OF THE TRANSFORMATION FUNCTION:")
print(str(transform))
}
}
            if (var_with_selectors_name %in% names(picked_vars[[i]])) {
              var_with_selectors <- picked_vars[[i]][[var_with_selectors_name]]
              var_ordered <- picked_vars_ordered[[i]][[var_with_selectors_name]]
              var_unorder_indices <- picked_vars_unorder_indices[[i]][[var_with_selectors_name]]
            } else if (var_with_selectors_name %in% names(picked_common_vars)) {
              var_with_selectors <- picked_common_vars[[var_with_selectors_name]]
              var_ordered <- picked_common_vars_ordered[[var_with_selectors_name]]
              var_unorder_indices <- picked_common_vars_unorder_indices[[var_with_selectors_name]]
            }
            n <- prod(dim(var_with_selectors))
            if (is.null(var_unorder_indices)) {
              var_unorder_indices <- 1:n
            }
            if (with_transform) {
              if (var_with_selectors_name %in% names(transformed_vars[[i]])) {
                m <- prod(dim(transformed_vars[[i]][[var_with_selectors_name]]))
Nicolau Manubens's avatar
Nicolau Manubens committed
                if (aiat) {
                  var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]]
                  var_ordered <- transformed_vars_ordered[[i]][[var_with_selectors_name]]
                  var_unorder_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]]
                }
              } else if (var_with_selectors_name %in% names(transformed_common_vars)) {
                m <- prod(dim(transformed_common_vars[[var_with_selectors_name]]))
Nicolau Manubens's avatar
Nicolau Manubens committed
                if (aiat) {
                  var_with_selectors <- transformed_common_vars[[var_with_selectors_name]]
                  var_ordered <- transformed_common_vars_ordered[[var_with_selectors_name]]
                  var_unorder_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]]
                }
              }
              if (is.null(var_unorder_indices)) {
                var_unorder_indices <- 1:m
              }
            }
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> SIZE OF ORIGINAL VARIABLE:")
print(n)
print("-> SIZE OF TRANSFORMED VARIABLE:")
if (with_transform) print(m)
print("-> STRUCTURE OF ORDERED VAR:")
print(str(var_ordered))
print("-> UNORDER INDICES:")
print(var_unorder_indices)
}
}
            var_dims <- dim(var_with_selectors)
            var_file_dims <- 1
            if (any(names(var_dims) %in% found_file_dims[[i]])) {
              if (with_transform) {
                stop("Requested transformation for inner dimension '", 
                     inner_dim, "' but provided selectors for such dimension ",
                     "over one or more file dimensions. This is not ",
                     "supported. Either request no transformation for the ",
                     "dimension '", inner_dim, "' or specify the ",
                     "selectors for this dimension without the file dimensions.")
              }
              var_file_dims <- var_dims[which(names(var_dims) %in% found_file_dims[[i]])]
              var_dims <- var_dims[-which(names(var_dims) %in% found_file_dims[[i]])]
            }
##            # Keep the selectors if they correspond to a variable that will be transformed.
##            if (with_transform) {
##              if (var_with_selectors_name %in% names(picked_vars[[i]])) {
##                transformed_var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]]
##              } else if (var_with_selectors_name %in% names(picked_common_vars)) {
##                transformed_var_with_selectors <- transformed_common_vars[[var_with_selectors_name]]
##              }
##              transformed_var_dims <- dim(transformed_var_with_selectors)
##              transformed_var_file_dims <- 1
##              if (any(names(transformed_var_dims) %in% found_file_dims[[i]])) {
##                transformed_var_file_dims <- transformed_var_dims[which(names(transformed_var_dims) %in% found_file_dims[[i]])]
##                transformed_var_dims <- tranasformed_var_dims[-which(names(transformed_var_dims) %in% found_file_dims[[i]])]
##              }
##if (inner_dim %in% dims_to_check) {
##print("111m")
##print(str(transformed_var_dims))
##}
##
##              m <- prod(transformed_var_dims)
##            }
            # Work out var file dims and inner dims.
            if (inner_dim %in% unlist(inner_dims_across_files)) {
              #TODO: if (chunk_amount != number of chunks in selector_file_dims), crash
              if (length(var_dims) > 1) {
                stop("Specified a '", inner_dim, "_var' for the dimension '", 
                     inner_dim, "', which goes across files (across '", file_dim, 
                     "'). The specified variable, '", var_with_selectors_name, "', has more ",
                     "than one dimension and can not be used as selector variable. ",
                     "Select another variable or fix it in the files.")
              }
            }
            ## TODO HERE::
            #- indices_of_first_files_with_data may change, because array is now extended
            var_full_dims <- dim(var_with_selectors)
            if (!(inner_dim %in% names(var_full_dims))) {
              stop("Could not find the dimension '", inner_dim, "' in ",
                   "the file. Either change the dimension name in ",
                   "your request, adjust the parameter ",
                   "'dim_names_in_files' or fix the dimension name in ",
                   "the file.\n", file_path)
            }
          } else if (((is.numeric(selector_array) || is.list(selector_array)) && selectors_are_indices) ||
Nicolau Manubens's avatar
Nicolau Manubens committed
                     (is.character(selector_array) && (length(selector_array) == 1) &&
                      (selector_array %in% c('all', 'first', 'last')) &&
                       !is.null(file_dim_reader))) {
Nicolau Manubens's avatar
Nicolau Manubens committed
            #### TODO HERE::
            ###- indices_of_first_files_with_data may change, because array is now extended
            # Lines moved above for better performance.
            ##data_dims <- file_dim_reader(file_path, NULL, selectors_of_first_files_with_data[[i]],
            ##                             lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1))
            if (!(inner_dim %in% names(data_dims))) {
              stop("Could not find the dimension '", inner_dim, "' in ",
                   "the file. Either change the dimension name in ",
                   "your request, adjust the parameter ",
                   "'dim_names_in_files' or fix the dimension name in ",
                   "the file.\n", file_path)
Nicolau Manubens's avatar
Nicolau Manubens committed
          } else {
Nicolau Manubens's avatar
Nicolau Manubens committed
            stop(paste0("Can not translate the provided selectors for '", inner_dim, 
                        "' to numeric indices. Provide numeric indices and a ",
                        "'file_dim_reader' function, or a '", inner_dim, 
                        "_var' in order to calculate the indices."))
          }
          # At this point, if no selector variable was provided, the variable 
          # data_dims has been populated. If a selector variable was provided,
          # the variables var_dims, var_file_dims and var_full_dims have been 
          # populated instead.
          fri <- first_round_indices <- NULL
          sri <- second_round_indices <- NULL
          # This variable will keep the indices needed to crop the transformed 
          # variable (the one that has been transformed without being subset 
          # with the first round indices).
          tvi <- tranaformed_variable_indices <- NULL
          ordered_fri <- NULL
          ordered_sri <- NULL
          if ((length(selector_array) == 1) && is.character(selector_array) &&
              (selector_array %in% c('all', 'first', 'last')) &&
              (chunks[[inner_dim]]['n_chunks'] == 1)) {
            if (is.null(var_with_selectors_name)) {
              fri <- vector('list', length = chunk_amount)
              dim(fri) <- c(chunk_amount)
              sri <- vector('list', length = chunk_amount)
              dim(sri) <- c(chunk_amount)
              if (selector_array == 'all') {
                fri[] <- replicate(chunk_amount, list(1:(data_dims[inner_dim])))
                taken_chunks <- rep(TRUE, chunk_amount)
                #sri <- NULL
              } else if (selector_array == 'first') {
                fri[[1]] <- 1
                taken_chunks[1] <- TRUE
                #sri <- NULL
              } else if (selector_array == 'last') {
                fri[[chunk_amount]] <- data_dims[inner_dim]
                taken_chunks[length(taken_chunks)] <- TRUE
                #sri <- NULL
              }
            } else {
              if ((!is.null(file_dim)) && !(file_dim %in% names(var_file_dims))) {
                stop("The variable '", var_with_selectors_name, "' must also be ",
                     "requested for the file dimension '", file_dim, "' in ",
                     "this configuration.")
              }
              fri <- vector('list', length = prod(var_file_dims))
              dim(fri) <- var_file_dims
              sri <- vector('list', length = prod(var_file_dims))
              dim(sri) <- var_file_dims
              if (selector_array == 'all') {
# TODO: Populate ordered_fri
                ordered_fri[] <- replicate(prod(var_file_dims), list(1:n))
                fri[] <- replicate(prod(var_file_dims), list(var_unorder_indices[1:n]))
                taken_chunks <- rep(TRUE, chunk_amount)
                if (!with_transform) {
                  #fri[] <- replicate(prod(var_file_dims), list(1:n))
                  #taken_chunks <- rep(TRUE, chunk_amount)
                  #sri <- NULL
                } else {
                  ordered_sri[] <- replicate(prod(var_file_dims), list(1:m))
                  sri[] <- replicate(prod(var_file_dims), list(1:m))
                  ## var_file_dims instead??
Nicolau Manubens's avatar
Nicolau Manubens committed
                  #if (!aiat) {
                    #fri[] <- replicate(prod(var_file_dims), list(1:n))
                    #taken_chunks <- rep(TRUE, chunk_amount)
                    #sri[] <- replicate(prod(transformed_var_file_dims), list(1:m))
                  #} else {
                    #fri[] <- replicate(prod(var_file_dims), list(1:n))
                    #taken_chunks <- rep(TRUE, chunk_amount)
                    #sri[] <- replicate(prod(transformed_var_file_dims), list(1:m))
                  #}
                  tvi <- 1:m
                }
              } else if (selector_array == 'first') {
                taken_chunks[1] <- TRUE
                if (!with_transform) {
                  ordered_fri[[1]] <- 1
                  fri[[1]] <- var_unorder_indices[1]
                  #taken_chunks[1] <- TRUE
                  #sri <- NULL
                } else {
Nicolau Manubens's avatar
Nicolau Manubens committed
                  if (!aiat) {
                    ordered_fri[[1]] <- 1
                    fri[[1]] <- var_unorder_indices[1]
# TODO: TO BE IMPROVED
                    #taken_chunks[1] <- TRUE
                    ordered_sri[[1]] <- 1:ceiling(m / n)
                    sri[[1]] <- 1:ceiling(m / n)
                    tvi <- 1:ceiling(m / n)
                  } else {
                    ordered_fri[[1]] <- 1:ceiling(m / n)
                    fri[[1]] <- var_unorder_indices[1:ceiling(m / n)]
                    #taken_chunks[1] <- TRUE
                    tvi <- 1
                  }
                }
              } else if (selector_array == 'last') {
                taken_chunks[length(taken_chunks)] <- TRUE
                if (!with_transform) {
                  ordered_fri[[prod(var_file_dims)]] <- n
                  fri[[prod(var_file_dims)]] <- var_unorder_indices[n]
                  #taken_chunks[length(taken_chunks)] <- TRUE
                  #sri <- NULL
                } else {
Nicolau Manubens's avatar
Nicolau Manubens committed
                  if (!aiat) {
                    ordered_fri[[prod(var_file_dims)]] <- prod(var_dims)
                    fri[[prod(var_file_dims)]] <- var_unorder_indices[prod(var_dims)]
                    #taken_chunks[length(taken_chunks)] <- TRUE
                    ordered_sri[[prod(var_file_dims)]] <- 1:ceiling(m / n)
                    sri[[prod(var_file_dims)]] <- 1:ceiling(m / n)
# TODO: TO BE IMPROVED. THE TVI MAY BE WRONG IF THERE'S BEEN A REORDERING.
                    tvi <- 1:ceiling(m / n)
                  } else {
                    ordered_fri[[prod(var_file_dims)]] <- (n - ceiling(m / n) + 1):n
                    fri[[prod(var_file_dims)]] <- var_unorder_indices[(n - ceiling(m / n) + 1):n]
                    #taken_chunks[length(taken_chunks)] <- TRUE
                    ordered_sri[[prod(var_file_dims)]] <- 1
                    sri[[prod(var_file_dims)]] <- 1
          # If the selectors are not 'all', 'first', 'last', ...
          } else {
            if (!is.null(var_with_selectors_name)) {
              unmatching_file_dims <- which(!(names(var_file_dims) %in% names(selector_file_dims)))
              if ((length(unmatching_file_dims) > 0)) {
                raise_error <- FALSE
                if (is.null(file_dim)) {
                  raise_error <- TRUE
                } else {
                  if (!((length(unmatching_file_dims) == 1) && 
                        (names(var_file_dims)[unmatching_file_dims] == file_dim) &&
                        (inner_dim %in% names(selector_inner_dims)))) {
                    raise_error <- TRUE
                  }
                }
                if (raise_error) {
                  stop("Provided selectors for the dimension '", inner_dim, "' must have as many ",
                       "file dimensions as the variable the dimension is defined along, '", 
                       var_with_selectors_name, "', with the exceptions of the file pattern dimension ('",
                       found_pattern_dim, "') and any depended file dimension (if specified as ",
                       "depended dimension in parameter 'inner_dims_across_files' and the ",
                       "depending file dimension is present in the provided selector array).")
                }
              }
Nicolau Manubens's avatar
Nicolau Manubens committed
              if (any(names(selector_file_dims) %in% names(dim(var_with_selectors)))) {
                if (any(dim(var_with_selectors)[names(selector_file_dims)] != selector_file_dims)) {
                  stop("Size of selector file dimensions must mach size of requested ",
                       "variable dimensions.")
                }
              }
            }
            ## TODO: If var dimensions are not in the same order as selector dimensions, reorder
            if (is.null(names(selector_file_dims))) {
              if (is.null(file_dim)) {
                fri_dims <- 1
              } else {
                fri_dims <- chunk_amount
                names(fri_dims) <- file_dim
              }
            } else {
              fri_dim_names <- names(selector_file_dims)
              if (!is.null(file_dim)) {
                fri_dim_names <- c(fri_dim_names, file_dim)
              }
              fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% fri_dim_names)]
              fri_dims <- rep(NA, length(fri_dim_names))
              names(fri_dims) <- fri_dim_names
              fri_dims[names(selector_file_dims)] <- selector_file_dims
              if (!is.null(file_dim)) {
                fri_dims[file_dim] <- chunk_amount
              }
            }
            fri <- vector('list', length = prod(fri_dims))
            dim(fri) <- fri_dims
            sri <- vector('list', length = prod(fri_dims))
            dim(sri) <- fri_dims
            selector_file_dim_array <- array(1:prod(selector_file_dims), dim = selector_file_dims)
            selector_store_position <- fri_dims
            for (j in 1:prod(dim(selector_file_dim_array))) {
              selector_indices_to_take <- which(selector_file_dim_array == j, arr.ind = TRUE)[1, ]
              names(selector_indices_to_take) <- names(selector_file_dims)
              selector_store_position[names(selector_indices_to_take)] <- selector_indices_to_take
              sub_array_of_selectors <- Subset(selector_array, names(selector_indices_to_take),
                                               as.list(selector_indices_to_take), drop = 'selected')
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> ITERATING OVER FILE DIMENSIONS OF THE SELECTORS.")
print("-> STRUCTURE OF A SUB ARRAY:")
print(str(sub_array_of_selectors))
print("-> STRUCTURE OF THE VARIABLE WITH SELECTORS:")
print(str(var_with_selectors))
print(dim(var_with_selectors))
}
}
Nicolau Manubens's avatar
Nicolau Manubens committed
              if (selectors_are_indices) {
                sub_array_of_values <- NULL
              #} else if (!is.null(var_ordered)) {
              #  sub_array_of_values <- var_ordered
              } else {
                if (length(var_file_dims) > 0) {
                  var_indices_to_take <- selector_indices_to_take[which(names(selector_indices_to_take) %in% names(var_file_dims))]
                  sub_array_of_values <- Subset(var_with_selectors, names(var_indices_to_take),
                                                as.list(var_indices_to_take), drop = 'selected')
                } else {
                  sub_array_of_values <- var_with_selectors
                }
              }
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> STRUCTURE OF THE SUB ARRAY FROM THE VARIABLE CORRESPONDING TO THE SUB ARRAY OF SELECTORS")
print(str(sub_array_of_values))
print(dim(sub_array_of_values))
print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:")
print(file_dim)
}
}
              if ((!is.null(file_dim) && (file_dim %in% names(selector_file_dims))) || is.null(file_dim)) {
                if (length(sub_array_of_selectors) > 0) {
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> THE INNER DIMENSION DOES NOT GO ACROSS ANY FILE DIMENSION OR IT DOES BUT IS IN THE PROVIDED SELECTOR ARRAY.")
}
}
Nicolau Manubens's avatar
Nicolau Manubens committed
                  if (selectors_are_indices) {
                    if (!is.null(var_with_selectors_name)) {
                      max_allowed <- ifelse(aiat, m, n)
                    } else {
                      max_allowed <- data_dims[inner_dim]
                    }
                    if (any(na.omit(unlist(sub_array_of_selectors)) > max_allowed) ||
                        any(na.omit(unlist(sub_array_of_selectors)) < 1)) {
                      stop("Provided indices out of range for dimension '", inner_dim, "' ", 
                           "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", 
                           max_allowed, ").")
                  # The selector_checker will return either a vector of indices or a list
                  # with the first and last desired indices.
                  goes_across_prime_meridian <- FALSE
Nicolau Manubens's avatar
Nicolau Manubens committed
                  if (!is.null(var_ordered) && !selectors_are_indices) {
                    if (!is.null(dim_reorder_params[[inner_dim]])) {
                      if (is.list(sub_array_of_selectors)) {

## NOTE: The check of 'goes_across_prime_meridian' is moved forward to here.
                        is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular")
                          if (!is.null(is_circular_dim)) {
                            if (is_circular_dim) {

# NOTE: Use CircularSort() to put the values in the assigned range, and get the order.
# For example, [-10, 20] in CircularSort(0, 360) is [350, 20]. The $ix list is [2, 1].
# 'goes_across_prime_meridian' means the selector range across the border. For example,
# CircularSort(-180, 180) with selector [170, 190] -> goes_across_prime_meridian = TRUE.
                              tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix
                              goes_across_prime_meridian <-  tmp[1] > tmp[2]
                            }
                          }

                        # HERE change to the same code as below (under 'else'). Not sure why originally 
                        #it uses additional lines, which make reorder not work.
                        sub_array_of_selectors <- as.list(dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$x)
                        #sub_array_reordered <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))
                        #sub_array_unorder <- sort(sub_array_reordered$ix, index.return = TRUE)$ix