Start.R 171 KB
Newer Older
                        #sub_array_of_selectors <- as.list(sub_array_reordered$x[sub_array_unorder])

# Add warning if the boundary is out of range
                      if (sub_array_of_selectors[1] < range(var_ordered)[1] | sub_array_of_selectors[1] > range(var_ordered)[2]) {
                        .warning(paste0("The lower boundary of selector of ",
                                       inner_dim,
                                       " is out of range [",
                                       min(var_ordered), ", ", max(var_ordered), "]. ",
                                       "Check if the desired range is all included."))
                      }
                      if (sub_array_of_selectors[2] < range(var_ordered)[1] | sub_array_of_selectors[2] > range(var_ordered)[2]) {
                        .warning(paste0("The upper boundary of selector of ",
                                       inner_dim,
                                       " is out of range [",
                                       min(var_ordered), ", ", max(var_ordered), "]. ",
                                       "Check if the desired range is all included."))
                      }


                      } else {
                        sub_array_of_selectors <- dim_reorder_params[[inner_dim]](sub_array_of_selectors)$x
                      }
                    }

# NOTE: The ideal solution for selecting indices in goes_across_prime_meridian case
# is modified SelectorCheckor.R. But now SelectorCheckor doesn't know the info of
#goes_across_prime_meridian, so I do the adjustion after calling SelectorCheckor().
                    sub_array_of_indices <- selector_checker(sub_array_of_selectors, var_ordered,
                                                             tolerance = if (aiat) {
                                                                           NULL
                                                                         } else {
                                                                           tolerance_params[[inner_dim]]
                                                                         })
                    if (goes_across_prime_meridian & sub_array_of_indices[[1]] < sub_array_of_indices[[2]]) {
                      if (!(sub_array_of_selectors[[1]] %in% var_ordered)){
                        sub_array_of_indices[[1]] <- sub_array_of_indices[[1]] - 1
                      }

                      if (!(sub_array_of_selectors[[2]] %in% var_ordered)){
                        sub_array_of_indices[[2]] <- sub_array_of_indices[[2]] + 1
                      }
                    }

#NOTE: the possible case?
                    if (goes_across_prime_meridian & sub_array_of_indices[[1]] > sub_array_of_indices[[2]]) {
                      .stop("The case is goes_across_prime_meridian but no adjustion for the indices!")
                    }

                   if (any(is.na(sub_array_of_indices))) {

                      stop(paste0("The selectors of ", inner_dim,
                                  " are out of range [", min(var_ordered),
                                  ", ", max(var_ordered), "]."))
                   }


# Add warning if the boundary is out of range
                            if (is.list(sub_array_of_selectors)) {
                              if (sub_array_of_selectors[1] <
                                min(sub_array_of_values) | sub_array_of_selectors[1] >
                                max(sub_array_of_values)) {
                                .warning(paste0("The lower boundary of selector of ",
                                  inner_dim, " is out of range [",
                                  min(sub_array_of_values), ", ",
                                  max(sub_array_of_values), "]. ",
                                  "Check if the desired range is all included."))
                              }
                              if (sub_array_of_selectors[2] <
                                min(sub_array_of_values) | sub_array_of_selectors[2] >
                                max(sub_array_of_values)) {
                                .warning(paste0("The upper boundary of selector of ",
                                  inner_dim, " is out of range [",
                                  min(sub_array_of_values), ", ",
                                  max(sub_array_of_values), "]. ",
                                  "Check if the desired range is all included."))
                              }
                            }

                    sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values,
                                                             tolerance = if (aiat) {
                                                                           NULL
                                                                         } else {
                                                                           tolerance_params[[inner_dim]]
                                                                         })

                          if (any(is.na(sub_array_of_indices))) {

                                stop(paste0("The selectors of ", inner_dim,
                                      " are out of range [", min(sub_array_of_values),
                                     ", ", max(sub_array_of_values), "]."))
                          }

                  ## This 'if' runs in both Start() and Compute(). In Start(), it doesn't have any effect (no chunk).
                  ## In Compute(), it creates the indices for each chunk. For example, if 'sub_array_of_indices'
                  ## is c(5:10) and chunked into 2, 'sub_array_of_indices' becomes c(5:7) for chunk = 1, c(8:10)
                  ## for chunk = 2. If 'sub_array_of_indices' is list(55, 62) and chunked into 2, it becomes
                  ## list(55, 58) for chunk = 1 and list(59, 62) for chunk = 2. 
                  ## TODO: The list can be turned into vector here? So afterward no need to judge if it is list 
                  ## or vector.
                  if (!is.list(sub_array_of_indices)) {
                    sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices), 
                                                                               chunks[[inner_dim]]["chunk"], 
                                                                               chunks[[inner_dim]]["n_chunks"], 
                                                                               inner_dim)]
                  } else {
                    tmp <- chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]),
                                                chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"],
                                                inner_dim)
                    vect <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                    sub_array_of_indices[[1]] <- vect[tmp[1]]
                    sub_array_of_indices[[2]] <- vect[tmp[length(tmp)]]
                  # The sub_array_of_indices now contains numeric indices of the values to be taken by each chunk.
# Check if all the files have the selectors assigned (e.g., region = 'Grnland') _20191015
if (is.character(sub_array_of_selectors)) {
  array_of_var_files_check <- vector('list', length(selector_indices))
  for (k in 1:length(selector_indices)) {
    asdasd <- selector_indices[[k]]
    array_of_var_files_check <- do.call('[', c(list(x = array_of_files_to_load), asdasd, list(drop = FALSE)))[j]
    file_object <- file_opener(array_of_var_files_check)
    var_values_check <- file_var_reader(NULL, file_object, NULL, var_to_read, synonims)
    if (any(as.vector(var_values_check)[sub_array_of_indices] != sub_array_of_selectors)) {
      .warning('Not all the files has correponding selectors. Check the selector attributes')
    }
  }
}

if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> TRANSFORMATION REQUESTED?")
print(with_transform)
print("-> BETA:")
print(beta)
}
}
                  if (with_transform) {
                    # If there is a transformation and selector values are provided, these
Nicolau Manubens's avatar
Nicolau Manubens committed
                    # selectors will be processed in the same way either if aiat = TRUE or
                    # aiat = FALSE.
## TODO: If sub_array_of_selectors was integer and aiat then... do what's commented 50 lines below.
##       otherwise, do what's coded.
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> SELECTORS REQUESTED BEFORE TRANSFORM.")
}
}

###NOTE: Here, the transform, is different from the below part of non-transform.
#  search 'if (goes_across_prime_meridian' to find the lines below.
                    if (goes_across_prime_meridian) {
# NOTE: before changing, the return is already correct.
#NOTE: The fix below has the same explanation as no with_transform part below. 
#      Search the next next 'if (goes_across_prime_meridian) {'.
                      if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
                        # global longitude
                        sub_array_of_fri <- 1:n
                        # Warning if transform_extra_cell != 0
                        if (beta != 0) {
                          .warning(paste0("Adding parameter transform_extra_cells =  ",
                            transform_extra_cells, " to the transformed index excesses ",
                            "the border. The border index is used for transformation."))
                        }

                        # normal case, i.e., not global
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        gap_width <- last_index - first_index - 1
                        sub_array_of_fri <- c(1:(min(unlist(sub_array_of_indices)) + min(gap_width, beta)),                       
                       (max(unlist(sub_array_of_indices)) - min(gap_width, beta)):n)

                        if (min(gap_width, beta) != beta) {
                              .warning(paste0("Adding parameter transform_extra_cells =  ",
                                  transform_extra_cells, " to the transformed index excesses ",
                                  "the border. The border index is used for transformation."))
                              }
                      #NOTE: This if seems redundant.
                      if (is.list(sub_array_of_indices)) {
                        sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                      }
                      first_index <- min(unlist(sub_array_of_indices))
                      last_index <- max(unlist(sub_array_of_indices))
                      start_padding <- min(beta, first_index - 1)
                      end_padding <- min(beta, n - last_index)

aho's avatar
aho committed
                    if (exists("is_circular_dim")) {
                      if (!is_circular_dim) {  #latitude
                        sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
                        if (start_padding != beta | end_padding != beta) {
                          .warning(paste0("Adding parameter transform_extra_cells =  ",
                                  transform_extra_cells, " to the transformed index excesses ",
                                  "the border. The border index is used for transformation."))
                        }
                      } else {  #longitude
                        if ((last_index - first_index + 1 + beta * 2) >= n) {
                          sub_array_of_fri <- 1:n
                        } else if (start_padding < beta) {  # left side too close to border, need to go to right side
                          sub_array_of_fri <- c((first_index - start_padding):(last_index + end_padding), (n - (beta - start_padding - 1)):n)
                        } else if (end_padding < beta) { # right side too close to border, need to go to left side
                          sub_array_of_fri <- c(1: (beta - end_padding), (first_index - start_padding):(last_index + end_padding))
                        } else {  #normal
                          sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
                        }
                      }
aho's avatar
aho committed
                    } else {   # when <var>_reorder is not used
                       sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding)
                        if (start_padding != beta | end_padding != beta) {
                          .warning(paste0("Adding parameter transform_extra_cells =  ",
                                  transform_extra_cells, " to the transformed index excesses ",
                                  "the border. The border index is used for transformation."))
                        }
                    }

                    }
                    subset_vars_to_transform <- vars_to_transform
                    if (!is.null(var_ordered)) {

##NOTE: if var_ordered is common_vars, it doesn't have attributes and it is a vector.
## Turn it into array and add dimension name.
                      if (!is.array(var_ordered)) {
                        var_ordered <- as.array(var_ordered)
                        names(dim(var_ordered)) <- inner_dim
                      }

                      subset_vars_to_transform[[var_with_selectors_name]] <- Subset(var_ordered, inner_dim, sub_array_of_fri)
                    } else {
##NOTE: It should be redundant because without reordering the var should remain array
## But just stay same with above...
                      if (!is.array(sub_array_of_values)) {
                        sub_array_of_values <- as.array(sub_array_of_values)
                        names(dim(sub_array_of_values)) <- inner_dim
                      }

                      subset_vars_to_transform[[var_with_selectors_name]] <- Subset(sub_array_of_values, inner_dim, sub_array_of_fri)
                    }

# Change the order of longitude crop if no reorder + from big to small.
# cdo -sellonlatbox, the lon is west, east (while lat can be north 
# to south or opposite)
# Before changing crop, first we need to find the name of longitude. 
# NOTE: The potential bug here (also the bug for CDORemapper): the lon name
#       is limited (only the ones listed in .KnownLonNames() are available.
                     known_lon_names <- s2dverification:::.KnownLonNames()
                     lon_name <- names(subset_vars_to_transform)[which(names(subset_vars_to_transform) %in% known_lon_names)[1]]

aho's avatar
aho committed
# NOTE: The cases not considered: (1) if lon reorder(decreasing = T)
#       It doesn't make sense, but if someone uses it, here should
#       occur error. (2) crop = TRUE/FALSE
                     if ('crop' %in% names(transform_params) & var_with_selectors_name == lon_name & is.null(dim_reorder_params[[inner_dim]])) {
                       if (is.numeric(class(transform_params$crop))) {
                         if (transform_params$crop[1] > transform_params$crop[2]) {
                           tmp <- transform_params$crop[1]
                           transform_params$crop[1] <- transform_params$crop[2]
                           transform_params$crop[2] <- tmp
                         }
                    transformed_subset_var <- do.call(transform, c(list(data_array = NULL,
                                                                        variables = subset_vars_to_transform,
                                                                        file_selectors = selectors_of_first_files_with_data[[i]]),
                                                                        transform_params))$variables[[var_with_selectors_name]]
                    # Sorting the transformed variable and working out the indices again after transform.
                    if (!is.null(dim_reorder_params[[inner_dim]])) {
                      transformed_subset_var_reorder <- dim_reorder_params[[inner_dim]](transformed_subset_var)
                      transformed_subset_var <- transformed_subset_var_reorder$x
#NOTE: The fix here solves the mis-ordered lon when across_meridian. 
                      transformed_subset_var_unorder <- transformed_subset_var_reorder$ix
#                      transformed_subset_var_unorder <- sort(transformed_subset_var_reorder$ix, index.return = TRUE)$ix
                    } else {
                      transformed_subset_var_unorder <- 1:length(transformed_subset_var)
                    }
                    sub_array_of_sri <- selector_checker(sub_array_of_selectors, transformed_subset_var,
Nicolau Manubens's avatar
Nicolau Manubens committed
                                                         tolerance = if (aiat) {
                                                                       tolerance_params[[inner_dim]]
                                                                     } else {
                                                                       NULL
                                                                     })

# Check if selectors fall out of the range of the transform grid
# It may happen when original lon is [-180, 180] while want to regrid to
# [0, 360], and lon selector = [-20, -10].
                    if (any(is.na(sub_array_of_sri))) {
                      stop(paste0("The selectors of ",
                           inner_dim, " are out of range of transform grid '",
                           transform_params$grid, "'. Use parameter '",
                           inner_dim, "_reorder' or change ", inner_dim,
                           " selectors."))
                    }

                    if (goes_across_prime_meridian) {

                      if (sub_array_of_sri[[1]] == sub_array_of_sri[[2]]) {
                        # global longitude
                        sub_array_of_sri <- c(1:length(transformed_subset_var))
                      } else {
                        # the common case, i.e., non-global
                        # NOTE: Because sub_array_of_sri order is exchanged due to 
                        # previous development, here [[1]] and [[2]] should exchange
                        sub_array_of_sri <- c(1:sub_array_of_sri[[1]],
                        sub_array_of_sri[[2]]:length(transformed_subset_var))
                      }
                    } else if (is.list(sub_array_of_sri)) {
                      sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
                    }
                    ordered_sri <- sub_array_of_sri
                    sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri]
                    # In this case, the tvi are not defined and the 'transformed_subset_var'
                    # will be taken instead of the var transformed before in the code.
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> FIRST INDEX:")
print(first_index)
print("-> LAST INDEX:")
print(last_index)
print("-> STRUCTURE OF FIRST ROUND INDICES:")
print(str(sub_array_of_fri))
print("-> STRUCTURE OF SECOND ROUND INDICES:")
print(str(sub_array_of_sri))
print("-> STRUCTURE OF TRANSFORMED VARIABLE INDICES:")
print(str(tvi))
}
}
###                    # If the selectors are expressed after transformation
###                    } else {
###if (debug) {
###if (inner_dim %in% dims_to_check) {
###print("-> SELECTORS REQUESTED AFTER TRANSFORM.")
###}
###}
###                      if (goes_across_prime_meridian) {
###                        sub_array_of_indices <- c(sub_array_of_indices[[1]]:m,
###                                                    1:sub_array_of_indices[[2]])
###                      }
###                      first_index <- min(unlist(sub_array_of_indices))
###                      last_index <- max(unlist(sub_array_of_indices))
###                      first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
###                      last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
###                      sub_array_of_fri <- first_index_before_transform:last_index_before_transform
###                      n_of_extra_cells <- round(beta / n * m)
###                      if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
###                        sub_array_of_sri <- 1:(last_index - first_index + 1) 
###                        if (is.null(tvi)) {
###                          tvi <- sub_array_of_sri + first_index - 1
###                        }
###                      } else {
###                        sub_array_of_sri <- sub_array_of_indices - first_index + 1
###                        if (is.null(tvi)) {
###                          tvi <- sub_array_of_indices
###                        }
###                      }
###                      sub_array_of_sri <- sub_array_of_sri + n_of_extra_cells
                    sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
                                             list(value = sub_array_of_sri)))
                  } else {
                    if (goes_across_prime_meridian) {
#NOTE: The potential problem here is, if it is global longitude,
#      and the indices overlap (e.g., lon = [0, 359.723] and 
#      CircularSort(-180, 180), then sub_array_of_indices = list(649, 649)). 
#      Therefore, sub_array_of_fri will be c(1:649, 649:1296). We'll
#      get two 649.
#      The fix below may not be the best solution, but it works for 
#      the example above.

                      if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) {
                      # global longitude
                      sub_array_of_fri <- c(1:n)
                      } else {
                      # the common case, i.e., non-global
                      sub_array_of_fri <- c(1:min(unlist(sub_array_of_indices)),
                      max(unlist(sub_array_of_indices)):n)
                    } else if (is.list(sub_array_of_indices)) {
                      sub_array_of_fri <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                    } else {
                      sub_array_of_fri <- sub_array_of_indices
                    }
                  }
                  if (!is.null(var_unorder_indices)) {
                    if (is.null(ordered_fri)) {
                      ordered_fri <- sub_array_of_fri
                    }
                    sub_array_of_fri <- var_unorder_indices[sub_array_of_fri]
                  }
                  fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position),
                                           list(value = sub_array_of_fri)))
                  if (!is.null(file_dim)) {
                    taken_chunks[selector_store_position[[file_dim]]] <- TRUE
                  } else {
                    taken_chunks <- TRUE
                  }
                }
              } else {
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.")
}
}
                if (inner_dim %in% names(dim(sub_array_of_selectors))) {
                  if (is.null(var_with_selectors_name)) {
                    if (any(na.omit(unlist(sub_array_of_selectors)) < 1) ||
                        any(na.omit(unlist(sub_array_of_selectors)) > data_dims[inner_dim] * chunk_amount)) {
                      stop("Provided indices out of range for dimension '", inner_dim, "' ", 
                           "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", 
                           data_dims[inner_dim] * chunk_amount, ").")
                    }
                  } else {
                    if (inner_dim %in% names(dim(sub_array_of_values))) {
# NOTE: Put across-inner-dim at the 1st position.
# POSSIBLE PROB!! Only organize inner dim, the rest dims may not in the same order as sub_array_of_selectors below.
                      inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_values)) == inner_dim)
                      if (inner_dim_pos_in_sub_array != 1) {
                        new_sub_array_order <- (1:length(dim(sub_array_of_values)))[-inner_dim_pos_in_sub_array]
                        new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order)
                        sub_array_of_values <- .aperm2(sub_array_of_values, new_sub_array_order)
                      }
                    }
                  }

# NOTE: Put across-inner-dim at the 1st position.
# POSSIBLE PROB!! Only organize inner dim, the rest dims may not in the same order as sub_array_of_values above.
                  inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_selectors)) == inner_dim)
                  if (inner_dim_pos_in_sub_array != 1) {
                    new_sub_array_order <- (1:length(dim(sub_array_of_selectors)))[-inner_dim_pos_in_sub_array]
                    new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order)
                    sub_array_of_selectors <- .aperm2(sub_array_of_selectors, new_sub_array_order)
                  }
                  sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values,
                                                           tolerance = tolerance_params[[inner_dim]])
                  # It is needed to expand the indices here, otherwise for 
                  # values(list(date1, date2)) only 2 values are picked.
                  if (is.list(sub_array_of_indices)) {
                    sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]]
                  }
                  sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices),
                                                                             chunks[[inner_dim]]['chunk'],
                                                                             chunks[[inner_dim]]['n_chunks'],
                                                                             inner_dim)]
                  sub_array_is_list <- FALSE
                  if (is.list(sub_array_of_indices)) {
                    sub_array_is_list <- TRUE
                    sub_array_of_indices <- unlist(sub_array_of_indices)
                  }
                  if (is.null(var_with_selectors_name)) {
                    indices_chunk <- floor((sub_array_of_indices - 1) / data_dims[inner_dim]) + 1
                    transformed_indices <- ((sub_array_of_indices - 1) %% data_dims[inner_dim]) + 1
                  } else {
                    indices_chunk <- floor((sub_array_of_indices - 1) / var_full_dims[inner_dim]) + 1
                    transformed_indices <- ((sub_array_of_indices - 1) %% var_full_dims[inner_dim]) + 1
                  }
                  if (sub_array_is_list) {
                    sub_array_of_indices <- as.list(sub_array_of_indices)
                  }
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> GOING TO ITERATE ALONG CHUNKS.")
}
}
                  for (chunk in 1:chunk_amount) {
                    if (!is.null(names(selector_store_position))) {
                      selector_store_position[file_dim] <- chunk
                    } else {
                      selector_store_position <- chunk
                    }
                    chunk_selectors <- transformed_indices[which(indices_chunk == chunk)]
                    sub_array_of_indices <- chunk_selectors
                    if (with_transform) {
                      # If the provided selectors are expressed in the world
                      # before transformation
Nicolau Manubens's avatar
Nicolau Manubens committed
                      if (!aiat) {
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        sub_array_of_fri <- max(c(first_index - beta, 1)):min(c(last_index + beta, n))
                        sub_array_of_sri <- transform_indices(unlist(sub_array_of_indices) - first_index + 1, n, m)
                        if (is.list(sub_array_of_indices)) {
                          if (length(sub_array_of_sri) > 1) {
                            sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
                          }
                        }
##TODO: TRANSFORM SUBSET VARIABLE AS ABOVE, TO COMPUTE SRI
                      # If the selectors are expressed after transformation
                      } else {
                        first_index <- min(unlist(sub_array_of_indices))
                        last_index <- max(unlist(sub_array_of_indices))
                        first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1)
                        last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n)
                        sub_array_of_fri <- first_index_before_transform:last_index_before_transform
                        if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) {
                          sub_array_of_sri <- 1:(last_index - first_index + 1) + 
                                              round(beta / n * m) 
                        } else {
                          sub_array_of_sri <- sub_array_of_indices - first_index + 1 +
                                              round(beta / n * m)
                        }
##TODO: FILL IN TVI
                      }
                      sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position),
                                               list(value = sub_array_of_sri)))
                      if (length(sub_array_of_sri) > 0) {
                        taken_chunks[chunk] <- TRUE
                      }
                    } else {
                      sub_array_of_fri <- sub_array_of_indices
                      if (length(sub_array_of_fri) > 0) {
                        taken_chunks[chunk] <- TRUE
                      }
                    }
                    if (!is.null(var_unorder_indices)) {
                      ordered_fri <- sub_array_of_fri
                      sub_array_of_fri <- var_unorder_indices[sub_array_of_fri]
                    }
                    fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position),
                                             list(value = sub_array_of_fri)))
                  }
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> FINISHED ITERATING ALONG CHUNKS")
}
}
                } else {
                  stop("Provided array of indices for dimension '", inner_dim, "', ",
                       "which goes across the file dimension '", file_dim, "', but ",
                       "the provided array does not have the dimension '", inner_dim, 
                       "', which is mandatory.")
                }
              }
            }
          }
if (debug) {
if (inner_dim %in% dims_to_check) {
print("-> PROCEEDING TO CROP VARIABLES")
}
}
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
          #if ((length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last'))) {
            #if (!is.null(var_with_selectors_name) || (is.null(var_with_selectors_name) && is.character(selector_array) &&
            #    (length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last')))) {
              empty_chunks <- which(!taken_chunks)
              if (length(empty_chunks) >= length(taken_chunks)) {
                stop("Selectors do not match any of the possible values for the dimension '", inner_dim, "'.")
              }
              if (length(empty_chunks) > 0) {
#                # Get the first group of chunks to remove, and remove them. 
#                # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 1 and 2
#                dist <- abs(rev(empty_chunks) - c(rev(empty_chunks)[1] - 1, head(rev(empty_chunks), length(rev(empty_chunks)) - 1)))
#                if (all(dist == 1)) {
#                  start_chunks_to_remove <- NULL
#                } else {
#                  first_chunk_to_remove <- tail(which(dist > 1), 1)
#                  start_chunks_to_remove <- rev(rev(empty_chunks)[first_chunk_to_remove:length(empty_chunks)])
#                }
#                # Get the last group of chunks to remove, and remove them. 
#                # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 8 and 9
#                dist <- abs(empty_chunks - c(empty_chunks[1] - 1, head(empty_chunks, length(empty_chunks) - 1)))
#                if (all(dist == 1)) {
#                  first_chunk_to_remove <- 1
#                } else {
#                  first_chunk_to_remove <- tail(which(dist > 1), 1)
#                }
#                end_chunks_to_remove <- empty_chunks[first_chunk_to_remove:length(empty_chunks)]
#                chunks_to_keep <- which(!((1:length(taken_chunks)) %in% c(start_chunks_to_remove, end_chunks_to_remove)))
                chunks_to_keep <- which(taken_chunks)
                dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], list(chunks_to_keep))
#                found_indices <- Subset(found_indices, file_dim, chunks_to_keep)
#                # Crop dataset variables file dims.
#                for (picked_var in names(picked_vars[[i]])) {
#                  if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) {
#                    picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, chunks_to_keep)
#                  }
#                }
              }
            #}
            dat[[i]][['selectors']][[inner_dim]] <- list(fri = fri, sri = sri)
            # Crop dataset variables inner dims.
            # Crop common variables inner dims.
            types_of_var_to_crop <- 'picked'
            if (with_transform) {
              types_of_var_to_crop <- c(types_of_var_to_crop, 'transformed')
            }
            if (!is.null(dim_reorder_params[[inner_dim]])) {
              types_of_var_to_crop <- c(types_of_var_to_crop, 'reordered')
            }
            for (type_of_var_to_crop in types_of_var_to_crop) {
              if (type_of_var_to_crop == 'transformed') {
                if (is.null(tvi)) {
                  if (!is.null(dim_reorder_params[[inner_dim]])) {
                    crop_indices <- unique(unlist(ordered_sri))
                  } else {
                    crop_indices <- unique(unlist(sri))
                  }
                } else {
                  crop_indices <- unique(unlist(tvi))
                }
                vars_to_crop <- transformed_vars[[i]]
                common_vars_to_crop <- transformed_common_vars
              } else if (type_of_var_to_crop == 'reordered') {
                crop_indices <- unique(unlist(ordered_fri))
                vars_to_crop <- picked_vars_ordered[[i]]
                common_vars_to_crop <- picked_common_vars_ordered
              } else {
                crop_indices <- unique(unlist(fri))
                vars_to_crop <- picked_vars[[i]]
                common_vars_to_crop <- picked_common_vars
              }
              for (var_to_crop in names(vars_to_crop)) {
                if (inner_dim %in% names(dim(vars_to_crop[[var_to_crop]]))) {
                  if (!is.null(crop_indices)) {
                    if (type_of_var_to_crop == 'transformed') {
Nicolau Manubens's avatar
Nicolau Manubens committed
                      if (!aiat) {
                        vars_to_crop[[var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices)
                      } else {
                        vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices)
                      }
                    } else {
                      vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices)
                    }
                  }
                }
              }
              if (i == length(dat)) {
                for (common_var_to_crop in names(common_vars_to_crop)) {
                  if (inner_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) {
                    common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices)
                  }
                }
              }
              if (type_of_var_to_crop == 'transformed') {
                if (!is.null(vars_to_crop)) {
                  transformed_vars[[i]] <- vars_to_crop
                }
                if (i == length(dat)) {
                  transformed_common_vars <- common_vars_to_crop
                }
              } else if (type_of_var_to_crop == 'reordered') {
                if (!is.null(vars_to_crop)) {
                  picked_vars_ordered[[i]] <- vars_to_crop
                }
                if (i == length(dat)) {
                  picked_common_vars_ordered <- common_vars_to_crop
                }
              } else {
                if (!is.null(vars_to_crop)) {
                  picked_vars[[i]] <- vars_to_crop
                }
                if (i == length(dat)) {
                  picked_common_vars <- common_vars_to_crop
                }
              }
            }
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
          #}
        }
        # After the selectors have been picked (using the original variables), 
        # the variables are transformed. At that point, the original selectors
        # for the transformed variables are also kept in the variable original_selectors.
#print("L")
      }
    }
  }
#  if (!is.null(transformed_common_vars)) {
#    picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars
#  }
  # Remove the trailing chunks, if any.
  for (file_dim in names(dims_to_crop)) {
#    indices_to_keep <- min(sapply(dims_to_crop[[file_dim]], min)):max(sapply(dims_to_crop[[file_dim]], max))
    ## TODO: Merge indices in dims_to_crop with some advanced mechanism?
    indices_to_keep <- unique(unlist(dims_to_crop[[file_dim]]))
    array_of_files_to_load <- Subset(array_of_files_to_load, file_dim, indices_to_keep)
    array_of_not_found_files <- Subset(array_of_not_found_files, file_dim, indices_to_keep)
    for (i in 1:length(dat)) {
      # Crop selectors
      for (selector_dim in names(dat[[i]][['selectors']])) {
        if (selector_dim == file_dim) {
          for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['fri']])) {
            dat[[i]][['selectors']][[selector_dim]][['fri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['fri']][[j]][indices_to_keep]
          }
          for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['sri']])) {
            dat[[i]][['selectors']][[selector_dim]][['sri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['sri']][[j]][indices_to_keep]
          }
        }
        if (file_dim %in% names(dim(dat[[i]][['selectors']][[selector_dim]][['fri']]))) {
          dat[[i]][['selectors']][[selector_dim]][['fri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['fri']], file_dim, indices_to_keep)
          dat[[i]][['selectors']][[selector_dim]][['sri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['sri']], file_dim, indices_to_keep)
        }
      }
      # Crop dataset variables file dims.
      for (picked_var in names(picked_vars[[i]])) {
        if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) {
          picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, indices_to_keep)
        }
      }
      for (transformed_var in names(transformed_vars[[i]])) {
        if (file_dim %in% names(dim(transformed_vars[[i]][[transformed_var]]))) {
          transformed_vars[[i]][[transformed_var]] <- Subset(transformed_vars[[i]][[transformed_var]], file_dim, indices_to_keep)
        }
      }
    }
    # Crop common variables file dims.
Nicolau Manubens's avatar
Nicolau Manubens committed
    for (picked_common_var in names(picked_common_vars)) {
      if (file_dim %in% names(dim(picked_common_vars[[picked_common_var]]))) {
        picked_common_vars[[picked_common_var]] <- Subset(picked_common_vars[[picked_common_var]], file_dim, indices_to_keep)
      }
    }
    for (transformed_common_var in names(transformed_common_vars)) {
      if (file_dim %in% names(dim(transformed_common_vars[[transformed_common_var]]))) {
        transformed_common_vars[[transformed_common_var]] <- Subset(transformed_common_vars[[transformed_common_var]], file_dim, indices_to_keep)
      }
    }
  }
  # Calculate the size of the final array.
  total_inner_dims <- NULL
  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      inner_dims <- expected_inner_dims[[i]]
      inner_dims <- sapply(inner_dims, 
        function(x) {
          if (!all(sapply(dat[[i]][['selectors']][[x]][['sri']], is.null))) {
            max(sapply(dat[[i]][['selectors']][[x]][['sri']], length))
          } else {
            if (length(var_params[[x]]) > 0) {
              if (var_params[[x]] %in% names(transformed_vars[[i]])) {
                length(transformed_vars[[i]][[var_params[[x]]]])
              } else if (var_params[[x]] %in% names(transformed_common_vars)) {
                length(transformed_common_vars[[var_params[[x]]]])
              } else {
                max(sapply(dat[[i]][['selectors']][[x]][['fri']], length))
              }
            } else {
              max(sapply(dat[[i]][['selectors']][[x]][['fri']], length))
            }
          }
        })
      names(inner_dims) <- expected_inner_dims[[i]]
      if (is.null(total_inner_dims)) {
        total_inner_dims <- inner_dims
      } else {
        new_dims <- .MergeArrayDims(total_inner_dims, inner_dims)
        total_inner_dims <- new_dims[[3]]
      }
    }
  }
  new_dims <- .MergeArrayDims(dim(array_of_files_to_load), total_inner_dims)
  final_dims <- new_dims[[3]][dim_names]
  # final_dims_fake is the vector of final dimensions after having merged the 
  # 'across' file dimensions with the respective 'across' inner dimensions, and
  # after having broken into multiple dimensions those dimensions for which 
  # multidimensional selectors have been provided.
  # final_dims will be used for collocation of data, whereas final_dims_fake 
  # will be used for shaping the final array to be returned to the user.
  final_dims_fake <- final_dims
  if (merge_across_dims) {
    if (!is.null(inner_dims_across_files)) {
      for (file_dim_across in names(inner_dims_across_files)) {
Nicolau Manubens's avatar
Nicolau Manubens committed
        inner_dim_pos <- which(names(final_dims_fake) == inner_dims_across_files[[file_dim_across]])
        new_dims <- c()
        if (inner_dim_pos > 1) {
          new_dims <- c(new_dims, final_dims_fake[1:(inner_dim_pos - 1)])
        }
Nicolau Manubens's avatar
Nicolau Manubens committed
        new_dims <- c(new_dims, setNames(prod(final_dims_fake[c(inner_dim_pos, inner_dim_pos + 1)]), 
                                         inner_dims_across_files[[file_dim_across]]))
        if (inner_dim_pos + 1 < length(final_dims_fake)) {
          new_dims <- c(new_dims, final_dims_fake[(inner_dim_pos + 2):length(final_dims_fake)])
        }
        final_dims_fake <- new_dims
      }
    }
  }
Nicolau Manubens's avatar
Nicolau Manubens committed
  all_split_dims <- NULL
  if (split_multiselected_dims) {
    for (dim_param in 1:length(dim_params)) {
      if (!is.null(dim(dim_params[[dim_param]]))) {
        if (length(dim(dim_params[[dim_param]])) > 1) {
          split_dims <- dim(dim_params[[dim_param]])
Nicolau Manubens's avatar
Nicolau Manubens committed
          all_split_dims <- c(all_split_dims, setNames(list(split_dims), 
                                                       names(dim_params)[dim_param]))
Nicolau Manubens's avatar
Nicolau Manubens committed
          if (is.null(names(split_dims))) {
            names(split_dims) <- paste0(names(dim_params)[dim_param], 
                                         1:length(split_dims))
          }
          old_dim_pos <- which(names(final_dims_fake) == names(dim_params)[dim_param])

# NOTE: Three steps to create new dims.
# 1st: Put in the dims before split_dim.
# 2nd: Replace the old_dim with split_dims.
# 3rd: Put in the dims after split_dim.
          new_dims <- c()
          if (old_dim_pos > 1) {
            new_dims <- c(new_dims, final_dims_fake[1:(old_dim_pos - 1)])
          }
          new_dims <- c(new_dims, split_dims)
          if (old_dim_pos < length(final_dims_fake)) {
            new_dims <- c(new_dims, final_dims_fake[(old_dim_pos + 1):length(final_dims_fake)])
          }
Nicolau Manubens's avatar
Nicolau Manubens committed
          final_dims_fake <- new_dims
  # only merge_across_dims -> the 'time' dim length needs to be adjusted
    across_inner_dim <- inner_dims_across_files[[1]]  #TODO: more than one?
    across_file_dim <- names(inner_dims_across_files)  #TODO: more than one?
aho's avatar
aho committed
    # Get the length of each inner_dim ('time') along each file_dim ('file_date')  
    length_inner_across_dim <- lapply(dat[[i]][['selectors']][[across_inner_dim]][['fri']], length)
    final_dims_fake_name <- names(final_dims_fake)
    pos_across_inner_dim <- which(final_dims_fake_name == across_inner_dim)
    new_length_inner_dim <- sum(unlist(length_inner_across_dim))
    if (pos_across_inner_dim != length(final_dims_fake)) {
      final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
                           new_length_inner_dim,
                           final_dims_fake[(pos_across_inner_dim + 1):length(final_dims_fake)])
    } else {
      final_dims_fake <- c(final_dims_fake[1:(pos_across_inner_dim - 1)],
                           new_length_inner_dim)
    }
    names(final_dims_fake) <- final_dims_fake_name
  }
  if (!silent) {
    .message("Detected dimension sizes:")
    longest_dim_len <- max(sapply(names(final_dims_fake), nchar))
    longest_size_len <- max(sapply(paste0(final_dims_fake, ''), nchar))
    sapply(names(final_dims_fake), 
      function(x) {
        message(paste0("*   ", paste(rep(' ', longest_dim_len - nchar(x)), collapse = ''), 
                       x, ": ", paste(rep(' ', longest_size_len - nchar(paste0(final_dims_fake[x], ''))), collapse = ''), 
                       final_dims_fake[x]))
    bytes <- prod(c(final_dims_fake, 8))
    dim_sizes <- paste(final_dims_fake, collapse = ' x ')
    if (retrieve) {
      .message(paste("Total size of requested data:"))
    } else {
      .message(paste("Total size of involved data:"))
    }
    .message(paste(dim_sizes, " x 8 bytes =", 
                   format(structure(bytes, class = "object_size"), units = "auto")), 
                   indent = 2)
  }
# NOTE: If split_multiselected_dims + merge_across_dims, the dim order may need to be changed.
#       The inner_dim needs to be the first dim among split dims.
#       Cannot control the rest dims are in the same order or not...
#       Suppose users put the same order of across inner and file dims.
  if (split_multiselected_dims & merge_across_dims) {
    # TODO: More than one split?
    inner_dim_pos_in_split_dims <- which(names(all_split_dims[[1]]) == inner_dims_across_files)  
    # if inner_dim is not the first, change!
    if (inner_dim_pos_in_split_dims != 1) {
      split_dims <- c(split_dims[inner_dim_pos_in_split_dims],
                      split_dims[1:length(split_dims)][-inner_dim_pos_in_split_dims])
      split_dims_pos <- which(!is.na(match(names(final_dims_fake), names(split_dims))))
      # Save the current final_dims_fake for later reorder back
      final_dims_fake_output <- final_dims_fake
      new_dims <- c()
      if (split_dims_pos[1] != 1) {
        new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
      }
      new_dims <- c(new_dims, split_dims)
      if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
        new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
      }
      final_dims_fake <- new_dims
    }
  }

  # The following several lines will only be run if retrieve = TRUE
  if (retrieve) {
  ########## CREATING THE SHARED MATRIX AND DISPATCHING WORK PIECES ###########
  # TODO: try performance of storing all in cols instead of rows
  # Create the shared memory array, and a pointer to it, to be sent
  # to the work pieces.
  data_array <- big.matrix(nrow = prod(final_dims), ncol = 1)
  shared_matrix_pointer <- describe(data_array)
  if (is.null(num_procs)) {
    num_procs <- availableCores()
  # Creating a shared tmp folder to store metadata from each chunk
  array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load))
  if (!is.null(metadata_dims)) {
    metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load))))
    names(metadata_indices_to_load) <- names(dim(array_of_files_to_load))
    metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims)))
    array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags),  metadata_indices_to_load,
                                                list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims])))))
  }
  metadata_file_counter <- 0
  metadata_folder <- tempfile('metadata')
  dir.create(metadata_folder)
  # Build the work pieces, each with:
  # - file path
  # - total size (dims) of store array
  # - start position in store array
  # - file selectors (to provide extra info. useful e.g. to select variable)
  # - indices to take from file
  work_pieces <- list()
  for (i in 1:length(dat)) {
    if (dataset_has_files[i]) {
      selectors <- dat[[i]][['selectors']]
      file_dims <- found_file_dims[[i]]
      inner_dims <- expected_inner_dims[[i]]
      sub_array_dims <- final_dims[file_dims]
      sub_array_dims[found_pattern_dim] <- 1
      sub_array_of_files_to_load <- array(1:prod(sub_array_dims), 
Nicolau Manubens's avatar
Nicolau Manubens committed
                                          dim = sub_array_dims)
      names(dim(sub_array_of_files_to_load)) <- names(sub_array_dims)
      # Detect which of the dimensions of the dataset go across files.
      file_dim_across_files <- lapply(inner_dims, 
        function(x) {
          dim_across <- sapply(inner_dims_across_files, function(y) x %in% y)
          if (any(dim_across)) {
            names(inner_dims_across_files)[which(dim_across)[1]]
          } else {
            NULL
          }
        })
      names(file_dim_across_files) <- inner_dims
      j <- 1
      while (j <= prod(sub_array_dims)) {
        # Work out file path.
        file_to_load_sub_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ]
        names(file_to_load_sub_indices) <- names(sub_array_dims)
        file_to_load_sub_indices[found_pattern_dim] <- i
        big_dims <- rep(1, length(dim(array_of_files_to_load)))
        names(big_dims) <- names(dim(array_of_files_to_load))
        file_to_load_indices <- .MergeArrayDims(file_to_load_sub_indices, big_dims)[[1]]
        file_to_load <- do.call('[[', c(list(array_of_files_to_load), 
                                        as.list(file_to_load_indices)))
        not_found_file <- do.call('[[', c(list(array_of_not_found_files),
                                          as.list(file_to_load_indices)))
        load_file_metadata <- do.call('[', c(list(array_of_metadata_flags), 
                                             as.list(file_to_load_indices)))
        if (load_file_metadata) {
          metadata_file_counter <- metadata_file_counter + 1
        }
        if (!is.na(file_to_load) && !not_found_file) {
          # Work out indices to take
          first_round_indices <- lapply(inner_dims, 
            function (x) {
              if (is.null(file_dim_across_files[[x]])) {
                selectors[[x]][['fri']][[1]]
              } else {
Nicolau Manubens's avatar
Nicolau Manubens committed
                which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] 
                selectors[[x]][['fri']][[which_chunk]]
              }
            })
          names(first_round_indices) <- inner_dims
          second_round_indices <- lapply(inner_dims, 
            function (x) {
              if (is.null(file_dim_across_files[[x]])) {
                selectors[[x]][['sri']][[1]]
              } else {
Nicolau Manubens's avatar
Nicolau Manubens committed
                which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]]
                selectors[[x]][['sri']][[which_chunk]]
              }
            })
if (debug) {
print("-> BUILDING A WORK PIECE")
#print(str(selectors))
}
          names(second_round_indices) <- inner_dims
          if (!any(sapply(first_round_indices, length) == 0)) {
            work_piece <- list()
            work_piece[['first_round_indices']] <- first_round_indices
            work_piece[['second_round_indices']] <- second_round_indices
            work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices
            work_piece[['file_path']] <- file_to_load
            work_piece[['store_dims']] <- final_dims
            # Work out store position
            store_position <- final_dims
            store_position[names(file_to_load_indices)] <- file_to_load_indices
            store_position[inner_dims] <- rep(1, length(inner_dims))
            work_piece[['store_position']] <- store_position
            # Work out file selectors
            file_selectors <- sapply(file_dims, 
              function (x) {
                vector_to_pick <- 1
                if (x %in% names(depending_file_dims)) {
                  vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]]
                }
                selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]]
              })
            names(file_selectors) <- file_dims
            work_piece[['file_selectors']] <- file_selectors
            # Send variables for transformation
            if (!is.null(transform) && (length(transform_vars) > 0)) {