zzz.R 53.9 KB
Newer Older
  data_array_final_dims <- multiApply::Apply(data_array_final_dims,
                                             target_dims = c(across_inner_dim, across_file_dim),  #c('time', 'file_date')
                                             output_dims = c(across_inner_dim, across_file_dim),
                                             fun = func_remove_blank,
                                             logi_array = logi_array)$output1
  ## reorder back to the correct dim
  tmp <- match(names(final_dims), names(dim(data_array_final_dims)))
  data_array_final_dims <- .aperm2(data_array_final_dims, tmp)
  data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999]  # become a vector

  return(data_array_tmp)
}



# When merge_across_dims = TRUE and split_multiselected_dims = TRUE, rearrange the chunks 
# (i.e., work_piece) is necessary if one file contains values for discrete dimensions
rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_dims, 
                                      final_dims_fake, across_inner_dim, length_inner_across_dim) {
  # generate the correct order list from indices_chunk 
  final_order_list <- list()
  i <- 1
  j <- 1
  a <- indices_chunk[i]
  while (i <= length(indices_chunk)) {
    while (indices_chunk[i+1] == indices_chunk[i] & i < length(indices_chunk)) {
      a <- c(a, indices_chunk[i+1])
        i <- i + 1
    }
    final_order_list[[j]] <- a
    a <- indices_chunk[i+1]
    i <- i + 1
    j <- j + 1
  }
  names(final_order_list) <- sapply(final_order_list, '[[', 1)
  final_order_list <- lapply(final_order_list, length)
        
  if (!all(diff(as.numeric(names(final_order_list))) > 0)) {
    # shape the vector into the array without split_dims
    split_dims_pos <- match(all_split_dims[[1]], final_dims_fake)
    new_dims <- c()
    if (split_dims_pos[1] > 1) {
      new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)])
    }
    new_dims <- c(new_dims,  prod(all_split_dims[[1]]))
    names(new_dims)[split_dims_pos[1]] <- across_inner_dim
    if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) {
      new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)])
    }
    data_array_no_split <- array(data_array_tmp, dim = new_dims)
    # seperate 'time' dim into each work_piece length
    data_array_seperate <- list()
    tmp <- cumsum(unlist(length_inner_across_dim))
    tmp <- c(0, tmp)
    for (i in 1:length(length_inner_across_dim)) {
      data_array_seperate[[i]] <- Subset(data_array_no_split, across_inner_dim,
                                         (tmp[i] + 1):tmp[i + 1])
    }
    # re-build the array: chunk 
    which_chunk <- as.numeric(names(final_order_list))
    how_many_indices <- unlist(final_order_list)
    array_piece <- list()
    ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate)))
    for (i in 1:length(final_order_list)) {
      array_piece[[i]] <- Subset(data_array_seperate[[which_chunk[i]]],
                                 across_inner_dim,
                                 ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1))
      ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i]
    }
   
    # re-build the array: paste
    data_array_tmp <- array_piece[[1]]
    along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim)
    if (length(array_piece) > 1) {
      for (i in 2:length(array_piece)) {
        data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]],
                                       along = along_pos)
      }
    }
  }    
  
  return(data_array_tmp)
}


# Create a list of metadata of the variable (e.g., tas)
create_metadata_list <- function(array_of_metadata_flags, metadata_dims, pattern_dims,
                                 loaded_metadata_files, loaded_metadata, dat_names,
                                 dataset_has_files) {
  #NOTE: Here, metadata can be saved in one of two ways: one for $common and the other for $dat
  #      for $common, it is a list of metadata length. For $dat, it is a list of dat length,
  #      and each sublist has the metadata for each dat.
aho's avatar
aho committed
  dim_of_metadata <- dim(array_of_metadata_flags)[metadata_dims]
  if (!any(names(dim_of_metadata) == pattern_dims) |
      (any(names(dim_of_metadata) == pattern_dims) &
       dim_of_metadata[pattern_dims] == 1)) {  # put under $common; old code
    return_metadata <- vector('list',
                              length = prod(dim_of_metadata))
    return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata
    dim(return_metadata) <- dim_of_metadata

  } else { # put under $dat. metadata_dims has 'dat' and dat length > 1
    return_metadata <- vector('list',
                              length = dim_of_metadata[pattern_dims])
    names(return_metadata) <- dat_names
    for (kk in 1:length(return_metadata)) {
      return_metadata[[kk]] <- vector('list', length = prod(dim_of_metadata[-1])) # 1 is dat
    }
    loaded_metadata_count <- 1
    for (kk in 1:length(return_metadata)) {
      for (jj in 1:length(return_metadata[[kk]])) {
        if (dataset_has_files[kk]) {
          if (loaded_metadata_count %in% loaded_metadata_files) {
            return_metadata[[kk]][jj] <- loaded_metadata[[which(loaded_metadata_files == loaded_metadata_count)]]
            names(return_metadata[[kk]])[jj] <- names(loaded_metadata[[which(loaded_metadata_files == loaded_metadata_count)]])

          } else {
            return_metadata[[kk]][jj] <- NULL
aho's avatar
aho committed
          loaded_metadata_count <- loaded_metadata_count + 1
        } else {
          return_metadata[[kk]][jj] <- NULL

  return(return_metadata)
}

# This function adds the metadata of the variable (e.g., tas) into the list of picked_vars or
# picked_common_vars. The metadata is only retrieved when 'retrieve = TRUE'.
combine_metadata_picked_vars <- function(return_metadata, picked_vars, picked_common_vars,
                                         metadata_dims, pattern_dims, length_dat) {
#NOTE: The metadata of variables can be saved in one of the two different structures.
#      (1) metadata_dims != 'dat', or (metadata_dims == 'dat' & length(dat) == 1):
#          put under $common
#      (2) (metadata_dims == 'dat' & length(dat) > 1):
#          put under $dat1, $dat2, .... Put it in picked_vars list
#TODO: The current (2) uses the inefficient method. Should define the list structure first
#      then fill the list, rather than expand it in the for loop.

  if (any(metadata_dims == pattern_dims) & length_dat > 1) { # (2)
    for (kk in 1:length(return_metadata)) {
      sublist_names <- lapply(return_metadata, names)[[kk]]
      if (!is.null(sublist_names)) {
        for (jj in 1:length(sublist_names)) {
          picked_vars[[kk]][[sublist_names[jj]]] <- return_metadata[[kk]][[jj]]
        }
      }
    }
    Variables_list <- c(list(common = picked_common_vars), picked_vars)

  } else {  #(1)
    len <- unlist(lapply(return_metadata, length))
    len <- sum(len) + length(which(len == 0))  #0 means NULL
    name_list <- lapply(return_metadata, names)
    new_list <- vector('list', length = len)
    count <- 1

    for (kk in 1:length(return_metadata)) {
      if (length(return_metadata[[kk]]) == 0) {  #NULL
        count <- count + 1
      } else {
        for (jj in 1:length(return_metadata[[kk]])) {
          new_list[[count]] <- return_metadata[[kk]][[jj]]
          names(new_list)[count] <- name_list[[kk]][jj]
          count <- count + 1
        }
      }
    }
    Variables_list <- c(list(common = c(picked_common_vars, new_list)), picked_vars)
  }

  return(Variables_list)
}