NetCDFToArray.R 13.9 KB
Newer Older
NetCDFToArray <- function(file_to_read, vars_to_read, inner_indices, 
                          drop_var_dim = FALSE, unlist = TRUE) {
  file_opener <- NcOpen
  file_closer <- NcClose
  file_dim_reader <- NetCDFReadDims
  close <- FALSE
  if (is.character(file_to_read)) {
    file_object <- file_opener(file_to_read)
    file_path <- file_to_read
    close <- TRUE
  } else if (grepl('^ncdf', class(file_to_read))) {
    file_object <- file_to_read
    file_path <- file_object$filename
  } else {
    stop("Either the path to a NetCDF file or a ncdf object must be provided as 'file_to_read'.")
  }

  # Check var_names
  if (is.character(vars_to_read) || is.numeric(vars_to_read)) {
    vars_to_read <- list(vars_to_read)
  }
  print_error <- FALSE
  if (is.list(vars_to_read)) {
    if (!all(sapply(vars_to_read, function(x) is.character(x) || is.numeric(x)))) {
      print_error <- TRUE
    }
  } else {
    print_error <- TRUE
  }
  if (print_error) {
    stop("Parameter 'vars_to_read' must be one or a list of numeric vectors or vectors of character strings or NULL.")
#  if (!is.character(var_names) && !is.numeric(var_names)) {
#    stop("Parameter 'var_names' must be a numeric vector or vector of character strings.")
#  }
  result_list <- NULL
  for (vars_to_read_vector in vars_to_read) {
    result <- NULL
    if (!is.null(file_object)) {
      # Create all variables that are 'dimvars'
      extra_dimvars <- NULL
      extra_dimvars_list <- NULL
      for (dim_name in names(file_object$dim)) {
        if (file_object$dim[[dim_name]]$create_dimvar) {
          new_var <- list(name = dim_name, ndims = 1, 
                          size = file_object$dim[[dim_name]]$len,
                          units = file_object$dim[[dim_name]]$units,
                          dim = list(file_object$dim[[dim_name]]))
          new_var_extra_atts <- ncatt_get(file_object, dim_name)
          new_var[names(new_var_extra_atts)] <- new_var_extra_atts
          extra_dimvars_list <- c(extra_dimvars_list, setNames(list(new_var), dim_name))
          extra_dimvars <- c(extra_dimvars, dim_name)
        }
      }
      #file_object$var[extra_dimvars] <- extra_dimvars_list
      #file_object$nvars <- file_object$nvars + length(extra_dimvars)
      nmv <- numeric_var_indices <- which(is.numeric(vars_to_read_vector))
      if (length(nmv) > 0) {
        if (any(vars_to_read_vector[nmv] > (length(file_object$var) + length(extra_dimvars)))) {
          stop("Provided numerical variable indices out of bounds in 'vars_to_read'.")
        }
        vars_to_read_vector[nmv] <- c(sapply(file_object$var, '[[', 'name'), extra_dimvars)[vars_to_read_vector[nmv]]
      }
      for (var_name in vars_to_read_vector) {
        if (var_name %in% extra_dimvars) {
          var_result <- file_object$dim[[var_name]]$vals
          #atts <- file_object$dim[[var_name]]
          atts <- extra_dimvars_list[[var_name]]
          atts_to_remove <- c('vals', 'name', 'len', 'group_index', 
                              'group_id', 'id', 'dimvarid', 'create_dimvar')
          if (any(names(atts) %in% atts_to_remove)) {
            atts <- atts[-which(names(atts) %in% atts_to_remove)]
          }
          units <- file_object$dim[[var_name]]$units
          names(dim(var_result)) <- sapply(extra_dimvars_list[[var_name]]$dim, '[[', 'name')
        } else {
          if (!(var_name %in% names(file_object$var))) {
            stop("Could not find the variable '", var_name, "' in the file.")
          }
          try({
            var_result <- ncvar_get(file_object, var_name)
          }, silent = TRUE)
          atts <- file_object$var[[var_name]]
          atts_to_remove <- c('id', 'name', 'ndims', 'natts', 'size', 
                              'dimids', 'group_index', 'chunksizes', 
                              'storage', 'shuffle', 'compression', 'dims', 
                              'varsize', 'longname')
          if (any(names(atts) %in% atts_to_remove)) {
            atts <- atts[-which(names(atts) %in% atts_to_remove)]
          }
          extra_atts <- ncatt_get(file_object, var_name)
          atts[names(extra_atts)] <- extra_atts
          units <- file_object$var[[var_name]]$units
          names(dim(var_result)) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name')
        }
        if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) {
          if (units == 'seconds') {
            units <- 'secs'
          } else if (units == 'minutes') {
            units <- 'mins'
          }
          var_result[] <- paste(var_result, units)
        } else if (grepl(' since ', units)) {
          parts <- strsplit(units, ' since ')[[1]]
          units <- parts[1]
          if (units == 'seconds') {
            units <- 'secs'
          } else if (units == 'minutes') {
            units <- 'mins'
          }
          var_result <- seq(as.POSIXct(parts[2]), length = max(var_result, na.rm = TRUE) + 1, by = units)[var_result + 1]
        }
        if (!drop_var_dim && (length(vars_to_read_vector) == 1)) {
          dim(var_result) <- c(c(var = 1), dim(var_result))
        }
        attr(var_result, 'variables') <- structure(list(atts), .Names = var_name)
        ## TODO: Take the general attributes out of atts and put them as
        ##       global attributes.
        if (is.null(result)) {
          result <- var_result
        } else {
          new_attrs <- c(attr(result, 'variables'), 
                         attr(var_result, 'variables'))
          result <- .MergeArrays(result, var_result, 'var')
          attr(result, 'variables') <- new_attrs
        }
      }
    }
    if (is.null(result_list)) {
      if (length(vars_to_read) == 1 && unlist) {
        result_list <- result
     } else {
        if (length(vars_to_read_vector) == 1) {
          result_list <- structure(list(result), .Names = vars_to_read_vector)
        } else {
          result_list <- list(result)
        }
      }
    } else {
      if (length(vars_to_read_vector) == 1) {
        result_list <- do.call('[[<-', list(x = result_list, 
                                            i = vars_to_read_vector,
                                            value = result))
      } else {
        result_list <- do.call('[[<-', list(x = result_list, 
                                            i = length(result_list) + 1,
                                            value = result))
      }
    }
  }
  if (close) {
    file_closer(file_object)
  }
}

# Parameter 'file_selectos' expects a named character vector of single
# file dimension selectors.
# Parameter 'inner_selectors' expects a named list of numeric vectors.
.NcDataReader <- function(file_path = NULL, file_object = NULL, 
                          file_selectors = NULL, inner_indices,
                          file_opener = NULL, file_closer = NULL) {
  file_dim_reader <- .NcDimReader
#print("aa")
  close <- FALSE
  if (is.null(file_object)) {
    if (is.null(file_path)) {
      stop("Either 'file_path' or 'file_object' must be provided.")
    }
    if (is.null(file_opener) || is.null(file_closer)) {
      stop("Both 'file_opener' and 'file_closer' must be provided if ",
           "no 'file_object' is directly provided.")
    } else if (!is.function(file_opener) || !is.function(file_closer)) {
      stop("Parameters 'file_opener' and 'file_closer' must be functions.")
    }
    file_object <- file_opener(file_path)
    close <- !is.null(file_object)
  } else {
    file_path <- file_object$filename
  }

  result <- NULL
  if (!is.null(file_object)) {
    var_names <- NULL
    var_dim_name <- NULL
    if (any(c('var', 'variable') %in% c(names(file_selectors), names(inner_indices)))) {
      var_dim_name <- c('var', 'variable')[which(c('var', 'variable') %in% c(names(file_selectors), names(inner_indices)))]
      if (var_dim_name %in% names(file_selectors)) {
        if (is.character(file_selectors[[var_dim_name]])) {
          var_names <- file_selectors[[var_dim_name]]
        }
      } else if (var_dim_name %in% names(inner_indices)) {
        var_names <- names(file_object$var)
        if (is.character(inner_indices[[var_dim_name]])) {
          var_names <- inner_indices[[var_dim_name]][which(inner_indices[[var_dim_name]] %in% var_names)]
        } else if (is.numeric(inner_indices[[var_dim_name]])) {
          if (any(inner_indices[[var_dim_name]] > length(var_names)) ||
              any(inner_indices[[var_dim_name]] < 0)) {
            stop("Selectors out of range for '", var_dim_name, "'.")
          }
          var_names <- var_names[inner_indices[[var_dim_name]]]
        } else {
          stop("Selectors for '", var_dim_name, "' must be numeric or ",
               "character strings.")
        }
      }
    }
    if (is.null(var_names)) {
      if (length(file_object$var) == 1) {
        var_names <- names(file_object$var)[1]
      } else {
        stop(paste0("Could not disambiguate which variable in the file ",
                    "is being requested. Either provide the parameter ",
                    "'var'/'variable' or force the file to contain only ",
                    "one variable.\n", file_path))
      }
    }
    result <- try({
      res <- NULL
      metadata <- NULL
      dims <- file_dim_reader(file_path, file_object, file_selectors,
                              inner_indices)
#print("bb")
      if (!all(names(inner_indices) %in% names(dims))) {
        stop("Missing dimensions in the file.\nExpected: ",
             paste(names(inner_indices), collapse = ', '), "\n",
             "Found: ", paste(names(dims), collapse = ', '), "\n",
             file_path)
      }
      extra_dims <- NULL
      if (length(dims) > length(inner_indices)) {
        common_dims <- which(names(dims) %in% names(inner_indices))
        if (length(common_dims) > 0) {
          extra_dims <- dims[-common_dims]
        } else {
          extra_dims <- dims
        }
        if (any(extra_dims != 1)) {
          stop("Unexpected extra dimensions (of length > 1) in the file.\nExpected: ",
               paste(names(inner_indices), collapse = ', '), "\n",
               "Found: ", paste(names(dims), collapse = ', '), "\n",
               file_path)
        } else {
          inner_indices[names(extra_dims)] <- rep(1, length(extra_dims))
        }
        extra_dims <- names(extra_dims)
      }
      any_empty_selectors <- FALSE
      # Here we are allowing for indices out of range (simply discarding them).
      for (inner_dim in names(inner_indices)) {
        inds_out_of_range <- which(inner_indices[[inner_dim]] > dims[inner_dim])
        if (length(inds_out_of_range) > 0) {
          inner_indices[[inner_dim]] <- inner_indices[[inner_dim]][-which(inner_indices[[inner_dim]] > dims[inner_dim])]
        }
        if (length(inner_indices[[inner_dim]]) == 0) {
          any_empty_selectors <- TRUE
        }
        if (any(inner_indices[[inner_dim]] < 0)) {
          stop("Invalid indices provided for '", inner_dim, "'.")
        }
      }
      if (!any_empty_selectors) {
        missing_dims <- NULL
        if (length(dims) < length(inner_indices)) {
          missing_dim_names <- names(inner_indices)[-which(names(inner_indices) %in% names(dims))]
          missing_dim_indices <- lapply(missing_dim_names, function(x) inner_indices[[x]])
          if (any(!sapply(missing_dim_indices, identical, 1))) {
            stop("Could not find all expected dimensions in the file.\nExpected: ",
                 paste(names(inner_indices), collapse = ', '), "\n",
                 "Found: ", paste(names(dims), collapse = ', '), "\n",
                 file_path)
          } else {
            original_dims <- sapply(inner_indices, length)
            names(original_dims) <- names(inner_indices)
            inner_indices <- inner_indices[-which(names(inner_indices) %in% missing_dim_names)]
          }
          missing_dims <- missing_dim_names
        }
#print("cc")
        inner_dims <- names(inner_indices)
        reorder <- NULL
        if (any(names(dims) != inner_dims)) {
          reorder <- sapply(names(dims), function(x) which(inner_dims == x))
          reorder_back <- sapply(inner_dims, function(x) which(names(dims) == x))
          inner_indices <- inner_indices[reorder]
        }
        if (!is.null(var_dim_name) && (var_dim_name %in% names(inner_indices))) {
          inner_indices <- inner_indices[-which(names(inner_indices) == var_dim_name)]
        }
        for (var_name in var_names) {
          tmp <- do.call('[', c(list(ncvar_get(file_object, var_name, sapply(inner_indices, min),
                                               sapply(inner_indices, max) - sapply(inner_indices, min) + 1,
                                               collapse_degen = FALSE)),
                                lapply(inner_indices, function(x) x - min(x) + 1), list(drop = FALSE)))
          if (!is.null(var_dim_name) && (var_dim_name %in% inner_dims)) {
            dim(tmp) <- c(dim(tmp)[which(1:length(dim(tmp)) < which(inner_dims == var_dim_name))], 1,
                          dim(tmp)[which(1:length(dim(tmp)) >= which(inner_dims == var_dim_name))])
            if (is.null(res)) {
              res <- tmp
            } else {
              res <- abind(res, tmp, along = which(inner_dims == var_dim_name))
            }
          } else {
            res <- tmp
          }
          metadata <- c(metadata, structure(list(file_object$var[[var_name]]), .Names = var_name))
        }
        if (!is.null(reorder)) {
          res <- aperm(res, reorder_back)
        }
        if (!is.null(missing_dims)) {
          dim(res) <- original_dims
        }
        if (!is.null(extra_dims)) {
          dim(res) <- dim(res)[-which(inner_dims %in% extra_dims)]
        }
        attr(res, 'variables') <- metadata
        names(dim(res)) <- inner_dims
        ## TODO: Take the common parts in metadata and put them as 
        ##       global attributes.
      }
#print("dd")
      res
    })
    if ('try-error' %in% class(result)) {
      result <- NULL
    }
  }

  if (close) {
    file_closer(file_object)
  }

  result
}

nc2a <- NetCDFToArray