Commit 594d119f authored by Nicolau Manubens's avatar Nicolau Manubens
Browse files

Improved nc2a.

parent 800de6e4
NetCDFToArray <- function(file_to_read, vars_to_read, inner_indices,
drop_var_dim = FALSE, unlist = TRUE) {
NetCDFToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL,
drop_var_dim = FALSE, unlist = TRUE,
expect_all_indices = FALSE, allow_out_of_range = TRUE) {
file_opener <- NcOpen
file_closer <- NcClose
file_dim_reader <- NetCDFReadDims
is_single_na <- function(x) ifelse(length(x) > 1, FALSE, is.na(x))
close <- FALSE
if (is.character(file_to_read)) {
file_object <- file_opener(file_to_read)
......@@ -15,6 +17,36 @@ NetCDFToArray <- function(file_to_read, vars_to_read, inner_indices,
stop("Either the path to a NetCDF file or a ncdf object must be provided as 'file_to_read'.")
}
# Check dim_indices
if (!is.null(dim_indices)) {
if (!is.list(dim_indices)) {
stop("Parameter 'dim_indices' must be a list of numeric vectors.")
}
if (is.null(names(dim_indices))) {
stop("Parameter 'dim_indices' must have dimension names.")
}
var_indices_position <- NULL
for (i in 1:length(dim_indices)) {
if (names(dim_indices)[i] %in% c('var', 'variable')) {
vars_to_read <- dim_indices[[i]]
var_indices_position <- i
} else {
if (!(names(dim_indices)[i] %in% names(file_object$dim))) {
stop("Provided indices in 'dim_indices' for a non-existing dimension.")
}
if (!is.numeric(dim_indices[[i]])) {
stop("Parameter 'dim_indices' must be a list of numeric vectors.")
}
}
if (identical(dim_indices[[i]], TRUE)) {
dim_indices[[i]] <- NA
}
}
if (!is.null(var_indices_position)) {
dim_indices <- dim_indices[-var_indices_position]
}
}
# Check var_names
if (is.character(vars_to_read) || is.numeric(vars_to_read)) {
vars_to_read <- list(vars_to_read)
......@@ -28,7 +60,7 @@ NetCDFToArray <- function(file_to_read, vars_to_read, inner_indices,
print_error <- TRUE
}
if (print_error) {
stop("Parameter 'vars_to_read' must be one or a list of numeric vectors or vectors of character strings or NULL.")
stop("The variables to take specified in 'vars_to_read' or in 'dim_indices[['var']]' must be one or a list of numeric vectors or vectors of character strings or NULL.")
}
# if (!is.character(var_names) && !is.numeric(var_names)) {
# stop("Parameter 'var_names' must be a numeric vector or vector of character strings.")
......@@ -64,7 +96,15 @@ NetCDFToArray <- function(file_to_read, vars_to_read, inner_indices,
}
for (var_name in vars_to_read_vector) {
if (var_name %in% extra_dimvars) {
var_result <- file_object$dim[[var_name]]$vals
indices_to_take <- TRUE
if (var_names %in% names(dim_indices)) {
indices_to_take <- dim_indices[[var_name]]
if (length(dim(indices_to_take)) > 1) {
stop("More than 1 dimensions found for the dimension variable ", var_name, ".")
}
}
var_result <- file_object$dim[[var_name]]$vals[indices_to_take]
## TODO: Crop dimensions in attributes
#atts <- file_object$dim[[var_name]]
atts <- extra_dimvars_list[[var_name]]
atts_to_remove <- c('vals', 'name', 'len', 'group_index',
......@@ -75,12 +115,106 @@ NetCDFToArray <- function(file_to_read, vars_to_read, inner_indices,
units <- file_object$dim[[var_name]]$units
names(dim(var_result)) <- sapply(extra_dimvars_list[[var_name]]$dim, '[[', 'name')
} else {
if (!(var_name %in% names(file_object$var))) {
stop("Could not find the variable '", var_name, "' in the file.")
var_result <- NULL
found_dims <- file_dim_reader(file_object, var_name)
if ('var' %in% names(found_dims)) {
found_dims <- found_dims[-which(names(found_dims) == 'var')]
}
if (length(vars_to_read_vector) == 1 && length(vars_to_read) == 1) {
if (!all(names(dim_indices) %in% names(found_dims))) {
stop("Missing dimensions in the file.\nExpected: ",
paste(names(dim_indices), collapse = ', '), "\n",
"Found: ", paste(names(found_dims), collapse = ', '), "\n",
file_path)
}
}
indices_to_take <- as.list(rep(NA, length(found_dims)))
names(indices_to_take) <- names(found_dims)
extra_dims <- NULL
common_dims <- which(names(found_dims) %in% names(dim_indices))
if (length(common_dims) > 0) {
extra_dims <- found_dims[-common_dims]
} else {
extra_dims <- found_dims
}
if (length(extra_dims) > 0) {
if (expect_all_indices) {
if (any(extra_dims != 1)) {
stop("Unexpected extra dimensions (of length > 1) in the file.\nPossible dims expected: ",
paste(names(dim_indices), collapse = ', '), "\n",
"Found dims: ", paste(names(found_dims), collapse = ', '), "\n",
file_path)
}
}
}
if (expect_all_indices) {
extra_dims <- names(extra_dims)
} else {
extra_dims <- NULL
}
any_empty_selectors <- FALSE
# Here we are allowing for indices out of range (simply discarding them).
for (inner_dim in names(indices_to_take)) {
if (inner_dim %in% names(dim_indices)) {
indices_to_take[[inner_dim]] <- dim_indices[[inner_dim]]
}
inds_out_of_range <- which(indices_to_take[[inner_dim]] > found_dims[inner_dim])
if (length(inds_out_of_range) > 0) {
if (allow_out_of_range) {
indices_to_take[[inner_dim]] <- indices_to_take[[inner_dim]][-which(indices_to_take[[inner_dim]] > found_dims[inner_dim])]
} else {
stop("Provided indices out of range for dimension '", inner_dim, "'.")
}
}
if (length(indices_to_take[[inner_dim]]) == 0) {
any_empty_selectors <- TRUE
}
if (length(which(indices_to_take[[inner_dim]] < 0)) > 0) {
stop("Invalid indices provided for '", inner_dim, "'.")
}
}
if (!any_empty_selectors) {
#missing_dims <- NULL
#if (length(found_dims) < length(indices_to_take)) {
# missing_dim_names <- names(dim_indices)[-which(names(dim_indices) %in% names(dims))]
# missing_dim_indices <- lapply(missing_dim_names, function(x) dim_indices[[x]])
# if (any(!sapply(missing_dim_indices, identical, 1))) {
# stop("Could not find all expected dimensions in the file.\nExpected: ",
# paste(names(dim_indices), collapse = ', '), "\n",
# "Found: ", paste(names(dims), collapse = ', '), "\n",
# file_path)
# } else {
# original_dims <- sapply(dim_indices, length)
# names(original_dims) <- names(dim_indices)
# dim_indices <- dim_indices[-which(names(dim_indices) %in% missing_dim_names)]
# }
# missing_dims <- missing_dim_names
#}
inner_dims <- names(indices_to_take)
reorder <- NULL
if (any(names(found_dims) != inner_dims)) {
reorder <- sapply(names(found_dims), function(x) which(inner_dims == x))
reorder_back <- sapply(inner_dims, function(x) which(names(found_dims) == x))
indices_to_take <- indices_to_take[reorder]
}
start <- sapply(indices_to_take, function(x) if (is_single_na(x)) 1 else min(x))
count <- sapply(indices_to_take, function(x) if (is_single_na(x)) -1 else max(x) - min(x) + 1)
var_result <- do.call('[', c(list(ncvar_get(file_object, var_name, start, count, collapse_degen = FALSE)),
lapply(indices_to_take, function(x) if (is_single_na(x)) TRUE else x - min(x) + 1), list(drop = FALSE)))
#metadata <- c(metadata, structure(list(file_object$var[[var_name]]), .Names = var_name))
## TODO: Crop dimensions in attributes
if (!is.null(reorder)) {
var_result <- aperm(var_result, reorder_back)
}
#if (!is.null(missing_dims)) {
# dim(var_result) <- original_dims
#}
if (!is.null(extra_dims)) {
dim(var_result) <- dim(var_result)[-which(inner_dims %in% extra_dims)]
}
#attr(var_result, 'variables') <- metadata
names(dim(var_result)) <- inner_dims
}
try({
var_result <- ncvar_get(file_object, var_name)
}, silent = TRUE)
atts <- file_object$var[[var_name]]
atts_to_remove <- c('id', 'name', 'ndims', 'natts', 'size',
'dimids', 'group_index', 'chunksizes',
......@@ -94,6 +228,7 @@ NetCDFToArray <- function(file_to_read, vars_to_read, inner_indices,
units <- file_object$var[[var_name]]$units
names(dim(var_result)) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name')
}
if (!is.null(var_result)) {
if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) {
if (units == 'seconds') {
units <- 'secs'
......@@ -127,6 +262,7 @@ NetCDFToArray <- function(file_to_read, vars_to_read, inner_indices,
}
}
}
}
if (is.null(result_list)) {
if (length(vars_to_read) == 1 && unlist) {
result_list <- result
......@@ -155,184 +291,4 @@ NetCDFToArray <- function(file_to_read, vars_to_read, inner_indices,
result_list
}
# Parameter 'file_selectos' expects a named character vector of single
# file dimension selectors.
# Parameter 'inner_selectors' expects a named list of numeric vectors.
.NcDataReader <- function(file_path = NULL, file_object = NULL,
file_selectors = NULL, inner_indices,
file_opener = NULL, file_closer = NULL) {
file_dim_reader <- .NcDimReader
#print("aa")
close <- FALSE
if (is.null(file_object)) {
if (is.null(file_path)) {
stop("Either 'file_path' or 'file_object' must be provided.")
}
if (is.null(file_opener) || is.null(file_closer)) {
stop("Both 'file_opener' and 'file_closer' must be provided if ",
"no 'file_object' is directly provided.")
} else if (!is.function(file_opener) || !is.function(file_closer)) {
stop("Parameters 'file_opener' and 'file_closer' must be functions.")
}
file_object <- file_opener(file_path)
close <- !is.null(file_object)
} else {
file_path <- file_object$filename
}
result <- NULL
if (!is.null(file_object)) {
var_names <- NULL
var_dim_name <- NULL
if (any(c('var', 'variable') %in% c(names(file_selectors), names(inner_indices)))) {
var_dim_name <- c('var', 'variable')[which(c('var', 'variable') %in% c(names(file_selectors), names(inner_indices)))]
if (var_dim_name %in% names(file_selectors)) {
if (is.character(file_selectors[[var_dim_name]])) {
var_names <- file_selectors[[var_dim_name]]
}
} else if (var_dim_name %in% names(inner_indices)) {
var_names <- names(file_object$var)
if (is.character(inner_indices[[var_dim_name]])) {
var_names <- inner_indices[[var_dim_name]][which(inner_indices[[var_dim_name]] %in% var_names)]
} else if (is.numeric(inner_indices[[var_dim_name]])) {
if (any(inner_indices[[var_dim_name]] > length(var_names)) ||
any(inner_indices[[var_dim_name]] < 0)) {
stop("Selectors out of range for '", var_dim_name, "'.")
}
var_names <- var_names[inner_indices[[var_dim_name]]]
} else {
stop("Selectors for '", var_dim_name, "' must be numeric or ",
"character strings.")
}
}
}
if (is.null(var_names)) {
if (length(file_object$var) == 1) {
var_names <- names(file_object$var)[1]
} else {
stop(paste0("Could not disambiguate which variable in the file ",
"is being requested. Either provide the parameter ",
"'var'/'variable' or force the file to contain only ",
"one variable.\n", file_path))
}
}
result <- try({
res <- NULL
metadata <- NULL
dims <- file_dim_reader(file_path, file_object, file_selectors,
inner_indices)
#print("bb")
if (!all(names(inner_indices) %in% names(dims))) {
stop("Missing dimensions in the file.\nExpected: ",
paste(names(inner_indices), collapse = ', '), "\n",
"Found: ", paste(names(dims), collapse = ', '), "\n",
file_path)
}
extra_dims <- NULL
if (length(dims) > length(inner_indices)) {
common_dims <- which(names(dims) %in% names(inner_indices))
if (length(common_dims) > 0) {
extra_dims <- dims[-common_dims]
} else {
extra_dims <- dims
}
if (any(extra_dims != 1)) {
stop("Unexpected extra dimensions (of length > 1) in the file.\nExpected: ",
paste(names(inner_indices), collapse = ', '), "\n",
"Found: ", paste(names(dims), collapse = ', '), "\n",
file_path)
} else {
inner_indices[names(extra_dims)] <- rep(1, length(extra_dims))
}
extra_dims <- names(extra_dims)
}
any_empty_selectors <- FALSE
# Here we are allowing for indices out of range (simply discarding them).
for (inner_dim in names(inner_indices)) {
inds_out_of_range <- which(inner_indices[[inner_dim]] > dims[inner_dim])
if (length(inds_out_of_range) > 0) {
inner_indices[[inner_dim]] <- inner_indices[[inner_dim]][-which(inner_indices[[inner_dim]] > dims[inner_dim])]
}
if (length(inner_indices[[inner_dim]]) == 0) {
any_empty_selectors <- TRUE
}
if (any(inner_indices[[inner_dim]] < 0)) {
stop("Invalid indices provided for '", inner_dim, "'.")
}
}
if (!any_empty_selectors) {
missing_dims <- NULL
if (length(dims) < length(inner_indices)) {
missing_dim_names <- names(inner_indices)[-which(names(inner_indices) %in% names(dims))]
missing_dim_indices <- lapply(missing_dim_names, function(x) inner_indices[[x]])
if (any(!sapply(missing_dim_indices, identical, 1))) {
stop("Could not find all expected dimensions in the file.\nExpected: ",
paste(names(inner_indices), collapse = ', '), "\n",
"Found: ", paste(names(dims), collapse = ', '), "\n",
file_path)
} else {
original_dims <- sapply(inner_indices, length)
names(original_dims) <- names(inner_indices)
inner_indices <- inner_indices[-which(names(inner_indices) %in% missing_dim_names)]
}
missing_dims <- missing_dim_names
}
#print("cc")
inner_dims <- names(inner_indices)
reorder <- NULL
if (any(names(dims) != inner_dims)) {
reorder <- sapply(names(dims), function(x) which(inner_dims == x))
reorder_back <- sapply(inner_dims, function(x) which(names(dims) == x))
inner_indices <- inner_indices[reorder]
}
if (!is.null(var_dim_name) && (var_dim_name %in% names(inner_indices))) {
inner_indices <- inner_indices[-which(names(inner_indices) == var_dim_name)]
}
for (var_name in var_names) {
tmp <- do.call('[', c(list(ncvar_get(file_object, var_name, sapply(inner_indices, min),
sapply(inner_indices, max) - sapply(inner_indices, min) + 1,
collapse_degen = FALSE)),
lapply(inner_indices, function(x) x - min(x) + 1), list(drop = FALSE)))
if (!is.null(var_dim_name) && (var_dim_name %in% inner_dims)) {
dim(tmp) <- c(dim(tmp)[which(1:length(dim(tmp)) < which(inner_dims == var_dim_name))], 1,
dim(tmp)[which(1:length(dim(tmp)) >= which(inner_dims == var_dim_name))])
if (is.null(res)) {
res <- tmp
} else {
res <- abind(res, tmp, along = which(inner_dims == var_dim_name))
}
} else {
res <- tmp
}
metadata <- c(metadata, structure(list(file_object$var[[var_name]]), .Names = var_name))
}
if (!is.null(reorder)) {
res <- aperm(res, reorder_back)
}
if (!is.null(missing_dims)) {
dim(res) <- original_dims
}
if (!is.null(extra_dims)) {
dim(res) <- dim(res)[-which(inner_dims %in% extra_dims)]
}
attr(res, 'variables') <- metadata
names(dim(res)) <- inner_dims
## TODO: Take the common parts in metadata and put them as
## global attributes.
}
#print("dd")
res
})
if ('try-error' %in% class(result)) {
result <- NULL
}
}
if (close) {
file_closer(file_object)
}
result
}
nc2a <- NetCDFToArray
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment