Newer
Older
NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL,
drop_var_dim = FALSE, unlist = TRUE,
expect_all_indices = FALSE, allow_out_of_range = TRUE) {
file_opener <- NcOpen
file_closer <- NcClose
is_single_na <- function(x) ifelse(length(x) > 1, FALSE, is.na(x))
close <- FALSE
if (is.character(file_to_read)) {
file_object <- file_opener(file_to_read)
file_path <- file_to_read
close <- TRUE
} else if (grepl('^ncdf', class(file_to_read))) {
file_object <- file_to_read
file_path <- file_object$filename
} else {
stop("Either the path to a NetCDF file or a ncdf object must be provided as 'file_to_read'.")
}
# Check dim_indices
if (!is.null(dim_indices)) {
if (!is.list(dim_indices)) {
stop("Parameter 'dim_indices' must be a list of numeric vectors.")
}
if (is.null(names(dim_indices))) {
stop("Parameter 'dim_indices' must have dimension names.")
}
var_indices_position <- NULL
for (i in 1:length(dim_indices)) {
if (names(dim_indices)[i] %in% c('var', 'variable')) {
vars_to_read <- dim_indices[[i]]
var_indices_position <- i
} else {
if (!(names(dim_indices)[i] %in% names(file_object$dim))) {
stop("Provided indices in 'dim_indices' for a non-existing dimension.")
}
if (!is.numeric(dim_indices[[i]])) {
stop("Parameter 'dim_indices' must be a list of numeric vectors.")
}
}
if (identical(dim_indices[[i]], TRUE)) {
dim_indices[[i]] <- NA
}
}
if (!is.null(var_indices_position)) {
dim_indices <- dim_indices[-var_indices_position]
}
}
if (is.character(vars_to_read) || is.numeric(vars_to_read)) {
vars_to_read <- list(vars_to_read)
}
print_error <- FALSE
if (is.list(vars_to_read)) {
if (!all(sapply(vars_to_read, function(x) is.character(x) || is.numeric(x)))) {
print_error <- TRUE
}
} else {
print_error <- TRUE
}
if (print_error) {
stop("The variables to take specified in 'vars_to_read' or in 'dim_indices[['var']]' must be one or a list of numeric vectors or vectors of character strings or NULL.")
# if (!is.character(vars_to_read) && !is.numeric(vars_to_read)) {
# stop("Parameter 'vars_to_read' must be a numeric vector or vector of character strings.")
result_list <- NULL
for (vars_to_read_vector in vars_to_read) {
result <- NULL
if (!is.null(file_object)) {
# Create all variables that are 'dimvars'
extra_dimvars <- NULL
extra_dimvars_list <- NULL
for (dim_name in names(file_object$dim)) {
if (file_object$dim[[dim_name]]$create_dimvar) {
new_var <- list(name = dim_name, ndims = 1,
size = file_object$dim[[dim_name]]$len,
units = file_object$dim[[dim_name]]$units,
dim = list(file_object$dim[[dim_name]]))
new_var_extra_atts <- ncatt_get(file_object, dim_name)
new_var[names(new_var_extra_atts)] <- new_var_extra_atts
extra_dimvars_list <- c(extra_dimvars_list, setNames(list(new_var), dim_name))
extra_dimvars <- c(extra_dimvars, dim_name)
}
}
#file_object$var[extra_dimvars] <- extra_dimvars_list
#file_object$nvars <- file_object$nvars + length(extra_dimvars)
nmv <- numeric_var_indices <- which(is.numeric(vars_to_read_vector))
if (length(nmv) > 0) {
if (any(vars_to_read_vector[nmv] > (length(file_object$var) + length(extra_dimvars)))) {
stop("Provided numerical variable indices out of bounds in 'vars_to_read'.")
}
vars_to_read_vector[nmv] <- c(sapply(file_object$var, '[[', 'name'), extra_dimvars)[vars_to_read_vector[nmv]]
}
for (var_name in vars_to_read_vector) {
if (var_name %in% extra_dimvars) {
indices_to_take <- dim_indices[[var_name]]
if (length(dim(indices_to_take)) > 1) {
stop("More than 1 dimensions found for the dimension variable ", var_name, ".")
}
}
var_result <- file_object$dim[[var_name]]$vals[indices_to_take]
## TODO: Crop dimensions in attributes
#atts <- file_object$dim[[var_name]]
atts <- extra_dimvars_list[[var_name]]
atts_to_remove <- c('vals', 'name', 'len', 'group_index',
'group_id', 'id', 'dimvarid', 'create_dimvar')
if (any(names(atts) %in% atts_to_remove)) {
atts <- atts[-which(names(atts) %in% atts_to_remove)]
}
units <- file_object$dim[[var_name]]$units
names(dim(var_result)) <- sapply(extra_dimvars_list[[var_name]]$dim, '[[', 'name')
} else {
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
var_result <- NULL
found_dims <- file_dim_reader(file_object, var_name)
if ('var' %in% names(found_dims)) {
found_dims <- found_dims[-which(names(found_dims) == 'var')]
}
if (length(vars_to_read_vector) == 1 && length(vars_to_read) == 1) {
if (!all(names(dim_indices) %in% names(found_dims))) {
stop("Missing dimensions in the file.\nExpected: ",
paste(names(dim_indices), collapse = ', '), "\n",
"Found: ", paste(names(found_dims), collapse = ', '), "\n",
file_path)
}
}
indices_to_take <- as.list(rep(NA, length(found_dims)))
names(indices_to_take) <- names(found_dims)
extra_dims <- NULL
common_dims <- which(names(found_dims) %in% names(dim_indices))
if (length(common_dims) > 0) {
extra_dims <- found_dims[-common_dims]
} else {
extra_dims <- found_dims
}
if (length(extra_dims) > 0) {
if (expect_all_indices) {
if (any(extra_dims != 1)) {
stop("Unexpected extra dimensions (of length > 1) in the file.\nPossible dims expected: ",
paste(names(dim_indices), collapse = ', '), "\n",
"Found dims: ", paste(names(found_dims), collapse = ', '), "\n",
file_path)
}
}
}
if (expect_all_indices) {
extra_dims <- names(extra_dims)
} else {
extra_dims <- NULL
}
any_empty_selectors <- FALSE
# Here we are allowing for indices out of range (simply discarding them).
for (inner_dim in names(indices_to_take)) {
if (inner_dim %in% names(dim_indices)) {
indices_to_take[[inner_dim]] <- dim_indices[[inner_dim]]
}
inds_out_of_range <- which(indices_to_take[[inner_dim]] > found_dims[inner_dim])
if (length(inds_out_of_range) > 0) {
if (allow_out_of_range) {
indices_to_take[[inner_dim]] <- indices_to_take[[inner_dim]][-which(indices_to_take[[inner_dim]] > found_dims[inner_dim])]
} else {
stop("Provided indices out of range for dimension '", inner_dim, "'.")
}
}
if (length(indices_to_take[[inner_dim]]) == 0) {
any_empty_selectors <- TRUE
}
if (length(which(indices_to_take[[inner_dim]] < 0)) > 0) {
stop("Invalid indices provided for '", inner_dim, "'.")
}
}
if (!any_empty_selectors) {
#missing_dims <- NULL
#if (length(found_dims) < length(indices_to_take)) {
# missing_dim_names <- names(dim_indices)[-which(names(dim_indices) %in% names(dims))]
# missing_dim_indices <- lapply(missing_dim_names, function(x) dim_indices[[x]])
# if (any(!sapply(missing_dim_indices, identical, 1))) {
# stop("Could not find all expected dimensions in the file.\nExpected: ",
# paste(names(dim_indices), collapse = ', '), "\n",
# "Found: ", paste(names(dims), collapse = ', '), "\n",
# file_path)
# } else {
# original_dims <- sapply(dim_indices, length)
# names(original_dims) <- names(dim_indices)
# dim_indices <- dim_indices[-which(names(dim_indices) %in% missing_dim_names)]
# }
# missing_dims <- missing_dim_names
#}
inner_dims <- names(indices_to_take)
reorder <- NULL
if (any(names(found_dims) != inner_dims)) {
reorder <- sapply(names(found_dims), function(x) which(inner_dims == x))
reorder_back <- sapply(inner_dims, function(x) which(names(found_dims) == x))
indices_to_take <- indices_to_take[reorder]
}
start <- sapply(indices_to_take, function(x) if (is_single_na(x)) 1 else min(x))
count <- sapply(indices_to_take, function(x) if (is_single_na(x)) -1 else max(x) - min(x) + 1)
var_result <- do.call('[', c(list(ncvar_get(file_object, var_name, start, count, collapse_degen = FALSE)),
lapply(indices_to_take, function(x) if (is_single_na(x)) TRUE else x - min(x) + 1), list(drop = FALSE)))
#metadata <- c(metadata, structure(list(file_object$var[[var_name]]), .Names = var_name))
## TODO: Crop dimensions in attributes
if (!is.null(reorder)) {
var_result <- aperm(var_result, reorder_back)
}
#if (!is.null(missing_dims)) {
# dim(var_result) <- original_dims
#}
if (!is.null(extra_dims)) {
dim(var_result) <- dim(var_result)[-which(inner_dims %in% extra_dims)]
}
#attr(var_result, 'variables') <- metadata
names(dim(var_result)) <- inner_dims
}
atts <- file_object$var[[var_name]]
atts_to_remove <- c('id', 'name', 'ndims', 'natts', 'size',
'dimids', 'group_index', 'chunksizes',
'storage', 'shuffle', 'compression', 'dims',
'varsize', 'longname')
if (any(names(atts) %in% atts_to_remove)) {
atts <- atts[-which(names(atts) %in% atts_to_remove)]
}
extra_atts <- ncatt_get(file_object, var_name)
atts[names(extra_atts)] <- extra_atts
units <- file_object$var[[var_name]]$units
names(dim(var_result)) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name')
}
if (!is.null(var_result)) {
if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) {
if (units == 'seconds') {
units <- 'secs'
} else if (units == 'minutes') {
units <- 'mins'
}
var_result[] <- paste(var_result, units)
} else if (grepl(' since ', units)) {
parts <- strsplit(units, ' since ')[[1]]
units <- parts[1]
if (units == 'seconds') {
units <- 'secs'
} else if (units == 'minutes') {
units <- 'mins'
}
var_result <- seq(as.POSIXct(parts[2]), length = max(var_result, na.rm = TRUE) + 1, by = units)[var_result + 1]
if (!drop_var_dim && (length(vars_to_read_vector) == 1)) {
dim(var_result) <- c(c(var = 1), dim(var_result))
}
attr(var_result, 'variables') <- structure(list(atts), .Names = var_name)
## TODO: Take the general attributes out of atts and put them as
## global attributes.
if (is.null(result)) {
result <- var_result
} else {
new_attrs <- c(attr(result, 'variables'),
attr(var_result, 'variables'))
result <- .MergeArrays(result, var_result, 'var')
attr(result, 'variables') <- new_attrs
if (is.null(result_list)) {
if (length(vars_to_read) == 1 && unlist) {
result_list <- result
if (length(vars_to_read_vector) == 1) {
result_list <- structure(list(result), .Names = vars_to_read_vector)
} else {
result_list <- list(result)
}
}
} else {
if (length(vars_to_read_vector) == 1) {
result_list <- do.call('[[<-', list(x = result_list,
i = vars_to_read_vector,
value = result))
} else {
result_list <- do.call('[[<-', list(x = result_list,
i = length(result_list) + 1,
value = result))
}
}
}
if (close) {
file_closer(file_object)
}
result_list