NetCDFToArray <- function(file_to_read, vars_to_read, inner_indices, drop_var_dim = FALSE, unlist = TRUE) { file_opener <- NcOpen file_closer <- NcClose file_dim_reader <- NetCDFReadDims close <- FALSE if (is.character(file_to_read)) { file_object <- file_opener(file_to_read) file_path <- file_to_read close <- TRUE } else if (grepl('^ncdf', class(file_to_read))) { file_object <- file_to_read file_path <- file_object$filename } else { stop("Either the path to a NetCDF file or a ncdf object must be provided as 'file_to_read'.") } # Check var_names if (is.character(vars_to_read) || is.numeric(vars_to_read)) { vars_to_read <- list(vars_to_read) } print_error <- FALSE if (is.list(vars_to_read)) { if (!all(sapply(vars_to_read, function(x) is.character(x) || is.numeric(x)))) { print_error <- TRUE } } else { print_error <- TRUE } if (print_error) { stop("Parameter 'vars_to_read' must be one or a list of numeric vectors or vectors of character strings or NULL.") } # if (!is.character(var_names) && !is.numeric(var_names)) { # stop("Parameter 'var_names' must be a numeric vector or vector of character strings.") # } result_list <- NULL for (vars_to_read_vector in vars_to_read) { result <- NULL if (!is.null(file_object)) { # Create all variables that are 'dimvars' extra_dimvars <- NULL extra_dimvars_list <- NULL for (dim_name in names(file_object$dim)) { if (file_object$dim[[dim_name]]$create_dimvar) { new_var <- list(name = dim_name, ndims = 1, size = file_object$dim[[dim_name]]$len, units = file_object$dim[[dim_name]]$units, dim = list(file_object$dim[[dim_name]])) new_var_extra_atts <- ncatt_get(file_object, dim_name) new_var[names(new_var_extra_atts)] <- new_var_extra_atts extra_dimvars_list <- c(extra_dimvars_list, setNames(list(new_var), dim_name)) extra_dimvars <- c(extra_dimvars, dim_name) } } #file_object$var[extra_dimvars] <- extra_dimvars_list #file_object$nvars <- file_object$nvars + length(extra_dimvars) nmv <- numeric_var_indices <- which(is.numeric(vars_to_read_vector)) if (length(nmv) > 0) { if (any(vars_to_read_vector[nmv] > (length(file_object$var) + length(extra_dimvars)))) { stop("Provided numerical variable indices out of bounds in 'vars_to_read'.") } vars_to_read_vector[nmv] <- c(sapply(file_object$var, '[[', 'name'), extra_dimvars)[vars_to_read_vector[nmv]] } for (var_name in vars_to_read_vector) { if (var_name %in% extra_dimvars) { var_result <- file_object$dim[[var_name]]$vals #atts <- file_object$dim[[var_name]] atts <- extra_dimvars_list[[var_name]] atts_to_remove <- c('vals', 'name', 'len', 'group_index', 'group_id', 'id', 'dimvarid', 'create_dimvar') if (any(names(atts) %in% atts_to_remove)) { atts <- atts[-which(names(atts) %in% atts_to_remove)] } units <- file_object$dim[[var_name]]$units names(dim(var_result)) <- sapply(extra_dimvars_list[[var_name]]$dim, '[[', 'name') } else { if (!(var_name %in% names(file_object$var))) { stop("Could not find the variable '", var_name, "' in the file.") } try({ var_result <- ncvar_get(file_object, var_name) }, silent = TRUE) atts <- file_object$var[[var_name]] atts_to_remove <- c('id', 'name', 'ndims', 'natts', 'size', 'dimids', 'group_index', 'chunksizes', 'storage', 'shuffle', 'compression', 'dims', 'varsize', 'longname') if (any(names(atts) %in% atts_to_remove)) { atts <- atts[-which(names(atts) %in% atts_to_remove)] } extra_atts <- ncatt_get(file_object, var_name) atts[names(extra_atts)] <- extra_atts units <- file_object$var[[var_name]]$units names(dim(var_result)) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name') } if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) { if (units == 'seconds') { units <- 'secs' } else if (units == 'minutes') { units <- 'mins' } var_result[] <- paste(var_result, units) } else if (grepl(' since ', units)) { parts <- strsplit(units, ' since ')[[1]] units <- parts[1] if (units == 'seconds') { units <- 'secs' } else if (units == 'minutes') { units <- 'mins' } var_result <- seq(as.POSIXct(parts[2]), length = max(var_result, na.rm = TRUE) + 1, by = units)[var_result + 1] } if (!drop_var_dim && (length(vars_to_read_vector) == 1)) { dim(var_result) <- c(c(var = 1), dim(var_result)) } attr(var_result, 'variables') <- structure(list(atts), .Names = var_name) ## TODO: Take the general attributes out of atts and put them as ## global attributes. if (is.null(result)) { result <- var_result } else { new_attrs <- c(attr(result, 'variables'), attr(var_result, 'variables')) result <- .MergeArrays(result, var_result, 'var') attr(result, 'variables') <- new_attrs } } } if (is.null(result_list)) { if (length(vars_to_read) == 1 && unlist) { result_list <- result } else { if (length(vars_to_read_vector) == 1) { result_list <- structure(list(result), .Names = vars_to_read_vector) } else { result_list <- list(result) } } } else { if (length(vars_to_read_vector) == 1) { result_list <- do.call('[[<-', list(x = result_list, i = vars_to_read_vector, value = result)) } else { result_list <- do.call('[[<-', list(x = result_list, i = length(result_list) + 1, value = result)) } } } if (close) { file_closer(file_object) } result_list } # Parameter 'file_selectos' expects a named character vector of single # file dimension selectors. # Parameter 'inner_selectors' expects a named list of numeric vectors. .NcDataReader <- function(file_path = NULL, file_object = NULL, file_selectors = NULL, inner_indices, file_opener = NULL, file_closer = NULL) { file_dim_reader <- .NcDimReader #print("aa") close <- FALSE if (is.null(file_object)) { if (is.null(file_path)) { stop("Either 'file_path' or 'file_object' must be provided.") } if (is.null(file_opener) || is.null(file_closer)) { stop("Both 'file_opener' and 'file_closer' must be provided if ", "no 'file_object' is directly provided.") } else if (!is.function(file_opener) || !is.function(file_closer)) { stop("Parameters 'file_opener' and 'file_closer' must be functions.") } file_object <- file_opener(file_path) close <- !is.null(file_object) } else { file_path <- file_object$filename } result <- NULL if (!is.null(file_object)) { var_names <- NULL var_dim_name <- NULL if (any(c('var', 'variable') %in% c(names(file_selectors), names(inner_indices)))) { var_dim_name <- c('var', 'variable')[which(c('var', 'variable') %in% c(names(file_selectors), names(inner_indices)))] if (var_dim_name %in% names(file_selectors)) { if (is.character(file_selectors[[var_dim_name]])) { var_names <- file_selectors[[var_dim_name]] } } else if (var_dim_name %in% names(inner_indices)) { var_names <- names(file_object$var) if (is.character(inner_indices[[var_dim_name]])) { var_names <- inner_indices[[var_dim_name]][which(inner_indices[[var_dim_name]] %in% var_names)] } else if (is.numeric(inner_indices[[var_dim_name]])) { if (any(inner_indices[[var_dim_name]] > length(var_names)) || any(inner_indices[[var_dim_name]] < 0)) { stop("Selectors out of range for '", var_dim_name, "'.") } var_names <- var_names[inner_indices[[var_dim_name]]] } else { stop("Selectors for '", var_dim_name, "' must be numeric or ", "character strings.") } } } if (is.null(var_names)) { if (length(file_object$var) == 1) { var_names <- names(file_object$var)[1] } else { stop(paste0("Could not disambiguate which variable in the file ", "is being requested. Either provide the parameter ", "'var'/'variable' or force the file to contain only ", "one variable.\n", file_path)) } } result <- try({ res <- NULL metadata <- NULL dims <- file_dim_reader(file_path, file_object, file_selectors, inner_indices) #print("bb") if (!all(names(inner_indices) %in% names(dims))) { stop("Missing dimensions in the file.\nExpected: ", paste(names(inner_indices), collapse = ', '), "\n", "Found: ", paste(names(dims), collapse = ', '), "\n", file_path) } extra_dims <- NULL if (length(dims) > length(inner_indices)) { common_dims <- which(names(dims) %in% names(inner_indices)) if (length(common_dims) > 0) { extra_dims <- dims[-common_dims] } else { extra_dims <- dims } if (any(extra_dims != 1)) { stop("Unexpected extra dimensions (of length > 1) in the file.\nExpected: ", paste(names(inner_indices), collapse = ', '), "\n", "Found: ", paste(names(dims), collapse = ', '), "\n", file_path) } else { inner_indices[names(extra_dims)] <- rep(1, length(extra_dims)) } extra_dims <- names(extra_dims) } any_empty_selectors <- FALSE # Here we are allowing for indices out of range (simply discarding them). for (inner_dim in names(inner_indices)) { inds_out_of_range <- which(inner_indices[[inner_dim]] > dims[inner_dim]) if (length(inds_out_of_range) > 0) { inner_indices[[inner_dim]] <- inner_indices[[inner_dim]][-which(inner_indices[[inner_dim]] > dims[inner_dim])] } if (length(inner_indices[[inner_dim]]) == 0) { any_empty_selectors <- TRUE } if (any(inner_indices[[inner_dim]] < 0)) { stop("Invalid indices provided for '", inner_dim, "'.") } } if (!any_empty_selectors) { missing_dims <- NULL if (length(dims) < length(inner_indices)) { missing_dim_names <- names(inner_indices)[-which(names(inner_indices) %in% names(dims))] missing_dim_indices <- lapply(missing_dim_names, function(x) inner_indices[[x]]) if (any(!sapply(missing_dim_indices, identical, 1))) { stop("Could not find all expected dimensions in the file.\nExpected: ", paste(names(inner_indices), collapse = ', '), "\n", "Found: ", paste(names(dims), collapse = ', '), "\n", file_path) } else { original_dims <- sapply(inner_indices, length) names(original_dims) <- names(inner_indices) inner_indices <- inner_indices[-which(names(inner_indices) %in% missing_dim_names)] } missing_dims <- missing_dim_names } #print("cc") inner_dims <- names(inner_indices) reorder <- NULL if (any(names(dims) != inner_dims)) { reorder <- sapply(names(dims), function(x) which(inner_dims == x)) reorder_back <- sapply(inner_dims, function(x) which(names(dims) == x)) inner_indices <- inner_indices[reorder] } if (!is.null(var_dim_name) && (var_dim_name %in% names(inner_indices))) { inner_indices <- inner_indices[-which(names(inner_indices) == var_dim_name)] } for (var_name in var_names) { tmp <- do.call('[', c(list(ncvar_get(file_object, var_name, sapply(inner_indices, min), sapply(inner_indices, max) - sapply(inner_indices, min) + 1, collapse_degen = FALSE)), lapply(inner_indices, function(x) x - min(x) + 1), list(drop = FALSE))) if (!is.null(var_dim_name) && (var_dim_name %in% inner_dims)) { dim(tmp) <- c(dim(tmp)[which(1:length(dim(tmp)) < which(inner_dims == var_dim_name))], 1, dim(tmp)[which(1:length(dim(tmp)) >= which(inner_dims == var_dim_name))]) if (is.null(res)) { res <- tmp } else { res <- abind(res, tmp, along = which(inner_dims == var_dim_name)) } } else { res <- tmp } metadata <- c(metadata, structure(list(file_object$var[[var_name]]), .Names = var_name)) } if (!is.null(reorder)) { res <- aperm(res, reorder_back) } if (!is.null(missing_dims)) { dim(res) <- original_dims } if (!is.null(extra_dims)) { dim(res) <- dim(res)[-which(inner_dims %in% extra_dims)] } attr(res, 'variables') <- metadata names(dim(res)) <- inner_dims ## TODO: Take the common parts in metadata and put them as ## global attributes. } #print("dd") res }) if ('try-error' %in% class(result)) { result <- NULL } } if (close) { file_closer(file_object) } result } nc2a <- NetCDFToArray