XlsOpener <- function(file_path) { ##### NULL ##### } XlsCloser <- function(file_object) { ##### NULL ##### } # Parameter 'file_selectors' expects a named character vector of single # file dimension selectors. # Parameter 'inner_indices' expects a named list of numeric or # character string vectors. ## TODO: Assuming colnames are always provided. XlsDimReader <- function(file_path = NULL, file_object = NULL, file_selectors = NULL, inner_indices = NULL, synonims) { ##### if (!is.null(file_object)) { file_to_read <- file_object } else if (!is.null(file_path)) { file_to_read <- file_path } else { stop("Either 'file_path' or 'file_object' must be provided.") } ##### ##### vars_in_file <- c(readxl::excel_sheets, 'col_names') ##### if (any(c('var', 'variable') %in% names(inner_indices))) { vars_to_read <- inner_indices[[which(names(inner_indices) %in% c('var', 'variable'))[1]]] var_tag <- names(inner_indices)[[which(names(inner_indices) %in% c('var', 'variable'))[1]]] } else if (any(c('var', 'variable') %in% names(file_selectors))) { vars_to_read <- file_selectors[[which(names(file_selectors) %in% c('var', 'variable'))[1]]] var_tag <- names(file_selectors)[[which(names(file_selectors) %in% c('var', 'variable'))[1]]] } else if (length(vars_in_file) == 1) { vars_to_read <- vars_in_file file_selectors <- c(file_selectors, list(var = vars_in_file)) var_tag <- 'var' } else { ##### stop("XlsDimReader expected to find a requested 'var' or 'variable' in 'file_selectors'.") ##### } if ((length(vars_to_read) == 1) && (vars_to_read[1] == 'var_names')) { setNames(length(vars_in_file), var_tag) } else { vars_to_read <- sapply(vars_to_read, function(x) { if (x %in% names(synonims)) { x_in_file <- which(synonims[[x]] %in% vars_in_file) if (length(x_in_file) < 1) { stop("Could not find variable '", x, "' (or its synonims if ", "specified) in the file ", file_path) } if (length(x_in_file) > 1) { stop("Found more than one matches for the synonims of the ", "variable '", x, "' in the file ", file_path) } synonims[[x]][x_in_file] } else { if (is.character(x) && !(x %in% c('all', 'last', 'first'))) { if (!(x %in% vars_in_file)) { stop("Could not find variable '", x, "' (or its synonims if ", "specified) in the file ", file_path) } } x } }) vars_to_read <- SelectorChecker(vars_to_read, vars_in_file, return_indices = FALSE) ##### rows <- max(sapply(vars_to_read, function(x) { read_xls(file_to_read, x, cell_limits(c(1, 1), c(1, NA))) })) cols <- max(sapply(vars_to_read, function(x) { read_xls(file_to_read, x, cell_limits(c(1, 1), c(NA, 1))) })) read_dims <- c(var = length(vars_to_read), rows = rows, cols = cols) ##### if (any(c('var', 'variable') %in% names(inner_indices))) { names(read_dims)[which(names(read_dims) == 'var')] <- var_tag read_dims[var_tag] <- length(vars_in_file) } else { read_dims <- read_dims[-which(names(read_dims) == 'var')] } read_dims } } XlsVarReader <- function(file_path = NULL, file_object = NULL, file_selectors = NULL, var_name = NULL, synonims) { ##### if (!is.null(file_object)) { file_to_read <- file_object file_path <- file_object } else if (!is.null(file_path)) { file_to_read <- file_path } else { stop("Either 'file_path' or 'file_object' must be provided.") } ##### if (var_name %in% c('var_names')) { ##### vars_in_file <- c(readxl::excel_sheets(file_to_read), 'col_names') ##### vars_in_file <- sapply(vars_in_file, function(x) { which_entry <- which(sapply(synonims, function(y) x %in% y)) if (length(which_entry) > 0) { names(synonims)[which_entry] } else { x } }) vars_in_file } else { ##### XlsDataReader(file_path, file_object, list(var = var_name), NULL, synonims) ##### } } # Parameter 'file_selectos' expects a named character vector of single # file dimension selectors. # Parameter 'inner_indices' expects a named list of numeric vectors. XlsDataReader <- function(file_path = NULL, file_object = NULL, file_selectors = NULL, inner_indices = NULL, synonims) { ##### if (!is.null(file_object)) { file_to_read <- file_object file_path <- file_object } else if (!is.null(file_path)) { file_to_read <- file_path } else { stop("Either 'file_path' or 'file_object' must be provided.") } ##### if (is.null(file_to_read)) { return(NULL) } # Flag to know whether a variable has been requested (take all dims detected) # or whether data has been requested (stop if no indices provided for any # dim). var_requested <- is.null(inner_indices) drop_var_dim <- FALSE if (any(c('var', 'variable') %in% names(file_selectors))) { if (!any(c('var', 'variable') %in% names(inner_indices))) { inner_indices <- c(inner_indices, list(var = file_selectors[[which(names(file_selectors) %in% c('var', 'variable'))[1]]])) drop_var_dim <- TRUE } } ##### vars_in_file <- readxl::excel_sheets(file_to_read) ##### if (any(names(inner_indices) %in% c('var', 'variable'))) { position_of_var <- which(names(inner_indices) %in% c('var', 'variable'))[1] } else if (length(vars_in_file) == 1) { inner_indices <- c(inner_indices, list(var = vars_in_file)) drop_var_dim <- TRUE position_of_var <- length(inner_indices) } else { stop("A 'var'/'variable' file dimension or inner dimension must be ", "requested for XlsDataReader() to read XLS files.") } inner_indices[[position_of_var]] <- sapply(inner_indices[[position_of_var]], function(x) { if (x %in% names(synonims)) { x_in_file <- which(synonims[[x]] %in% vars_in_file) if (length(x_in_file) < 1) { stop("Could not find variable '", x, "' (or its synonims if ", "specified) in the file ", file_path) } if (length(x_in_file) > 1) { stop("Found more than one matches for the synonims of the ", "variable '", x, "' in the file ", file_path) } synonims[[x]][x_in_file] } else { if (is.character(x) && !(x %in% c('all', 'first', 'last'))) { if (!(x %in% vars_in_file)) { stop("Could not find variable '", x, "' (or its synonims if ", "specified) in the file ", file_path) } } x } }) #inner_indices[[position_of_var]] <- SelectorChecker(inner_indices[[position_of_var]], vars_in_file) ##### dims_in_file <- XlsDimReader(NULL, file_to_read, NULL, inner_indices[position_of_var], synonims) ##### names(inner_indices) <- sapply(names(inner_indices), function(x) { if (x %in% names(synonims)) { x_in_file <- which(synonims[[x]] %in% names(dims_in_file)) if (length(x_in_file) < 1) { stop("Could not find dimension '", x, "' (or its synonims if ", "specified) in the file ", file_path) } if (length(x_in_file) > 1) { stop("Found more than one matches for the synonims of the ", "dimension '", x, "' in the file ", file_path) } synonims[[x]][x_in_file] } else { if (!(x %in% names(dims_in_file))) { stop("Could not find dimension '", x, "' (or its synonims if ", "specified) in the file ", file_path) } x } }) if (drop_var_dim) { dims_in_file <- dims_in_file[-which(names(dims_in_file) %in% c('var', 'variable'))] } singleton_unspecified_dims <- which((dims_in_file == 1) & !(names(dims_in_file) %in% names(inner_indices))) if (length(singleton_unspecified_dims) > 0) { dims_in_file <- dims_in_file[-singleton_unspecified_dims] } if (var_requested) { ##### if (var_name == 'col_names') { ## TODO: Add support for different col names for each sheet. ## TODO: If col_names specified in ... and == FALSE, stop. result <- readxl::read_xls(file_to_read, 1, cell_limits(c(1, 1), c(1, NA)), col_names = TRUE) } else { result <- readxl::read_xls(file_to_read, var_name, cell_limits(c(1, 1), c(NA, NA)), col_names = TRUE) } result <- array(unlist(result, use.names = FALSE), dim = c(var = 1, rows = dim(result)[1], cols = dim(result)[2])) if (drop_var_dim) { dim(result) <- dim(result)[-1] } ##result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim, ## expect_all_indices = FALSE, allow_out_of_range = TRUE) ##### } else { if (any(!(names(dims_in_file) %in% names(inner_indices)))) { expected_dim_names <- names(inner_indices) if (drop_var_dim) { expected_dim_names <- expected_dim_names[-position_of_var] } stop("Unexpected extra dimensions (of length > 1) in the file.\nExpected: ", paste(expected_dim_names, collapse = ', '), "\n", "Found: ", paste(names(dims_in_file), collapse = ', '), "\n", file_path) } ##### for var_name in var_names compute min and max indices for each dimension #result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim, # expect_all_indices = TRUE, allow_out_of_range = TRUE) subset taking only indices in inner dims reorder dims according to inner_indices ##### } names(dim(result)) <- sapply(names(dim(result)), function(x) { which_entry <- which(sapply(synonims, function(y) x %in% y)) if (length(which_entry) > 0) { names(synonims)[which_entry] } else { x } }) names(attr(result, 'variables')) <- sapply(names(attr(result, 'variables')), function(x) { which_entry <- which(sapply(synonims, function(y) x %in% y)) if (length(which_entry) > 0) { names(synonims)[which_entry] } else { x } }) if (length(names(attr(result, 'variables'))) == 1) { var_name <- names(attr(result, 'variables')) units <- attr(result, 'variables')[[var_name]][['units']] if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) { if (units == 'seconds') { units <- 'secs' } else if (units == 'minutes') { units <- 'mins' } result[] <- paste(result[], units) } else if (grepl(' since ', units)) { parts <- strsplit(units, ' since ')[[1]] units <- parts[1] if (units == 'seconds') { units <- 'secs' } else if (units == 'minutes') { units <- 'mins' } new_array <- seq(as.POSIXct(parts[2]), length = max(result, na.rm = TRUE) + 1, by = units)[result[] + 1] dim(new_array) <- dim(result) attr(new_array, 'variables') <- attr(result, 'variables') result <- new_array } } result }