Newer
Older
XlsOpener <- function(file_path) {
#####
NULL
#####
}
XlsCloser <- function(file_object) {
#####
NULL
#####
}
# Parameter 'file_selectors' expects a named character vector of single
# file dimension selectors.
# Parameter 'inner_indices' expects a named list of numeric or
# character string vectors.
## TODO: Assuming colnames are always provided.
XlsDimReader <- function(file_path = NULL, file_object = NULL,
file_selectors = NULL, inner_indices = NULL,
#####
if (!is.null(file_object)) {
file_to_read <- file_object
} else if (!is.null(file_path)) {
file_to_read <- file_path
} else {
stop("Either 'file_path' or 'file_object' must be provided.")
}
#####
#####
vars_in_file <- c(readxl::excel_sheets, 'col_names')
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#####
if (any(c('var', 'variable') %in% names(inner_indices))) {
vars_to_read <- inner_indices[[which(names(inner_indices) %in% c('var', 'variable'))[1]]]
var_tag <- names(inner_indices)[[which(names(inner_indices) %in% c('var', 'variable'))[1]]]
} else if (any(c('var', 'variable') %in% names(file_selectors))) {
vars_to_read <- file_selectors[[which(names(file_selectors) %in% c('var', 'variable'))[1]]]
var_tag <- names(file_selectors)[[which(names(file_selectors) %in% c('var', 'variable'))[1]]]
} else if (length(vars_in_file) == 1) {
vars_to_read <- vars_in_file
file_selectors <- c(file_selectors, list(var = vars_in_file))
var_tag <- 'var'
} else {
#####
stop("XlsDimReader expected to find a requested 'var' or 'variable' in 'file_selectors'.")
#####
}
if ((length(vars_to_read) == 1) && (vars_to_read[1] == 'var_names')) {
setNames(length(vars_in_file), var_tag)
} else {
vars_to_read <- sapply(vars_to_read,
function(x) {
if (x %in% names(synonims)) {
x_in_file <- which(synonims[[x]] %in% vars_in_file)
if (length(x_in_file) < 1) {
stop("Could not find variable '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
if (length(x_in_file) > 1) {
stop("Found more than one matches for the synonims of the ",
"variable '", x, "' in the file ", file_path)
}
synonims[[x]][x_in_file]
} else {
if (is.character(x) && !(x %in% c('all', 'last', 'first'))) {
if (!(x %in% vars_in_file)) {
stop("Could not find variable '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
}
x
}
})
vars_to_read <- SelectorChecker(vars_to_read, vars_in_file,
return_indices = FALSE)
#####
rows <- max(sapply(vars_to_read,
function(x) {
read_xls(file_to_read, x, cell_limits(c(1, 1), c(1, NA)))
}))
cols <- max(sapply(vars_to_read,
function(x) {
read_xls(file_to_read, x, cell_limits(c(1, 1), c(NA, 1)))
}))
read_dims <- c(var = length(vars_to_read),
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#####
if (any(c('var', 'variable') %in% names(inner_indices))) {
names(read_dims)[which(names(read_dims) == 'var')] <- var_tag
read_dims[var_tag] <- length(vars_in_file)
} else {
read_dims <- read_dims[-which(names(read_dims) == 'var')]
}
read_dims
}
}
XlsVarReader <- function(file_path = NULL, file_object = NULL,
file_selectors = NULL, var_name = NULL,
synonims) {
#####
if (!is.null(file_object)) {
file_to_read <- file_object
file_path <- file_object
} else if (!is.null(file_path)) {
file_to_read <- file_path
} else {
stop("Either 'file_path' or 'file_object' must be provided.")
}
#####
if (var_name %in% c('var_names')) {
#####
vars_in_file <- c(readxl::excel_sheets(file_to_read), 'col_names')
#####
vars_in_file <- sapply(vars_in_file,
function(x) {
which_entry <- which(sapply(synonims, function(y) x %in% y))
if (length(which_entry) > 0) {
names(synonims)[which_entry]
} else {
x
}
})
vars_in_file
} else {
#####
XlsDataReader(file_path, file_object, list(var = var_name), NULL, synonims)
#####
}
}
# Parameter 'file_selectos' expects a named character vector of single
# file dimension selectors.
# Parameter 'inner_indices' expects a named list of numeric vectors.
XlsDataReader <- function(file_path = NULL, file_object = NULL,
file_selectors = NULL, inner_indices = NULL,
synonims) {
#####
if (!is.null(file_object)) {
file_to_read <- file_object
} else if (!is.null(file_path)) {
} else {
stop("Either 'file_path' or 'file_object' must be provided.")
}
# Flag to know whether a variable has been requested (take all dims detected)
# or whether data has been requested (stop if no indices provided for any
# dim).
if (any(c('var', 'variable') %in% names(file_selectors))) {
if (!any(c('var', 'variable') %in% names(inner_indices))) {
inner_indices <- c(inner_indices,
list(var = file_selectors[[which(names(file_selectors) %in%
c('var', 'variable'))[1]]]))
#####
vars_in_file <- readxl::excel_sheets(file_to_read)
#####
if (any(names(inner_indices) %in% c('var', 'variable'))) {
position_of_var <- which(names(inner_indices) %in% c('var', 'variable'))[1]
} else if (length(vars_in_file) == 1) {
inner_indices <- c(inner_indices,
list(var = vars_in_file))
drop_var_dim <- TRUE
position_of_var <- length(inner_indices)
stop("A 'var'/'variable' file dimension or inner dimension must be ",
"requested for XlsDataReader() to read XLS files.")
inner_indices[[position_of_var]] <- sapply(inner_indices[[position_of_var]],
function(x) {
if (x %in% names(synonims)) {
x_in_file <- which(synonims[[x]] %in% vars_in_file)
if (length(x_in_file) < 1) {
stop("Could not find variable '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
if (length(x_in_file) > 1) {
stop("Found more than one matches for the synonims of the ",
"variable '", x, "' in the file ", file_path)
}
synonims[[x]][x_in_file]
} else {
if (is.character(x) && !(x %in% c('all', 'first', 'last'))) {
if (!(x %in% vars_in_file)) {
stop("Could not find variable '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
#inner_indices[[position_of_var]] <- SelectorChecker(inner_indices[[position_of_var]], vars_in_file)
#####
dims_in_file <- XlsDimReader(NULL, file_to_read, NULL,
inner_indices[position_of_var], synonims)
#####
names(inner_indices) <- sapply(names(inner_indices),
function(x) {
if (x %in% names(synonims)) {
x_in_file <- which(synonims[[x]] %in% names(dims_in_file))
if (length(x_in_file) < 1) {
stop("Could not find dimension '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
if (length(x_in_file) > 1) {
stop("Found more than one matches for the synonims of the ",
"dimension '", x, "' in the file ", file_path)
}
synonims[[x]][x_in_file]
} else {
stop("Could not find dimension '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
x
}
})
dims_in_file <- dims_in_file[-which(names(dims_in_file) %in% c('var', 'variable'))]
singleton_unspecified_dims <- which((dims_in_file == 1) &
!(names(dims_in_file) %in% names(inner_indices)))
dims_in_file <- dims_in_file[-singleton_unspecified_dims]
if (var_name == 'col_names') {
## TODO: Add support for different col names for each sheet.
## TODO: If col_names specified in ... and == FALSE, stop.
result <- readxl::read_xls(file_to_read, 1, cell_limits(c(1, 1), c(1, NA)), col_names = TRUE)
} else {
result <- readxl::read_xls(file_to_read, var_name, cell_limits(c(1, 1), c(NA, NA)), col_names = TRUE)
}
result <- array(unlist(result, use.names = FALSE),
dim = c(var = 1, rows = dim(result)[1], cols = dim(result)[2]))
if (drop_var_dim) {
dim(result) <- dim(result)[-1]
}
##result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim,
## expect_all_indices = FALSE, allow_out_of_range = TRUE)
#####
if (any(!(names(dims_in_file) %in% names(inner_indices)))) {
expected_dim_names <- names(inner_indices)
if (drop_var_dim) {
expected_dim_names <- expected_dim_names[-position_of_var]
}
stop("Unexpected extra dimensions (of length > 1) in the file.\nExpected: ",
paste(expected_dim_names, collapse = ', '), "\n",
"Found: ", paste(names(dims_in_file), collapse = ', '), "\n",
file_path)
}
for var_name in var_names
compute min and max indices for each dimension
#result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim,
# expect_all_indices = TRUE, allow_out_of_range = TRUE)
subset taking only indices in inner dims
reorder dims according to inner_indices
names(dim(result)) <- sapply(names(dim(result)),
function(x) {
which_entry <- which(sapply(synonims, function(y) x %in% y))
if (length(which_entry) > 0) {
names(synonims)[which_entry]
} else {
x
}
})
names(attr(result, 'variables')) <- sapply(names(attr(result, 'variables')),
function(x) {
which_entry <- which(sapply(synonims, function(y) x %in% y))
if (length(which_entry) > 0) {
names(synonims)[which_entry]
} else {
x
}
})
if (length(names(attr(result, 'variables'))) == 1) {
var_name <- names(attr(result, 'variables'))
units <- attr(result, 'variables')[[var_name]][['units']]
if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) {
if (units == 'seconds') {
units <- 'secs'
} else if (units == 'minutes') {
units <- 'mins'
}
result[] <- paste(result[], units)
} else if (grepl(' since ', units)) {
parts <- strsplit(units, ' since ')[[1]]
units <- parts[1]
if (units == 'seconds') {
units <- 'secs'
} else if (units == 'minutes') {
units <- 'mins'
}
new_array <- seq(as.POSIXct(parts[2]), length = max(result, na.rm = TRUE) + 1, by = units)[result[] + 1]
dim(new_array) <- dim(result)
attr(new_array, 'variables') <- attr(result, 'variables')
result <- new_array