Newer
Older
XlsOpener <- function(file_path) {
#####
NULL
#####
}
XlsCloser <- function(file_object) {
#####
NULL
#####
}
# Parameter 'file_selectors' expects a named character vector of single
# file dimension selectors.
# Parameter 'inner_indices' expects a named list of numeric or
# character string vectors.
## TODO: Assuming colnames are always provided.
XlsDimReader <- function(file_path = NULL, file_object = NULL,
file_selectors = NULL, inner_indices = NULL,
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#####
if (!is.null(file_object)) {
file_to_read <- file_object
} else if (!is.null(file_path)) {
file_to_read <- file_path
} else {
stop("Either 'file_path' or 'file_object' must be provided.")
}
#####
#####
vars_in_file <- readxl::excel_sheets
#####
if (any(c('var', 'variable') %in% names(inner_indices))) {
vars_to_read <- inner_indices[[which(names(inner_indices) %in% c('var', 'variable'))[1]]]
var_tag <- names(inner_indices)[[which(names(inner_indices) %in% c('var', 'variable'))[1]]]
} else if (any(c('var', 'variable') %in% names(file_selectors))) {
vars_to_read <- file_selectors[[which(names(file_selectors) %in% c('var', 'variable'))[1]]]
var_tag <- names(file_selectors)[[which(names(file_selectors) %in% c('var', 'variable'))[1]]]
} else if (length(vars_in_file) == 1) {
vars_to_read <- vars_in_file
file_selectors <- c(file_selectors, list(var = vars_in_file))
var_tag <- 'var'
} else {
#####
stop("XlsDimReader expected to find a requested 'var' or 'variable' in 'file_selectors'.")
#####
}
if ((length(vars_to_read) == 1) && (vars_to_read[1] == 'var_names')) {
setNames(length(vars_in_file), var_tag)
} else {
vars_to_read <- sapply(vars_to_read,
function(x) {
if (x %in% names(synonims)) {
x_in_file <- which(synonims[[x]] %in% vars_in_file)
if (length(x_in_file) < 1) {
stop("Could not find variable '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
if (length(x_in_file) > 1) {
stop("Found more than one matches for the synonims of the ",
"variable '", x, "' in the file ", file_path)
}
synonims[[x]][x_in_file]
} else {
if (is.character(x) && !(x %in% c('all', 'last', 'first'))) {
if (!(x %in% vars_in_file)) {
stop("Could not find variable '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
}
x
}
})
vars_to_read <- SelectorChecker(vars_to_read, vars_in_file,
return_indices = FALSE)
#####
rows <- read_xls(file_to_read, vars_to_read[1],
cell_limits(c(1, 1), c(1, NA)))
cols <- read_xls(file_to_read, vars_to_read[1],
cell_limits(c(1, 1), c(NA, 1)))
read_dims <- c(var = length(vars_to_read),
rows = length(rows),
cols = length(cols))
#####
if (any(c('var', 'variable') %in% names(inner_indices))) {
names(read_dims)[which(names(read_dims) == 'var')] <- var_tag
read_dims[var_tag] <- length(vars_in_file)
} else {
read_dims <- read_dims[-which(names(read_dims) == 'var')]
}
read_dims
}
}
XlsVarReader <- function(file_path = NULL, file_object = NULL,
file_selectors = NULL, var_name = NULL,
synonims) {
#####
if (!is.null(file_object)) {
file_to_read <- file_object
file_path <- file_object
} else if (!is.null(file_path)) {
file_to_read <- file_path
} else {
stop("Either 'file_path' or 'file_object' must be provided.")
}
#####
if (var_name %in% c('var_names')) {
#####
vars_in_file <- c(readxl::excel_sheets(file_to_read), 'col_names')
#####
vars_in_file <- sapply(vars_in_file,
function(x) {
which_entry <- which(sapply(synonims, function(y) x %in% y))
if (length(which_entry) > 0) {
names(synonims)[which_entry]
} else {
x
}
})
vars_in_file
} else {
#####
XlsDataReader(file_path, file_object, list(var = var_name), NULL, synonims)
#####
}
}
# Parameter 'file_selectos' expects a named character vector of single
# file dimension selectors.
# Parameter 'inner_indices' expects a named list of numeric vectors.
XlsDataReader <- function(file_path = NULL, file_object = NULL,
file_selectors = NULL, inner_indices = NULL,
synonims) {
#####
if (!is.null(file_object)) {
file_to_read <- file_object
} else if (!is.null(file_path)) {
} else {
stop("Either 'file_path' or 'file_object' must be provided.")
}
if (any(c('var', 'variable') %in% names(file_selectors))) {
if (!any(c('var', 'variable') %in% names(inner_indices))) {
inner_indices <- c(inner_indices,
list(var = file_selectors[[which(names(file_selectors) %in%
c('var', 'variable'))[1]]]))
#####
vars_in_file <- readxl::excel_sheets(file_to_read)
#####
if (any(names(inner_indices) %in% c('var', 'variable'))) {
position_of_var <- which(names(inner_indices) %in% c('var', 'variable'))[1]
} else if (length(vars_in_file) == 1) {
inner_indices <- c(inner_indices,
list(var = vars_in_file))
drop_var_dim <- TRUE
position_of_var <- length(inner_indices)
stop("A 'var'/'variable' file dimension or inner dimension must be ",
"requested for XlsDataReader() to read XLS files.")
inner_indices[[position_of_var]] <- sapply(inner_indices[[position_of_var]],
function(x) {
if (x %in% names(synonims)) {
x_in_file <- which(synonims[[x]] %in% vars_in_file)
if (length(x_in_file) < 1) {
stop("Could not find variable '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
if (length(x_in_file) > 1) {
stop("Found more than one matches for the synonims of the ",
"variable '", x, "' in the file ", file_path)
}
synonims[[x]][x_in_file]
} else {
if (is.character(x) && !(x %in% c('all', 'first', 'last'))) {
if (!(x %in% vars_in_file)) {
stop("Could not find variable '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
#inner_indices[[position_of_var]] <- SelectorChecker(inner_indices[[position_of_var]], vars_in_file)
#####
dims_in_file <- XlsDimReader(NULL, file_to_read, NULL,
inner_indices[position_of_var], synonims)
#####
names(inner_indices) <- sapply(names(inner_indices),
function(x) {
if (x %in% names(synonims)) {
x_in_file <- which(synonims[[x]] %in% names(dims_in_file))
if (length(x_in_file) < 1) {
stop("Could not find dimension '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
if (length(x_in_file) > 1) {
stop("Found more than one matches for the synonims of the ",
"dimension '", x, "' in the file ", file_path)
}
synonims[[x]][x_in_file]
} else {
stop("Could not find dimension '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
x
}
})
dims_in_file <- dims_in_file[-which(names(dims_in_file) %in% c('var', 'variable'))]
singleton_unspecified_dims <- which((dims_in_file == 1) &
!(names(dims_in_file) %in% names(inner_indices)))
dims_in_file <- dims_in_file[-singleton_unspecified_dims]
#####
for var_name in var_names
if (var_name == 'col_names') load the first row.
else result <- readxl::read_xls(file_to_read, var_name, cell_limits(c(1, 1, ....), c(NA, NA, .....)), col_names = TRUE)
if drop_var_dim
drop
##result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim,
## expect_all_indices = FALSE, allow_out_of_range = TRUE)
#####
if (any(!(names(dims_in_file) %in% names(inner_indices)))) {
expected_dim_names <- names(inner_indices)
if (drop_var_dim) {
expected_dim_names <- expected_dim_names[-position_of_var]
}
stop("Unexpected extra dimensions (of length > 1) in the file.\nExpected: ",
paste(expected_dim_names, collapse = ', '), "\n",
"Found: ", paste(names(dims_in_file), collapse = ', '), "\n",
file_path)
}
result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim,
expect_all_indices = TRUE, allow_out_of_range = TRUE)
names(dim(result)) <- sapply(names(dim(result)),
function(x) {
which_entry <- which(sapply(synonims, function(y) x %in% y))
if (length(which_entry) > 0) {
names(synonims)[which_entry]
} else {
x
}
})
names(attr(result, 'variables')) <- sapply(names(attr(result, 'variables')),
function(x) {
which_entry <- which(sapply(synonims, function(y) x %in% y))
if (length(which_entry) > 0) {
names(synonims)[which_entry]
} else {
x
}
})
if (length(names(attr(result, 'variables'))) == 1) {
var_name <- names(attr(result, 'variables'))
units <- attr(result, 'variables')[[var_name]][['units']]
if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) {
if (units == 'seconds') {
units <- 'secs'
} else if (units == 'minutes') {
units <- 'mins'
}
result[] <- paste(result[], units)
} else if (grepl(' since ', units)) {
parts <- strsplit(units, ' since ')[[1]]
units <- parts[1]
if (units == 'seconds') {
units <- 'secs'
} else if (units == 'minutes') {
units <- 'mins'
}
new_array <- seq(as.POSIXct(parts[2]), length = max(result, na.rm = TRUE) + 1, by = units)[result[] + 1]
dim(new_array) <- dim(result)
attr(new_array, 'variables') <- attr(result, 'variables')
result <- new_array