Newer
Older
#'NetCDF file data reader for 'startR'
#'
#'This is a data reader function for NetCDF files, intended for use as parameter
#'file_data_reader in a Start() call. This function complies with the
#'input/output interface required by Start() defined in the documentation for
#'This function uses the function NcToArray() in the package 'easyNCDF', which
#'in turn uses nc_var_get() in the package 'ncdf4'.
#'
#'@param file_path A character string indicating the path to the data file to
#' read. See details in the documentation of the parameter 'file_data_reader'
#' of the function Start(). The default value is NULL.
#'@param file_object An open connection to a NetCDF file, optionally with
#' additional header information. See details in the documentation of the
#' parameter 'file_data_reader' of the function Start(). The default value is
#' NULL.
#'@param file_selectors A named list containing the information of the path of
#' the file to read data from. It is automatically provided by Start(). See
#' details in the documentation of the parameter 'file_data_reader' of the
#' function Start(). The default value is NULL.
#'@param inner_indices A named list of numeric vectors indicating the indices
#' to take from each of the inner dimensions in the requested file. It is
#' automatically provided by Start(). See details in the documentation of the
#' parameter 'file_data_reader' of the function Start(). The default value is
#' NULL.
#'@param synonims A named list indicating the synonims for the dimension names
#' to look for in the requested file, exactly as provided in the parameter
#' 'synonims' in a Start() call. See details in the documentation of the
#' parameter 'file_data_reader' of the function Start().
#'
#'@return A multidimensional data array with the named dimensions and indices
#' requested in 'inner_indices', potentially with the attribute 'variables'
#' with additional auxiliary data. See details in the documentation of the
#' parameter 'file_data_reader' of the function Start().
#' data_path <- system.file('extdata', package = 'startR', mustWork = TRUE)
#' file_to_open <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc')
#' file_selectors <- c(dat = 'dat1', var = 'tos', sdate = '200011')
#' first_round_indices <- list(time = 1, latitude = 1:8, longitude = 1:16)
#' synonims <- list(dat = 'dat', var = 'var', sdate = 'sdate', time = 'time',
#' latitude = 'latitude', longitude = 'longitude')
#' sub_array <- NcDataReader(file_to_open, NULL, file_selectors,
#' first_round_indices, synonims)
#'@seealso \code{\link{NcOpener}} \code{\link{NcDimReader}}
#' \code{\link{NcCloser}} \code{\link{NcVarReader}}
NcDataReader <- function(file_path = NULL, file_object = NULL,
file_selectors = NULL, inner_indices = NULL,
if (!is.null(file_object)) {
file_to_read <- file_object
} else if (!is.null(file_path)) {
} else {
stop("Either 'file_path' or 'file_object' must be provided.")
}
if (any(c('var', 'variable') %in% names(file_selectors))) {
if (!any(c('var', 'variable') %in% names(inner_indices))) {
inner_indices <- c(inner_indices,
list(var = file_selectors[[which(names(file_selectors) %in%
c('var', 'variable'))[1]]]))
vars_in_file <- easyNCDF::NcReadVarNames(file_to_read)
if (any(names(inner_indices) %in% c('var', 'variable'))) {
position_of_var <- which(names(inner_indices) %in% c('var', 'variable'))[1]
} else if (length(vars_in_file) == 1) {
inner_indices <- c(inner_indices,
list(var = vars_in_file))
drop_var_dim <- TRUE
position_of_var <- length(inner_indices)
stop("A 'var'/'variable' file dimension or inner dimension must be ",
"requested for NcDataReader() to read NetCDF files with more than ",
"one variable.")
inner_indices[[position_of_var]] <- sapply(inner_indices[[position_of_var]],
function(x) {
if (x %in% names(synonims)) {
x_in_file <- which(synonims[[x]] %in% vars_in_file)
if (length(x_in_file) < 1) {
stop("Could not find variable '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
if (length(x_in_file) > 1) {
stop("Found more than one matches for the synonims of the ",
"variable '", x, "' in the file ", file_path)
}
synonims[[x]][x_in_file]
} else {
if (is.character(x) && !(x %in% c('all', 'first', 'last'))) {
if (!(x %in% vars_in_file)) {
stop("Could not find variable '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
#inner_indices[[position_of_var]] <- SelectorChecker(inner_indices[[position_of_var]], vars_in_file)
dims_in_file <- NcDimReader(NULL, file_to_read, NULL,
inner_indices[position_of_var], synonims)
names(inner_indices) <- sapply(names(inner_indices),
function(x) {
if (x %in% names(synonims)) {
x_in_file <- which(synonims[[x]] %in% names(dims_in_file))
if (length(x_in_file) < 1) {
stop("Could not find dimension '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
if (length(x_in_file) > 1) {
stop("Found more than one matches for the synonims of the ",
"dimension '", x, "' in the file ", file_path)
}
synonims[[x]][x_in_file]
} else {
stop("Could not find dimension '", x, "' (or its synonims if ",
"specified) in the file ", file_path)
}
x
}
})
dims_in_file <- dims_in_file[-which(names(dims_in_file) %in% c('var', 'variable'))]
singleton_unspecified_dims <- which((dims_in_file == 1) &
!(names(dims_in_file) %in% names(inner_indices)))
dims_in_file <- dims_in_file[-singleton_unspecified_dims]
if (var_requested) {
result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim,
expect_all_indices = FALSE, allow_out_of_range = TRUE)
} else {
if (any(!(names(dims_in_file) %in% names(inner_indices)))) {
expected_dim_names <- names(inner_indices)
if (drop_var_dim) {
expected_dim_names <- expected_dim_names[-position_of_var]
}
stop("Unexpected extra dimensions (of length > 1) in the file.\nExpected: ",
paste(expected_dim_names, collapse = ', '), "\n",
"Found: ", paste(names(dims_in_file), collapse = ', '), "\n",
file_path)
}
result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim,
expect_all_indices = TRUE, allow_out_of_range = TRUE)
}
if (!is.null(dim(result))) {
names(dim(result)) <- sapply(names(dim(result)),
function(x) {
which_entry <- which(sapply(synonims, function(y) x %in% y))
if (length(which_entry) > 0) {
names(synonims)[which_entry]
} else {
x
}
})
}
if (!is.null(result)) {
names(attr(result, 'variables')) <- sapply(names(attr(result, 'variables')),
function(x) {
which_entry <- which(sapply(synonims, function(y) x %in% y))
if (length(which_entry) > 0) {
names(synonims)[which_entry]
} else {
x
}
})
if (length(names(attr(result, 'variables'))) == 1) {
var_name <- names(attr(result, 'variables'))
units <- attr(result, 'variables')[[var_name]][['units']]
if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) {
if (units == 'seconds') {
# units <- 'mins'
result <- result * 60 # min to sec
calendar <- attr(result, 'variables')[[var_name]]$calendar
if (calendar == 'standard') calendar <- 'gregorian'
parts <- strsplit(units, ' since ')[[1]]
units <- parts[1]
if (units %in% c('second', 'seconds')) {
} else if (units %in% c('minute', 'minutes')) {
# units <- 'mins'
result <- result * 60 # min to sec
} else if (units %in% c('hour', 'hours')) {
result <- result * 60 * 60 # hour to sec
} else if (units %in% c('day', 'days')) {
# units <- 'days'
result <- result * 24 * 60 * 60 # day to sec
} else if (units %in% c('month', 'months')) {
# define day in each month
leap_month_day <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
no_leap_month_day <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
# Origin year and month
ori_year <- as.numeric(substr(parts[2], 1, 4))
ori_month <- as.numeric(substr(parts[2], 6, 7))
if (is.na(ori_month)) {
ori_month <- as.numeric(substr(parts[2], 6, 6))
}
if (!is.numeric(ori_year) | !is.numeric(ori_month)) {
stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ",
"Check the file or contact the maintainer."))
}
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
if (calendar == 'gregorian') {
# Find how many years + months
yr_num <- floor(result / 12)
month_left <- result - yr_num * 12
# Find the leap years we care
if (ori_month <= 2) {
leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear)))
} else {
leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear)))
}
total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet
if (month_left != 0) {
if ((ori_month + month_left) <= 12) { # the last month is still in the same last yr
# Is the last year a leap year?
last_leap <- s2dv::LeapYear(ori_year + yr_num)
if (last_leap) {
total_days <- total_days + sum(leap_month_day[ori_month:(ori_month + month_left - 1)])
} else {
total_days <- total_days + sum(no_leap_month_day[ori_month:(ori_month + month_left - 1)])
}
} else { # the last month ends in the next yr
if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16
last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005
if (last_leap) {
total_days <- total_days + sum(leap_month_day[2:12])
} else {
total_days <- total_days + sum(no_leap_month_day[2:12])
}
} else { # e.g., 2005-04-16 + 11mth = 2006-03-16
last_leap <- s2dv::LeapYear(ori_year + yr_num + 1)
needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1))
if (last_leap) {
total_days <- total_days + sum(leap_month_day[needed_month])
} else {
total_days <- total_days + sum(no_leap_month_day[needed_month])
}
}
}
}
result <- total_days * 24 * 60 * 60 # day to sec
} else if (calendar %in% c('365_day',' 365', 'noleap')) {
yr_num <- floor(result / 12)
month_left <- result - yr_num * 12
total_days <- 365 * yr_num + sum(no_leap_month_day[ori_month:(month_left - 1)])
result <- total_days * 24 * 60 * 60 # day to sec
} else if (calendar %in% c('360_day', '360')) {
result <- result * 30 * 24 * 60 * 60 # day to sec
} else { #old code. The calendar is not in any of the above.
result <- result * 30.5
result <- result * 24 * 60 * 60 # day to sec
}
new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[]
new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC"))
#new_array <- seq(as.POSIXct(parts[2]),
# length = max(result, na.rm = TRUE) + 1,
# by = units)[result[] + 1]
dim(new_array) <- dim(result)
attr(new_array, 'variables') <- attr(result, 'variables')
result <- new_array
if (close) {
NcCloser(file_to_read)
}