XlsPlugin.R 11.4 KB
Newer Older
XlsOpener <- function(file_path) {
  #####
  NULL
  #####
}

XlsCloser <- function(file_object) {
  #####
  NULL
  #####
}

# Parameter 'file_selectors' expects a named character vector of single 
# file dimension selectors.
# Parameter 'inner_indices' expects a named list of numeric or 
# character string vectors.
## TODO: Assuming colnames are always provided.
XlsDimReader <- function(file_path = NULL, file_object = NULL, 
                         file_selectors = NULL, inner_indices = NULL,
  #####
  if (!is.null(file_object)) {
    file_to_read <- file_object
  } else if (!is.null(file_path)) {
    file_to_read <- file_path
  } else {
    stop("Either 'file_path' or 'file_object' must be provided.")
  }
  #####

  #####
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
  vars_in_file <- c(readxl::excel_sheets, 'col_names')
  #####
  if (any(c('var', 'variable') %in% names(inner_indices))) {
    vars_to_read <- inner_indices[[which(names(inner_indices) %in% c('var', 'variable'))[1]]]
    var_tag <- names(inner_indices)[[which(names(inner_indices) %in% c('var', 'variable'))[1]]]
  } else if (any(c('var', 'variable') %in% names(file_selectors))) {
    vars_to_read <- file_selectors[[which(names(file_selectors) %in% c('var', 'variable'))[1]]]
    var_tag <- names(file_selectors)[[which(names(file_selectors) %in% c('var', 'variable'))[1]]]
  } else if (length(vars_in_file) == 1) {
    vars_to_read <- vars_in_file
    file_selectors <- c(file_selectors, list(var = vars_in_file))
    var_tag <- 'var'
  } else {
  #####
    stop("XlsDimReader expected to find a requested 'var' or 'variable' in 'file_selectors'.")
  #####
  }

  if ((length(vars_to_read) == 1) && (vars_to_read[1] == 'var_names')) {
    setNames(length(vars_in_file), var_tag)
  } else {
    vars_to_read <- sapply(vars_to_read, 
      function(x) {
        if (x %in% names(synonims)) {
          x_in_file <- which(synonims[[x]] %in% vars_in_file)
          if (length(x_in_file) < 1) {
            stop("Could not find variable '", x, "' (or its synonims if ",
                 "specified) in the file ", file_path)
          }
          if (length(x_in_file) > 1) {
            stop("Found more than one matches for the synonims of the ",
                 "variable '", x, "' in the file ", file_path)
          }
          synonims[[x]][x_in_file]
        } else {
          if (is.character(x) && !(x %in% c('all', 'last', 'first'))) {
            if (!(x %in% vars_in_file)) {
              stop("Could not find variable '", x, "' (or its synonims if ",
                   "specified) in the file ", file_path)
            }
          }
          x
        }
      })
    vars_to_read <- SelectorChecker(vars_to_read, vars_in_file, 
                                    return_indices = FALSE)
  #####
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
    rows <- max(sapply(vars_to_read, 
                       function(x) {
                         read_xls(file_to_read, x, cell_limits(c(1, 1), c(1, NA)))
                       }))
    cols <- max(sapply(vars_to_read, 
                       function(x) {
                         read_xls(file_to_read, x, cell_limits(c(1, 1), c(NA, 1)))
                       }))
    read_dims <- c(var = length(vars_to_read), 
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
                   rows = rows,
                   cols = cols)
  #####
    if (any(c('var', 'variable') %in% names(inner_indices))) {
      names(read_dims)[which(names(read_dims) == 'var')] <- var_tag
      read_dims[var_tag] <- length(vars_in_file)
    } else {
      read_dims <- read_dims[-which(names(read_dims) == 'var')]
    }
    read_dims
  }
}

XlsVarReader <- function(file_path = NULL, file_object = NULL, 
                         file_selectors = NULL, var_name = NULL,
                         synonims) {
  #####
  if (!is.null(file_object)) {
    file_to_read <- file_object
    file_path <- file_object
  } else if (!is.null(file_path)) {
    file_to_read <- file_path
  } else {
    stop("Either 'file_path' or 'file_object' must be provided.")
  }
  #####
  if (var_name %in% c('var_names')) {
  #####
    vars_in_file <- c(readxl::excel_sheets(file_to_read), 'col_names')
  #####
    vars_in_file <- sapply(vars_in_file,
      function(x) {
        which_entry <- which(sapply(synonims, function(y) x %in% y))
        if (length(which_entry) > 0) {
          names(synonims)[which_entry]
        } else {
          x
        }
      })
    vars_in_file
  } else {
  #####
    XlsDataReader(file_path, file_object, list(var = var_name), NULL, synonims)
  #####
  }
}

# Parameter 'file_selectos' expects a named character vector of single
# file dimension selectors.
# Parameter 'inner_indices' expects a named list of numeric vectors.
XlsDataReader <- function(file_path = NULL, file_object = NULL, 
                          file_selectors = NULL, inner_indices = NULL,
                          synonims) {
  #####
  if (!is.null(file_object)) {
    file_to_read <- file_object
    file_path <- file_object
  } else if (!is.null(file_path)) {
    file_to_read <- file_path
  } else {
    stop("Either 'file_path' or 'file_object' must be provided.")
  }
Nicolau Manubens's avatar
Nicolau Manubens committed
  if (is.null(file_to_read)) {
    return(NULL)
  }

Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
  # Flag to know whether a variable has been requested (take all dims detected)
  # or whether data has been requested (stop if no indices provided for any 
  # dim).
Nicolau Manubens's avatar
Nicolau Manubens committed
  var_requested <- is.null(inner_indices)
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
  drop_var_dim <- FALSE
  if (any(c('var', 'variable') %in% names(file_selectors))) {
    if (!any(c('var', 'variable') %in% names(inner_indices))) {
Nicolau Manubens's avatar
Nicolau Manubens committed
      inner_indices <- c(inner_indices,
                         list(var = file_selectors[[which(names(file_selectors) %in% 
                                                    c('var', 'variable'))[1]]]))
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
      drop_var_dim <- TRUE
  #####
  vars_in_file <- readxl::excel_sheets(file_to_read)
  #####
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
  if (any(names(inner_indices) %in% c('var', 'variable'))) {
    position_of_var <- which(names(inner_indices) %in% c('var', 'variable'))[1]
Nicolau Manubens's avatar
Nicolau Manubens committed
  } else if (length(vars_in_file) == 1) {
    inner_indices <- c(inner_indices,
                       list(var = vars_in_file))
    drop_var_dim <- TRUE
    position_of_var <- length(inner_indices)
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
  } else {
Nicolau Manubens's avatar
Nicolau Manubens committed
    stop("A 'var'/'variable' file dimension or inner dimension must be ",
         "requested for XlsDataReader() to read XLS files.")
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
  }

  inner_indices[[position_of_var]] <- sapply(inner_indices[[position_of_var]],
    function(x) {
      if (x %in% names(synonims)) {
        x_in_file <- which(synonims[[x]] %in% vars_in_file)
        if (length(x_in_file) < 1) {
          stop("Could not find variable '", x, "' (or its synonims if ",
               "specified) in the file ", file_path)
        }
        if (length(x_in_file) > 1) {
          stop("Found more than one matches for the synonims of the ",
               "variable '", x, "' in the file ", file_path)
        }
        synonims[[x]][x_in_file]
      } else {
Nicolau Manubens's avatar
Nicolau Manubens committed
        if (is.character(x) && !(x %in% c('all', 'first', 'last'))) {
          if (!(x %in% vars_in_file)) {
            stop("Could not find variable '", x, "' (or its synonims if ",
                 "specified) in the file ", file_path)
          }
Nicolau Manubens's avatar
Nicolau Manubens committed
  #inner_indices[[position_of_var]] <- SelectorChecker(inner_indices[[position_of_var]], vars_in_file)
  #####
  dims_in_file <- XlsDimReader(NULL, file_to_read, NULL, 
                               inner_indices[position_of_var], synonims)
  #####
  names(inner_indices) <- sapply(names(inner_indices), 
    function(x) {
      if (x %in% names(synonims)) {
Nicolau Manubens's avatar
Nicolau Manubens committed
        x_in_file <- which(synonims[[x]] %in% names(dims_in_file))
        if (length(x_in_file) < 1) {
          stop("Could not find dimension '", x, "' (or its synonims if ",
               "specified) in the file ", file_path)
        }
        if (length(x_in_file) > 1) {
          stop("Found more than one matches for the synonims of the ",
               "dimension '", x, "' in the file ", file_path)
        }
        synonims[[x]][x_in_file]
      } else {
Nicolau Manubens's avatar
Nicolau Manubens committed
        if (!(x %in% names(dims_in_file))) {
          stop("Could not find dimension '", x, "' (or its synonims if ",
               "specified) in the file ", file_path)
        }
        x
      }
    })
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
  if (drop_var_dim) {
    dims_in_file <- dims_in_file[-which(names(dims_in_file) %in% c('var', 'variable'))]
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
  }
  singleton_unspecified_dims <- which((dims_in_file == 1) & 
                                      !(names(dims_in_file) %in% names(inner_indices)))
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
  if (length(singleton_unspecified_dims) > 0) {
    dims_in_file <- dims_in_file[-singleton_unspecified_dims]
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
  }
  if (var_requested) {
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
    if (var_name == 'col_names') {
      ## TODO: Add support for different col names for each sheet.
      ## TODO: If col_names specified in ... and == FALSE, stop.
      result <- readxl::read_xls(file_to_read, 1, cell_limits(c(1, 1), c(1, NA)), col_names = TRUE)
    } else {
      result <- readxl::read_xls(file_to_read, var_name, cell_limits(c(1, 1), c(NA, NA)), col_names = TRUE)
    } 
    result <- array(unlist(result, use.names = FALSE), 
                    dim = c(var = 1, rows = dim(result)[1], cols = dim(result)[2]))
    if (drop_var_dim) {
      dim(result) <- dim(result)[-1]
    }
    ##result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim,
    ##                              expect_all_indices = FALSE, allow_out_of_range = TRUE)
  #####
Nicolau Manubens's avatar
Nicolau Manubens committed
    if (any(!(names(dims_in_file) %in% names(inner_indices)))) {
      expected_dim_names <- names(inner_indices)
      if (drop_var_dim) {
        expected_dim_names <- expected_dim_names[-position_of_var]
      }
      stop("Unexpected extra dimensions (of length > 1) in the file.\nExpected: ",
           paste(expected_dim_names, collapse = ', '), "\n",
           "Found: ", paste(names(dims_in_file), collapse = ', '), "\n",
           file_path)
    }
Nicolau Manubens Gil's avatar
Nicolau Manubens Gil committed
    for var_name in var_names
    compute min and max indices for each dimension
    #result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim,
    #                              expect_all_indices = TRUE, allow_out_of_range = TRUE)
    subset taking only indices in inner dims
    reorder dims according to inner_indices
  names(dim(result)) <- sapply(names(dim(result)),
    function(x) {
      which_entry <- which(sapply(synonims, function(y) x %in% y))
      if (length(which_entry) > 0) {
        names(synonims)[which_entry]
      } else {
        x
      }
    })
  names(attr(result, 'variables')) <- sapply(names(attr(result, 'variables')),
    function(x) {
      which_entry <- which(sapply(synonims, function(y) x %in% y))
      if (length(which_entry) > 0) {
        names(synonims)[which_entry]
      } else {
        x
      }
    })
Nicolau Manubens's avatar
Nicolau Manubens committed
  if (length(names(attr(result, 'variables'))) == 1) {
    var_name <- names(attr(result, 'variables'))
    units <- attr(result, 'variables')[[var_name]][['units']]
    if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) {
      if (units == 'seconds') {
        units <- 'secs'
      } else if (units == 'minutes') {
        units <- 'mins'
      }
      result[] <- paste(result[], units)
    } else if (grepl(' since ', units)) {
      parts <- strsplit(units, ' since ')[[1]]
      units <- parts[1]
      if (units == 'seconds') {
        units <- 'secs'
      } else if (units == 'minutes') {
        units <- 'mins'
      }
      new_array <- seq(as.POSIXct(parts[2]), length = max(result, na.rm = TRUE) + 1, by = units)[result[] + 1]
      dim(new_array) <- dim(result)
      attr(new_array, 'variables') <- attr(result, 'variables')
      result <- new_array