Commit 482a523a authored by Nicolau Manubens Gil's avatar Nicolau Manubens Gil
Browse files

Fixes to support character variables.

parent 8a48c79e
...@@ -44,6 +44,11 @@ NcReadDims <- function(file_to_read, var_names = NULL) { ...@@ -44,6 +44,11 @@ NcReadDims <- function(file_to_read, var_names = NULL) {
} }
found_dims <- file_object$var[[var_name]]$size found_dims <- file_object$var[[var_name]]$size
names(found_dims) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name') names(found_dims) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name')
# Support for character strings
if ((file_object$var[[var_name]]$prec == 'char') &&
(length(file_object$var[[var_name]][['dim']]) > 1)) {
found_dims <- found_dims[-1]
}
new_dim <- c(var = 1) new_dim <- c(var = 1)
found_dims <- c(new_dim, found_dims) found_dims <- c(new_dim, found_dims)
if (!is.null(dims)) { if (!is.null(dims)) {
......
...@@ -212,8 +212,21 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL, ...@@ -212,8 +212,21 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL,
} }
start <- sapply(indices_to_take, function(x) if (is_single_na(x)) 1 else min(x)) start <- sapply(indices_to_take, function(x) if (is_single_na(x)) 1 else min(x))
count <- sapply(indices_to_take, function(x) if (is_single_na(x)) -1 else max(x) - min(x) + 1) count <- sapply(indices_to_take, function(x) if (is_single_na(x)) -1 else max(x) - min(x) + 1)
# Support for character strings
if ((file_object[['var']][[var_name]][['prec']] == 'char') &&
(length(file_object[['var']][[var_name]][['dim']]) > 1)) {
start <- c(1, start)
count <- c(-1, count)
original_ncvar_get_inner <- ncdf4:::ncvar_get_inner
assignInNamespace('ncvar_get_inner', .ncvar_get_inner, 'ncdf4')
}
var_result <- do.call('[', c(list(ncvar_get(file_object, var_name, start, count, collapse_degen = FALSE)), var_result <- do.call('[', c(list(ncvar_get(file_object, var_name, start, count, collapse_degen = FALSE)),
lapply(indices_to_take, function(x) if (is_single_na(x)) TRUE else x - min(x) + 1), list(drop = FALSE))) lapply(indices_to_take, function(x) if (is_single_na(x)) TRUE else x - min(x) + 1), list(drop = FALSE)))
# Support for character strings
if ((file_object[['var']][[var_name]][['prec']] == 'char') &&
(length(file_object[['var']][[var_name]][['dim']]) > 1)) {
assignInNamespace('ncvar_get_inner', original_ncvar_get_inner, 'ncdf4')
}
#metadata <- c(metadata, structure(list(file_object$var[[var_name]]), .Names = var_name)) #metadata <- c(metadata, structure(list(file_object$var[[var_name]]), .Names = var_name))
## TODO: Crop dimensions in attributes ## TODO: Crop dimensions in attributes
if (!is.null(reorder_back)) { if (!is.null(reorder_back)) {
......
...@@ -203,3 +203,322 @@ ...@@ -203,3 +203,322 @@
names(dim(array1)) <- names(dim(array2)) names(dim(array1)) <- names(dim(array2))
array1 array1
} }
# This function is a copy of the function ncvar_get_inner in 'ncdf4' package
# with a bugfix. When ncdf4 addresses this bug, this function will be removed
# from this package.
.ncvar_get_inner <- function( ncid, varid, missval, addOffset=0., scaleFact=1.0, start=NA, count=NA, verbose=FALSE, signedbyte=TRUE,
collapse_degen=TRUE, raw_datavals=FALSE ) {
if( ! is.numeric(ncid))
stop("Error, first arg passed to ncvar_get_inner (ncid) must be a simple C-style integer that is passed directly to the C api")
if( ! is.numeric(varid))
stop("Error, second arg passed to ncvar_get_inner (varid) must be a simple C-style integer that is passed directly to the C api")
if( verbose ) {
print(paste("ncvar_get_inner: entering with (C-STYLE INTEGER ONLY) ncid=", ncid,
"varid=", varid ))
print(paste("ncvar_get_inner: following line is collapse_degen:"))
print(collapse_degen)
}
tmp_typename = c('short', 'int', 'float', 'double', 'char', 'byte' )
have_start = (length(start)>1) || ((length(start)==1) && (!is.na(start)))
have_count = (length(count)>1) || ((length(count)==1) && (!is.na(count)))
sm <- storage.mode(start)
if( (sm != "double") && (sm != "integer") && (sm != "logical"))
stop(paste("passed a start argument of storage mode",sm,"; can only handle double or integer"))
sm <- storage.mode(count)
if( (sm != "double") && (sm != "integer") && (sm != "logical"))
stop(paste("passed a 'count' argument with storage mode '",sm,"'; can only handle double or integer", sep=''))
if( signedbyte )
byte_style = 1 # 1=signed
else
byte_style = 2 # 2=unsigned
varsize <- ncdf4:::ncvar_size ( ncid, varid )
ndims <- ncdf4:::ncvar_ndims( ncid, varid )
if( verbose ) {
print(paste("ndims:",ndims))
print("ncvar_get: varsize:")
print(varsize)
}
#------------------------------
# Fix up start and count to use
#------------------------------
if( ndims == 0 ) {
start <- 1
count <- 1
}
else
{
if( ! have_start )
start <- rep(1,ndims) # Note: use R convention for now
if( ! have_count )
count <- varsize - start + 1
else
{
#------------------
# Take care of -1's
#------------------
count <- ifelse( (count == -1), varsize-start+1, count)
}
}
if( verbose ) {
print("ncvar_get: start:")
print(start)
print("ncvar_get: count:")
print(count)
}
if( ndims > 0 ) {
if( length(start) != ndims )
stop(paste("Error: variable has",ndims,"dims, but start has",length(start),"entries. They must match!"))
if( length(count) != ndims )
stop(paste("Error: variable has",ndims,"dims, but count has",length(count),"entries. They must match!"))
}
#----------------------------------------
# Need to know how much space to allocate
#----------------------------------------
totvarsize <- prod(count)
if( verbose )
print(paste("ncvar_get: totvarsize:",totvarsize))
#--------------------------------------------------
# Switch from R to C convention for start and count
#--------------------------------------------------
c.start <- start[ ndims:1 ] - 1
c.count <- count[ ndims:1 ]
rv <- list()
rv$error <- -1
#---------------------------------
# Get the correct type of variable
#---------------------------------
precint <- ncdf4:::ncvar_type( ncid, varid ) # 1=short, 2=int, 3=float, 4=double, 5=char, 6=byte, 7=ubyte, 8=ushort, 9=uint, 10=int64, 11=uint64, 12=string
if( verbose )
print(paste("ncvar_get_inner: getting var of type",tmp_typename[precint], 'id=', precint))
if( (precint == 1) || (precint == 2) || (precint == 6) || (precint == 7) || (precint == 8)) {
#--------------------------------
# Short, Int, Byte, UByte, UShort
#--------------------------------
rv <- .Call("Rsx_nc4_get_vara_int",
as.integer(ncid),
as.integer(varid),
as.integer(c.start), # Already switched to C convention...
as.integer(c.count), # Already switched to C convention...
as.integer(byte_style), # 1=signed, 2=unsigned
PACKAGE="ncdf4")
if( rv$error != 0 )
stop("C function Rsx_nc4_get_var_int returned error")
data = rv$data
}
else if( (precint == 3) || (precint == 4)) {
#----------------------------------------------------------------
# Float, double where we have the C routine fix the missing value
# 'imvstate' is: 0 if we do not have a missing value (it is NULL),
# 1 if the missing value is NA, 2 if the missing value is present
# and not NULL and not NA. These codes are used by the C routine
#----------------------------------------------------------------
if( is.null( missval )) {
passed_missval = 0.0
imvstate = as.integer(0)
}
else if( is.na(missval)) {
passed_missval = 0.0
imvstate = as.integer(1)
}
else
{
passed_missval = missval
imvstate = as.integer(2)
}
if( raw_datavals )
fixmiss = as.integer(0) # setting 'raw_datavalues' to TRUE gives the actual raw numbers from the file, not processed at all
else
fixmiss = as.integer(1)
if( verbose ) print('about to call Rsx_nc4_get_vara_double...')
rv <- .Call("Rsx_nc4_get_vara_double",
as.integer(ncid),
as.integer(varid),
as.integer(c.start), # Already switched to C convention...
as.integer(c.count), # Already switched to C convention...
fixmiss,
imvstate,
as.double(passed_missval),
PACKAGE="ncdf4")
if( rv$error != 0 )
stop("C function R_nc4_get_vara_double returned error")
if( verbose ) print('back from call to Rsx_nc4_get_vara_double...')
data = rv$data
}
else if( (precint == 9) || (precint == 10) || (precint == 11)) {
#---------------------------------------------
# uint, int64, uint64
# Thanks to Tom Hilinski of Colorado State for
# fix to uint here
#---------------------------------------------
rv$data <- double(totvarsize)
fixmiss = as.integer(0)
rv <- .Call("Rsx_nc4_get_vara_double",
as.integer(ncid),
as.integer(varid),
as.integer(c.start), # Already switched to C convention...
as.integer(c.count), # Already switched to C convention...
fixmiss,
as.integer(-1), # The 'imvstate' arg is unused in this call since no fixmiss
as.double(0.0), # the passed missing value is not used in this call since no fixmiss
PACKAGE="ncdf4")
if( rv$error != 0 )
stop("C function R_nc4_get_vara_double returned error")
data = rv$data
}
else if( precint == 5 ) {
#-----
# Char
#-----
strndims <- ndims - 1
strlen <- count[1] + 1
strdim <- 1
if( strndims >= 1 ) {
strdim <- count[2:ndims]
nstr <- prod(strdim)
}
else
nstr <- 1
if(verbose)
print(paste("ndims:",ndims,"strndims:",strndims,"strlen:",strlen,"nstr:",nstr))
#----------------------------------------------
# Make a character string of the specified size
#----------------------------------------------
stor <- ncdf4:::blankstring_ncdf4( totvarsize )
stordata <- ncdf4:::blankstring_ncdf4(strlen)
if( verbose )
print(paste("length of stor string:",nchar(stor)))
rv$tempstore <- stor
rv$data <- array(stordata, dim=strdim)
rv <- .C("R_nc4_get_vara_text",
as.integer(ncid),
as.integer(varid),
as.integer(c.start), # Already switched to C convention...
as.integer(c.count), # Already switched to C convention...
tempstore=as.character(rv$tempstore),
data=as.character(rv$data),
error=as.integer(rv$error),
PACKAGE="ncdf4")
if( rv$error != 0 )
stop("C function R_nc4_get_var_text returned error")
dim(rv$data) <- strdim
}
else if( precint == 12 ) {
#-----------------------------
# netcdf version 4 String type
#-----------------------------
rv <- .Call( "R_nc4_get_vara_string",
as.integer(ncid),
as.integer(varid),
as.integer(c.start), # Already switched to C convention...
as.integer(c.count), # Already switched to C convention...
PACKAGE="ncdf4" )
}
else
{
stop(paste("Trying to get variable of an unhandled type code: ",precint, "(", ncdf4:::ncvar_type_to_string(precint), ")"))
}
if( verbose ) {
print(paste("ncvar_get_inner: C call returned",rv$error))
print(paste("ncvar_get_inner: dim of directly returned array:"))
print(dim(rv$data))
}
#--------------------------------------------------------
# Set our dims...but collapse degenerate dimensions first
#--------------------------------------------------------
if( ndims > 0 ) {
if( collapse_degen ) {
count.nodegen <- vector()
foundone <- 0
for( i in 1:ndims )
if( count[i] > 1 ) {
count.nodegen <- append(count.nodegen, count[i])
foundone <- 1
}
if( foundone == 0 )
dim(rv$data) <- (1)
else
{
if( verbose )
print(paste("count.nodegen:",count.nodegen," Length of data:",length(rv$data)))
if( precint != 5 )
dim(rv$data) <- count.nodegen
}
}
else if (precint != 5)
dim(rv$data) = count
if( verbose ) {
print("ncvar_get: final dims of returned array:")
print(dim(rv$data))
}
}
#----------------------------------------------------------
# Change missing values to "NA"s. Note that 'varid2Rindex'
# is NOT filled out for dimvars, so skip this if a dimvar
# 1=short, 2=int, 3=float, 4=double, 5=char, 6=byte
# NOTE: if type is 3 or 4 (float or double), the missing
# value was already set by the C routine.
#----------------------------------------------------------
if( (!raw_datavals) && (precint != 5) && (precint != 3) && (precint != 4) ) { # not char, float, or double
if( verbose ) print("ncvar_get: setting missing values to NA")
if( (precint==1) || (precint==2) || (precint==6) || (precint==7) || (precint==8) || (precint==9)) {
#--------------------------------------
# Short, Int, Byte, UByte, UShort, UInt
#--------------------------------------
if( verbose ) print(paste("ncvar_get_inner: setting ", tmp_typename[precint],"-type missing value of ", missval, " to NA", sep=''))
if( ! is.na(missval) )
rv$data[rv$data==missval] <- NA
}
else if( (precint==10) || (precint==11)) {
#--------------------------------
# 8-byte int, unsigned 8-byte int
#--------------------------------
if( ! is.na(missval) ) {
tol <- abs(missval*1.e-5)
if( verbose ) print(paste("ncvar_get_inner: setting ", tmp_typename[precint],"-type missing value of ", missval,
" (w/tolerance ", tol,") to NA", sep=''))
rv$data[abs(rv$data-missval)<tol] <- NA
}
}
}
#--------------------------------------
# Implement add_offset and scale_factor
#--------------------------------------
if( ! raw_datavals ) {
if( (scaleFact != 1.0) || (addOffset != 0.0) ) {
if( verbose )
print(paste("ncvar_get: implementing add_offset=", addOffset, " and scaleFact=", scaleFact ))
rv$data <- rv$data * scaleFact + addOffset
}
}
return(rv$data)
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment