diff --git a/R/NcReadDims.R b/R/NcReadDims.R index ce4a0956a2b8f8b6cc59b8b93b33a13593c282c9..660b4efe20e64d9194a22100846d7bb6831bfe5c 100644 --- a/R/NcReadDims.R +++ b/R/NcReadDims.R @@ -44,6 +44,11 @@ NcReadDims <- function(file_to_read, var_names = NULL) { } found_dims <- file_object$var[[var_name]]$size names(found_dims) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name') + # Support for character strings + if ((file_object$var[[var_name]]$prec == 'char') && + (length(file_object$var[[var_name]][['dim']]) > 1)) { + found_dims <- found_dims[-1] + } new_dim <- c(var = 1) found_dims <- c(new_dim, found_dims) if (!is.null(dims)) { diff --git a/R/NcToArray.R b/R/NcToArray.R index 45c32891db8176230fc4939b71feaa3a87ad5c2d..27b7099ad80f8911e81d1e6bdb6b3ab607e0d2ab 100644 --- a/R/NcToArray.R +++ b/R/NcToArray.R @@ -72,7 +72,6 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL, # if (!is.character(vars_to_read) && !is.numeric(vars_to_read)) { # stop("Parameter 'vars_to_read' must be a numeric vector or vector of character strings.") # } - result_list <- NULL for (vars_to_read_vector in vars_to_read) { result <- NULL @@ -94,12 +93,11 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL, } #file_object$var[extra_dimvars] <- extra_dimvars_list #file_object$nvars <- file_object$nvars + length(extra_dimvars) - nmv <- numeric_var_indices <- which(is.numeric(vars_to_read_vector)) - if (length(nmv) > 0) { - if (any(vars_to_read_vector[nmv] > (length(file_object$var) + length(extra_dimvars)))) { + if (is.numeric(vars_to_read_vector)) { + if (any(vars_to_read_vector > (length(file_object$var) + length(extra_dimvars)))) { stop("Provided numerical variable indices out of bounds in 'vars_to_read'.") } - vars_to_read_vector[nmv] <- c(sapply(file_object$var, '[[', 'name'), extra_dimvars)[vars_to_read_vector[nmv]] + vars_to_read_vector <- c(sapply(file_object$var, '[[', 'name'), extra_dimvars)[vars_to_read_vector] } for (var_name in vars_to_read_vector) { if (var_name %in% extra_dimvars) { @@ -214,8 +212,21 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL, } start <- sapply(indices_to_take, function(x) if (is_single_na(x)) 1 else min(x)) count <- sapply(indices_to_take, function(x) if (is_single_na(x)) -1 else max(x) - min(x) + 1) + # Support for character strings + if ((file_object[['var']][[var_name]][['prec']] == 'char') && + (length(file_object[['var']][[var_name]][['dim']]) > 1)) { + start <- c(1, start) + count <- c(-1, count) + original_ncvar_get_inner <- ncdf4:::ncvar_get_inner + assignInNamespace('ncvar_get_inner', .ncvar_get_inner, 'ncdf4') + } var_result <- do.call('[', c(list(ncvar_get(file_object, var_name, start, count, collapse_degen = FALSE)), lapply(indices_to_take, function(x) if (is_single_na(x)) TRUE else x - min(x) + 1), list(drop = FALSE))) + # Support for character strings + if ((file_object[['var']][[var_name]][['prec']] == 'char') && + (length(file_object[['var']][[var_name]][['dim']]) > 1)) { + assignInNamespace('ncvar_get_inner', original_ncvar_get_inner, 'ncdf4') + } #metadata <- c(metadata, structure(list(file_object$var[[var_name]]), .Names = var_name)) ## TODO: Crop dimensions in attributes if (!is.null(reorder_back)) { @@ -246,7 +257,7 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL, #names(dim(var_result)) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name') } if (!is.null(var_result)) { - if (!drop_var_dim || (length(vars_to_read_vector) == 1)) { + if (!(drop_var_dim && (length(vars_to_read_vector) == 1))) { dim(var_result) <- c(setNames(1, var_tag), dim(var_result)) } attr(var_result, 'variables') <- structure(list(atts), .Names = var_name) diff --git a/R/Utils.R b/R/Utils.R index 82c2fb6b8ad9a476c25efd90c06a07306e3392e5..27be8dde22465598faf7f17b32815f49b75cb1e1 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -203,3 +203,322 @@ names(dim(array1)) <- names(dim(array2)) array1 } + +# This function is a copy of the function ncvar_get_inner in 'ncdf4' package +# with a bugfix. When ncdf4 addresses this bug, this function will be removed +# from this package. +.ncvar_get_inner <- function( ncid, varid, missval, addOffset=0., scaleFact=1.0, start=NA, count=NA, verbose=FALSE, signedbyte=TRUE, + collapse_degen=TRUE, raw_datavals=FALSE ) { + + if( ! is.numeric(ncid)) + stop("Error, first arg passed to ncvar_get_inner (ncid) must be a simple C-style integer that is passed directly to the C api") + if( ! is.numeric(varid)) + + stop("Error, second arg passed to ncvar_get_inner (varid) must be a simple C-style integer that is passed directly to the C api") + + if( verbose ) { + print(paste("ncvar_get_inner: entering with (C-STYLE INTEGER ONLY) ncid=", ncid, + "varid=", varid )) + print(paste("ncvar_get_inner: following line is collapse_degen:")) + print(collapse_degen) + } + + tmp_typename = c('short', 'int', 'float', 'double', 'char', 'byte' ) + + have_start = (length(start)>1) || ((length(start)==1) && (!is.na(start))) + have_count = (length(count)>1) || ((length(count)==1) && (!is.na(count))) + + sm <- storage.mode(start) + if( (sm != "double") && (sm != "integer") && (sm != "logical")) + stop(paste("passed a start argument of storage mode",sm,"; can only handle double or integer")) + sm <- storage.mode(count) + if( (sm != "double") && (sm != "integer") && (sm != "logical")) + stop(paste("passed a 'count' argument with storage mode '",sm,"'; can only handle double or integer", sep='')) + + if( signedbyte ) + byte_style = 1 # 1=signed + else + byte_style = 2 # 2=unsigned + + varsize <- ncdf4:::ncvar_size ( ncid, varid ) + ndims <- ncdf4:::ncvar_ndims( ncid, varid ) + if( verbose ) { + print(paste("ndims:",ndims)) + print("ncvar_get: varsize:") + print(varsize) + } + + #------------------------------ + # Fix up start and count to use + #------------------------------ + if( ndims == 0 ) { + start <- 1 + count <- 1 + } + else + { + if( ! have_start ) + start <- rep(1,ndims) # Note: use R convention for now + if( ! have_count ) + count <- varsize - start + 1 + else + { + #------------------ + # Take care of -1's + #------------------ + count <- ifelse( (count == -1), varsize-start+1, count) + } + } + if( verbose ) { + print("ncvar_get: start:") + print(start) + print("ncvar_get: count:") + print(count) + } + + if( ndims > 0 ) { + if( length(start) != ndims ) + stop(paste("Error: variable has",ndims,"dims, but start has",length(start),"entries. They must match!")) + if( length(count) != ndims ) + stop(paste("Error: variable has",ndims,"dims, but count has",length(count),"entries. They must match!")) + } + + #---------------------------------------- + # Need to know how much space to allocate + #---------------------------------------- + totvarsize <- prod(count) + if( verbose ) + print(paste("ncvar_get: totvarsize:",totvarsize)) + + #-------------------------------------------------- + # Switch from R to C convention for start and count + #-------------------------------------------------- + c.start <- start[ ndims:1 ] - 1 + c.count <- count[ ndims:1 ] + + rv <- list() + rv$error <- -1 + + #--------------------------------- + # Get the correct type of variable + #--------------------------------- + precint <- ncdf4:::ncvar_type( ncid, varid ) # 1=short, 2=int, 3=float, 4=double, 5=char, 6=byte, 7=ubyte, 8=ushort, 9=uint, 10=int64, 11=uint64, 12=string + if( verbose ) + print(paste("ncvar_get_inner: getting var of type",tmp_typename[precint], 'id=', precint)) + + if( (precint == 1) || (precint == 2) || (precint == 6) || (precint == 7) || (precint == 8)) { + #-------------------------------- + # Short, Int, Byte, UByte, UShort + #-------------------------------- + rv <- .Call("Rsx_nc4_get_vara_int", + as.integer(ncid), + as.integer(varid), + as.integer(c.start), # Already switched to C convention... + as.integer(c.count), # Already switched to C convention... + as.integer(byte_style), # 1=signed, 2=unsigned + PACKAGE="ncdf4") + if( rv$error != 0 ) + stop("C function Rsx_nc4_get_var_int returned error") + data = rv$data + } + + else if( (precint == 3) || (precint == 4)) { + #---------------------------------------------------------------- + # Float, double where we have the C routine fix the missing value + # 'imvstate' is: 0 if we do not have a missing value (it is NULL), + # 1 if the missing value is NA, 2 if the missing value is present + # and not NULL and not NA. These codes are used by the C routine + #---------------------------------------------------------------- + if( is.null( missval )) { + passed_missval = 0.0 + imvstate = as.integer(0) + } + else if( is.na(missval)) { + passed_missval = 0.0 + imvstate = as.integer(1) + } + else + { + passed_missval = missval + imvstate = as.integer(2) + } + + if( raw_datavals ) + fixmiss = as.integer(0) # setting 'raw_datavalues' to TRUE gives the actual raw numbers from the file, not processed at all + else + fixmiss = as.integer(1) + + if( verbose ) print('about to call Rsx_nc4_get_vara_double...') + rv <- .Call("Rsx_nc4_get_vara_double", + as.integer(ncid), + as.integer(varid), + as.integer(c.start), # Already switched to C convention... + as.integer(c.count), # Already switched to C convention... + fixmiss, + imvstate, + as.double(passed_missval), + PACKAGE="ncdf4") + if( rv$error != 0 ) + stop("C function R_nc4_get_vara_double returned error") + if( verbose ) print('back from call to Rsx_nc4_get_vara_double...') + data = rv$data + } + + else if( (precint == 9) || (precint == 10) || (precint == 11)) { + #--------------------------------------------- + # uint, int64, uint64 + # Thanks to Tom Hilinski of Colorado State for + # fix to uint here + #--------------------------------------------- + rv$data <- double(totvarsize) + fixmiss = as.integer(0) + rv <- .Call("Rsx_nc4_get_vara_double", + as.integer(ncid), + as.integer(varid), + as.integer(c.start), # Already switched to C convention... + as.integer(c.count), # Already switched to C convention... + fixmiss, + as.integer(-1), # The 'imvstate' arg is unused in this call since no fixmiss + as.double(0.0), # the passed missing value is not used in this call since no fixmiss + PACKAGE="ncdf4") + if( rv$error != 0 ) + stop("C function R_nc4_get_vara_double returned error") + data = rv$data + } + + else if( precint == 5 ) { + #----- + # Char + #----- + strndims <- ndims - 1 + strlen <- count[1] + 1 + strdim <- 1 + if( strndims >= 1 ) { + strdim <- count[2:ndims] + nstr <- prod(strdim) + } + else + nstr <- 1 + if(verbose) + print(paste("ndims:",ndims,"strndims:",strndims,"strlen:",strlen,"nstr:",nstr)) + + #---------------------------------------------- + # Make a character string of the specified size + #---------------------------------------------- + stor <- ncdf4:::blankstring_ncdf4( totvarsize ) + stordata <- ncdf4:::blankstring_ncdf4(strlen) + if( verbose ) + print(paste("length of stor string:",nchar(stor))) + rv$tempstore <- stor + rv$data <- array(stordata, dim=strdim) + + rv <- .C("R_nc4_get_vara_text", + as.integer(ncid), + as.integer(varid), + as.integer(c.start), # Already switched to C convention... + as.integer(c.count), # Already switched to C convention... + tempstore=as.character(rv$tempstore), + data=as.character(rv$data), + error=as.integer(rv$error), + PACKAGE="ncdf4") + if( rv$error != 0 ) + stop("C function R_nc4_get_var_text returned error") + + dim(rv$data) <- strdim + } + + else if( precint == 12 ) { + #----------------------------- + # netcdf version 4 String type + #----------------------------- + rv <- .Call( "R_nc4_get_vara_string", + as.integer(ncid), + as.integer(varid), + as.integer(c.start), # Already switched to C convention... + as.integer(c.count), # Already switched to C convention... + PACKAGE="ncdf4" ) + } + else + { + stop(paste("Trying to get variable of an unhandled type code: ",precint, "(", ncdf4:::ncvar_type_to_string(precint), ")")) + } + if( verbose ) { + print(paste("ncvar_get_inner: C call returned",rv$error)) + print(paste("ncvar_get_inner: dim of directly returned array:")) + print(dim(rv$data)) + } + + #-------------------------------------------------------- + # Set our dims...but collapse degenerate dimensions first + #-------------------------------------------------------- + if( ndims > 0 ) { + if( collapse_degen ) { + count.nodegen <- vector() + foundone <- 0 + for( i in 1:ndims ) + if( count[i] > 1 ) { + count.nodegen <- append(count.nodegen, count[i]) + foundone <- 1 + } + if( foundone == 0 ) + dim(rv$data) <- (1) + else + { + if( verbose ) + print(paste("count.nodegen:",count.nodegen," Length of data:",length(rv$data))) + if( precint != 5 ) + dim(rv$data) <- count.nodegen + } + } + else if (precint != 5) + dim(rv$data) = count + + if( verbose ) { + print("ncvar_get: final dims of returned array:") + print(dim(rv$data)) + } + } + + #---------------------------------------------------------- + # Change missing values to "NA"s. Note that 'varid2Rindex' + # is NOT filled out for dimvars, so skip this if a dimvar + # 1=short, 2=int, 3=float, 4=double, 5=char, 6=byte + # NOTE: if type is 3 or 4 (float or double), the missing + # value was already set by the C routine. + #---------------------------------------------------------- + if( (!raw_datavals) && (precint != 5) && (precint != 3) && (precint != 4) ) { # not char, float, or double + if( verbose ) print("ncvar_get: setting missing values to NA") + if( (precint==1) || (precint==2) || (precint==6) || (precint==7) || (precint==8) || (precint==9)) { + #-------------------------------------- + # Short, Int, Byte, UByte, UShort, UInt + #-------------------------------------- + if( verbose ) print(paste("ncvar_get_inner: setting ", tmp_typename[precint],"-type missing value of ", missval, " to NA", sep='')) + if( ! is.na(missval) ) + rv$data[rv$data==missval] <- NA + } + else if( (precint==10) || (precint==11)) { + #-------------------------------- + # 8-byte int, unsigned 8-byte int + #-------------------------------- + if( ! is.na(missval) ) { + tol <- abs(missval*1.e-5) + if( verbose ) print(paste("ncvar_get_inner: setting ", tmp_typename[precint],"-type missing value of ", missval, + " (w/tolerance ", tol,") to NA", sep='')) + rv$data[abs(rv$data-missval)