From c04846f086c62206c0b6dfb2f71664e0a4b15658 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Sun, 9 Apr 2017 18:04:44 +0200 Subject: [PATCH 1/4] Bug in nc2a, not dropping var_dim. --- R/NcToArray.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/NcToArray.R b/R/NcToArray.R index 45c3289..1945ff7 100644 --- a/R/NcToArray.R +++ b/R/NcToArray.R @@ -246,7 +246,7 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL, #names(dim(var_result)) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name') } if (!is.null(var_result)) { - if (!drop_var_dim || (length(vars_to_read_vector) == 1)) { + if (!(drop_var_dim && (length(vars_to_read_vector) == 1))) { dim(var_result) <- c(setNames(1, var_tag), dim(var_result)) } attr(var_result, 'variables') <- structure(list(atts), .Names = var_name) -- GitLab From ded36c38e462a49acdd7c5f21e0e05873c04194d Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Sun, 9 Apr 2017 18:58:49 +0200 Subject: [PATCH 2/4] Doc enhancement. --- man/NcToArray.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/NcToArray.Rd b/man/NcToArray.Rd index 855d344..ec48517 100644 --- a/man/NcToArray.Rd +++ b/man/NcToArray.Rd @@ -5,7 +5,7 @@ Read a NetCDF File Into an R Array } \description{ -Reads one or a set of variables together with metadata items from a NetCDF file into an R array. Indices to retrieve (not necessarily consecutive) can be specified for each of the dimensions. Depending on the format of the request, the variables will be merged in into a single extended array or returned in a list with an array for each variable. The different variables in the file are considered to be stored along a dimension called 'var', so reading a variable 'foo' with dimensions 'lat' and 'lon' would result in an array with the dimensions c('var' = 1, 'lat' = n_lats, 'lon' = n_lons). +Reads one or a set of variables together with metadata items from a single NetCDF file into an R array (see package 'startR' to read data from multiple files/data sets). Indices to retrieve (not necessarily consecutive) can be specified for each of the dimensions. Depending on the format of the request, the variables will be merged in into a single extended array or returned in a list with an array for each variable. The different variables in the file are considered to be stored along a dimension called 'var', so reading a variable 'foo' with dimensions 'lat' and 'lon' would result in an array with the dimensions c('var' = 1, 'lat' = n_lats, 'lon' = n_lons). } \usage{ NcToArray(file_to_read, dim_indices = NULL, vars_to_read = NULL, @@ -17,7 +17,7 @@ nc2a(file_to_read, dim_indices = NULL, vars_to_read = NULL, } \arguments{ \item{file_to_read}{ -Path to the file to be read or a NetCDF object as returned by \code{easyNCDF::NcOpen} or \code{ncdf4::nc_open}. +Path to the file to be read or a NetCDF object as returned by \code{easyNCDF::NcOpen} or \code{ncdf4::nc_open}. See package 'startR' if need to read data from multiple files/data sets. } \item{dim_indices}{ Named list with numeric vectors of indices to take for each dimension. The names should correspond to the dimension names which to take the indices for. Non-consecutive indices can be specified. If \code{expect_all_indices = FALSE} (default), it is not mandatory to specify the indices for all (or even any of) the dimensions. In that case all the indices along such dimensions will be read in. If \code{expect_all_indices = TRUE}, then indices for all the dimensions have to be specified for the function to return a data array. In that case, \code{NA} can be used to request all indices for a dimension if desired. -- GitLab From 8a48c79e5e3e871849c5270342dff28c0f7dcb2d Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 11 Apr 2017 19:56:35 +0200 Subject: [PATCH 3/4] Minor fixes. --- R/NcToArray.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/NcToArray.R b/R/NcToArray.R index 1945ff7..30561c0 100644 --- a/R/NcToArray.R +++ b/R/NcToArray.R @@ -72,7 +72,6 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL, # if (!is.character(vars_to_read) && !is.numeric(vars_to_read)) { # stop("Parameter 'vars_to_read' must be a numeric vector or vector of character strings.") # } - result_list <- NULL for (vars_to_read_vector in vars_to_read) { result <- NULL @@ -94,12 +93,11 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL, } #file_object$var[extra_dimvars] <- extra_dimvars_list #file_object$nvars <- file_object$nvars + length(extra_dimvars) - nmv <- numeric_var_indices <- which(is.numeric(vars_to_read_vector)) - if (length(nmv) > 0) { - if (any(vars_to_read_vector[nmv] > (length(file_object$var) + length(extra_dimvars)))) { + if (is.numeric(vars_to_read_vector)) { + if (any(vars_to_read_vector > (length(file_object$var) + length(extra_dimvars)))) { stop("Provided numerical variable indices out of bounds in 'vars_to_read'.") } - vars_to_read_vector[nmv] <- c(sapply(file_object$var, '[[', 'name'), extra_dimvars)[vars_to_read_vector[nmv]] + vars_to_read_vector <- c(sapply(file_object$var, '[[', 'name'), extra_dimvars)[vars_to_read_vector] } for (var_name in vars_to_read_vector) { if (var_name %in% extra_dimvars) { -- GitLab From 482a523a7f7bdf53117dc5ae5e1a2d2f25645ec5 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Thu, 13 Apr 2017 02:32:20 +0200 Subject: [PATCH 4/4] Fixes to support character variables. --- R/NcReadDims.R | 5 + R/NcToArray.R | 13 ++ R/Utils.R | 319 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 337 insertions(+) diff --git a/R/NcReadDims.R b/R/NcReadDims.R index ce4a095..660b4ef 100644 --- a/R/NcReadDims.R +++ b/R/NcReadDims.R @@ -44,6 +44,11 @@ NcReadDims <- function(file_to_read, var_names = NULL) { } found_dims <- file_object$var[[var_name]]$size names(found_dims) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name') + # Support for character strings + if ((file_object$var[[var_name]]$prec == 'char') && + (length(file_object$var[[var_name]][['dim']]) > 1)) { + found_dims <- found_dims[-1] + } new_dim <- c(var = 1) found_dims <- c(new_dim, found_dims) if (!is.null(dims)) { diff --git a/R/NcToArray.R b/R/NcToArray.R index 30561c0..27b7099 100644 --- a/R/NcToArray.R +++ b/R/NcToArray.R @@ -212,8 +212,21 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL, } start <- sapply(indices_to_take, function(x) if (is_single_na(x)) 1 else min(x)) count <- sapply(indices_to_take, function(x) if (is_single_na(x)) -1 else max(x) - min(x) + 1) + # Support for character strings + if ((file_object[['var']][[var_name]][['prec']] == 'char') && + (length(file_object[['var']][[var_name]][['dim']]) > 1)) { + start <- c(1, start) + count <- c(-1, count) + original_ncvar_get_inner <- ncdf4:::ncvar_get_inner + assignInNamespace('ncvar_get_inner', .ncvar_get_inner, 'ncdf4') + } var_result <- do.call('[', c(list(ncvar_get(file_object, var_name, start, count, collapse_degen = FALSE)), lapply(indices_to_take, function(x) if (is_single_na(x)) TRUE else x - min(x) + 1), list(drop = FALSE))) + # Support for character strings + if ((file_object[['var']][[var_name]][['prec']] == 'char') && + (length(file_object[['var']][[var_name]][['dim']]) > 1)) { + assignInNamespace('ncvar_get_inner', original_ncvar_get_inner, 'ncdf4') + } #metadata <- c(metadata, structure(list(file_object$var[[var_name]]), .Names = var_name)) ## TODO: Crop dimensions in attributes if (!is.null(reorder_back)) { diff --git a/R/Utils.R b/R/Utils.R index 82c2fb6..27be8dd 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -203,3 +203,322 @@ names(dim(array1)) <- names(dim(array2)) array1 } + +# This function is a copy of the function ncvar_get_inner in 'ncdf4' package +# with a bugfix. When ncdf4 addresses this bug, this function will be removed +# from this package. +.ncvar_get_inner <- function( ncid, varid, missval, addOffset=0., scaleFact=1.0, start=NA, count=NA, verbose=FALSE, signedbyte=TRUE, + collapse_degen=TRUE, raw_datavals=FALSE ) { + + if( ! is.numeric(ncid)) + stop("Error, first arg passed to ncvar_get_inner (ncid) must be a simple C-style integer that is passed directly to the C api") + if( ! is.numeric(varid)) + + stop("Error, second arg passed to ncvar_get_inner (varid) must be a simple C-style integer that is passed directly to the C api") + + if( verbose ) { + print(paste("ncvar_get_inner: entering with (C-STYLE INTEGER ONLY) ncid=", ncid, + "varid=", varid )) + print(paste("ncvar_get_inner: following line is collapse_degen:")) + print(collapse_degen) + } + + tmp_typename = c('short', 'int', 'float', 'double', 'char', 'byte' ) + + have_start = (length(start)>1) || ((length(start)==1) && (!is.na(start))) + have_count = (length(count)>1) || ((length(count)==1) && (!is.na(count))) + + sm <- storage.mode(start) + if( (sm != "double") && (sm != "integer") && (sm != "logical")) + stop(paste("passed a start argument of storage mode",sm,"; can only handle double or integer")) + sm <- storage.mode(count) + if( (sm != "double") && (sm != "integer") && (sm != "logical")) + stop(paste("passed a 'count' argument with storage mode '",sm,"'; can only handle double or integer", sep='')) + + if( signedbyte ) + byte_style = 1 # 1=signed + else + byte_style = 2 # 2=unsigned + + varsize <- ncdf4:::ncvar_size ( ncid, varid ) + ndims <- ncdf4:::ncvar_ndims( ncid, varid ) + if( verbose ) { + print(paste("ndims:",ndims)) + print("ncvar_get: varsize:") + print(varsize) + } + + #------------------------------ + # Fix up start and count to use + #------------------------------ + if( ndims == 0 ) { + start <- 1 + count <- 1 + } + else + { + if( ! have_start ) + start <- rep(1,ndims) # Note: use R convention for now + if( ! have_count ) + count <- varsize - start + 1 + else + { + #------------------ + # Take care of -1's + #------------------ + count <- ifelse( (count == -1), varsize-start+1, count) + } + } + if( verbose ) { + print("ncvar_get: start:") + print(start) + print("ncvar_get: count:") + print(count) + } + + if( ndims > 0 ) { + if( length(start) != ndims ) + stop(paste("Error: variable has",ndims,"dims, but start has",length(start),"entries. They must match!")) + if( length(count) != ndims ) + stop(paste("Error: variable has",ndims,"dims, but count has",length(count),"entries. They must match!")) + } + + #---------------------------------------- + # Need to know how much space to allocate + #---------------------------------------- + totvarsize <- prod(count) + if( verbose ) + print(paste("ncvar_get: totvarsize:",totvarsize)) + + #-------------------------------------------------- + # Switch from R to C convention for start and count + #-------------------------------------------------- + c.start <- start[ ndims:1 ] - 1 + c.count <- count[ ndims:1 ] + + rv <- list() + rv$error <- -1 + + #--------------------------------- + # Get the correct type of variable + #--------------------------------- + precint <- ncdf4:::ncvar_type( ncid, varid ) # 1=short, 2=int, 3=float, 4=double, 5=char, 6=byte, 7=ubyte, 8=ushort, 9=uint, 10=int64, 11=uint64, 12=string + if( verbose ) + print(paste("ncvar_get_inner: getting var of type",tmp_typename[precint], 'id=', precint)) + + if( (precint == 1) || (precint == 2) || (precint == 6) || (precint == 7) || (precint == 8)) { + #-------------------------------- + # Short, Int, Byte, UByte, UShort + #-------------------------------- + rv <- .Call("Rsx_nc4_get_vara_int", + as.integer(ncid), + as.integer(varid), + as.integer(c.start), # Already switched to C convention... + as.integer(c.count), # Already switched to C convention... + as.integer(byte_style), # 1=signed, 2=unsigned + PACKAGE="ncdf4") + if( rv$error != 0 ) + stop("C function Rsx_nc4_get_var_int returned error") + data = rv$data + } + + else if( (precint == 3) || (precint == 4)) { + #---------------------------------------------------------------- + # Float, double where we have the C routine fix the missing value + # 'imvstate' is: 0 if we do not have a missing value (it is NULL), + # 1 if the missing value is NA, 2 if the missing value is present + # and not NULL and not NA. These codes are used by the C routine + #---------------------------------------------------------------- + if( is.null( missval )) { + passed_missval = 0.0 + imvstate = as.integer(0) + } + else if( is.na(missval)) { + passed_missval = 0.0 + imvstate = as.integer(1) + } + else + { + passed_missval = missval + imvstate = as.integer(2) + } + + if( raw_datavals ) + fixmiss = as.integer(0) # setting 'raw_datavalues' to TRUE gives the actual raw numbers from the file, not processed at all + else + fixmiss = as.integer(1) + + if( verbose ) print('about to call Rsx_nc4_get_vara_double...') + rv <- .Call("Rsx_nc4_get_vara_double", + as.integer(ncid), + as.integer(varid), + as.integer(c.start), # Already switched to C convention... + as.integer(c.count), # Already switched to C convention... + fixmiss, + imvstate, + as.double(passed_missval), + PACKAGE="ncdf4") + if( rv$error != 0 ) + stop("C function R_nc4_get_vara_double returned error") + if( verbose ) print('back from call to Rsx_nc4_get_vara_double...') + data = rv$data + } + + else if( (precint == 9) || (precint == 10) || (precint == 11)) { + #--------------------------------------------- + # uint, int64, uint64 + # Thanks to Tom Hilinski of Colorado State for + # fix to uint here + #--------------------------------------------- + rv$data <- double(totvarsize) + fixmiss = as.integer(0) + rv <- .Call("Rsx_nc4_get_vara_double", + as.integer(ncid), + as.integer(varid), + as.integer(c.start), # Already switched to C convention... + as.integer(c.count), # Already switched to C convention... + fixmiss, + as.integer(-1), # The 'imvstate' arg is unused in this call since no fixmiss + as.double(0.0), # the passed missing value is not used in this call since no fixmiss + PACKAGE="ncdf4") + if( rv$error != 0 ) + stop("C function R_nc4_get_vara_double returned error") + data = rv$data + } + + else if( precint == 5 ) { + #----- + # Char + #----- + strndims <- ndims - 1 + strlen <- count[1] + 1 + strdim <- 1 + if( strndims >= 1 ) { + strdim <- count[2:ndims] + nstr <- prod(strdim) + } + else + nstr <- 1 + if(verbose) + print(paste("ndims:",ndims,"strndims:",strndims,"strlen:",strlen,"nstr:",nstr)) + + #---------------------------------------------- + # Make a character string of the specified size + #---------------------------------------------- + stor <- ncdf4:::blankstring_ncdf4( totvarsize ) + stordata <- ncdf4:::blankstring_ncdf4(strlen) + if( verbose ) + print(paste("length of stor string:",nchar(stor))) + rv$tempstore <- stor + rv$data <- array(stordata, dim=strdim) + + rv <- .C("R_nc4_get_vara_text", + as.integer(ncid), + as.integer(varid), + as.integer(c.start), # Already switched to C convention... + as.integer(c.count), # Already switched to C convention... + tempstore=as.character(rv$tempstore), + data=as.character(rv$data), + error=as.integer(rv$error), + PACKAGE="ncdf4") + if( rv$error != 0 ) + stop("C function R_nc4_get_var_text returned error") + + dim(rv$data) <- strdim + } + + else if( precint == 12 ) { + #----------------------------- + # netcdf version 4 String type + #----------------------------- + rv <- .Call( "R_nc4_get_vara_string", + as.integer(ncid), + as.integer(varid), + as.integer(c.start), # Already switched to C convention... + as.integer(c.count), # Already switched to C convention... + PACKAGE="ncdf4" ) + } + else + { + stop(paste("Trying to get variable of an unhandled type code: ",precint, "(", ncdf4:::ncvar_type_to_string(precint), ")")) + } + if( verbose ) { + print(paste("ncvar_get_inner: C call returned",rv$error)) + print(paste("ncvar_get_inner: dim of directly returned array:")) + print(dim(rv$data)) + } + + #-------------------------------------------------------- + # Set our dims...but collapse degenerate dimensions first + #-------------------------------------------------------- + if( ndims > 0 ) { + if( collapse_degen ) { + count.nodegen <- vector() + foundone <- 0 + for( i in 1:ndims ) + if( count[i] > 1 ) { + count.nodegen <- append(count.nodegen, count[i]) + foundone <- 1 + } + if( foundone == 0 ) + dim(rv$data) <- (1) + else + { + if( verbose ) + print(paste("count.nodegen:",count.nodegen," Length of data:",length(rv$data))) + if( precint != 5 ) + dim(rv$data) <- count.nodegen + } + } + else if (precint != 5) + dim(rv$data) = count + + if( verbose ) { + print("ncvar_get: final dims of returned array:") + print(dim(rv$data)) + } + } + + #---------------------------------------------------------- + # Change missing values to "NA"s. Note that 'varid2Rindex' + # is NOT filled out for dimvars, so skip this if a dimvar + # 1=short, 2=int, 3=float, 4=double, 5=char, 6=byte + # NOTE: if type is 3 or 4 (float or double), the missing + # value was already set by the C routine. + #---------------------------------------------------------- + if( (!raw_datavals) && (precint != 5) && (precint != 3) && (precint != 4) ) { # not char, float, or double + if( verbose ) print("ncvar_get: setting missing values to NA") + if( (precint==1) || (precint==2) || (precint==6) || (precint==7) || (precint==8) || (precint==9)) { + #-------------------------------------- + # Short, Int, Byte, UByte, UShort, UInt + #-------------------------------------- + if( verbose ) print(paste("ncvar_get_inner: setting ", tmp_typename[precint],"-type missing value of ", missval, " to NA", sep='')) + if( ! is.na(missval) ) + rv$data[rv$data==missval] <- NA + } + else if( (precint==10) || (precint==11)) { + #-------------------------------- + # 8-byte int, unsigned 8-byte int + #-------------------------------- + if( ! is.na(missval) ) { + tol <- abs(missval*1.e-5) + if( verbose ) print(paste("ncvar_get_inner: setting ", tmp_typename[precint],"-type missing value of ", missval, + " (w/tolerance ", tol,") to NA", sep='')) + rv$data[abs(rv$data-missval)