From c04846f086c62206c0b6dfb2f71664e0a4b15658 Mon Sep 17 00:00:00 2001
From: Nicolau Manubens <nicolau.manubens@bsc.es>
Date: Sun, 9 Apr 2017 18:04:44 +0200
Subject: [PATCH 1/4] Bug in nc2a, not dropping var_dim.

---
 R/NcToArray.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/NcToArray.R b/R/NcToArray.R
index 45c3289..1945ff7 100644
--- a/R/NcToArray.R
+++ b/R/NcToArray.R
@@ -246,7 +246,7 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL,
           #names(dim(var_result)) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name')
         }
         if (!is.null(var_result)) {
-          if (!drop_var_dim || (length(vars_to_read_vector) == 1)) {
+          if (!(drop_var_dim && (length(vars_to_read_vector) == 1))) {
             dim(var_result) <- c(setNames(1, var_tag), dim(var_result))
           }
           attr(var_result, 'variables') <- structure(list(atts), .Names = var_name)
-- 
GitLab


From ded36c38e462a49acdd7c5f21e0e05873c04194d Mon Sep 17 00:00:00 2001
From: Nicolau Manubens <nicolau.manubens@bsc.es>
Date: Sun, 9 Apr 2017 18:58:49 +0200
Subject: [PATCH 2/4] Doc enhancement.

---
 man/NcToArray.Rd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/man/NcToArray.Rd b/man/NcToArray.Rd
index 855d344..ec48517 100644
--- a/man/NcToArray.Rd
+++ b/man/NcToArray.Rd
@@ -5,7 +5,7 @@
 Read a NetCDF File Into an R Array
 }
 \description{
-Reads one or a set of variables together with metadata items from a NetCDF file into an R array. Indices to retrieve (not necessarily consecutive) can be specified for each of the dimensions. Depending on the format of the request, the variables will be merged in into a single extended array or returned in a list with an array for each variable. The different variables in the file are considered to be stored along a dimension called 'var', so reading a variable 'foo' with dimensions 'lat' and 'lon' would result in an array with the dimensions c('var' = 1, 'lat' = n_lats, 'lon' = n_lons).
+Reads one or a set of variables together with metadata items from a single NetCDF file into an R array (see package 'startR' to read data from multiple files/data sets). Indices to retrieve (not necessarily consecutive) can be specified for each of the dimensions. Depending on the format of the request, the variables will be merged in into a single extended array or returned in a list with an array for each variable. The different variables in the file are considered to be stored along a dimension called 'var', so reading a variable 'foo' with dimensions 'lat' and 'lon' would result in an array with the dimensions c('var' = 1, 'lat' = n_lats, 'lon' = n_lons).
 }
 \usage{
 NcToArray(file_to_read, dim_indices = NULL, vars_to_read = NULL,
@@ -17,7 +17,7 @@ nc2a(file_to_read, dim_indices = NULL, vars_to_read = NULL,
 }
 \arguments{
   \item{file_to_read}{
-Path to the file to be read or a NetCDF object as returned by \code{easyNCDF::NcOpen} or \code{ncdf4::nc_open}.
+Path to the file to be read or a NetCDF object as returned by \code{easyNCDF::NcOpen} or \code{ncdf4::nc_open}. See package 'startR' if need to read data from multiple files/data sets.
   }
   \item{dim_indices}{
 Named list with numeric vectors of indices to take for each dimension. The names should correspond to the dimension names which to take the indices for. Non-consecutive indices can be specified. If \code{expect_all_indices = FALSE} (default), it is not mandatory to specify the indices for all (or even any of) the dimensions. In that case all the indices along such dimensions will be read in. If \code{expect_all_indices = TRUE}, then indices for all the dimensions have to be specified for the function to return a data array. In that case, \code{NA} can be used to request all indices for a dimension if desired.
-- 
GitLab


From 8a48c79e5e3e871849c5270342dff28c0f7dcb2d Mon Sep 17 00:00:00 2001
From: Nicolau Manubens <nicolau.manubens@ic3.cat>
Date: Tue, 11 Apr 2017 19:56:35 +0200
Subject: [PATCH 3/4] Minor fixes.

---
 R/NcToArray.R | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/R/NcToArray.R b/R/NcToArray.R
index 1945ff7..30561c0 100644
--- a/R/NcToArray.R
+++ b/R/NcToArray.R
@@ -72,7 +72,6 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL,
 #  if (!is.character(vars_to_read) && !is.numeric(vars_to_read)) {
 #    stop("Parameter 'vars_to_read' must be a numeric vector or vector of character strings.")
 #  }
-
   result_list <- NULL
   for (vars_to_read_vector in vars_to_read) {
     result <- NULL
@@ -94,12 +93,11 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL,
       }
       #file_object$var[extra_dimvars] <- extra_dimvars_list
       #file_object$nvars <- file_object$nvars + length(extra_dimvars)
-      nmv <- numeric_var_indices <- which(is.numeric(vars_to_read_vector))
-      if (length(nmv) > 0) {
-        if (any(vars_to_read_vector[nmv] > (length(file_object$var) + length(extra_dimvars)))) {
+      if (is.numeric(vars_to_read_vector)) {
+        if (any(vars_to_read_vector > (length(file_object$var) + length(extra_dimvars)))) {
           stop("Provided numerical variable indices out of bounds in 'vars_to_read'.")
         }
-        vars_to_read_vector[nmv] <- c(sapply(file_object$var, '[[', 'name'), extra_dimvars)[vars_to_read_vector[nmv]]
+        vars_to_read_vector <- c(sapply(file_object$var, '[[', 'name'), extra_dimvars)[vars_to_read_vector]
       }
       for (var_name in vars_to_read_vector) {
         if (var_name %in% extra_dimvars) {
-- 
GitLab


From 482a523a7f7bdf53117dc5ae5e1a2d2f25645ec5 Mon Sep 17 00:00:00 2001
From: Nicolau Manubens <nicolau.manubens@bsc.es>
Date: Thu, 13 Apr 2017 02:32:20 +0200
Subject: [PATCH 4/4] Fixes to support character variables.

---
 R/NcReadDims.R |   5 +
 R/NcToArray.R  |  13 ++
 R/Utils.R      | 319 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 337 insertions(+)

diff --git a/R/NcReadDims.R b/R/NcReadDims.R
index ce4a095..660b4ef 100644
--- a/R/NcReadDims.R
+++ b/R/NcReadDims.R
@@ -44,6 +44,11 @@ NcReadDims <- function(file_to_read, var_names = NULL) {
       }
       found_dims <- file_object$var[[var_name]]$size
       names(found_dims) <- sapply(file_object$var[[var_name]]$dim, '[[', 'name')
+      # Support for character strings
+      if ((file_object$var[[var_name]]$prec == 'char') && 
+          (length(file_object$var[[var_name]][['dim']]) > 1)) {
+        found_dims <- found_dims[-1]
+      }
       new_dim <- c(var = 1)
       found_dims <- c(new_dim, found_dims)
       if (!is.null(dims)) {
diff --git a/R/NcToArray.R b/R/NcToArray.R
index 30561c0..27b7099 100644
--- a/R/NcToArray.R
+++ b/R/NcToArray.R
@@ -212,8 +212,21 @@ NcToArray <- function(file_to_read, dim_indices = NULL, vars_to_read = NULL,
             }
             start <- sapply(indices_to_take, function(x) if (is_single_na(x)) 1 else min(x))
             count <- sapply(indices_to_take, function(x) if (is_single_na(x)) -1 else max(x) - min(x) + 1)
+            # Support for character strings
+            if ((file_object[['var']][[var_name]][['prec']] == 'char') && 
+                (length(file_object[['var']][[var_name]][['dim']]) > 1)) {
+              start <- c(1, start)
+              count <- c(-1, count)
+              original_ncvar_get_inner <- ncdf4:::ncvar_get_inner
+              assignInNamespace('ncvar_get_inner', .ncvar_get_inner, 'ncdf4')
+            }
             var_result <- do.call('[', c(list(ncvar_get(file_object, var_name, start, count, collapse_degen = FALSE)),
                                          lapply(indices_to_take, function(x) if (is_single_na(x)) TRUE else x - min(x) + 1), list(drop = FALSE)))
+            # Support for character strings
+            if ((file_object[['var']][[var_name]][['prec']] == 'char') && 
+                (length(file_object[['var']][[var_name]][['dim']]) > 1)) {
+              assignInNamespace('ncvar_get_inner', original_ncvar_get_inner, 'ncdf4')
+            }
             #metadata <- c(metadata, structure(list(file_object$var[[var_name]]), .Names = var_name))
             ## TODO: Crop dimensions in attributes
             if (!is.null(reorder_back)) {
diff --git a/R/Utils.R b/R/Utils.R
index 82c2fb6..27be8dd 100644
--- a/R/Utils.R
+++ b/R/Utils.R
@@ -203,3 +203,322 @@
   names(dim(array1)) <- names(dim(array2))
   array1
 }
+
+# This function is a copy of the function ncvar_get_inner in 'ncdf4' package 
+# with a bugfix. When ncdf4 addresses this bug, this function will be removed
+# from this package.
+.ncvar_get_inner <- function( ncid, varid, missval, addOffset=0., scaleFact=1.0, start=NA, count=NA, verbose=FALSE, signedbyte=TRUE, 
+		collapse_degen=TRUE, raw_datavals=FALSE ) {
+
+	if( ! is.numeric(ncid))
+		stop("Error, first arg passed to ncvar_get_inner (ncid) must be a simple C-style integer that is passed directly to the C api")
+	if( ! is.numeric(varid))
+
+		stop("Error, second arg passed to ncvar_get_inner (varid) must be a simple C-style integer that is passed directly to the C api")
+
+	if( verbose ) {
+		print(paste("ncvar_get_inner: entering with (C-STYLE INTEGER ONLY) ncid=", ncid, 
+			"varid=", varid ))
+		print(paste("ncvar_get_inner: following line is collapse_degen:"))
+		print(collapse_degen)
+		}
+
+	tmp_typename = c('short', 'int', 'float', 'double', 'char', 'byte' )
+
+	have_start = (length(start)>1) || ((length(start)==1) && (!is.na(start)))
+	have_count = (length(count)>1) || ((length(count)==1) && (!is.na(count)))
+
+	sm <- storage.mode(start)
+	if( (sm != "double") && (sm != "integer") && (sm != "logical"))
+		stop(paste("passed a start argument of storage mode",sm,"; can only handle double or integer"))
+	sm <- storage.mode(count)
+	if( (sm != "double") && (sm != "integer") && (sm != "logical"))
+		stop(paste("passed a 'count' argument with storage mode '",sm,"'; can only handle double or integer", sep=''))
+
+	if( signedbyte )
+		byte_style = 1	# 1=signed
+	else
+		byte_style = 2	# 2=unsigned
+
+	varsize <- ncdf4:::ncvar_size ( ncid, varid )
+	ndims   <- ncdf4:::ncvar_ndims( ncid, varid )
+	if( verbose ) {
+		print(paste("ndims:",ndims))
+		print("ncvar_get: varsize:")
+		print(varsize)
+		}
+
+	#------------------------------
+	# Fix up start and count to use
+	#------------------------------
+	if( ndims == 0 ) {
+		start <- 1
+		count <- 1
+		}
+	else
+		{
+		if( ! have_start )
+			start <- rep(1,ndims)	# Note: use R convention for now
+		if( ! have_count )
+			count <- varsize - start + 1	
+		else
+			{
+			#------------------
+			# Take care of -1's
+			#------------------
+			count <- ifelse( (count == -1), varsize-start+1, count)
+			}
+		}
+	if( verbose ) {
+		print("ncvar_get: start:")
+		print(start)
+		print("ncvar_get: count:")
+		print(count)
+		}
+
+	if( ndims > 0 ) {
+		if( length(start) != ndims ) 
+			stop(paste("Error: variable has",ndims,"dims, but start has",length(start),"entries.  They must match!"))
+		if( length(count) != ndims ) 
+			stop(paste("Error: variable has",ndims,"dims, but count has",length(count),"entries.  They must match!"))
+		}
+
+	#----------------------------------------
+	# Need to know how much space to allocate
+	#----------------------------------------
+	totvarsize <- prod(count)
+	if( verbose )
+		print(paste("ncvar_get: totvarsize:",totvarsize))
+	
+	#--------------------------------------------------
+	# Switch from R to C convention for start and count
+	#--------------------------------------------------
+	c.start <- start[ ndims:1 ] - 1
+	c.count <- count[ ndims:1 ]
+
+	rv <- list()
+	rv$error <- -1
+
+	#---------------------------------
+	# Get the correct type of variable
+	#---------------------------------
+	precint <- ncdf4:::ncvar_type( ncid, varid ) # 1=short, 2=int, 3=float, 4=double, 5=char, 6=byte, 7=ubyte, 8=ushort, 9=uint, 10=int64, 11=uint64, 12=string
+	if( verbose )
+		print(paste("ncvar_get_inner: getting var of type",tmp_typename[precint], 'id=', precint))
+
+	if( (precint == 1) || (precint == 2) || (precint == 6) || (precint == 7) || (precint == 8)) {
+		#--------------------------------
+		# Short, Int, Byte, UByte, UShort
+		#--------------------------------
+		rv <- .Call("Rsx_nc4_get_vara_int", 
+			as.integer(ncid),
+			as.integer(varid),	
+			as.integer(c.start),	# Already switched to C convention...
+			as.integer(c.count),	# Already switched to C convention...
+			as.integer(byte_style), # 1=signed, 2=unsigned
+			PACKAGE="ncdf4")
+		if( rv$error != 0 ) 
+			stop("C function Rsx_nc4_get_var_int returned error")
+		data = rv$data
+		}
+
+	else if( (precint == 3) || (precint == 4)) {
+		#----------------------------------------------------------------
+		# Float, double where we have the C routine fix the missing value
+		# 'imvstate' is: 0 if we do not have a missing value (it is NULL),
+		# 1 if the missing value is NA, 2 if the missing value is present
+		# and not NULL and not NA. These codes are used by the C routine
+		#----------------------------------------------------------------
+		if( is.null( missval )) {
+			passed_missval = 0.0
+			imvstate = as.integer(0)
+			}
+		else if( is.na(missval)) {
+			passed_missval = 0.0
+			imvstate = as.integer(1)
+			}
+		else
+			{
+			passed_missval = missval
+			imvstate = as.integer(2)
+			}
+
+		if( raw_datavals )
+			fixmiss = as.integer(0)	# setting 'raw_datavalues' to TRUE gives the actual raw numbers from the file, not processed at all
+		else
+			fixmiss = as.integer(1)
+
+		if( verbose ) print('about to call Rsx_nc4_get_vara_double...')
+		rv <- .Call("Rsx_nc4_get_vara_double", 
+			as.integer(ncid),
+			as.integer(varid),
+			as.integer(c.start),	# Already switched to C convention...
+			as.integer(c.count),	# Already switched to C convention...
+			fixmiss,
+			imvstate,
+			as.double(passed_missval),
+			PACKAGE="ncdf4")
+		if( rv$error != 0 ) 
+			stop("C function R_nc4_get_vara_double returned error")
+		if( verbose ) print('back from call to Rsx_nc4_get_vara_double...')
+		data = rv$data
+		}
+
+	else if( (precint == 9) || (precint == 10) || (precint == 11)) {
+		#---------------------------------------------
+		# uint, int64, uint64
+		# Thanks to Tom Hilinski of Colorado State for
+		# fix to uint here
+		#---------------------------------------------
+		rv$data  <- double(totvarsize)
+		fixmiss = as.integer(0)
+		rv <- .Call("Rsx_nc4_get_vara_double", 
+			as.integer(ncid),
+			as.integer(varid),
+			as.integer(c.start),	# Already switched to C convention...
+			as.integer(c.count),	# Already switched to C convention...
+			fixmiss,
+			as.integer(-1),		# The 'imvstate' arg is unused in this call since no fixmiss
+			as.double(0.0),		# the passed missing value is not used in this call since no fixmiss
+			PACKAGE="ncdf4")
+		if( rv$error != 0 ) 
+			stop("C function R_nc4_get_vara_double returned error")
+		data = rv$data
+		}
+
+	else if( precint == 5 ) {
+		#-----
+		# Char 
+		#-----
+		strndims <- ndims - 1
+		strlen   <- count[1] + 1
+		strdim   <- 1
+		if( strndims >= 1 ) {
+			strdim <- count[2:ndims]
+			nstr   <- prod(strdim)
+			}
+		else
+			nstr <- 1
+		if(verbose)
+			print(paste("ndims:",ndims,"strndims:",strndims,"strlen:",strlen,"nstr:",nstr))
+
+		#----------------------------------------------
+		# Make a character string of the specified size
+		#----------------------------------------------
+		stor     <- ncdf4:::blankstring_ncdf4( totvarsize )
+		stordata <- ncdf4:::blankstring_ncdf4(strlen)
+		if( verbose )
+			print(paste("length of stor string:",nchar(stor)))
+		rv$tempstore <- stor
+		rv$data      <- array(stordata, dim=strdim)
+
+		rv <- .C("R_nc4_get_vara_text", 
+			as.integer(ncid),
+			as.integer(varid),
+			as.integer(c.start),	# Already switched to C convention...
+			as.integer(c.count),	# Already switched to C convention...
+			tempstore=as.character(rv$tempstore),
+			data=as.character(rv$data),
+			error=as.integer(rv$error),
+			PACKAGE="ncdf4")
+		if( rv$error != 0 ) 
+			stop("C function R_nc4_get_var_text returned error")
+
+		dim(rv$data) <- strdim
+		}
+
+	else if( precint == 12 ) {
+		#-----------------------------
+		# netcdf version 4 String type
+		#-----------------------------
+		rv <- .Call( "R_nc4_get_vara_string",
+			as.integer(ncid),
+			as.integer(varid),
+			as.integer(c.start),    # Already switched to C convention...
+			as.integer(c.count),    # Already switched to C convention...
+			PACKAGE="ncdf4" )
+		}
+	else
+		{
+		stop(paste("Trying to get variable of an unhandled type code: ",precint, "(", ncdf4:::ncvar_type_to_string(precint), ")"))
+		}
+	if( verbose ) {
+		print(paste("ncvar_get_inner: C call returned",rv$error))
+		print(paste("ncvar_get_inner: dim of directly returned array:"))
+		print(dim(rv$data))
+		}
+
+	#--------------------------------------------------------
+	# Set our dims...but collapse degenerate dimensions first
+	#--------------------------------------------------------
+	if( ndims > 0 ) {
+		if( collapse_degen ) {
+			count.nodegen <- vector()
+			foundone <- 0
+			for( i in 1:ndims )
+				if( count[i] > 1 ) {
+					count.nodegen <- append(count.nodegen, count[i])
+					foundone <- 1
+					}
+			if( foundone == 0 ) 
+				dim(rv$data) <- (1)
+			else
+				{
+				if( verbose )
+					print(paste("count.nodegen:",count.nodegen,"   Length of data:",length(rv$data)))
+				if( precint != 5 )
+					dim(rv$data) <- count.nodegen
+				}
+			}
+		else if (precint != 5)
+			dim(rv$data) = count
+
+		if( verbose ) {
+			print("ncvar_get: final dims of returned array:")
+			print(dim(rv$data))
+			}
+		}
+
+	#----------------------------------------------------------
+	# Change missing values to "NA"s.  Note that 'varid2Rindex'
+	# is NOT filled out for dimvars, so skip this if a dimvar
+	# 1=short, 2=int, 3=float, 4=double, 5=char, 6=byte
+	# NOTE: if type is 3 or 4 (float or double), the missing
+	# value was already set by the C routine.
+	#----------------------------------------------------------
+	if( (!raw_datavals) && (precint != 5) && (precint != 3) && (precint != 4) ) {	# not char, float, or double
+		if( verbose ) print("ncvar_get: setting missing values to NA")
+		if( (precint==1) || (precint==2) || (precint==6) || (precint==7) || (precint==8) || (precint==9)) {
+			#--------------------------------------
+			# Short, Int, Byte, UByte, UShort, UInt
+			#--------------------------------------
+			if( verbose ) print(paste("ncvar_get_inner: setting ", tmp_typename[precint],"-type missing value of ", missval, " to NA", sep=''))
+			if( ! is.na(missval) ) 
+				rv$data[rv$data==missval] <- NA
+			}
+		else if( (precint==10) || (precint==11)) {
+			#--------------------------------
+			# 8-byte int, unsigned 8-byte int
+			#--------------------------------
+			if( ! is.na(missval) ) {
+				tol <- abs(missval*1.e-5)
+				if( verbose ) print(paste("ncvar_get_inner: setting ", tmp_typename[precint],"-type missing value of ", missval, 
+					" (w/tolerance ", tol,") to NA", sep=''))
+				rv$data[abs(rv$data-missval)<tol] <- NA
+				}
+			}
+		}
+
+	#--------------------------------------
+	# Implement add_offset and scale_factor
+	#--------------------------------------
+	if( ! raw_datavals ) {
+		if( (scaleFact != 1.0) || (addOffset != 0.0) ) {
+			if( verbose ) 
+				print(paste("ncvar_get: implementing add_offset=", addOffset, " and scaleFact=", scaleFact ))
+			rv$data <- rv$data * scaleFact + addOffset
+			}
+		}
+
+	return(rv$data)
+}
-- 
GitLab