From 412b01203e5928559da7f0c97a06c04b2e4d3614 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 30 May 2023 18:30:01 +0200 Subject: [PATCH] Remove ClimProjDiags::Subset dependency; Add unit test; fix examples --- DESCRIPTION | 3 +- NAMESPACE | 1 - R/ArrayToNc.R | 58 +++++++--- R/Utils.R | 48 +++++++++ man/ArrayToNc.Rd | 53 ++++++++-- tests/testthat.R | 4 + tests/testthat/test-ArrayToNc.R | 181 ++++++++++++++++++++++++++++++++ 7 files changed, 324 insertions(+), 24 deletions(-) create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-ArrayToNc.R diff --git a/DESCRIPTION b/DESCRIPTION index e1d0bcc..131fa1b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,10 +13,9 @@ Depends: R (>= 3.2.0) Imports: ncdf4, - ClimProjDiags, abind License: Apache License 2.0 URL: https://earth.bsc.es/gitlab/es/easyNCDF/-/wikis/home BugReports: https://earth.bsc.es/gitlab/es/easyNCDF/-/issues SystemRequirements: netcdf development libraries -RoxygenNote: 7.0.1 +RoxygenNote: 7.2.0 diff --git a/NAMESPACE b/NAMESPACE index 4a7fb54..b13c64c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,5 +7,4 @@ export(NcReadDims) export(NcReadVarNames) export(NcToArray) import(ncdf4) -importFrom(ClimProjDiags,Subset) importFrom(stats,setNames) diff --git a/R/ArrayToNc.R b/R/ArrayToNc.R index e880f8e..6b4e477 100644 --- a/R/ArrayToNc.R +++ b/R/ArrayToNc.R @@ -1,16 +1,26 @@ #'Save multidimensional R arrays into NetCDF files #' #'@author N. Manubens \email{nicolau.manubens@bsc.es} -#'@description This function takes as input one or a list of multidimensional R arrays and stores them in a NetCDF file, using the \code{ncdf4} package. The full path and name of the resulting file must be specified. Metadata can be attached to the arrays and propagated into the NetCDF file in 3 possible ways:\cr +#'@description This function takes as input one or a list of multidimensional R +#'arrays and stores them in a NetCDF file, using the \code{ncdf4} package. The +#'full path and name of the resulting file must be specified. Metadata can be +#'attached to the arrays and propagated into the NetCDF file in 3 possible +#'ways:\cr #' \itemize{ - #' \item{Via the list names if a list of arrays is provided:}{Each name in the input list, corresponding to one multidimensional array, will be interpreted as the name of the variable it contains.\cr + #' \item{Via the list names if a list of arrays is provided:}{Each name in +#' the input list, corresponding to one multidimensional array, will be +#' interpreted as the name of the variable it contains.\cr #'E.g:\cr #' \code{ #'ArrayToNc(arrays = list(temperature = array(1:9, c(3, 3))), -#' file_path = 'example.nc') +#' file_path = 'example.nc') #' } #' } -#' \item{Via the dimension names of each provided array:}{The dimension names of each of the provided arrays will be interpreted as names for the dimensions of the NetCDF files. Read further for special dimension names that will trigger special behaviours, such as 'time' and 'var'.\cr +#' \item{Via the dimension names of each provided array:}{The dimension names +#' of each of the provided arrays will be interpreted as names for the +#' dimensions of the NetCDF files. Read further for special dimension +#' names that will trigger special behaviours, such as 'time' and 'var'. +#'\cr #'E.g:\cr #' \code{ #'temperature <- array(rnorm(100 * 50 * 10), dim = c(100, 50, 10)) @@ -18,7 +28,14 @@ #'ArrayToNc(list(temperature = temperature), file_path = 'example.nc') #' } #' } -#' \item{Via the attribute 'variables' of each provided array:}{The arrays can be provided with metadata in an attribute named 'variables', which is expected to be a named list of named lists, where the names of the container list are the names of the variables present in the provided array, and where each sub-list contains metadata for each of the variables. The attribute names and values supported in the sub-lists must follow the same format the package \code{ncdf4} uses to represent the NetCDF file headers.\cr +#' \item{Via the attribute 'variables' of each provided array:}{The arrays +#' can be provided with metadata in an attribute named 'variables', which +#' is expected to be a named list of named lists, where the names of the +#' container list are the names of the variables present in the provided +#' array, and where each sub-list contains metadata for each of the +#' variables. The attribute names and values supported in the sub-lists +#' must follow the same format the package \code{ncdf4} uses to represent +#' the NetCDF file headers.\cr #'E.g:\cr #' \code{ #'a <- array(1:400, dim = c(5, 10, 4, 2)) @@ -26,6 +43,10 @@ #' tos = list(addOffset = 100, #' scaleFact = 10, #' dim = list(list(name = 'time', +#' unlim = FALSE))), +#' tas = list(addOffset = 100, +#' scaleFact = 10, +#' dim = list(list(name = 'time', #' unlim = FALSE))) #' ) #'attr(a, 'variables') <- metadata @@ -35,17 +56,29 @@ #' } #' } #'The special dimension names are 'var'/'variable' and 'time'.\cr -#'If a dimension is named 'var' or 'variable', \code{ArrayToNc} will interpret each array entry along such dimension corresponds to a separate new variable, hence will create a new variable inside the NetCDF file and will use it to store all the data in the provided array for the corresponding entry along the 'var'/'variable' dimension.\cr -#'If a dimension is named 'time', by default it will be interpreted and built as an unlimited dimension. The 'time' dimension must be the last dimension of the array (the right-most). If a 'var'/'variable' dimension is present, the 'time' dimension can be also placed on its left (i.e. the one before the last dimension). The default behaviour of creating the 'time' as unlimited dimension can be disabled by setting manually the attribute \code{unlim = FALSE}, as shown in the previous example.\cr\cr +#'If a dimension is named 'var' or 'variable', \code{ArrayToNc} will interpret +#'each array entry along such dimension corresponds to a separate new variable, +#'hence will create a new variable inside the NetCDF file and will use it to +#'store all the data in the provided array for the corresponding entry along the +#''var'/'variable' dimension.\cr +#'If a dimension is named 'time', by default it will be interpreted and built as +#'an unlimited dimension. The 'time' dimension must be the last dimension of the +#'array (the right-most). If a 'var'/'variable' dimension is present, the 'time' +#'dimension can be also placed on its left (i.e. the one before the last +#'dimension). The default behaviour of creating the 'time' as unlimited +#'dimension can be disabled by setting manually the attribute +#'\code{unlim = FALSE}, as shown in the previous example.\cr\cr #'\code{a2nc} is an alias of \code{ArrayToNc}. #' -#'@param arrays One or a list of multidimensional data arrays. The list can be provided with names, which will be interpreted as variable names. The arrays can be provided with dimension names. The arrays can be provided with metadata in the attribute 'variables' (read section Description for details). +#'@param arrays One or a list of multidimensional data arrays. The list can be +#' provided with names, which will be interpreted as variable names. The arrays +#' can be provided with dimension names. The arrays can be provided with +#' metadata in the attribute 'variables' (read section Description for details). #'@param file_path Path and name of the NetCDF file to be created. #' #'@return This function returns NULL. #' #'@import ncdf4 -#'@importFrom ClimProjDiags Subset #'@importFrom stats setNames #'@examples #' \dontrun{ @@ -74,7 +107,8 @@ #'names(dim(a)) <- c('lat', 'lon', 'time', 'var') #'ArrayToNc(a, 'tmp.nc') #' -#'# The dimension 'var'/'variable' can be in any position and can have any length +#'# The dimension 'var'/'variable' can be in any position and can have any +#'# length. #'a <- array(1:1600, dim = c(10, 20, 4, 2)) #'names(dim(a)) <- c('lat', 'var', 'lon', 'time') #'ArrayToNc(a, 'tmp.nc') @@ -478,11 +512,11 @@ ArrayToNc <- function(arrays, file_path) { } else { if (scale_factor != 1 || add_offset != 0) { ncvar_put(ncdf_object, defined_vars[[var_counter]]$name, - (Subset(arrays[[i]], var_dim, j, drop = 'selected') - add_offset) / scale_factor, + (.subset(arrays[[i]], var_dim, j, drop = 'selected') - add_offset) / scale_factor, count = dim(arrays[[i]])[-var_dim]) } else { ncvar_put(ncdf_object, defined_vars[[var_counter]]$name, - Subset(arrays[[i]], var_dim, j, drop = 'selected'), + .subset(arrays[[i]], var_dim, j, drop = 'selected'), count = dim(arrays[[i]])[-var_dim]) } } diff --git a/R/Utils.R b/R/Utils.R index 15e56c1..c49df2f 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -216,3 +216,51 @@ } array1 } + +# This function also exists in startR::Utils. It serves the same functionality as ClimProjDiags::Subset. +.subset <- function(x, along, indices, drop = FALSE) { + # x: array with dimension names + # along: a vector of all characters or all numerics + # indices: a list of indices + # drop: F/"selected" + + if (any(is.character(along))) { + along <- match(along, names(dim(x))) + if (!all(is.numeric(along))) { + stop("Check parameter 'along'.") + } + } + if (!is.list(indices)) { + if (length(along) == 1) { + indices <- list(indices) + } else { + stop("Parameter 'indices' should be a list.") + } + } + if (length(indices) != length(along)) { + stop("Parameter 'along' and 'indices' should have the same length.") + } + if (!drop %in% c(FALSE, 'selected')) { + stop("Parameter 'drop' can only be FALSE or 'selected'.") + } + + # Save attributes except 'dim' + saved_attr <- attributes(x)[-which(names(attributes(x)) == 'dim')] + + # Take the subset + index <- as.list(rep(TRUE, length(dim(x)))) + index[along] <- indices + subset <- eval(as.call(c(as.name("["), as.name("x"), index, drop = F))) + + if (drop == 'selected') { + need_remove <- dim(subset)[along] == 1 # T,F + if (any(need_remove)) { + dim(subset) <- dim(subset)[-along[which(need_remove)]] + } + } + + # Add attributes back + attributes(subset)[names(saved_attr)] <- saved_attr + + return(subset) +} diff --git a/man/ArrayToNc.Rd b/man/ArrayToNc.Rd index c97f4ac..e51880e 100644 --- a/man/ArrayToNc.Rd +++ b/man/ArrayToNc.Rd @@ -10,7 +10,10 @@ ArrayToNc(arrays, file_path) a2nc(arrays, file_path) } \arguments{ -\item{arrays}{One or a list of multidimensional data arrays. The list can be provided with names, which will be interpreted as variable names. The arrays can be provided with dimension names. The arrays can be provided with metadata in the attribute 'variables' (read section Description for details).} +\item{arrays}{One or a list of multidimensional data arrays. The list can be +provided with names, which will be interpreted as variable names. The arrays +can be provided with dimension names. The arrays can be provided with +metadata in the attribute 'variables' (read section Description for details).} \item{file_path}{Path and name of the NetCDF file to be created.} } @@ -18,16 +21,26 @@ a2nc(arrays, file_path) This function returns NULL. } \description{ -This function takes as input one or a list of multidimensional R arrays and stores them in a NetCDF file, using the \code{ncdf4} package. The full path and name of the resulting file must be specified. Metadata can be attached to the arrays and propagated into the NetCDF file in 3 possible ways:\cr +This function takes as input one or a list of multidimensional R +arrays and stores them in a NetCDF file, using the \code{ncdf4} package. The +full path and name of the resulting file must be specified. Metadata can be +attached to the arrays and propagated into the NetCDF file in 3 possible +ways:\cr \itemize{ - \item{Via the list names if a list of arrays is provided:}{Each name in the input list, corresponding to one multidimensional array, will be interpreted as the name of the variable it contains.\cr + \item{Via the list names if a list of arrays is provided:}{Each name in + the input list, corresponding to one multidimensional array, will be + interpreted as the name of the variable it contains.\cr E.g:\cr \code{ ArrayToNc(arrays = list(temperature = array(1:9, c(3, 3))), - file_path = 'example.nc') + file_path = 'example.nc') } } - \item{Via the dimension names of each provided array:}{The dimension names of each of the provided arrays will be interpreted as names for the dimensions of the NetCDF files. Read further for special dimension names that will trigger special behaviours, such as 'time' and 'var'.\cr + \item{Via the dimension names of each provided array:}{The dimension names + of each of the provided arrays will be interpreted as names for the + dimensions of the NetCDF files. Read further for special dimension + names that will trigger special behaviours, such as 'time' and 'var'. +\cr E.g:\cr \code{ temperature <- array(rnorm(100 * 50 * 10), dim = c(100, 50, 10)) @@ -35,12 +48,23 @@ names(dim(temperature)) <- c('longitude', 'latitude', 'time') ArrayToNc(list(temperature = temperature), file_path = 'example.nc') } } - \item{Via the attribute 'variables' of each provided array:}{The arrays can be provided with metadata in an attribute named 'variables', which is expected to be a named list of named lists, where the names of the container list are the names of the variables present in the provided array, and where each sub-list contains metadata for each of the variables. The attribute names and values supported in the sub-lists must follow the same format the package \code{ncdf4} uses to represent the NetCDF file headers.\cr + \item{Via the attribute 'variables' of each provided array:}{The arrays + can be provided with metadata in an attribute named 'variables', which + is expected to be a named list of named lists, where the names of the + container list are the names of the variables present in the provided + array, and where each sub-list contains metadata for each of the + variables. The attribute names and values supported in the sub-lists + must follow the same format the package \code{ncdf4} uses to represent + the NetCDF file headers.\cr E.g:\cr \code{ a <- array(1:400, dim = c(5, 10, 4, 2)) metadata <- list( tos = list(addOffset = 100, + scaleFact = 10, + dim = list(list(name = 'time', + unlim = FALSE))), + tas = list(addOffset = 100, scaleFact = 10, dim = list(list(name = 'time', unlim = FALSE))) @@ -52,8 +76,18 @@ ArrayToNc(a, 'tmp.nc') } } The special dimension names are 'var'/'variable' and 'time'.\cr -If a dimension is named 'var' or 'variable', \code{ArrayToNc} will interpret each array entry along such dimension corresponds to a separate new variable, hence will create a new variable inside the NetCDF file and will use it to store all the data in the provided array for the corresponding entry along the 'var'/'variable' dimension.\cr -If a dimension is named 'time', by default it will be interpreted and built as an unlimited dimension. The 'time' dimension must be the last dimension of the array (the right-most). If a 'var'/'variable' dimension is present, the 'time' dimension can be also placed on its left (i.e. the one before the last dimension). The default behaviour of creating the 'time' as unlimited dimension can be disabled by setting manually the attribute \code{unlim = FALSE}, as shown in the previous example.\cr\cr +If a dimension is named 'var' or 'variable', \code{ArrayToNc} will interpret +each array entry along such dimension corresponds to a separate new variable, +hence will create a new variable inside the NetCDF file and will use it to +store all the data in the provided array for the corresponding entry along the +'var'/'variable' dimension.\cr +If a dimension is named 'time', by default it will be interpreted and built as +an unlimited dimension. The 'time' dimension must be the last dimension of the +array (the right-most). If a 'var'/'variable' dimension is present, the 'time' +dimension can be also placed on its left (i.e. the one before the last +dimension). The default behaviour of creating the 'time' as unlimited +dimension can be disabled by setting manually the attribute +\code{unlim = FALSE}, as shown in the previous example.\cr\cr \code{a2nc} is an alias of \code{ArrayToNc}. } \examples{ @@ -83,7 +117,8 @@ a <- array(1:1600, dim = c(10, 20, 4, 2)) names(dim(a)) <- c('lat', 'lon', 'time', 'var') ArrayToNc(a, 'tmp.nc') -# The dimension 'var'/'variable' can be in any position and can have any length +# The dimension 'var'/'variable' can be in any position and can have any +# length. a <- array(1:1600, dim = c(10, 20, 4, 2)) names(dim(a)) <- c('lat', 'var', 'lon', 'time') ArrayToNc(a, 'tmp.nc') diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..2b16ba9 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(easyNCDF) + +test_check("easyNCDF") diff --git a/tests/testthat/test-ArrayToNc.R b/tests/testthat/test-ArrayToNc.R new file mode 100644 index 0000000..4a660d0 --- /dev/null +++ b/tests/testthat/test-ArrayToNc.R @@ -0,0 +1,181 @@ + +out_dir <- "./tests/" + +################################### +test_that("1. dat1", { + +out_file <- paste0(out_dir, 'test_ArrayToNc_1.nc') + +tos <- array(1:400, dim = c(5, 10, 4)) +metadata <- list(tos = list(units = 'K')) +attr(tos, 'variables') <- metadata +names(dim(tos)) <- c('lat', 'lon', 'time') + +lon <- seq(0, 360 - 360 / 10, length.out = 10) +dim(lon) <- c(lon = 10) +metadata <- list(lon = list(units = 'degrees_east')) +attr(lon, 'variables') <- metadata + +lat <- seq(-90, 90, length.out = 5) +dim(lat) <- c(lat = 5) +metadata <- list(lat = list(units = 'degrees_north')) +attr(lat, 'variables') <- metadata + +#time_attr <- as.POSIXct(paste0("1970-03-" 3:6, " 12:00:00"), tz = 'UTC') +time_attr <- 3:6 +dim(time_attr) <- c(time = 4) +metadata <- list(time = list(units = 'days since 1970-03-01 12:00:00', + calendar = 'standard')) +attr(time_attr, 'variables') <- metadata + +ArrayToNc(list(tos, lon, lat, time_attr), out_file) + +dat1 <- NcToArray(out_file, vars_to_read = 'tos') +lon1 <- NcToArray(out_file, vars_to_read = 'lon') +lat1 <- NcToArray(out_file, vars_to_read = 'lat') + +expect_equal( +c(tos), +c(dat1) +) +expect_equal( +names(attributes(dat1)$variables$tos), +c("prec", "units", "dim", "unlim", "make_missing_value", "missval", "hasAddOffset", "hasScaleFact") +) +expect_equal( +attributes(dat1)$variables$tos$prec, +"int" +) +expect_equal( +attributes(dat1)$variables$tos$units, +"K" +) +expect_equal( +length(attributes(dat1)$variables$tos$dim), +3 +) +expect_equal( +sapply(attributes(dat1)$variables$tos$dim, '[[', 1), +c("lat", "lon", "time") +) +expect_equal( +c(lon), +c(lon1) +) +expect_equal( +attributes(lon1)$variables$lon$units, +"degrees_east" +) +expect_equal( +c(lat), +c(lat1) +) +expect_equal( +attributes(lat1)$variables$lat$units, +"degrees_north" +) + +# Delete files +unlink(out_file, recursive = TRUE) + +}) + +################################### +test_that("2. var", { + +out_file <- paste0(out_dir, 'test_ArrayToNc_2.nc') + +a <- array(1:27, dim = c(3, 3, 3)) +names(dim(a)) <- c('lon', 'lat', 'var') +ArrayToNc(a, out_file) + +dat2_1 <- NcToArray(out_file, vars_to_read = 'var1_1') +dat2_2 <- NcToArray(out_file, vars_to_read = 'var1_2') +dat2_3 <- NcToArray(out_file, vars_to_read = 'var1_3') + +expect_equal( +c(a), +c(c(dat2_1), c(dat2_2), c(dat2_3)) +) +expect_equal( +names(attributes(dat2_1)$variables$var1_1), +c("prec", "units", "dim", "unlim", "make_missing_value", "missval", "hasAddOffset", "hasScaleFact") +) +expect_equal( +names(attributes(dat2_2)$variables$var1_2), +names(attributes(dat2_1)$variables$var1_1) +) +expect_equal( +names(attributes(dat2_3)$variables$var1_3), +names(attributes(dat2_1)$variables$var1_1) +) +expect_equal( +dim(dat2_1), +c(var = 1, lon = 3, lat = 3) +) +expect_equal( +dim(dat2_2), +dim(dat2_1) +) +expect_equal( +dim(dat2_3), +dim(dat2_1) +) + +# Delete files +unlink(out_file, recursive = TRUE) + +}) + +test_that("3. var & addOffset, scaleFactor", { + +out_file <- paste0(out_dir, 'test_ArrayToNc_3.nc') + +a <- array(1:400, dim = c(5, 10, 4, 2)) +metadata <- list( + tos = list(addOffset = 100, + scaleFact = 10), + tas = list(addOffset = 100, + scaleFact = 10)) +attr(a, 'variables') <- metadata +names(dim(a)) <- c('lat', 'lon', 'time', 'var') +ArrayToNc(a, out_file) + +dat3_1 <- NcToArray(out_file, vars_to_read = 'tos') +dat3_2 <- NcToArray(out_file, vars_to_read = 'tas') + +expect_equal( +c(dat3_1), +c(rep(seq(10, 90, 10), each = 10), rep(100, 19), rep(seq(110, 190, 10), each = 10), 200) +) +expect_equal( +c(dat3_2), +c(rep(200, 9), rep(seq(210, 390, 10), each = 10), 400) +) +expect_equal( +dim(dat3_1), +c(var = 1, lat = 5, lon = 10, time = 4) +) +expect_equal( +names(attributes(dat3_1)$variables$tos), +c("prec", "units", "dim", "unlim", "make_missing_value", "missval", "hasAddOffset", "addOffset", "hasScaleFact", "scaleFact", "scale_factor", "add_offset") +) +expect_equal( +names(attributes(dat3_2)$variables$tas), +c("prec", "units", "dim", "unlim", "make_missing_value", "missval", "hasAddOffset", "addOffset", "hasScaleFact", "scaleFact", "scale_factor", "add_offset") +) +expect_equal( +(attributes(dat3_2)$variables$tas$add_offset), +100 +) +expect_equal( +attributes(dat3_2)$variables$tas$hasAddOffset, +TRUE +) + +# Delete files +unlink(out_file, recursive = TRUE) + +}) + + -- GitLab