From 009a83327c20e4bf00a527e0e0b1bb2e63b8ee2b Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 18 Aug 2020 19:32:51 +0200 Subject: [PATCH 1/5] Consider the case that metadata_dims = 'var' and var is more than one --- R/Start.R | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/R/Start.R b/R/Start.R index 899d6ba..e9540a9 100644 --- a/R/Start.R +++ b/R/Start.R @@ -4116,14 +4116,33 @@ Start <- function(..., # dim = indices/selectors, .message("Successfully retrieved data.") } -# NOTE: The original var_backup saves only the first of the list (i.e., 1st dat). -# If there is more than one dat, it should be put under each dat rather than $common. -# Question: If there is only one dat, should it be under $common or $dat1? -# It is under $common now. - if (length(attr(data_array, 'Variables')) > 1) { # more than 1 dat. Put under each dat +#NOTE: The metadata of variables can be saved in one of the three different structures. +# (1) matadata_dims = 'xxx', which the length of 'xxx' is 1: the metadata is saved +# under $common. +# If the length of 'xxx' > 1, +# (2) metadata_dims = 'dat' (i.e., pattern_dims): the metadata is saved under +# each dat, and only the first variable can be saved. +# (3) metadata_dims != 'dat' (e.g., 'var'): the metadata is saved only under dat1, +# and all the variables can be saved. +#TODO: Enable metadata_dims to have more than one item. For example, there are 2 dats +# and 2 vars. The metadata of the vars should be saved under each dat. That is, +# each data has two vars. + + if (length(attr(data_array, 'Variables')) > 1) { # metadata_dims length is more than 1. (2) or (3) var_backup <- attr(data_array, 'Variables') - for (kk in 1:length(var_backup)) { - picked_vars[[kk]][[names(var_backup[[kk]])]] <- var_backup[[kk]][[1]] + + if (metadata_dims == pattern_dims) { # 'dat'. (2) + for (kk in 1:length(var_backup)) { + if (!is.null(var_backup[[kk]])) { # if file does not exist + picked_vars[[kk]][[names(var_backup[[kk]])]] <- var_backup[[kk]][[1]] + } + } + } else { # metadata_dims = 'var' or other file dim. (3) + for (kk in 1:length(var_backup)) { + if (!is.null(var_backup[[kk]])) { # if file does not exist + picked_vars[[1]][[metadata_dims]][[names(var_backup[[kk]])]] <- var_backup[[kk]][[1]] + } + } } var_backup <- NULL -- GitLab From 32dca61cc330ecbb3d58d2400638bf48d30d5fda Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 20 Aug 2020 11:17:46 +0200 Subject: [PATCH 2/5] Bugfix for metadata_dims when multiple dats and vars are required --- NEWS.md | 3 ++ R/Start.R | 97 ++++++++++++++++++++++++++++++++++------------------ man/Start.Rd | 4 +-- 3 files changed, 69 insertions(+), 35 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4ebe2d6..2a122b2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # startR v2.0.1 (Release date: 2020-08-) - Bugfix for metadata in the condition that reorder or transform is applied and 'return_vars' is NULL. +- Bugfix for the parameter 'metadata_dims'. It did not work correctly for the cases other than +'1 data set, 1 variable'. For 1 data set case, all the variables should be listed under $common in +the attributes; for more than 1 data set case, the variables should be listed under each $dat. - Bugfix for the missing first file case. It showed an error before when the first file is not found but now it works. # startR v2.0.0 (Release date: 2020-08-06) diff --git a/R/Start.R b/R/Start.R index e9540a9..990d8e7 100644 --- a/R/Start.R +++ b/R/Start.R @@ -611,8 +611,8 @@ #' auxiliary data read for only the first file of each source (or data set) in #' the pattern dimension (see \code{\dots} for info on what the pattern #' dimension is). However it can be configured to return the metadata for all -#' the files along any set of file dimensions. The parameter 'metadata_dims' -#' allows to configure this level of granularity of the returned metadata. +#' the files along any set of file dimensions. The default value is NULL, and +#' it will be assigned automatically as parameter 'pattern_dims'. #'@param selector_checker A function used internaly by Start() to #' translate a set of selectors (values for a dimension associated to a #' coordinate variable) into a set of numeric indices. It takes by default @@ -1080,7 +1080,7 @@ Start <- function(..., # dim = indices/selectors, # Check metadata_dims if (!is.null(metadata_dims)) { - if (is.na(metadata_dims)) { + if (any(is.na(metadata_dims))) { metadata_dims <- NULL } else if (!is.character(metadata_dims) || (length(metadata_dims) < 1)) { stop("Parameter 'metadata' dims must be a vector of at least one character string.") @@ -1089,6 +1089,21 @@ Start <- function(..., # dim = indices/selectors, metadata_dims <- pattern_dims } + # Check if pattern_dims is the first item in metadata_dims + if ((pattern_dims %in% metadata_dims) & metadata_dims[1] != pattern_dims) { + metadata_dims <- c(metadata_dims[-1], metadata_dims[1]) + } + # Check if metadata_dims has more than 2 elements + if ((metadata_dims[1] == pattern_dims & length(metadata_dims) > 2)) { + .warning(paste0("Parameter 'metadata_dims' has too many elements which serve repetitive ", + "function. Keep '", metadata_dims[1], "' and '", metadata_dims[2], "' only.")) + metadata_dims <- metadata_dims[1:2] + } else if (!(pattern_dims %in% metadata_dims) & length(metadata_dims) > 1) { + .warning(paste0("Parameter 'metadata_dims' has too many elements which serve repetitive ", + "function. Keep '", metadata_dims[1], "' only.")) + metadata_dims <- metadata_dims[1] + } + # Once the pattern dimension with dataset specifications is found, # the variable 'dat' is mounted with the information of each # dataset. @@ -4049,9 +4064,35 @@ Start <- function(..., # dim = indices/selectors, loaded_metadata_files <- list.files(metadata_folder) loaded_metadata <- lapply(paste0(metadata_folder, '/', loaded_metadata_files), readRDS) unlink(metadata_folder, recursive = TRUE) - return_metadata <- vector('list', length = prod(dim(array_of_metadata_flags)[metadata_dims])) - return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata - dim(return_metadata) <- dim(array_of_metadata_flags[metadata_dims]) + +#NOTE: Here, metadata can be saved in one of two ways: one for $common and the other for $dat +# for $common, it is a list of metadata length. For $dat, it is a list of dat length, +# and each sublist has the metadata for each dat. + dim_of_metadata <- dim(array_of_metadata_flags)[metadata_dims] + if (!any(names(dim_of_metadata) == pattern_dims) | + (any(names(dim_of_metadata) == pattern_dims) & + dim_of_metadata[pattern_dims] == 1)) { # put under $common; old code + return_metadata <- vector('list', + length = prod(dim(array_of_metadata_flags)[metadata_dims])) + return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata + dim(return_metadata) <- dim(array_of_metadata_flags[metadata_dims]) + + } else { # put under $dat. metadata_dims has 'dat' and dat length > 1 + return_metadata <- vector('list', + length = dim_of_metadata[pattern_dims]) + names(return_metadata) <- dat_names + for (kk in 1:length(return_metadata)) { + return_metadata[[kk]] <- vector('list', length = prod(dim_of_metadata[-1])) # 1 is dat + } + loaded_metadata_count <- 1 + for (kk in 1:length(return_metadata)) { + for (jj in 1:length(return_metadata[[kk]])) { + return_metadata[[kk]][jj] <- loaded_metadata[[loaded_metadata_count]] + names(return_metadata[[kk]])[jj] <- names(loaded_metadata[[loaded_metadata_count]]) + loaded_metadata_count <- loaded_metadata_count + 1 + } + } + } attr(data_array, 'Variables') <- return_metadata # TODO: Try to infer data type from loaded_metadata # as.integer(data_array) @@ -4116,38 +4157,28 @@ Start <- function(..., # dim = indices/selectors, .message("Successfully retrieved data.") } -#NOTE: The metadata of variables can be saved in one of the three different structures. -# (1) matadata_dims = 'xxx', which the length of 'xxx' is 1: the metadata is saved -# under $common. -# If the length of 'xxx' > 1, -# (2) metadata_dims = 'dat' (i.e., pattern_dims): the metadata is saved under -# each dat, and only the first variable can be saved. -# (3) metadata_dims != 'dat' (e.g., 'var'): the metadata is saved only under dat1, -# and all the variables can be saved. -#TODO: Enable metadata_dims to have more than one item. For example, there are 2 dats -# and 2 vars. The metadata of the vars should be saved under each dat. That is, -# each data has two vars. - - if (length(attr(data_array, 'Variables')) > 1) { # metadata_dims length is more than 1. (2) or (3) +#NOTE: The metadata of variables can be saved in one of the two different structures. +# (1) metadata_dims != 'dat', or (metadata_dims == 'dat' & length(dat) == 1): +# put under $common +# (2) (metadata_dims == 'dat' & length(dat) > 1): +# put under $dat1, $dat2, .... Put it in picked_vars list +#TODO: The current (2) uses the inefficient method. Should define the list structure first +# then fill the list, rather than expand it in the for loop. + if (any(metadata_dims == pattern_dims) & length(dat) > 1) { # (2) var_backup <- attr(data_array, 'Variables') - - if (metadata_dims == pattern_dims) { # 'dat'. (2) - for (kk in 1:length(var_backup)) { - if (!is.null(var_backup[[kk]])) { # if file does not exist - picked_vars[[kk]][[names(var_backup[[kk]])]] <- var_backup[[kk]][[1]] - } - } - } else { # metadata_dims = 'var' or other file dim. (3) - for (kk in 1:length(var_backup)) { - if (!is.null(var_backup[[kk]])) { # if file does not exist - picked_vars[[1]][[metadata_dims]][[names(var_backup[[kk]])]] <- var_backup[[kk]][[1]] - } + for (kk in 1:length(var_backup)) { + sublist_names <- lapply(var_backup, names)[[kk]] + for (jj in 1:length(sublist_names)) { + picked_vars[[kk]][[sublist_names[jj]]] <- var_backup[[kk]][[jj]] } } var_backup <- NULL - } else { #old code - var_backup <- attr(data_array, 'Variables')[[1]] + } else { #(1) + var_backup <- attr(data_array, 'Variables') + var_backup_name <- lapply(var_backup, names) + var_backup <- lapply(var_backup, '[[', 1) + names(var_backup) <- unlist(var_backup_name) } attr(data_array, 'Variables') <- NULL diff --git a/man/Start.Rd b/man/Start.Rd index d69562c..f80f3bb 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -295,8 +295,8 @@ the data reader can optionally return auxiliary data via the attribute auxiliary data read for only the first file of each source (or data set) in the pattern dimension (see \code{\dots} for info on what the pattern dimension is). However it can be configured to return the metadata for all -the files along any set of file dimensions. The parameter 'metadata_dims' - allows to configure this level of granularity of the returned metadata.} +the files along any set of file dimensions. The default value is NULL, and +it will be assigned automatically as parameter 'pattern_dims'.} \item{selector_checker}{A function used internaly by Start() to translate a set of selectors (values for a dimension associated to a -- GitLab From fd11d85486b50034fa463f7cb9fcc1e232c66822 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 20 Aug 2020 14:51:39 +0200 Subject: [PATCH 3/5] Add unit test, faq, and use case for metadata_dims --- inst/doc/faq.md | 28 +++ inst/doc/usecase.md | 4 + inst/doc/usecase/ex1_10_metadata_dims.R | 180 +++++++++++++++++++ tests/testthat/test-Start-metadata_dims.R | 207 +++++++++++++++++++++- 4 files changed, 413 insertions(+), 6 deletions(-) create mode 100644 inst/doc/usecase/ex1_10_metadata_dims.R diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 4ad94a0..c5be8aa 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -22,6 +22,7 @@ This document intends to be the first reference for any doubts that you may have 16. [Use parameter 'return_vars' in Start()](#16-use-parameter-return_vars-in-start) 17. [Use parameter 'split_multiselected_dims' in Start()](#17-use-parameter-split_multiselected_dims-in-start) 18. [Use glob expression '*' to define the file path](#18-use-glob-expression-to-define-the-file-path) + 19. [Use 'metadata_dims' to retrieve variable metadata](#19-use-metadata_dims-to-retrieve-variable-metadata) 2. **Something goes wrong...** @@ -692,6 +693,33 @@ This is different from the common definition of glob expression that tries to ex There is a parameter 'path_glob_permissive' in Start(). If set it to TRUE, the '*' in the filename itself will remain (i.e., as the common definition), while the ones in the path to the filename will still be replaced by the pattern in the first found file. +### 19. Use 'metadata_dims' to retrieve variable metadata +In addition to retrieve the data value, Start() can retrieve the auxiliary data as well. +The parameter 'metadata_dims' is for the variable which you want to get the value (e.g., 'tas'), +and the parameter 'return_vars' is for other variables in the netCDF file (e.g., 'lat', 'lon', 'time'). + +The definition of 'metadata_dims' is: +> A vector of character strings with the names of the file dimensions which to return metadata for. +Start() by default returns the auxiliary data read for only the first file of each source (or data set) in the pattern dimension. + However, it can be configured to return the metadata for all the files along any set of file dimensions. + +By default, 'metadata_dims' is equal to 'pattern_dims', which we usually assign as 'dat'. +By this means, the variable auxiliary data will be collected from the first file of each data set. +If you only have one variable to be retrieved, you have no problem with the default. +However, what if the data set number and/or the variable number is more than 1? You need to adjust this parameter to get the complete metadata. + +Here are some common cases and the corresponding 'metadata_dims' to be used: +- One dat, one var: 'dat' (or default) +- One dat, two vars: 'var' +- Two dats, one var: 'dat' (or default) +- Two dats, two vars: c('dat', 'var') + +If there are two variables to be retrieved but metadata_dims does'nt have "var", only the first +variable's metadata will be retrieved. If there are two data sets but metadata_dims doesn't have "dat", +only the first data set will have the variable's metadata. + +Please find the relevant use cases in [ex1_10](inst/doc/usecase/ex1_10_metadta_dims.R). + # Something goes wrong... diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 65dbd2d..00eae6f 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -38,6 +38,10 @@ The problem may occur when the dimension number of the splitted selector is more 8. [Loading tas and tos from Decadal Predictions performed with the EC-Earth model](inst/doc/usecase/ex1_8_tasandtos.R) Some climate indices needs to be computed loading 'tas' (air temperature at 2m) over land and 'tos' (ocean surface temperature) over sea. Using **startR**, you can load these data in a unique **Start** call or with multiple calls separately for each variable. + 10. [Use 'metadata_dims' to retrieve complete variable metadata](inst/doc/usecase/ex1_10_metadta_dims.R) + This script tells you how to use the parameter 'metadata_dims' in Start() to get the complete variable metadata. +You will see four difference cases and learn the rules. +You can find more explanation in FAQ [How-to-19](inst/doc/faq.md#19-use-metadata_dims-to-retrieve-variable-metadata). 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) diff --git a/inst/doc/usecase/ex1_10_metadata_dims.R b/inst/doc/usecase/ex1_10_metadata_dims.R new file mode 100644 index 0000000..d49e314 --- /dev/null +++ b/inst/doc/usecase/ex1_10_metadata_dims.R @@ -0,0 +1,180 @@ +#--------------------------------------------------------------------- +# This script tells you how to use the parameter 'metadata_dims' in Start() +# to get the complete variable metadata. You will see four difference cases +# and there are some rules can be followed: +# (1) If the length of 'dat'/'var' is more than 1, you need to assign that +# file dimension in 'metadata_dims' +# (2) If there is more than one 'dat', the metadata will be stored under +# $ in the attributes. +# (3) If there is only one 'dat', or 'dat' is not in 'metadata_dims', the +# metadata will be stored under $common in the attributes. +# You can find more explanation in FAQ How-to-19. + +#--------------------------------------------------------------------- +library(startR) + +# Case 1: One dat, one var + + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + data <- Start(dat = list(list(name = 'system5_m1', path = repos)), + var = 'tas', + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = 'dat', # it can be omitted since it is automatically specified as 'dat' + retrieve = T + ) + +str(attr(data, 'Variables')) +# 'tas' is under $common +List of 2 + $ common :List of 2 + ..$ time: POSIXct[1:1], format: "2017-02-01" + ..$ tas :List of 12 + .. .. + .. .. + .. .. + $ system5_m1:List of 2 + ..$ longitude: num [1:10(1d)] 0 0.278 0.556 0.833 1.111 ... + .. .. + .. .. + .. .. + ..$ latitude : num [1:10(1d)] 89.8 89.5 89.2 88.9 88.7 ... + .. .. + .. .. + + +# Case 2: One dat, two vars + repos <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', + '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_185001-185012.nc') + var <- c('tas', 'clt') + sdate <- '20170101' + + data <- Start(dat = repos, + var = var, + time = indices(1), + lat = indices(1:10), + lon = indices(10:19), + return_vars = list(lat = NULL, lon = NULL), + metadata_dims = 'var', + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + retrieve = TRUE + ) + +str(attr(data, 'Variables')) +# 'tas' and 'clt' are under $common +List of 2 + $ common:List of 4 + ..$ lat: num [1:10(1d)] -89.5 -88.8 -88.1 -87.4 -86.7 ... + .. .. + ..$ lon: num [1:10(1d)] 6.33 7.03 7.73 8.44 9.14 ... + .. .. + ..$ tas:List of 17 + .. .. + ..$ clt:List of 16 + .. .. + $ dat1 : NULL + +# NOTE: If the above script is changed to metadata_dims = 'dat', you can only find +# 'tas' under $common + + +# Case 3: Two dats, one var + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + + data <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = 'tas', + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = 'dat', # it can be omitted since it is automatically specified as 'dat' + retrieve = T + ) + +str(attr(data, 'Variables')) +# 'tas' is under both $system4_m1 and $system5_m1 +List of 3 + $ common :List of 1 + ..$ time: POSIXct[1:1], format: "2017-02-01" + $ system4_m1:List of 3 + ..$ longitude: num [1:10(1d)] 0 0.703 1.406 2.109 2.812 ... + .. .. + ..$ latitude : num [1:10(1d)] 89.5 88.8 88.1 87.4 86.7 ... + .. .. + ..$ tas :List of 11 + .. .. + $ system5_m1:List of 3 + ..$ longitude: num [1:10(1d)] 0 0.278 0.556 0.833 1.111 ... + .. .. + ..$ latitude : num [1:10(1d)] 89.8 89.5 89.2 88.9 88.7 ... + .. .. + ..$ tas :List of 12 + .. .. + + +# Case 4: Two dats, two vars + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + + data <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = T + ) + +str(attr(data, 'Variables')) +# 'tas' and 'sfcWind' are under both $system4_m1 and $system5_m1 +List of 3 + $ common :List of 1 + ..$ time: POSIXct[1:1], format: "2017-02-01" + $ system4_m1:List of 3 + ..$ longitude: num [1:10(1d)] 0 0.703 1.406 2.109 2.812 ... + .. .. + ..$ latitude : num [1:10(1d)] 89.5 88.8 88.1 87.4 86.7 ... + .. .. + ..$ tas :List of 11 + .. .. + ..$ sfcWind :List of 11 + .. .. + $ system5_m1:List of 4 + ..$ longitude: num [1:10(1d)] 0 0.278 0.556 0.833 1.111 ... + .. .. + ..$ latitude : num [1:10(1d)] 89.8 89.5 89.2 88.9 88.7 ... + .. .. + ..$ tas :List of 12 + .. .. + ..$ sfcWind :List of 12 + .. .. + +#NOTE: If metadata_dims = 'dat', only 'tas' is under each dat. +# If metadata_dims = 'var', 'tas' and 'sfcWind' will be under $common. + diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index a771f98..00edd5d 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -1,6 +1,6 @@ context("Start() metadata_dims check") -test_that("1. One data set", { +test_that("1. One data set, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" data <- Start(dat = list(list(name = 'system5_m1', path = repos)), var = 'tas', @@ -39,10 +39,10 @@ test_that("1. One data set", { 12 ) -} +}) -test_that("2. Two data sets", { +test_that("2. Two data sets, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" @@ -92,9 +92,204 @@ test_that("2. Two data sets", { 11 ) -} +}) -test_that("3. Specify metadata_dims with another file dimension", { +test_that("3. One data set, two vars", { + repos <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', + '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_185001-185012.nc') + var <- c('tas', 'clt') + sdate <- '20170101' + + data <- Start(dat = repos, + var = var, + time = indices(1), + lat = indices(1:10), + lon = indices(10:19), + return_vars = list(lat = NULL, lon = NULL), + metadata_dims = 'var', + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + retrieve = TRUE + ) + + expect_equal( + length(attr(data, 'Variables')), + 2 + ) + expect_equal( + names(attr(data, 'Variables')), + c("common", "dat1") + ) + expect_equal( + names(attr(data, 'Variables')$common), + c('lat', 'lon', 'tas', 'clt') + ) + expect_equal( + is.null(attr(data, 'Variables')$dat1), + TRUE + ) + expect_equal( + length(attr(data, 'Variables')$common$tas), + 17 + ) + expect_equal( + length(attr(data, 'Variables')$common$clt), + 16 + ) + +}) + +test_that("4. Two data sets, two vars", { + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + + data <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = 'dat', + retrieve = T + ) + + expect_equal( + length(attr(data, 'Variables')), + 3 + ) + expect_equal( + names(attr(data, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(data, 'Variables')$common), + 'time' + ) + expect_equal( + names(attr(data, 'Variables')$system4_m1), + c("longitude", "latitude", "tas") + ) + expect_equal( + names(attr(data, 'Variables')$system5_m1), + c("longitude", "latitude", "tas") + ) + expect_equal( + length(attr(data, 'Variables')$system5_m1$tas), + 12 + ) + expect_equal( + length(attr(data, 'Variables')$system4_m1$tas), + 11 + ) + + + data <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = T + ) + expect_equal( + length(attr(data, 'Variables')), + 3 + ) + expect_equal( + names(attr(data, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(data, 'Variables')$common), + 'time' + ) + expect_equal( + names(attr(data, 'Variables')$system4_m1), + c("longitude", "latitude", "tas", "sfcWind") + ) + expect_equal( + names(attr(data, 'Variables')$system5_m1), + c("longitude", "latitude", "tas", "sfcWind") + ) + expect_equal( + length(attr(data, 'Variables')$system5_m1$tas), + 12 + ) + expect_equal( + length(attr(data, 'Variables')$system5_m1$sfcWind), + 12 + ) + expect_equal( + length(attr(data, 'Variables')$system4_m1$tas), + 11 + ) + expect_equal( + length(attr(data, 'Variables')$system4_m1$sfcWind), + 11 + ) + +}) + +test_that("5. Specify metadata_dims with another file dimension", { + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + + data <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20150101', '20160101', '20170101'), + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = 'sdate', + retrieve = T + ) + expect_equal( + length(attr(data, 'Variables')), + 3 + ) + expect_equal( + names(attr(data, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(data, 'Variables')$common), + c('time', 'tas', 'tas', 'tas') + ) + expect_equal( + names(attr(data, 'Variables')$system4_m1), + c("longitude", "latitude") + ) + expect_equal( + names(attr(data, 'Variables')$system5_m1), + c("longitude", "latitude") + ) + expect_equal( + length(attr(data, 'Variables')$common$tas), + 12 + ) -} +}) -- GitLab From 20d85683f5473f4b3e2aba0802de933eeec199de Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 20 Aug 2020 14:55:37 +0200 Subject: [PATCH 4/5] Typo fix for how-to-19 --- inst/doc/faq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index c5be8aa..7121ca1 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -718,7 +718,7 @@ If there are two variables to be retrieved but metadata_dims does'nt have "var", variable's metadata will be retrieved. If there are two data sets but metadata_dims doesn't have "dat", only the first data set will have the variable's metadata. -Please find the relevant use cases in [ex1_10](inst/doc/usecase/ex1_10_metadta_dims.R). +Please find the relevant use cases in [ex1_10](inst/doc/usecase/ex1_10_metadata_dims.R). # Something goes wrong... -- GitLab From d286d76480322b5f3aa4f928a64b8aa6aade6a6e Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 20 Aug 2020 14:57:29 +0200 Subject: [PATCH 5/5] typo fix for ex1_10 --- inst/doc/usecase.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 00eae6f..1d3e877 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -38,7 +38,7 @@ The problem may occur when the dimension number of the splitted selector is more 8. [Loading tas and tos from Decadal Predictions performed with the EC-Earth model](inst/doc/usecase/ex1_8_tasandtos.R) Some climate indices needs to be computed loading 'tas' (air temperature at 2m) over land and 'tos' (ocean surface temperature) over sea. Using **startR**, you can load these data in a unique **Start** call or with multiple calls separately for each variable. - 10. [Use 'metadata_dims' to retrieve complete variable metadata](inst/doc/usecase/ex1_10_metadta_dims.R) + 10. [Use 'metadata_dims' to retrieve complete variable metadata](inst/doc/usecase/ex1_10_metadata_dims.R) This script tells you how to use the parameter 'metadata_dims' in Start() to get the complete variable metadata. You will see four difference cases and learn the rules. You can find more explanation in FAQ [How-to-19](inst/doc/faq.md#19-use-metadata_dims-to-retrieve-variable-metadata). -- GitLab