From f53ad8f54236fa0857f14950de829f5550c4042b Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 19 Mar 2021 12:48:23 +0100 Subject: [PATCH 1/4] Revise the check for synomins return_vars name --- R/Start.R | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/R/Start.R b/R/Start.R index 5139ac5..850bc7d 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1400,32 +1400,27 @@ Start <- function(..., # dim = indices/selectors, } } -#///////////////////////////////////////////////////// +#================================================== # Check if return_vars name is inner dim name. If it is synonim, change back to # inner dim name and return a warning. dim_params_found_file_dims <- dim_params[found_file_dims[[i]]] - file_dims_array_dim_names <- unlist(lapply(lapply(dim_params_found_file_dims, dim), - names)) - if (any(!names(return_vars) %in% expected_inner_dims[[i]] & - !names(return_vars) %in% unlist(var_params) & - !names(return_vars) %in% file_dims_array_dim_names)) { - not_inner_dimname_ind <- which(!names(return_vars) %in% expected_inner_dims[[i]]) - for (not_inner_dim in 1:length(not_inner_dimname_ind)) { - wrong_name_return_vars <- names(return_vars)[not_inner_dimname_ind[not_inner_dim]] - if (!wrong_name_return_vars %in% unlist(synonims)) { - stop(paste0("Could not find variable '", wrong_name_return_vars, "' (or its ", - "synonims if specified) in the inner dimension names. The expected ", - "name should be ", paste(paste0("'", expected_inner_dims[[i]], "'"), collapse = ', '), ".")) - } else { - inner_dim_name <- names(unlist(lapply(lapply(synonims, '%in%', wrong_name_return_vars), which))) - names(return_vars)[not_inner_dimname_ind[not_inner_dim]] <- inner_dim_name - .warning(paste0("The name '", wrong_name_return_vars, "' in parameter 'return_vars' ", - "is synonim. Change it back to the inner dimension name, '", - inner_dim_name, "'.")) - } + if (any(names(return_vars) %in% unlist(synonims) & + !names(return_vars) %in% names(synonims))) { + use_syn_names <- which(names(return_vars) %in% unlist(synonims) & + !names(return_vars) %in% names(synonims)) + for (use_syn_name in use_syn_names) { + wrong_name <- names(return_vars)[use_syn_name] + names(return_vars)[use_syn_name] <- names(unlist( + lapply(lapply(synonims, '%in%', + names(return_vars)[use_syn_name]), + which))) + .warning(paste0("The name '", wrong_name, + "' in parameter 'return_vars' in synonim. ", + "Change it back to the inner dimension name, '", + names(return_vars)[use_syn_name], "'.")) } } -#///////////////////////////////////////////////////// +#===================================================== ## (Check the *_var parameters). if (any(!(unlist(var_params) %in% names(return_vars)))) { -- GitLab From 83c8e6dbcc2f428bf6eaff9d4d77c7df3f692636 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 19 Mar 2021 13:09:59 +0100 Subject: [PATCH 2/4] Revise unit tests about implicit inner dim and split file dim --- .../testthat/test-Start-implicit_inner_dim.R | 62 +++---------------- tests/testthat/test-Start-split-merge.R | 56 ++++++++++++++--- 2 files changed, 58 insertions(+), 60 deletions(-) diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R index 621fe5b..26e3ce7 100644 --- a/tests/testthat/test-Start-implicit_inner_dim.R +++ b/tests/testthat/test-Start-implicit_inner_dim.R @@ -1,20 +1,15 @@ context("Start() implicit inner dimension") -# The unit test is for the implicitly defined inner dimension. If a file dimension selector -# is an array with named dimensions, and 'split_multiselected_dims' is used, then the file -# dim can be split into multiple dimensions that may contain inner dimensions. - -# merge_across_dims + split_multiselected_dims + implicit inner dim???? - +# The unit test is for the implicit inner dimension. If the inner dimension length is 1, +# startR allows it not to be specified in the call. Users can still define it in +# 'return_vars'. #--------------------------------------------------------------- -test_that("1. Split into inner dimension", { +test_that("1. time = 1", { obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" variable <- "prlr" -dates_file <- c("201311","201312","201411","201412") -dim(dates_file) <- c(time = 2, syear = 2) +dates_file <- c("201311","201312") -# (1) suppressWarnings( obs <- Start(dat = obs.path, var = variable, @@ -25,8 +20,8 @@ obs <- Start(dat = obs.path, longitude_reorder = CircularSort(-180, 180), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', + return_vars = list(latitude = NULL, + longitude = NULL, time = 'file_date'), split_multiselected_dims = TRUE, retrieve = FALSE) @@ -34,11 +29,11 @@ obs <- Start(dat = obs.path, expect_equal( attr(obs, 'Dimensions'), -c(dat = 1, var = 1, time = 2, syear = 2, latitude = 18, longitude = 81) +c(dat = 1, var = 1, file_date = 2, latitude = 18, longitude = 81) ) expect_equal( dim(attr(obs, 'Variables')$common$time), -c(file_date = 4, time = 1) +c(file_date = 2, time = 1) ) expect_equal( attr(obs, 'Variables')$common$time[1, 1], @@ -48,42 +43,3 @@ as.POSIXct('2013-11-15 23:30:00', tz = 'UTC') }) -test_that("2. Split into file dimension", { - -obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" -variable <- "prlr" -dates_file <- c("201311","201312","201411","201412") -dim(dates_file) <- c(smonth = 2, syear = 2) - -suppressWarnings( -obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - time = 'all', - latitude = values(list(35.6, 40)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(-10, 10)), - longitude_reorder = CircularSort(-180, 180), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = FALSE) -) - -expect_equal( -attr(obs, 'Dimensions'), -c(dat = 1, var = 1, smonth = 2, syear = 2, time = 1, latitude = 18, longitude = 81) -) -expect_equal( -dim(attr(obs, 'Variables')$common$time), -c(file_date = 4, time = 1) -) -expect_equal( -attr(obs, 'Variables')$common$time[1, 1], -as.POSIXct('2013-11-15 23:30:00', tz = 'UTC') -) - -}) diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index e998844..10e8ab6 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -4,7 +4,7 @@ context("Start() split + merge dim and value check") var_name <- 'sfcWind' path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc' - +suppressWarnings( hcst <- Start(dat = path.exp, var = var_name, sdate = c('20160704', '20161222'), @@ -19,6 +19,7 @@ hcst <- Start(dat = path.exp, time = c('sdate', 'syear')), retrieve = F, silent = TRUE) +) dates <- attr(hcst, 'Variables')$common$time file_date <- sort(unique(gsub('-', '', sapply(as.character(dates), substr, 1, 7)))) @@ -27,7 +28,7 @@ path.obs <- '/esarchive/recon/ecmwf/era5/1hourly/$var$/$var$_$file_date$.nc' test_that("1. split + merge + narm", { -skip_on_cran() +suppressWarnings( obs <- Start(dat = path.obs, var = var_name, file_date = file_date, # a vector @@ -44,7 +45,7 @@ obs <- Start(dat = path.obs, longitude = 'dat', time = 'file_date'), retrieve = T) - +) expect_equal( dim(obs), c(dat = 1, var = 1, latitude = 10, longitude = 10, sdate = 2, syear = 3, time = 12) @@ -67,7 +68,7 @@ obs <- Start(dat = path.obs, test_that("2. no split + merge + narm", { -skip_on_cran() +suppressWarnings( obs <- Start(dat = path.obs, var = var_name, file_date = file_date, # a vector @@ -84,7 +85,7 @@ obs <- Start(dat = path.obs, longitude = 'dat', time = 'file_date'), retrieve = T) - +) expect_equal( dim(obs), c(dat = 1, var = 1, latitude = 10, longitude = 10, time = 72) @@ -107,7 +108,7 @@ obs <- Start(dat = path.obs, test_that("3. no split + merge + no narm", { -skip_on_cran() +suppressWarnings( obs <- Start(dat = path.obs, var = var_name, file_date = file_date, # a vector @@ -124,7 +125,7 @@ obs <- Start(dat = path.obs, longitude = 'dat', time = 'file_date'), retrieve = T) - +) expect_equal( dim(obs), c(dat = 1, var = 1, latitude = 10, longitude = 10, time = 108) @@ -145,3 +146,44 @@ obs <- Start(dat = path.obs, ) }) +test_that("4. split only", { + +obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" +variable <- "prlr" +dates_file <- c("201311","201312","201411","201412") +dim(dates_file) <- c(smonth = 2, syear = 2) + +suppressWarnings( +obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + time = 'all', + latitude = values(list(35.6, 40)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(-10, 10)), + longitude_reorder = CircularSort(-180, 180), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = FALSE) +) + +expect_equal( +attr(obs, 'Dimensions'), +c(dat = 1, var = 1, smonth = 2, syear = 2, time = 1, latitude = 18, longitude = 81) +) +expect_equal( +dim(attr(obs, 'Variables')$common$time), +c(file_date = 4, time = 1) +) +expect_equal( +attr(obs, 'Variables')$common$time[1, 1], +as.POSIXct('2013-11-15 23:30:00', tz = 'UTC') +) + + + +}) -- GitLab From 06950a03cc272e9d1c4f916ebc31c90c0e324bed Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 19 Mar 2021 13:28:23 +0100 Subject: [PATCH 3/4] Revise split_multiselected_dims --- inst/doc/faq.md | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 24e7acb..3b6d5fa 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -611,7 +611,30 @@ List of 1 The selectors can be not only vectors, but also multidimensional array. For instance, the 'time' dimension can be assigned by a two-dimensional array `[sdate = 12, time = 31]`, which is 31 timesteps for 12 start dates. You may want to have both 'sdate' and 'time' in the output dimension, even though 'sdate' is not explicitly specified in Start(). -The parameter 'split_multiselected_dims' is for this goal. It is common in the case that uses experimental data attributes to get the corresponding observational data. +The parameter 'split_multiselected_dims' is for this goal. It can be used to reshape the +file dimensions, and it is also common in the case that experimental data attributes are +used to define observational data inner dimensions, so we can get the corresponding observational data in the same dimension structure. + +Here is a simple example. By defining the selector of the file dimension 'file_date' as a +two-dimensional array, we can reshape this dimension into 'month' and 'year'. + +```r +obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" +file_date <- c("201311","201312","201411","201412") +dim(file_date) <- c(month = 2, year = 2) + +obs <- Start(dat = obs.path, + var = 'prlr', + file_date = file_date, + time = 'all', + lat = indices(1:10), + lon = indices(1:10), + return_vars = list(lat = NULL, + lon = NULL, + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) +``` The following script is part of the use case [ex1_2_exp_obs_attr.R](inst/doc/usecase/ex1_2_exp_obs_attr.R). The time selector for observational data comes from experimental data above (neglected here). The dimension number of the selector is two. -- GitLab From bf1b0cf090e4bdcc8f8e06ff081e7c364a553f85 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 19 Mar 2021 14:12:36 +0100 Subject: [PATCH 4/4] Stop if merge_across_dims and split_multiselected_dims are both used but this file dim selector is multi-dim array --- R/Start.R | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/R/Start.R b/R/Start.R index 850bc7d..3d82d57 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3636,6 +3636,7 @@ Start <- function(..., # dim = indices/selectors, # final_dims will be used for collocation of data, whereas final_dims_fake # will be used for shaping the final array to be returned to the user. final_dims_fake <- final_dims + if (merge_across_dims) { if (!is.null(inner_dims_across_files)) { for (file_dim_across in names(inner_dims_across_files)) { @@ -3666,7 +3667,15 @@ Start <- function(..., # dim = indices/selectors, 1:length(split_dims)) } old_dim_pos <- which(names(final_dims_fake) == names(dim_params)[dim_param]) - + + # If merge_across_dims and split_multiselected_dims are both used, + # on one file dim, and this file dim is multi-dim, it doesn't work. + if (identical(old_dim_pos, integer(0))) { + stop(paste0("The dimension '", names(dim_params)[dim_param], + "' to be split cannot be found after 'merge_across_dims' ", + "is used. Check if the reshape parameters are used appropriately.")) + } + # NOTE: Three steps to create new dims. # 1st: Put in the dims before split_dim. # 2nd: Replace the old_dim with split_dims. @@ -3684,6 +3693,7 @@ Start <- function(..., # dim = indices/selectors, } } } + if (merge_across_dims_narm) { # only merge_across_dims -> the 'time' dim length needs to be adjusted across_inner_dim <- inner_dims_across_files[[1]] #TODO: more than one? -- GitLab