From 06f6910279bbe5a187793ef7e639f41a73604839 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 6 Oct 2020 15:47:13 +0200 Subject: [PATCH 1/5] Set the result of nchar() to 0 if it is integer(0) --- R/Utils.R | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/R/Utils.R b/R/Utils.R index a4255c1..3a6f6ea 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -203,8 +203,12 @@ if (!is.null(left)) { left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) match_len <- attr(left_match, 'match.length') - left_match_limits <- c(left_match + match_len - 1 - nchar(clean(right_known)) - nchar(replace_values[[tag]]) + 1, - left_match + match_len - 1 - nchar(clean(right_known))) + + right_known_nchar <- nchar(clean(right_known)) + if (identical(right_known_nchar, integer(0))) right_known_nchar <- 0 + left_match_limits <- c(left_match + match_len - 1 - right_known_nchar - nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - right_known_nchar) + if (!(left_match < 1)) { match_limits <- left_match_limits } @@ -213,8 +217,11 @@ if (!is.null(right)) { right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) match_len <- attr(right_match, 'match.length') - right_match_limits <- c(right_match + nchar(clean(left_known)), - right_match + nchar(clean(left_known)) + nchar(replace_values[[tag]]) - 1) + + left_known_nchar <- nchar(clean(left_known)) + if (identical(left_known_nchar, integer(0))) left_known_nchar <- 0 + right_match_limits <- c(right_match + left_known_nchar, + right_match + left_known_nchar + nchar(replace_values[[tag]]) - 1) if (is.null(match_limits) && !(right_match < 1)) { match_limits <- right_match_limits } -- GitLab From 03269a1163050a89ed7e8c6630b0304b4cd4f301 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 6 Oct 2020 19:52:36 +0200 Subject: [PATCH 2/5] Add unit test for new bugfix --- .../test-Start-path_glob_permissive.R | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R index 9e14c02..b5b3208 100644 --- a/tests/testthat/test-Start-path_glob_permissive.R +++ b/tests/testthat/test-Start-path_glob_permissive.R @@ -88,3 +88,50 @@ data <- Start(dat = repos, ) }) + + +test_that("2. tag at the end", { +# Without the layer that path_glob_permissive allows to contain *, the last item in the path is tag. In the example below, the path without path_glob_permissive layer is +# "/esarchive/oper/S2S4E-data/weekly_statistics/S2S/$var$/$sdate$/". The last item is "$sdate$" + +sdates.seq.thu <- format(seq(as.Date(paste(2020, 06, 11, sep = '-')), as.Date(paste(2020, 09, 17, sep = '-')), + by = 'weeks'), format='%Y%m%d') +path <- "/esarchive/oper/S2S4E-data/weekly_statistics/S2S/$var$/$sdate$/$var$_$sdate$_*.nc" + +exp <- Start(dat = path, + var = "tas", + sdate = sdates.seq.thu, + time = 'all', + ensemble = "all", + latitude = indices(1:2), + longitude = indices(1:2), + path_glob_permissive = 1, + retrieve = F) + + asd <- as.list(attr(exp, 'ExpectedFiles')) + qwe <- sapply(sapply(asd, strsplit, '/'), '[[', 9) + files <- paste0('tas_', sdates.seq.thu, '_', 24:38, '.nc') + expect_equal( + qwe, files + ) + +exp <- Start(dat = path, + var = "tas", + sdate = sdates.seq.thu, + time = 'all', + ensemble = "all", + latitude = indices(1:2), + longitude = indices(1:2), + path_glob_permissive = FALSE, + retrieve = F) + + asd <- as.list(attr(exp, 'ExpectedFiles')) + qwe <- sapply(sapply(asd, strsplit, '/'), '[[', 9) + files <- paste0('tas_', sdates.seq.thu, '_', 24, '.nc') + expect_equal( + qwe, files + ) + + +}) + -- GitLab From 3f33897ee86f2f28c0a614d1aded29a95ce24afd Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 7 Oct 2020 11:49:47 +0200 Subject: [PATCH 3/5] Add new developement to NEWS.md --- NEWS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index c348dda..2c1eeb3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# startR v2.0.2 (Release date: 2020-10-) +- Bugfix for wildcard reading when parameter 'path_glob_permissive' is used. +- Bugfix for metadata_dims along non-dat dim. +- Solve the error 'No space left on device' which happened when the jobs are aborted. + # startR v2.0.1 (Release date: 2020-08-25) - Bugfix for the function .chunk(). Its name was chunk() before v2.0.0, and there are two parts were not renamed to .chunk() in v2.0.0. -- GitLab From d11dcd7ba7b1ba7f0a39486c9cb89a0ea7ad04ed Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 7 Oct 2020 12:39:52 +0200 Subject: [PATCH 4/5] Revise NEWS.md --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 2c1eeb3..774623f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,7 @@ # startR v2.0.2 (Release date: 2020-10-) - Bugfix for wildcard reading when parameter 'path_glob_permissive' is used. - Bugfix for metadata_dims along non-dat dim. -- Solve the error 'No space left on device' which happened when the jobs are aborted. +- /dev/shm automatic cleaning on Compute(). Solve the error 'No space left on device' which happened when the jobs are aborted. # startR v2.0.1 (Release date: 2020-08-25) - Bugfix for the function .chunk(). Its name was chunk() before v2.0.0, and there are two parts -- GitLab From 963c14144683ff3cfe8b462584e5e6c02bb7a3ee Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 7 Oct 2020 14:45:09 +0200 Subject: [PATCH 5/5] Add new use case. --- inst/doc/usecase.md | 3 + .../doc/usecase/ex1_11_expid_member_version.R | 110 ++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 inst/doc/usecase/ex1_11_expid_member_version.R diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 4e44631..82e0bf7 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -47,6 +47,9 @@ You can also find information in [FAQ How-to-18](inst/doc/faq.md#18-use-glob-exp You will see four difference cases and learn the rules. You can find more explanation in FAQ [How-to-20](inst/doc/faq.md#20-use-metadata_dims-to-retrieve-variable-metadata). + 11. [Three methods to load experimental files with different member and version](inst/doc/usecase/ex1_11_expid_member_version.R) + This script shows three ways to load the data with different expid - member - version combination. It is useful for climate prediction of multiple experiments. + 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) diff --git a/inst/doc/usecase/ex1_11_expid_member_version.R b/inst/doc/usecase/ex1_11_expid_member_version.R new file mode 100644 index 0000000..1accfff --- /dev/null +++ b/inst/doc/usecase/ex1_11_expid_member_version.R @@ -0,0 +1,110 @@ +# Author: An-Chi Ho +# Date: 7th Oct. 2020 +#--------------------------------------------------------------------- +# The script shows three ways to load the data with different expid - member - version +# combination. It is useful for climate prediction of multiple experiments. +# In this case, the two datasets have the following combination: +# | expid | member | version | +# |-------|----------|---------| +# | a1st | r7i1p1f1 |v20190302| +# | a1sx |r10i1p1f1 |v20190308| +# +# The three methods to load the data are: +# (1) dependencies +# (2) glob expression +# (3) dataset +#--------------------------------------------------------------------- + +library(startR) + +# (1) dependencies +# Because the three file dimensions 'expid', 'member', and 'version' have dependency +# on each other, so we can use the parameter 'xxx_depends' to specify the relationship. + +repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/$member$/Omon/$var$/gn/$version$/', + '$var$_Omon_EC-Earth3_historical_$member$_gn_$year$.nc') +yrh1 <- 1960 +yrh2 <- 1961 +years <- paste0(c(yrh1 : yrh2), '01-', c(yrh1 : yrh2), '12') + +data <- Start(dat = repos, + var = 'tosmean', + expid = c('a1st', 'a1sx'), + member = 'all', + version = 'all', + member_depends = 'expid', + member_depends = 'version', + version_depends = 'expid', + version_depends = 'member', + year = years, + time = 'all', + region = 'all', + return_vars = list(time = NULL, region = NULL), + retrieve = T) + +dim(data) + dat var expid member version year time region + 1 1 2 1 2 2 12 14 + + +# (2) glob expression +# The parameter 'path_glob_permissive' allows to use '*' to define the path. +# Note that '*' can only represent one possibility. Because each expid only has +# one member and one version, so we can use '*' to define the path. +# See Start() documentation for more details of 'path_glob_permissive'. + +repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/*/Omon/$var$/gn/v*/', + '$var$_Omon_EC-Earth3_historical_*_gn_$year$.nc') + +yrh1 <- 1960 +yrh2 <- 1961 +years <- paste0(c(yrh1 : yrh2), '01-', c(yrh1 : yrh2), '12') + +data <- Start(dat = repos, + var = 'tosmean', + expid = c('a1st', 'a1sx'), + year = years, + time = 'all', + region = 'all', + path_glob_permissive = 6, # to preserve * for the last 6 folder layers (6th is $member$ originally) + return_vars = list(time = NULL, region = NULL), + retrieve = T) + +dim(data) + dat var expid year time region + 1 1 2 55 12 14 + + +# (3) dataset +# We can simply define two expID as two datasets. Therefore, the member and version +# can be specified in each path directly. +# The following script is a bit different from the above two. It read two versions +# for the first dataset. Therefore, the result dimension 'version = 2', and the second +# dataset has NAs along [version = 2] since it only has one version. + +repos <- list(list(name = 'a1st', + path = paste0('/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/r7i1p1f1/Omon/$var$/gn/$version$/', + '$var$_Omon_EC-Earth3_historical_r7i1p1f1_gn_$year$.nc')), + list(name = 'a1sx', + path = paste0('/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/r10i1p1f1/Omon/$var$/gn/$version$/', + '$var$_Omon_EC-Earth3_historical_r10i1p1f1_gn_$year$.nc')) + ) +yrh1 <- 1960 +yrh2 <- 1961 #2014 +years <- paste0(c(yrh1 : yrh2), '01-', c(yrh1 : yrh2), '12') + +data <- Start(dat = repos, + var = 'tosmean', + year = years, + version = indices(1:2), #'all', + time = 'all', + region = 'all', + retrieve = T) + + dim(data) + dat var year version time region + 2 1 2 2 12 14 -- GitLab