diff --git a/NEWS.md b/NEWS.md index ba586300e91d99e8648727deadeb6f77a519d38d..8b65f5efada6d88723a0be848243e1aab0f35580 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,12 @@ # startR v2.0.4 (Release date: ) - Bugfix for metadata retrieving when there are more than one dataset and one of them is missing. +- Bugfix for metadata_dims along non-dat dim. +- Bugfix for wildcard reading when parameter 'path_glob_permissive' is used. +- /dev/shm automatic cleaning on Compute(). Solve the error 'No space left on device' which happened when the jobs are aborted. # startR v2.0.1 (Release date: 2020-09-10) - /dev/shm automatic cleaning on Compute() + # startR v2.0.1 (Release date: 2020-08-25) - Bugfix for the function .chunk(). Its name was chunk() before v2.0.0, and there are two parts were not renamed to .chunk() in v2.0.0. diff --git a/R/Utils.R b/R/Utils.R index a4255c1790057b2328dba51c076842064514883c..3a6f6ea59feca0038389a4627aa9850bba170e4e 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -203,8 +203,12 @@ if (!is.null(left)) { left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) match_len <- attr(left_match, 'match.length') - left_match_limits <- c(left_match + match_len - 1 - nchar(clean(right_known)) - nchar(replace_values[[tag]]) + 1, - left_match + match_len - 1 - nchar(clean(right_known))) + + right_known_nchar <- nchar(clean(right_known)) + if (identical(right_known_nchar, integer(0))) right_known_nchar <- 0 + left_match_limits <- c(left_match + match_len - 1 - right_known_nchar - nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - right_known_nchar) + if (!(left_match < 1)) { match_limits <- left_match_limits } @@ -213,8 +217,11 @@ if (!is.null(right)) { right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) match_len <- attr(right_match, 'match.length') - right_match_limits <- c(right_match + nchar(clean(left_known)), - right_match + nchar(clean(left_known)) + nchar(replace_values[[tag]]) - 1) + + left_known_nchar <- nchar(clean(left_known)) + if (identical(left_known_nchar, integer(0))) left_known_nchar <- 0 + right_match_limits <- c(right_match + left_known_nchar, + right_match + left_known_nchar + nchar(replace_values[[tag]]) - 1) if (is.null(match_limits) && !(right_match < 1)) { match_limits <- right_match_limits } diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 4e446311af471ec13ba027335e9d383315e9ebc7..82e0bf73cc76924270c8ecd20c2377595ef956de 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -47,6 +47,9 @@ You can also find information in [FAQ How-to-18](inst/doc/faq.md#18-use-glob-exp You will see four difference cases and learn the rules. You can find more explanation in FAQ [How-to-20](inst/doc/faq.md#20-use-metadata_dims-to-retrieve-variable-metadata). + 11. [Three methods to load experimental files with different member and version](inst/doc/usecase/ex1_11_expid_member_version.R) + This script shows three ways to load the data with different expid - member - version combination. It is useful for climate prediction of multiple experiments. + 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) diff --git a/inst/doc/usecase/ex1_11_expid_member_version.R b/inst/doc/usecase/ex1_11_expid_member_version.R new file mode 100644 index 0000000000000000000000000000000000000000..1accfff126518b9c03be4f972c04828aa0ae857a --- /dev/null +++ b/inst/doc/usecase/ex1_11_expid_member_version.R @@ -0,0 +1,110 @@ +# Author: An-Chi Ho +# Date: 7th Oct. 2020 +#--------------------------------------------------------------------- +# The script shows three ways to load the data with different expid - member - version +# combination. It is useful for climate prediction of multiple experiments. +# In this case, the two datasets have the following combination: +# | expid | member | version | +# |-------|----------|---------| +# | a1st | r7i1p1f1 |v20190302| +# | a1sx |r10i1p1f1 |v20190308| +# +# The three methods to load the data are: +# (1) dependencies +# (2) glob expression +# (3) dataset +#--------------------------------------------------------------------- + +library(startR) + +# (1) dependencies +# Because the three file dimensions 'expid', 'member', and 'version' have dependency +# on each other, so we can use the parameter 'xxx_depends' to specify the relationship. + +repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/$member$/Omon/$var$/gn/$version$/', + '$var$_Omon_EC-Earth3_historical_$member$_gn_$year$.nc') +yrh1 <- 1960 +yrh2 <- 1961 +years <- paste0(c(yrh1 : yrh2), '01-', c(yrh1 : yrh2), '12') + +data <- Start(dat = repos, + var = 'tosmean', + expid = c('a1st', 'a1sx'), + member = 'all', + version = 'all', + member_depends = 'expid', + member_depends = 'version', + version_depends = 'expid', + version_depends = 'member', + year = years, + time = 'all', + region = 'all', + return_vars = list(time = NULL, region = NULL), + retrieve = T) + +dim(data) + dat var expid member version year time region + 1 1 2 1 2 2 12 14 + + +# (2) glob expression +# The parameter 'path_glob_permissive' allows to use '*' to define the path. +# Note that '*' can only represent one possibility. Because each expid only has +# one member and one version, so we can use '*' to define the path. +# See Start() documentation for more details of 'path_glob_permissive'. + +repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/*/Omon/$var$/gn/v*/', + '$var$_Omon_EC-Earth3_historical_*_gn_$year$.nc') + +yrh1 <- 1960 +yrh2 <- 1961 +years <- paste0(c(yrh1 : yrh2), '01-', c(yrh1 : yrh2), '12') + +data <- Start(dat = repos, + var = 'tosmean', + expid = c('a1st', 'a1sx'), + year = years, + time = 'all', + region = 'all', + path_glob_permissive = 6, # to preserve * for the last 6 folder layers (6th is $member$ originally) + return_vars = list(time = NULL, region = NULL), + retrieve = T) + +dim(data) + dat var expid year time region + 1 1 2 55 12 14 + + +# (3) dataset +# We can simply define two expID as two datasets. Therefore, the member and version +# can be specified in each path directly. +# The following script is a bit different from the above two. It read two versions +# for the first dataset. Therefore, the result dimension 'version = 2', and the second +# dataset has NAs along [version = 2] since it only has one version. + +repos <- list(list(name = 'a1st', + path = paste0('/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/r7i1p1f1/Omon/$var$/gn/$version$/', + '$var$_Omon_EC-Earth3_historical_r7i1p1f1_gn_$year$.nc')), + list(name = 'a1sx', + path = paste0('/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/r10i1p1f1/Omon/$var$/gn/$version$/', + '$var$_Omon_EC-Earth3_historical_r10i1p1f1_gn_$year$.nc')) + ) +yrh1 <- 1960 +yrh2 <- 1961 #2014 +years <- paste0(c(yrh1 : yrh2), '01-', c(yrh1 : yrh2), '12') + +data <- Start(dat = repos, + var = 'tosmean', + year = years, + version = indices(1:2), #'all', + time = 'all', + region = 'all', + retrieve = T) + + dim(data) + dat var year version time region + 2 1 2 2 12 14 diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R index fecb69d39878f5a11334770062a78cf07fe788ec..ca6bbc03debf19b0b2f6201938070ad2e16e2f45 100644 --- a/tests/testthat/test-Start-path_glob_permissive.R +++ b/tests/testthat/test-Start-path_glob_permissive.R @@ -88,3 +88,50 @@ data <- Start(dat = repos, ) }) + + +test_that("2. tag at the end", { +# Without the layer that path_glob_permissive allows to contain *, the last item in the path is tag. In the example below, the path without path_glob_permissive layer is +# "/esarchive/oper/S2S4E-data/weekly_statistics/S2S/$var$/$sdate$/". The last item is "$sdate$" + +sdates.seq.thu <- format(seq(as.Date(paste(2020, 06, 11, sep = '-')), as.Date(paste(2020, 09, 17, sep = '-')), + by = 'weeks'), format='%Y%m%d') +path <- "/esarchive/oper/S2S4E-data/weekly_statistics/S2S/$var$/$sdate$/$var$_$sdate$_*.nc" + +exp <- Start(dat = path, + var = "tas", + sdate = sdates.seq.thu, + time = 'all', + ensemble = "all", + latitude = indices(1:2), + longitude = indices(1:2), + path_glob_permissive = 1, + retrieve = F) + + asd <- as.list(attr(exp, 'ExpectedFiles')) + qwe <- sapply(sapply(asd, strsplit, '/'), '[[', 9) + files <- paste0('tas_', sdates.seq.thu, '_', 24:38, '.nc') + expect_equal( + qwe, files + ) + +exp <- Start(dat = path, + var = "tas", + sdate = sdates.seq.thu, + time = 'all', + ensemble = "all", + latitude = indices(1:2), + longitude = indices(1:2), + path_glob_permissive = FALSE, + retrieve = F) + + asd <- as.list(attr(exp, 'ExpectedFiles')) + qwe <- sapply(sapply(asd, strsplit, '/'), '[[', 9) + files <- paste0('tas_', sdates.seq.thu, '_', 24, '.nc') + expect_equal( + qwe, files + ) + + +}) +