diff --git a/R/Start.R b/R/Start.R index 8efea29e1cb85013f90f6c2e0fe7a618295c835e..6bf9e9eb08635d37726e8af550107d5897c4c14b 100644 --- a/R/Start.R +++ b/R/Start.R @@ -644,8 +644,8 @@ #'When specifying a path pattern for a dataset, it might contain shell glob #'experissions. For each dataset, the first file matching the path pattern is #'found, and the found file is used to work out fixed values for the glob -#'expressions that will be used for all the files of the dataset. However in -#'some cases the values of the shell glob expressions may not be constant for +#'expressions that will be used for all the files of the dataset. However, in +#'some cases, the values of the shell glob expressions may not be constant for #'all files in a dataset, and they need to be worked out for each file #'involved.\cr\cr #'For example, a path pattern could be as follows: \cr @@ -667,7 +667,16 @@ #' pattern with the original glob expressions in the 1st and 2nd levels (in the #' example, both asterisks would be preserved, thus would allow Start() #' to recognize files such as \cr -#' \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}). +#' \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}).\cr\cr +#'Note that each glob expression can only represent one possibility (Start() +#'chooses the first). Because /code{*} is not the tag, which means it cannot +#'be a dimension of the output array. Therefore, only one possibility can be +#'adopted. For example, if \cr +#'\code{'/path/to/dataset/precipitation_*/19901101_*_foo.nc'}\cr +#'has two matches:\cr +#'\code{'/path/to/dataset/precipitation_xxx/19901101_yyy_foo.nc'} and\cr +#'\code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'},\cr +#'only the first found file will be used. #'@param retrieve A logical value indicating whether to retrieve the data #' defined in the Start() call or to explore only its dimension lengths #' and names, and the values for the file and inner dimensions. The default @@ -1686,6 +1695,8 @@ Start <- function(..., # dim = indices/selectors, replace_values[[u_file_dim]] <- '*' depended_dim <- NULL depended_dim_values <- NA + +#NOTE: Here 'selectors' is always 1. Is it supposed to be like this? selectors <- dat_selectors[[u_file_dim]][[1]] if (u_file_dim %in% names(depending_file_dims)) { depended_dim <- depending_file_dims[[u_file_dim]] @@ -1787,6 +1798,20 @@ Start <- function(..., # dim = indices/selectors, sub_array_of_not_found_files[j] <- TRUE } else { file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values) + +#NOTE: After replacing tags, there is still * if path_glob_permissive is not FALSE. + if (grepl('\\*', file_path)) { + found_files <- Sys.glob(file_path) + file_path <- found_files[1] # choose only the first file. +#NOTE: Above line chooses only the first found file. Because * is not tags, which means +# it is not a dimension. So it cannot store more than one item. If use * to define +# the path, that * should only represent one possibility. + if (length(found_files) > 1) { + .warning("Using glob expression * to define the path, but more ", + "than one match is found. Choose the first match only.") + } + } + if (!(length(grep("^http", file_path)) > 0)) { if (grepl(file_path, '*', fixed = TRUE)) { file_path_full <- Sys.glob(file_path)[1] diff --git a/tests/testthat/test-Start-first_file_missing.R b/tests/testthat/test-Start-first_file_missing.R index 5f7a5a66e6db3a88f271acf28f06d5009724f5a9..95319aee359cc3fc462ad72aaeaef50a4a5d397f 100644 --- a/tests/testthat/test-Start-first_file_missing.R +++ b/tests/testthat/test-Start-first_file_missing.R @@ -69,7 +69,7 @@ data <- Start(dat = file, return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), - metadata_dims = c('file_date'), +# metadata_dims = c('file_date'), retrieve = T) expect_equal( @@ -87,7 +87,7 @@ data <- Start(dat = file, ) expect_equal( names(attr(data, 'Variables')$common), - c('time', 'tas') + c('time') ) }) @@ -112,7 +112,7 @@ data <- Start(dat = file, expect_equal( names(attr(data, 'Variables')$common), - c('latitude', 'longitude', 'time', 'tas') + c('latitude', 'longitude', 'time', 'tas', NA) ) expect_equal( as.vector(attr(data, 'NotFoundFiles')),