diff --git a/R/Utils.R b/R/Utils.R index 699468a059247a6a9f2b0c11e5d75ccaddcedf7a..8626b2a015b3e5fe90fb9c3af697b7ed7595c5b0 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -277,6 +277,9 @@ chunk <- function(chunk, n_chunks, selectors) { } .FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + + addition_warning = FALSE + if (!all(sapply(c(path_with_globs_and_tag, actual_path, tag), is.character))) { stop("All 'path_with_globs_and_tag', 'actual_path' and 'tag' must be character strings.") } @@ -308,34 +311,90 @@ chunk <- function(chunk, n_chunks, selectors) { clp <- chosen_left_part <- which.min(group_len_diffs)[1] left_expr <- paste(parts[1:clp], collapse = full_tag) + + #because ? means sth, use . (any char) to substitute ? left_expr <- gsub('?', '.', left_expr, fixed = TRUE) - # The .*? will force lazy evaluation (find the shortest match from the - # beginning of the actual_path). + test_left_expr <- left_expr + + # because * means zero or more char, use . to substitute *. + # And the * behind . means zero or more char. '?' for lazy evaluation. left_expr <- gsub('*', '.*?', left_expr, fixed = TRUE) left_expr <- gsub(full_tag, '.*?', left_expr, fixed = TRUE) + + # To test if the pattern matches only one... dont use lazy evaluation + test_left_expr <- gsub('*', '.*', test_left_expr, fixed = TRUE) + test_left_expr <- gsub(full_tag, '.*', test_left_expr, fixed = TRUE) + + # Find the match chars from left left_match <- regexec(left_expr, actual_path)[[1]] + test_left_match <- regexec(test_left_expr, actual_path)[[1]] + if (left_match < 0) { stop("Unexpected error in .FindTagValue.") } + if (attr(test_left_match, "match.length") != attr(left_match, "match.length")) { + addition_warning = TRUE + warning("Detect more than one possibility derived from the global expression of path.") + } + + #Cut down the left match part + actual_path_sub <- substr(actual_path, + attr(left_match, 'match.length') + 1, + nchar(actual_path)) + + #----------Search match chars from right right_expr <- paste(parts[(clp + 1):(length(parts))], collapse = full_tag) right_expr <- gsub('?', '.', right_expr, fixed = TRUE) + + test_right_expr <- right_expr # For lazy evaulation to work, pattern and string have to be reversed. right_expr <- gsub('*', '.*?', right_expr, fixed = TRUE) right_expr <- gsub(full_tag, '.*?', right_expr, fixed = TRUE) right_expr <- gsub('$', '^', right_expr, fixed = TRUE) + + # To test if the pattern matches only one... dont use lazy evaluation + test_right_expr <- gsub('*', '.*', test_right_expr, fixed = TRUE) + test_right_expr <- gsub(full_tag, '.*', test_right_expr, fixed = TRUE) + test_right_expr <- gsub('$', '^', test_right_expr, fixed = TRUE) + rev_str <- function(s) { paste(rev(strsplit(s, NULL)[[1]]), collapse = '') } + right_expr <- rev_str(right_expr) + test_right_expr <- rev_str(test_right_expr) + right_expr <- gsub('?*.', '.*?', right_expr, fixed = TRUE) right_match <- regexec(right_expr, rev_str(actual_path))[[1]] + + test_right_expr <- gsub('*.', '.*', test_right_expr, fixed = TRUE) + test_right_match <- regexec(test_right_expr, rev_str(actual_path_sub))[[1]] + if (right_match < 0) { stop("Unexpected error in .FindTagValue.") } + + if (attr(test_right_match, "match.length") != attr(right_match, "match.length")) { + addition_warning = TRUE + warning(paste0("Detect more than one possibility derived from the global ", + "expression of path.")) + } + + #-------------get tag value right_match[] <- nchar(actual_path) - (right_match[] + attr(right_match, 'match.length') - 1) + 1 + if (addition_warning) { + warning(paste0("The extracted parameter ", full_tag, " is ", + substr(actual_path, left_match + attr(left_match, 'match.length'), + right_match - 1), + ". Check if all the desired files were read in. ", + "If not, specify parameter '", tag, + "' by values instead of indices, or set parameter ", + "'path_glob_permissive' as TRUE")) + } + if ((left_match + attr(left_match, 'match.length')) > (right_match - 1)) { NULL