diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c7deb1af5aac18cff5d07efec0fc1b0949cabf83..200b32db2ff68c46e0aeb1c22f07d820d0caf4bf 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,14 @@ stages: - build + +#workflow: +# rules: +# - if: $CI_COMMIT_TITLE =~ /-draft$/ +# when: never +# - when: always +# - if: $CI_PIPELINE_SOURCE == "merge_request_event" +# - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH + build: stage: build script: diff --git a/DESCRIPTION b/DESCRIPTION index 60fa08cdacb88cba6abc5814af42af6020084162..8fd5ee18350d5b284ae22abc5ca64e50af7b7184 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,16 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.3.0 +Version: 2.3.1 Authors@R: c( person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), - person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), + person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-4182-5258")), person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("aut"), comment = c(ORCID = "0000-0001-8568-3071")), + person("Eva", "Rifa", , "eva.rifarovira@bsc.es", role = "ctb"), + person("Victoria", "Agudetse", , "victoria.agudetse@bsc.es", role = "ctb"), + person("Bruno", "de Paula Kinoshita", , "bruno.depaulakinoshita@bsc.es", role = "ctb"), person("Javier", "Vegas", , "javier.vegas@bsc.es", role = c("ctb")), person("Pierre-Antoine", "Bretonniere", , "pierre-antoine.bretonniere@bsc.es", role = c("ctb")), - person("Roberto", "Serrano", , "rsnotivoli@gmal.com", role = c("ctb")), - person("Eva", "Rifa", , "eva.rifarovira@bsc.es", role = "ctb"), + person("Roberto", "Serrano", , "rsnotivoli@gmail.com", role = c("ctb")), person("BSC-CNS", role = c("aut", "cph"))) Description: Tool to automatically fetch, transform and arrange subsets of multi- dimensional data sets (collections of files) stored in local and/or @@ -42,5 +44,5 @@ URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues SystemRequirements: cdo ecFlow Encoding: UTF-8 -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 Config/testthat/edition: 3 diff --git a/NEWS.md b/NEWS.md index 9219f962797d99c8bdc6c950bc13dc272583f2d3..c19d7a3389ad581d2f70855758fa7417908d21bc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# startR v2.3.1 (Release date: 2023-12-22) +- Use Autosubmit as workflow manager on hub +- New feature: Collect result by Collect() on HPCs +- Bugfix: Correct Collect_autosubmit() .Rds files update +- Bugfix: Collect() correctly recognize the finished chunk (.Rds file) in local ecFlow folder. Prevent neverending Collect() when using `wait = F` in Compute() and Collect() the result later on + # startR v2.3.0 (Release date: 2023-08-31) - Load variable metadata when retreive = F - Change Compute() "threads_load" to 1 to be consistent with documentation diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 65ab36eeeef2b475a2449f0ab7af28a19e7e67e7..ec3363293b13cf351876365ad8da795bd8a9a220 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -611,14 +611,15 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here system(sys_commands) - } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { + } else { +# } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { # ssh from WS to AS VM to run exp as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') system(sys_commands) - } else { - stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") +# } else { +# stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") } # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. diff --git a/R/Collect.R b/R/Collect.R index 6d752f5fc907d6f6fbe84d3aa015ccbf8f4afc07..5ae8b150626c815f713af279f4dbf244502d787a 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -22,6 +22,9 @@ #' folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the #' data and Collect() them as many times as desired, set remove to FALSE. The #' default value is TRUE. +#' @param on_remote A logical value deciding to the function is run locally and +#' sync the outputs back from HPC (FALSE, default), or it is run on HPC +#' (TRUE). #'@return A list of merged data array. #' #'@examples @@ -72,8 +75,7 @@ #' } #' #'@export -Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { - +Collect <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) { # Parameter checks if (!is(startr_exec, 'startR_exec')) { stop("Parameter 'startr_exec' must be an object of the class ", @@ -88,20 +90,22 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { if (!is.logical(remove)) { stop("Parameter 'remove' must be logical.") } + if (!is.logical(on_remote)) { + stop("Parameter 'on_remote' must be logical.") + } if (tolower(startr_exec$workflow_manager) == 'ecflow') { - res <- Collect_ecflow(startr_exec, wait = wait, remove = remove) + res <- Collect_ecflow(startr_exec, wait = wait, remove = remove, on_remote = on_remote) } else if (tolower(startr_exec$workflow_manager) == 'autosubmit') { - res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove) + res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove, on_remote = on_remote) } return(res) } +Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) { -Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { - - if (Sys.which('ecflow_client') == '') { + if (!on_remote && Sys.which('ecflow_client') == '') { stop("ecFlow must be installed in order to collect results from a ", "Compute() execution.") } @@ -114,7 +118,8 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { timings <- startr_exec[['timings']] ecflow_suite_dir_suite <- paste0(ecflow_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') - if (!is.null(cluster[['temp_dir']])) { + if (!is.null(cluster[['temp_dir']])) { #NOTE: Which case doesn't have temp_dir? + remote_ecflow_suite_dir <- cluster[['temp_dir']] remote_ecflow_suite_dir_suite <- paste0(cluster[['temp_dir']], '/STARTR_CHUNKING_', suite_id, '/') @@ -141,8 +146,11 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { } done <- FALSE attempt <- 1 - sum_received_chunks <- sum(grepl('output.*\\.Rds', - list.files(ecflow_suite_dir_suite))) + if (!on_remote) { + sum_received_chunks <- sum(grepl('.*\\.Rds$', + list.files(ecflow_suite_dir_suite))) + } + if (cluster[['bidirectional']]) { t_transfer_back <- NA } else { @@ -156,92 +164,19 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { writeLines(rsync_petition_file_lines, rsync_petition_file) Sys.sleep(2) while (!done) { - failed <- FALSE - if (cluster[['bidirectional']]) { - status <- system(paste0("ecflow_client --get_state=STARTR_CHUNKING_", - suite_id, " --host=", - ecflow_server[['host']], " --port=", ecflow_server[['port']]), - intern = TRUE) - if (any(grepl(paste0("suite STARTR_CHUNKING_", suite_id, " #.* state:complete"), status))) { - done <- TRUE - } else if (!wait) { - stop("Computation in progress...") - } - if (!first_chunk_received) { - if (any(grepl('state:complete', status))) { - if (!is.null(time_before_first_chunk)) { - time_after_first_chunk <- Sys.time() - estimate <- (time_after_first_chunk - - time_before_first_chunk) * - ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) / - cluster[['max_jobs']]) - units(estimate) <- 'mins' - .message( - paste0('Remaining time estimate (neglecting queue and ', - 'merge time) (at ', format(time_after_first_chunk), - '): ', format(estimate), ' (', - format(time_after_first_chunk - - time_before_first_chunk), ' per chunk)') - ) - } - first_chunk_received <- TRUE - } - } - Sys.sleep(min(sqrt(attempt), 5)) - } else { - #if (sum_received_chunks == 0) { - # # Accounting for the fist chunk received in ByChunks and - # # setting it to complete - # # ByChunks needs the first chunk to calculate remaining time - # received_files <- list.files(ecflow_suite_dir_suite) - # received_chunks <- received_files[grepl('Rds$', - # received_files)] - #} - t_begin_transfer_back <- Sys.time() - rsync_output <- tryCatch({ - system(paste0("rsync -rav --include-from=", rsync_petition_file, " '", - cluster[['queue_host']], ":", remote_ecflow_suite_dir_suite, "' ", - ecflow_suite_dir_suite, "/"), intern = TRUE) - }, error = function(e) { - message("Warning: rsync from remote server to collect results failed. ", - "Retrying soon.") - failed <- TRUE - }) - t_end_transfer_back <- Sys.time() - t_transfer_back <- t_transfer_back + as.numeric(difftime(t_end_transfer_back, - t_begin_transfer_back, units = 'secs')) - if (!failed) { - #if (sum_received_chunks == 0) { - # rsync_output <- c(rsync_output, received_chunks) - #} - received_running <- grepl('running$', rsync_output) - for (received_chunk_index in which(received_running)) { - file_name <- rsync_output[received_chunk_index] - task_name <- find_task_name(file_name) - system(paste0('ecflow_client --force=active recursive ', - task_name, - " --host=", ecflow_server[['host']], - " --port=", ecflow_server[['port']])) - } - received_crashed <- grepl('crashed$', rsync_output) - for (received_chunk_index in which(received_crashed)) { - file_name <- rsync_output[received_chunk_index] - task_name <- find_task_name(file_name) - system(paste0('ecflow_client --force=aborted recursive ', - task_name, - " --host=", ecflow_server[['host']], - " --port=", ecflow_server[['port']])) + if (!on_remote) { + if (cluster[['bidirectional']]) { + status <- system(paste0("ecflow_client --get_state=STARTR_CHUNKING_", + suite_id, " --host=", + ecflow_server[['host']], " --port=", ecflow_server[['port']]), + intern = TRUE) + if (any(grepl(paste0("suite STARTR_CHUNKING_", suite_id, " #.* state:complete"), status))) { + done <- TRUE + } else if (!wait) { + stop("Computation in progress...") } - received_chunks <- grepl('Rds$', rsync_output) - for (received_chunk_index in which(received_chunks)) { - file_name <- rsync_output[received_chunk_index] - task_name <- find_task_name(file_name) - system(paste0('ecflow_client --force=complete recursive ', - task_name, - " --host=", ecflow_server[['host']], - " --port=", ecflow_server[['port']])) - sum_received_chunks <- sum_received_chunks + 1 - if (!first_chunk_received) { + if (!first_chunk_received) { + if (any(grepl('state:complete', status))) { if (!is.null(time_before_first_chunk)) { time_after_first_chunk <- Sys.time() estimate <- (time_after_first_chunk - @@ -252,27 +187,117 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { .message( paste0('Remaining time estimate (neglecting queue and ', 'merge time) (at ', format(time_after_first_chunk), - '): ', format(estimate), ' (', - format(time_after_first_chunk - + '): ', format(estimate), ' (', + format(time_after_first_chunk - time_before_first_chunk), ' per chunk)') ) } first_chunk_received <- TRUE } } - if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { - done <- TRUE - } else if (!wait) { - stop("Computation in progress...") + Sys.sleep(min(sqrt(attempt), 5)) + } else { + #if (sum_received_chunks == 0) { + # # Accounting for the fist chunk received in ByChunks and + # # setting it to complete + # # ByChunks needs the first chunk to calculate remaining time + # received_files <- list.files(ecflow_suite_dir_suite) + # received_chunks <- received_files[grepl('Rds$', + # received_files)] + #} + failed <- FALSE + t_begin_transfer_back <- Sys.time() + rsync_output <- tryCatch({ + system(paste0("rsync -rav --include-from=", rsync_petition_file, " '", + cluster[['queue_host']], ":", remote_ecflow_suite_dir_suite, "' ", + ecflow_suite_dir_suite, "/"), intern = TRUE) + }, error = function(e) { + message("Warning: rsync from remote server to collect results failed. ", + "Retrying soon.") + failed <- TRUE + }) + t_end_transfer_back <- Sys.time() + t_transfer_back <- t_transfer_back + as.numeric(difftime(t_end_transfer_back, + t_begin_transfer_back, units = 'secs')) + if (!failed) { + #if (sum_received_chunks == 0) { + # rsync_output <- c(rsync_output, received_chunks) + #} + received_running <- grepl('running$', rsync_output) + for (received_chunk_index in which(received_running)) { + file_name <- rsync_output[received_chunk_index] + task_name <- find_task_name(file_name) + system(paste0('ecflow_client --force=active recursive ', + task_name, + " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + } + received_crashed <- grepl('crashed$', rsync_output) + for (received_chunk_index in which(received_crashed)) { + file_name <- rsync_output[received_chunk_index] + task_name <- find_task_name(file_name) + system(paste0('ecflow_client --force=aborted recursive ', + task_name, + " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + } + received_chunks <- grepl('Rds$', rsync_output) + for (received_chunk_index in which(received_chunks)) { + file_name <- rsync_output[received_chunk_index] + task_name <- find_task_name(file_name) + system(paste0('ecflow_client --force=complete recursive ', + task_name, + " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + sum_received_chunks <- sum_received_chunks + 1 + if (!first_chunk_received) { + if (!is.null(time_before_first_chunk)) { + time_after_first_chunk <- Sys.time() + estimate <- (time_after_first_chunk - + time_before_first_chunk) * + ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) / + cluster[['max_jobs']]) + units(estimate) <- 'mins' + .message( + paste0('Remaining time estimate (neglecting queue and ', + 'merge time) (at ', format(time_after_first_chunk), + '): ', format(estimate), ' (', + format(time_after_first_chunk - + time_before_first_chunk), ' per chunk)') + ) + } + first_chunk_received <- TRUE + } + } + if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { + done <- TRUE + } else if (!wait) { + stop("Computation in progress...") + } } + Sys.sleep(cluster[['polling_period']]) } - Sys.sleep(cluster[['polling_period']]) + + } else { # on_remote + + sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_ecflow_suite_dir_suite ))) + + if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { + done <- TRUE + } else if (!wait) { + stop("Computation in progress...") + } else { + message("Computation in progress, ", sum_received_chunks, " of ", prod(unlist(chunks)), " chunks are done.") + message("Will try again after polling_period...") + Sys.sleep(cluster[['polling_period']]) + } + } attempt <- attempt + 1 } file.remove(rsync_petition_file) timings[['transfer_back']] <- t_transfer_back - if (!is.null(cluster[['temp_dir']])) { + if (!on_remote && !is.null(cluster[['temp_dir']])) { system(paste0('ssh ', cluster[['queue_host']], ' "rm -rf ', remote_ecflow_suite_dir_suite, '"')) } @@ -280,11 +305,18 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { .warning("ATTENTION: The source chunks will be removed from the ", "system. Store the result after Collect() ends if needed.") } + if (!on_remote) { + target_folder <- ecflow_suite_dir + target_folder_suite <- ecflow_suite_dir_suite + } else { + target_folder <- remote_ecflow_suite_dir + target_folder_suite <- remote_ecflow_suite_dir_suite + } t_begin_merge <- Sys.time() - result <- .MergeChunks(ecflow_suite_dir, suite_id, remove) + result <- .MergeChunks(target_folder, suite_id, remove) t_end_merge <- Sys.time() timings[['merge']] <- as.numeric(difftime(t_end_merge, t_begin_merge, units = 'secs')) - received_files <- list.files(ecflow_suite_dir_suite, full.names = TRUE) + received_files <- list.files(target_folder_suite, full.names = TRUE) received_timings_files <- received_files[grepl('timings$', received_files)] for (timings_file in received_timings_files) { times <- readRDS(timings_file) @@ -294,11 +326,12 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { timings[['compute']] <- c(timings[['compute']], times['compute']) } if (remove) { - system(paste0("ecflow_client --delete=force yes /STARTR_CHUNKING_", - suite_id, " --host=", ecflow_server[['host']], - " --port=", ecflow_server[['port']])) - unlink(paste0(ecflow_suite_dir_suite), - recursive = TRUE) + if (!on_remote) { + system(paste0("ecflow_client --delete=force yes /STARTR_CHUNKING_", + suite_id, " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + } + unlink(target_folder_suite, recursive = TRUE) } if (attempt > 2) { t_end_total <- Sys.time() @@ -374,7 +407,7 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { -Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { +Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) { suite_id <- startr_exec[['suite_id']] chunks <- startr_exec[['chunks']] @@ -386,19 +419,18 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { run_dir <- startr_exec$cluster[['run_dir']] done <- FALSE - sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) while (!done) { # If wait, try until it is done + sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { done <- TRUE } else if (!wait) { stop("Computation in progress...") } else { - Sys.sleep(startr_exec$cluster[['polling_period']]) message("Computation in progress, ", sum_received_chunks, " of ", prod(unlist(chunks)), " chunks are done...\n", "Check status on Autosubmit GUI: https://earth.bsc.es/autosubmitapp/experiment/", suite_id) -# Sys.sleep(min(sqrt(attempt), 5)) + Sys.sleep(startr_exec$cluster[['polling_period']]) } } # while !done diff --git a/R/Start.R b/R/Start.R index b0ad40d2c81114ce21ea635471c3d02612ca7c2a..5bfb3bfc67487ff764e7debe1fb7881c325ba610 100644 --- a/R/Start.R +++ b/R/Start.R @@ -674,7 +674,7 @@ #' to recognize files such as \cr #' \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}).\cr\cr #'Note that each glob expression can only represent one possibility (Start() -#'chooses the first). Because /code{*} is not the tag, which means it cannot +#'chooses the first). Because \code{*} is not the tag, which means it cannot #'be a dimension of the output array. Therefore, only one possibility can be #'adopted. For example, if \cr #'\code{'/path/to/dataset/precipitation_*/19901101_*_foo.nc'}\cr @@ -1777,6 +1777,18 @@ Start <- function(..., # dim = indices/selectors, } } } + + # Return info about return_vars when dat > 1 + if (length(dat) > 1 & length(common_return_vars) > 0) { + .message("\n", "[ATTENTION]", + paste0("According to parameter 'return_vars', the inner dimensions: ", + paste(names(common_return_vars), collapse = ', '), + ", are common among all the datasets. Please be sure that ", + "this is expected to avoid potential wrong results, and ", + "verify the outputs carefully."), + "\n", indent = 1) + } + #//////////////////////////////////////////// # This part was above where return_vars is seperated into return_vars and common_return_vars diff --git a/R/Step.R b/R/Step.R index e5d73042ef6ddd40afc266c17b17772de81c14a0..3c997f00780093644b29a8e15e73c3d311b1a8f4 100644 --- a/R/Step.R +++ b/R/Step.R @@ -19,7 +19,9 @@ #' for multiple returned arrays indicating the dimension names of the function #' output. #'@param use_libraries A vector of character string indicating the R library -#' names to be used in 'fun'. The default value is NULL. +#' names to be used in 'fun'. Only used when the jobs are run on HPCs; if the +#' jobs are run locally, load the necessary libraries by \code{library()} +#' directly. The default value is NULL. #'@param use_attributes One or more lists of vectors of character string #' indicating the data attributes to be used in 'fun'. The list name should be #' consistent with the list name of 'data' in AddStep(). The default value is diff --git a/R/zzz.R b/R/zzz.R index 1e56e291fa6286d0f15df806c634dc962eb29ac2..f098a3b11651e260dc72a23f5ec490e5e76a7320 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -980,7 +980,12 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, if (x %in% names(depending_file_dims)) { vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]] } - selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]] + if (x != found_pattern_dim) { + selectors[[x]][[vector_to_pick]][file_to_load_indices[x]] + } else { + # dat_dim only has one value in each work_piece + selectors[[x]][[vector_to_pick]] + } }) names(file_selectors) <- file_dims work_piece[['file_selectors']] <- file_selectors diff --git a/inst/doc/faq.md b/inst/doc/faq.md index ffe91a514552f734178f68a46e173becbd0604e4..7ff7604042f1cb121e6e6abbbb7b10dc190115e6 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -31,6 +31,8 @@ This document intends to be the first reference for any doubts that you may have 25. [What to do if your function has too many target dimensions](#25-what-to-do-if-your-function-has-too-many-target-dimensions) 26. [Use merge_across_dims_narm to remove NAs](#26-use-merge_across_dims_narm-to-remove-nas) 27. [Utilize chunk number in the function](#27-utilize-chunk-number-in-the-function) + 28. [Run startR in the background](#28-run-startr-in-the-background) + 29. [Collect result on HPCs](#29-collect-result-on-hpcs) 2. **Something goes wrong...** @@ -1008,6 +1010,38 @@ shows how to get start date for each chunk using chunk number; (2) [ex2_14](inst There are many other possible applications of this parameter. Please share with us other uses cases you may create. +### 28. Run startR in the background + +For heavy execution, we usually launch the jobs on HPCs with parallel computation. Sometimes, it takes a lot of time (days, weeks) to finish all the jobs. +It'd be much handy to let the jobs run in the background, so we don't need to make R session on workstation open during the whole process. +To do this: + +(1) Use parameter `wait = FALSE` in Compute() call. The execution therefore won't block the R session. + +(2) Save the object as a .Rds file by saveRDS(). In this file, you have all the information needed for collecting the result later. You can close the R session and turn off the workstation now. + +(3) When you want to collect the result, use Collect() with the saved .Rds file. +You can choose to use parameter `wait = TRUE` and the command will keep running until all the jobs are finished and can be collected. +Or, by `wait = FALSE`, it will tell you the jobs are still running and you can try again later. + +Note that if you use ecFlow as job manager and with Compute(wait = FALSE), the ecFlow-UI won't be updated due to uni-directional connection. +Check [ecFlow UI remains blue and does not update status](#2-ecflow-ui-remains-blue-and-does-not-update-status) for details. + +### 29. Collect result on HPCs +After using Compute() to run execution on HPCs, you can choose to collect the result on local workstation or on HPCs. Here is the instruction of how to do it on HPCs. + +(1) Run the startR workflow as usual on workstation until Compute(). + +(2) In Compute(), use `wait = FALSE`. The execution therefore won't block the R session. + +(3) Save the object as a .Rds file somewhere can be found on HPCs. E.g. `saveRDS(res, "/esarchive/scratch//res_startR_Collect.rds")` + +(4) ssh to HPCS (e.g., Nord3), open an R session. + +(5) Read the saved .Rds file. E.g. `obj_startR <- readRDS("/esarchive/scratch//res_startR_Collect.rds")` + +(6) Collect() the result with parameter `on_remote = TRUE`. E.g. `res <- Collect(obj_startR, on_remote = TRUE)` + # Something goes wrong... @@ -1042,9 +1076,15 @@ To solve this problem, use `Collect()` in the R terminal after running Compute() ### 3. Compute() successfully but then killed on R session -When Compute() on HPCs, the machines are able to process data which are much larger than the local workstation, so the computation works fine (i.e., on ec-Flow UI, the chunks show yellow in the end.) However, after the computation, the output will be sent back to local workstation. **If the returned data is larger than the available local memory space, your R session will be killed.** Therefore, always pre-check if the returned data will fit in your workstation free memory or not. If not, subset the input data or reduce the output size through more computation. +When we use Compute() and run jobs to HPCs, each job/chunk is finished and the result is saved as .Rds file individually. +When all the jobs are finished, the next step is to merge all the chunks into one array and return to workstation. +**If the returned data is larger than the available local memory space on your workstation, +your R session will be killed.** Therefore, it is better to always pre-check if the returned data will fit in your workstation free memory or not. + +If the result can fit on HPCs, you can also choose to collect the data there. Check [How-to-28](#29-collect-result-on-hpcs) for details. -Further explanation: though the complete output (i.e., merging all the chunks into one returned array) cannot be sent back to workstation, but the chunking results (.Rds file) are completed and saved in the directory '/STARTR_CHUNKING_'. If you still want to use the chunking results, you can find them there. +Note that even though the complete output (i.e., merging all the chunks into one returned array) cannot be sent back to workstation and the R session is killed, +the chunking results (.Rds files) are completed and saved in the local directory '/STARTR_CHUNKING_', and you can still utilize the chunk files. ### 4. My jobs work well in workstation and fatnodes but not on Power9 (or vice versa) diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index b22c6292c2923f54f2119fdbb5af3898a6c9af5d..91c11ee627b0a679325dda8ea6e2c1ced06e2baa 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -746,6 +746,7 @@ To have the good practice, note down the expid if it is automatically created by - `hpc_user`: Your user ID on the HPC (i.e., "bsc32xxx"). It is required if "queue_host" is not 'local'. - `data_dir`: The path to the data repository if the data is not shared. - `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. +- `run_dir`: The directory to run the startR jobs. It is useful when the self-defined function has relative path. - `init_commands`: The initial commands in bash script before R script runs. For example, the modules required by computation can be loaded here. - `r_module`: Name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. - `CDO_module`: Name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). diff --git a/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt b/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt new file mode 100644 index 0000000000000000000000000000000000000000..b6f18478e416d212a75c004ae02c27d795bc0495 --- /dev/null +++ b/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt @@ -0,0 +1,19 @@ +# Grid description file for Meteofrance System 7 (C3S) +# Serves as reference_grid for archive.ym +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = longitude +xlongname = "longitude" +xunits = "degrees_east" +yname = latitude +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md new file mode 100644 index 0000000000000000000000000000000000000000..0741780b3b7e07a11b8720ab8b13eff2ce9ab7eb --- /dev/null +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -0,0 +1,252 @@ +# Hands-on 1: Load data by startR + +## Goal +Use startR to load the data and learn how to adjust data structure while loading data. + +## 0. Load required packages + +```r +# Clean the session +rm(list = ls()) +# Load package +library(startR) +``` + +**Data description**: +We will use two datasets in the hands-on. The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF. The data have been first processed into monthly mean data and stored in our data archive (esarchive). + +We're going to analyze the near-surface temperature (short name: tas) for seasonal forecast. We will focus on the Europe region (roughly 20W-40E, 20N-80N). The hindcast years are 1993 to 2016, and the forecast year is 2020. The initial month is November. To speed up the practice, we will only load the first two forecast time steps, but all the ensemble members are used to give a less biased result. + +## 1. Load experimental data from data repository + +### 1.a Hindcast data + +Check one netCDF file to see the data structure. +``` +ncdump -h /gpfs/scratch/nct01/nct01001/d2_handson_R/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_19931101.nc |less +``` + +Understand the following script, run it, and check the result. + +```r + # Use this one if on workstation or nord3 (have access to /esarchive) + path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc" + #---------------------------------------------------------------------- + # Run these two lines if you're on Marenostrum4 and log in with training account + prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' + path_exp <- paste0(prefix, path_exp) + #---------------------------------------------------------------------- + + sdate_hcst <- paste0(1993:2016, '1101') + + hcst <- Start(dat = path_exp, + var = 'tas', + syear = sdate_hcst, + ensemble = 'all', + time = 1:2, + latitude = values(list(20, 80)), + latitude_reorder = Sort(), + longitude = values(list(-20, 40)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + +**Questions** + +(1) What are the dimensions of `hcst`? Use `dim()` to check. + +```r +dim(____) +``` + +(2) What is the structure of `hcst`? Use `str()` to check. +```r +str(hcst, max.level = _____) # try 1, 2, 3 +``` + +(3) The metadata variables are stored in `attr(hcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(hcst, 'Variables') +str(metadata_attr) +names(metadata_attr$common) + +hcst_time <- metadata_attr$common$time +hcst_lat <- __________ +hcst_lon <- __________ +``` + +### 1.b Forecast data + +The forecast data are from the same dataset as hindcast, but with different years. +Therefore, they share the same data path and strucutre. +Try to take the Start() call above and modify it to load the forecast data (hint: the start year is 2020.) + +```r + sdate_fcst <- ____________ + + fcst <- Start(dat = path_exp, + var = _____, + syear = sdate_fcst, + ensemble = 'all', + time = _____, + latitude = values(list(____, ____)), + latitude_reorder = Sort(), + longitude = values(list(____, ____)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = _____, method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = _____, + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + +**Questions** + +Check the forecast data by the same methods for hindcast data. + +(1) What are the dimensions of `fcst`? Use `dim()` to check. + +```r +dim(____) +``` + +(2) What is the structure of `fcst`? Use `str()` to check. +```r +str(hcst, max.level = _____) # try 1, 2, 3 +``` + +(3) The metadata variables are stored in `attr(fcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(_____, 'Variables') +str(metadata_attr) +names(metadata_attr$common) + +fcst_time <- __________ +fcst_lat <- __________ +fcst_lon <- __________ +``` + +### 1.c Observational data + +We need the corresponding observational data to compare with the experimental data. +So, the observational data should be loaded as the same dimensions as the experimental ones. +To achieve this, we can use the metadata of the experimental data as the selectors for observational data. But be careful with the usage! We must verify the correctness and applicability first. + +**Get the time values from hindcast data** + +Check the time attributes of `hcst`: Is it correct? + +```r +dim(attributes(hcst)$Variables$common$time) +str(attributes(hcst)$Variables$common$time) +``` + +The values are not correct since they should start from November, not December. +But the array has the correct dimensions and we can take advantage of it. +What we're going to do here is to tune the values one month ahead so we can have the correct dates. +(p.s. `lubridate` is a useful R package for time value manipulation!) + +```r +attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1) +date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m') +sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) +print(sdate_obs) +``` + +Now we have the correct date values, we can use them as the selectors of `syear` in the Start() call. In addition, we will use the reshaping feature in startR to get the desired dimensions. + +If the selector is an array, the parameter `split_multiselected_dims` of Start() splits the array by dimensions and we will get those dimensions in the output. +For example, we will use `sdate_obs` as the selector of "syear" dimension below. +`sdate_obs` has two dimensions, "syear" and "time"; +so, by `split_multiselected_dims`, the output `obs` will have these two dimensions, +even "time" is not explicitly specified in the Start() call. + +```r + path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' + #---------------------------------------------------------------------- + # Run these two lines if you're on Marenostrum4 and log in with training account + prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' + path_obs <- paste0(prefix, path_obs) + #---------------------------------------------------------------------- + + obs <- Start(dat = path_obs, + var = _____, + syear = sdate_obs, + split_multiselected_dims = TRUE, + latitude = values(list(_____, _____)), + latitude_reorder = Sort(), + longitude = values(list(_____, _____)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = ______, method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = ______, + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + +**Questions** + +Check the obsercational data by the same methods above. + +(1) What are the dimensions of `obs`? Use `dim()` to check. + +```r +dim(____) +``` + +(2) What is the structure of `obs`? Use `str()` to check. +```r +str(obs, max.level = ____) # try 1, 2, 3 +``` + +(3) The metadata variables are stored in `attr(obs, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(____, 'Variables') +str(metadata_attr) +names(metadata_attr$common) + +obs_time <- __________ +obs_lat <- __________ +obs_lon <- __________ +``` + + +## 2. Check if the datasets are consistent + +Wrong data, wrong everything afterward. It is important to examine the data and metadata after we load them. + +(1) Compare the dimensions of the three data by `dim()`. +```r + +``` +(2) Check the summary of the data by `summary()`. +```r +summary(hcst) +summary(fcst) +summary(obs) +``` + +(3) Compare metadata. We have saved the latitude, longitude, and time attributes above after loading each data. +Use `identical()` or `all.equal()` to check if the values are consistent. +```r +# lat and lon +identical(____, ____) +all.equal(____, ____) + +# time: only compare year and month +hcst_time_corrected <- attributes(hcst)$Variables$common$time +identical(format(hcst_time_correct, '%Y%m'), format(obs_time, '%Y%m')) +``` diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md new file mode 100644 index 0000000000000000000000000000000000000000..41d4a8ba7c2fc264a0fe25b07f7d075b1c45ebcf --- /dev/null +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md @@ -0,0 +1,277 @@ +# Hands-on 1: Load data by startR + +## Goal +Use startR to load the data and learn how to adjust data structure while loading data. + +## 0. Load required packages + +```r +# Clean the session +rm(list = ls()) +# Load package +library(startR) +``` + +**Data description**: +We will use two datasets in the hands-on. The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF. The data have been first processed into monthly mean data and stored in our data archive (esarchive). + +We're going to analyze the near-surface temperature (short name: tas) for seasonal forecast. We will focus on the Europe region (roughly 20W-40E, 20N-80N). The hindcast years are 1993 to 2016, and the forecast year is 2020. The initial month is November. To speed up the practice, we will only load the first two forecast time steps, but all the ensemble members are used to give a less biased result. + +## 1. Load experimental data from data repository + +### 1.a Hindcast data + +Check one netCDF file to see the data structure. +``` +ncdump -h /gpfs/scratch/nct01/nct01001/d2_handson_R/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_19931101.nc |less +``` + +Understand the following script, run it, and check the result. + +```r + # Use this one if on workstation or nord3 (have access to /esarchive) + path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc" + #---------------------------------------------------------------------- + # Run these two lines if you're on Marenostrum4 and log in with training account + prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' + path_exp <- paste0(prefix, path_exp) + #---------------------------------------------------------------------- + + sdate_hcst <- paste0(1993:2016, '1101') + + hcst <- Start(dat = path_exp, + var = 'tas', + syear = sdate_hcst, + ensemble = 'all', + time = 1:2, + latitude = values(list(20, 80)), + latitude_reorder = Sort(), + longitude = values(list(-20, 40)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + +**Questions** + +(1) What are the dimensions of `hcst`? Use `dim()` to check. + +```r +dim(hcst) +# dat var syear ensemble time latitude longitude +# 1 1 24 25 2 61 61 +``` + +(2) What is the structure of `hcst`? Use `str()` to check. +```r +str(hcst, max.level = 1) +str(hcst, max.level = 2) +str(hcst, max.level = 3) +``` + +(3) The metadata variables are stored in `attr(hcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(hcst, 'Variables') +str(metadata_attr) +names(metadata_attr$common) + +hcst_time <- metadata_attr$common$time +hcst_lat <- metadata_attr$common$latitude +hcst_lon <- metadata_attr$common$longitude +``` + +### 1.b Forecast data + +The forecast data are from the same dataset as hindcast, but with different years. +Therefore, they share the same data path and strucutre. +Try to take the Start() call above and modify it to load the forecast data (hint: the start year is 2020.) + +```r + sdate_fcst <- '20201101' + + fcst <- Start(dat = path_exp, + var = 'tas', + syear = sdate_fcst, + ensemble = 'all', + time = 1:2, + latitude = values(list(20, 80)), + latitude_reorder = Sort(), + longitude = values(list(-20, 40)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + +**Questions** + +Check the forecast data by the same methods for hindcast data. + +(1) What are the dimensions of `fcst`? Use `dim()` to check. + +```r +dim(fcst) +# dat var syear ensemble time latitude longitude +# 1 1 1 51 2 61 61 +``` + +(2) What is the structure of `fcst`? Use `str()` to check. +```r +str(fcst, max.level = 1) +str(fcst, max.level = 2) +str(fcst, max.level = 3) +``` + +(3) The metadata variables are stored in `attr(fcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(fcst, 'Variables') +str(metadata_attr) +names(metadata_attr$common) + +fcst_time <- metadata_attr$common$time +fcst_lat <- metadata_attr$common$latitude +fcst_lon <- metadata_attr$common$longitude +``` + +### 1.c Observational data + +We need the corresponding observational data to compare with the experimental data. +So, the observational data should be loaded as the same dimensions as the experimental ones. +To achieve this, we can use the metadata of the experimental data as the selectors for observational data. But be careful with the usage! We must verify the correctness and applicability first. + +**Get the time values from hindcast data** + +Check the time attributes of `hcst`: Is it correct? + +```r +dim(attributes(hcst)$Variables$common$time) +#syear time +# 24 2 + +str(attributes(hcst)$Variables$common$time) +# POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ... +``` + +The values are not correct since they should start from November, not December. +But the array has the correct dimensions and we can take advantage of it. +What we're going to do here is to tune the values one month ahead so we can have the correct dates. +(p.s. `lubridate` is a useful R package for time value manipulation!) + +```r +attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1) +date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m') +sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) +print(sdate_obs) +``` + +Now we have the correct date values, we can use them as the selectors of `syear` in the Start() call. In addition, we will use the reshaping feature in startR to get the desired dimensions. + +If the selector is an array, the parameter `split_multiselected_dims` of Start() splits the array by dimensions and we will get those dimensions in the output. +For example, we will use `sdate_obs` as the selector of "syear" dimension below. +`sdate_obs` has two dimensions, "syear" and "time"; +so, by `split_multiselected_dims`, the output `obs` will have these two dimensions, +even "time" is not explicitly specified in the Start() call. + +```r + path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' + #---------------------------------------------------------------------- + # Run these two lines if you're on Marenostrum4 and log in with training account + prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' + path_obs <- paste0(prefix, path_obs) + #---------------------------------------------------------------------- + + obs <- Start(dat = path_obs, + var = 'tas', + syear = sdate_obs, + split_multiselected_dims = TRUE, + latitude = values(list(20, 80)), + latitude_reorder = Sort(), + longitude = values(list(-20, 40)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + +**Questions** + +Check the obsercational data by the same methods above. + +(1) What are the dimensions of `obs`? Use `dim()` to check. + +```r +dim(obs) +# dat var syear time latitude longitude +# 1 1 24 2 61 61 +``` + +(2) What is the structure of `obs`? Use `str()` to check. +```r +str(obs, max.level = 1) +str(obs, max.level = 2) +str(obs, max.level = 3) +``` + +(3) The metadata variables are stored in `attr(obs, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(obs, 'Variables') +str(metadata_attr) +names(metadata_attr$common) + +obs_time <- metadata_attr$common$time +obs_lat <- metadata_attr$common$latitude +obs_lon <- metadata_attr$common$longitude +``` + + +## 2. Check if the datasets are consistent + +Wrong data, wrong everything afterward. It is important to examine the data and metadata after we load them. + +(1) Compare the dimensions of the three data by `dim()`. +```r +dim(hcst) +dim(fcst) +dim(obs) +``` +(2) Check the summary of the data by `summary()`. +```r +summary(hcst) +summary(fcst) +summary(obs) +``` + +(3) Compare metadata. We have saved the latitude, longitude, and time attributes above after loading each data. +Use `identical()` or `all.equal()` to check if the values are consistent. +```r +identical(obs_lat, hcst_lat) +[1] TRUE +all.equal(obs_lat, hcst_lat) +[1] TRUE +identical(obs_lon, hcst_lon) +[1] TRUE +identical(fcst_lat, hcst_lat) +[1] TRUE +identical(fcst_lon, hcst_lon) +[1] TRUE + +hcst_time_corrected <- attributes(hcst)$Variables$common$time +identical(format(hcst_time_corrected, '%Y%m'), format(obs_time, '%Y%m')) +[1] TRUE +``` diff --git a/inst/doc/usecase/ex2_1_timedim.R b/inst/doc/usecase/ex2_1_timedim.R index 15ef37da6dd8c955dcd433a73c5aa470e3b799d4..0c0f36c0a981e0abfd45e23fcd86d8f22406919f 100644 --- a/inst/doc/usecase/ex2_1_timedim.R +++ b/inst/doc/usecase/ex2_1_timedim.R @@ -45,9 +45,9 @@ library(startR) ## on Power9 #-----------modify according to your personal info--------- - queue_host = 'cte-power' #your own host name for power9 - temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' - ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory + queue_host <- 'cte-power' #your own host name for power9 + temp_dir <- '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' + ecflow_suite_dir <- '/home/Earth/nperez/startR_local/' #your own local directory #------------------------------------------------------------ res <- Compute(wf1, chunks = list(ensemble = 20, @@ -66,11 +66,12 @@ library(startR) ecflow_suite_dir = ecflow_suite_dir, wait = TRUE) -## on Nord3 + +## on Nord3 with ecFlow #-----------modify according to your personal info--------- - queue_host = 'nord4' - temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' - ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory + queue_host <- 'nord4' + temp_dir <- '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' + ecflow_suite_dir <- '/home/Earth/nperez/startR_local/' #your own local directory #------------------------------------------------------------ res <- Compute(wf1, chunks = list(ensemble = 20, @@ -88,3 +89,32 @@ library(startR) ecflow_suite_dir = ecflow_suite_dir, wait = TRUE) + + +## on Nord3 with Autosubmit +#-----------modify according to your personal info--------- + hpc_user <- "bsc32xxx" + expid <- "xxxx" # autosubmit exp id; can be NULL + autosubmit_suite_dir <- "/home/Earth//startR_local_autosubmit/" +#------------------------------------------------------------ + res <- Compute(wf1, + chunks = list(ensemble = 20, sdate = 2), + threads_load = 2, + threads_compute = 4, + cluster = list( + queue_host = 'nord3', + r_module = "R/4.1.2-foss-2019b", + autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3', + cores_per_job = 2, + job_wallclock = '01:00:00', + max_jobs = 40, + polling_period = 10, + extra_queue_params = list('#SBATCH --constraint=medmem', '#SBATCH --exclusive'), + expid = NULL, + hpc_user = hpc_user + ), + workflow_manager = 'autosubmit', + autosubmit_suite_dir = autosubmit_suite_dir, + autosubmit_server = NULL, #'bscesautosubmit01', + wait = TRUE + ) diff --git a/man/Collect.Rd b/man/Collect.Rd index d90cacaf8367095c5f4505fa8371151a1fdf4060..e701a00d7a6602dacb5bcb84ab0c723c9ad10561 100644 --- a/man/Collect.Rd +++ b/man/Collect.Rd @@ -4,7 +4,7 @@ \alias{Collect} \title{Collect and merge the computation results} \usage{ -Collect(startr_exec, wait = TRUE, remove = TRUE) +Collect(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) } \arguments{ \item{startr_exec}{An R object returned by Compute() when the parameter 'wait' @@ -25,6 +25,10 @@ received from the HPC after data being collected, as well as the local job folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the data and Collect() them as many times as desired, set remove to FALSE. The default value is TRUE.} + +\item{on_remote}{A logical value deciding to the function is run locally and +sync the outputs back from HPC (FALSE, default), or it is run on HPC +(TRUE).} } \value{ A list of merged data array. diff --git a/man/Start.Rd b/man/Start.Rd index 25eb8d744084a7d7da286206eb01d3d59acfa907..640c5a9e8b6a2b27b827231fac0ede40c4258b94 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -651,7 +651,7 @@ For example, a path pattern could be as follows: \cr to recognize files such as \cr \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}).\cr\cr Note that each glob expression can only represent one possibility (Start() -chooses the first). Because /code{*} is not the tag, which means it cannot +chooses the first). Because \code{*} is not the tag, which means it cannot be a dimension of the output array. Therefore, only one possibility can be adopted. For example, if \cr \code{'/path/to/dataset/precipitation_*/19901101_*_foo.nc'}\cr diff --git a/man/Step.Rd b/man/Step.Rd index c473ccbd3229176cf3cf9c321028ae6e2f61aa54..283c555d0c9fdc84d4694bdfbe1c7b651820c455 100644 --- a/man/Step.Rd +++ b/man/Step.Rd @@ -28,7 +28,9 @@ for multiple returned arrays indicating the dimension names of the function output.} \item{use_libraries}{A vector of character string indicating the R library -names to be used in 'fun'. The default value is NULL.} +names to be used in 'fun'. Only used when the jobs are run on HPCs; if the +jobs are run locally, load the necessary libraries by \code{library()} +directly. The default value is NULL.} \item{use_attributes}{One or more lists of vectors of character string indicating the data attributes to be used in 'fun'. The list name should be diff --git a/tests/testthat/test-AddStep-DimNames.R b/tests/testthat/test-AddStep-DimNames.R index 5e1fe9cb345c92b3329c4edacdd6c35244e88c46..e20ecfa6d1a00ef3579762faa747d9cb61089e76 100644 --- a/tests/testthat/test-AddStep-DimNames.R +++ b/tests/testthat/test-AddStep-DimNames.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') test_that("Single File - Local execution", { @@ -28,3 +29,4 @@ suppressWarnings( "The target dimensions required by 'step_fun' for the input 1 are not present in the corresponding provided object in 'inputs'.") }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-CDORemap.R b/tests/testthat/test-Compute-CDORemap.R index fb31d00bc062368d0aa4ac642a2af2cf276c3168..580bf6e99d436d29ed6ecf7565a7c72a76e99cc2 100644 --- a/tests/testthat/test-Compute-CDORemap.R +++ b/tests/testthat/test-Compute-CDORemap.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("ex2_3", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' @@ -54,3 +55,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-NumChunks.R b/tests/testthat/test-Compute-NumChunks.R index ffce880299e12b7043f5628b4114bdd2206327fe..d47b0f776958d4b3996eaef7dba3877123022a29 100644 --- a/tests/testthat/test-Compute-NumChunks.R +++ b/tests/testthat/test-Compute-NumChunks.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') test_that("Single File - Local execution", { @@ -51,3 +52,4 @@ ignore_attr = TRUE ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-chunk_depend_dim.R b/tests/testthat/test-Compute-chunk_depend_dim.R index 101bfb592ed5e21a975db603e167123b1b22dd90..80f407b887236ec1f8a1a4171ff2ab0e22b121a1 100644 --- a/tests/testthat/test-Compute-chunk_depend_dim.R +++ b/tests/testthat/test-Compute-chunk_depend_dim.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test tests the chunking over depended and depending dimension. # ex1_14 # 1. depending dim is values() @@ -221,3 +222,4 @@ Start(dat = path, ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R index 0c1da4ac5dda36b01669bd566efa6d0563fb4e34..5e43067cb538733f502663f62af2aca7eaf415ab 100644 --- a/tests/testthat/test-Compute-chunk_split_dim.R +++ b/tests/testthat/test-Compute-chunk_split_dim.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test is to check chunking over the split dim. It involves # how to arrange the chunks in a correct order even when chunking is happening. @@ -224,3 +225,4 @@ c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 46 }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-extra_params.R b/tests/testthat/test-Compute-extra_params.R index f055e96b48853b8f9f79437fcf2004dd7f50f253..49c36da1ad8e5b3ea01371360ebb9ec1e0e9add7 100644 --- a/tests/testthat/test-Compute-extra_params.R +++ b/tests/testthat/test-Compute-extra_params.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("ex2_6", { @@ -125,3 +126,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-inconsistent_target_dim.R b/tests/testthat/test-Compute-inconsistent_target_dim.R index 58f96a93af392b6ec7a4d3f3021370f4ede5256a..5a816efe66d575dead9a2e896f0cdbf20278b39c 100644 --- a/tests/testthat/test-Compute-inconsistent_target_dim.R +++ b/tests/testthat/test-Compute-inconsistent_target_dim.R @@ -1,3 +1,4 @@ +suppressMessages({ # If dataset are more than 1 (e.g., exp and obs), ByChunks() checks if # they have consistent dimensions in favor of Apply() computation. However, # only margin dimensions need to be identical. Target dimensions can have @@ -138,3 +139,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-irregular_regrid.R b/tests/testthat/test-Compute-irregular_regrid.R index 7de1471811e0ce838b28368267eb418a5a230a20..ba07d71906b366b47aa26b9c982ebab2e3594731 100644 --- a/tests/testthat/test-Compute-irregular_regrid.R +++ b/tests/testthat/test-Compute-irregular_regrid.R @@ -1,3 +1,4 @@ +suppressMessages({ library(s2dv) test_that("1. ex2_13", { @@ -73,3 +74,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-timedim.R b/tests/testthat/test-Compute-timedim.R index fbc5af0464989ebb977d6ac7c69adde6aa868b4a..922edfbf0970b29dc5b55952a497fa424189a971 100644 --- a/tests/testthat/test-Compute-timedim.R +++ b/tests/testthat/test-Compute-timedim.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("ex2_1", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' @@ -54,3 +55,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-transform_all.R b/tests/testthat/test-Compute-transform_all.R index 05d5de6639254346bea59cbfb16466e6f516699a..785070c77517434ab14a450a4a218c55e9b39eb0 100644 --- a/tests/testthat/test-Compute-transform_all.R +++ b/tests/testthat/test-Compute-transform_all.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("1. Chunk along non-lat/lon dim", { #skip_on_cran() @@ -119,3 +120,4 @@ res4 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-transform_indices.R b/tests/testthat/test-Compute-transform_indices.R index c2d3e35a65a99110996e19ddc6404c2b0f381747..9c8cc39fb0f3b8bff985d2e66d48c101fb9c39a8 100644 --- a/tests/testthat/test-Compute-transform_indices.R +++ b/tests/testthat/test-Compute-transform_indices.R @@ -1,3 +1,4 @@ +suppressMessages({ # Using indinces() to assign lat and lon, and transform the data. # Also test transform + chunk along lat/lon. @@ -376,3 +377,4 @@ as.vector(drop(res4$output1)) ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-transform_values.R b/tests/testthat/test-Compute-transform_values.R index 25a803f2b158048242a382dd880106181a34cfc8..32a544e353744e6ce91ae5bd7e9751e4f15c0ec3 100644 --- a/tests/testthat/test-Compute-transform_values.R +++ b/tests/testthat/test-Compute-transform_values.R @@ -1,3 +1,4 @@ +suppressMessages({ # Using values() to assign lat and lon, and transform the data. # Also test transform + chunk along lat/lon. @@ -603,3 +604,4 @@ res3_180 #================================================================ }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-two_data.R b/tests/testthat/test-Compute-two_data.R index dfa579abd9f4f201e848c446cfe5798151662e48..33d66317792336eec8e3e880f0becb74b000caf9 100644 --- a/tests/testthat/test-Compute-two_data.R +++ b/tests/testthat/test-Compute-two_data.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("ex2_7", { # exp data @@ -81,3 +82,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-use_attribute.R b/tests/testthat/test-Compute-use_attribute.R index 6f218e664a37e1481f08efb18e9f4b50789125a8..07ecd13772210b11e3b61bc8e1a434fc43fc732d 100644 --- a/tests/testthat/test-Compute-use_attribute.R +++ b/tests/testthat/test-Compute-use_attribute.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("ex2_2", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' @@ -53,3 +54,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-DCPP-across-depends.R b/tests/testthat/test-Start-DCPP-across-depends.R index bfe44b13727dd6cfa264efc4b5fdf144b3004ee0..0f49a383f944d95eef3e6db41edd0d7946dfd7dd 100644 --- a/tests/testthat/test-Start-DCPP-across-depends.R +++ b/tests/testthat/test-Start-DCPP-across-depends.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("Chunks of DCPP files- Local execution", { path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) @@ -52,3 +53,4 @@ expect_equal(dat[1,1,2,1:12,,], dat_2018_chunk1[1,1,,,]) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 7dfbc2cfc6e3bb9724fa4445b36964aa4359bae4..43651ab4fcad07f3ff65aed2bb4954f706a7e951 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("1. 360_day, daily, unit = 'days since 1850-01-01'", { path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast//HadGEM3-GC31-MM/', @@ -317,3 +318,4 @@ test_that("8. gregorian, 3hrly, unit = 'days since 1850-1-1'", { }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R index 9cccc2d24694d28fb76c8d853dcb8de96f8ea3e8..ec77c7fad2e8dbd670b906a1ad86091b1d05e4fb 100644 --- a/tests/testthat/test-Start-depends_values.R +++ b/tests/testthat/test-Start-depends_values.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test tests the case that using values() to define the depended # and depending dimensions. The depending dimension should be a list with # names that are the values of depended dimensions. @@ -80,3 +81,4 @@ suppressWarnings( ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-first_file_missing.R b/tests/testthat/test-Start-first_file_missing.R index fecbd7c05cbe19ca40dd9d4a058495893a49fd44..070ae5312811e90f907ca1ab639b031b3305f319 100644 --- a/tests/testthat/test-Start-first_file_missing.R +++ b/tests/testthat/test-Start-first_file_missing.R @@ -1,3 +1,4 @@ +suppressMessages({ # When some of the files are missing, Start() still can retrieve the data and # put NA in those missing positions. However, when the first file is missing, # Start() returned error before because of failing to find metadata. The bug is @@ -182,3 +183,4 @@ data <- Start(dat = file, ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-global-lon-across_meridian.R b/tests/testthat/test-Start-global-lon-across_meridian.R index 921c3313d5c0d22b0d98344e06463faac6a5ee28..429db5c19bdf04c15c3c8623479613b2b04d31ce 100644 --- a/tests/testthat/test-Start-global-lon-across_meridian.R +++ b/tests/testthat/test-Start-global-lon-across_meridian.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') test_that("first test", { @@ -54,3 +55,4 @@ suppressWarnings( ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index d493a87ba41589d96988146d27d24afac3dc9406..995310aa9a664de1697976e5d110a794a52fa9a1 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -1,3 +1,4 @@ +suppressMessages({ # Similar as usecase ex1_13. # Use a value array as the inner dimension selector to express dependency on a # file dimension. By this means, we don't need to specify the *_across parameter @@ -156,3 +157,4 @@ c(memb = 2, sdate = 3, region = 1) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R index 7e0264c5a3905d6125c8b326fed548403d56dd03..9c469757c54a208b19a3ff2131eca9279051379a 100644 --- a/tests/testthat/test-Start-implicit_inner_dim.R +++ b/tests/testthat/test-Start-implicit_inner_dim.R @@ -1,3 +1,4 @@ +suppressMessages({ # The unit test is for the implicit inner dimension. If the inner dimension length is 1, # startR allows it not to be specified in the call. Users can still define it in # 'return_vars'. @@ -44,3 +45,4 @@ as.POSIXct('2013-11-15', tz = 'UTC') }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-indices_list_vector.R b/tests/testthat/test-Start-indices_list_vector.R index 2effede805887d40bef16cb1da1986e9c5eddd79..76ec511da5af2dba25a96e81c67334685c5062b1 100644 --- a/tests/testthat/test-Start-indices_list_vector.R +++ b/tests/testthat/test-Start-indices_list_vector.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test tests the consistence between list of indices and vector of indices. # 1. transform # 2. no transform @@ -241,3 +242,4 @@ as.vector(exp2) ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-largest_dims_length.R b/tests/testthat/test-Start-largest_dims_length.R index 211c1321ac8865914ea4ecdd51fdce53044629cd..3585576fa3d45093b25cadef33974ef20d66adfc 100644 --- a/tests/testthat/test-Start-largest_dims_length.R +++ b/tests/testthat/test-Start-largest_dims_length.R @@ -1,3 +1,4 @@ +suppressMessages({ # When certain inner dim of files is not consistent, the parameter 'largest_dims_length' can # be used to ensure the returned array has the largest length of inner dimensions. @@ -299,3 +300,4 @@ as.vector(data5)[-c(5:24)] ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-line_order-consistency.R b/tests/testthat/test-Start-line_order-consistency.R index 11be10996738dc56baf8fff4c6d248540c3e29e8..d7f5095f92731d44bfd49e1ae057aaad818ffdbc 100644 --- a/tests/testthat/test-Start-line_order-consistency.R +++ b/tests/testthat/test-Start-line_order-consistency.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') variable <- "tas" @@ -144,3 +145,4 @@ suppressWarnings( ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index 2a2e7358824ca68b3240765e9b106ea0ff41c93c..3a68a53d99e0ab1fad0917f8e23c5bf148404246 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("1. One data set, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) @@ -983,3 +984,4 @@ dataF <- Start(dataset = path_list, ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index 227383bd17d735bd7ded03c465d12b0a67be4160..aea6f1f40e1ea28f395d8e3625c4e508d0766f72 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -1,3 +1,4 @@ +suppressMessages({ # When inner dimension selector is an array with filedim dimension name (e.g., time = [sdate = 2, time = 4], # or *_across is used, the inner dim has dependency on file dim. In this case, return_vars must # specify this relationship, i.e., return_vars = list(time = 'sdate'). @@ -197,3 +198,4 @@ expect_equal( ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index b14326894d512bb2b003d55cf422c6b413d54552..1346dc625173a6e0137b35abf9c748225ac8bcc2 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -1,3 +1,4 @@ +suppressMessages({ # When data is reshaping (e.g., time_across = 'sdate'), the corresponding attribute should be reshaped too. test_that("1. time across fyear, fyear depends on sdate", { @@ -805,3 +806,4 @@ dates }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R index e16f2bf4eb647ba103a7b2de612ab3828ab118e0..49cc003b15a88ce561007903d994a9bb606f4a86 100644 --- a/tests/testthat/test-Start-multiple-sdates.R +++ b/tests/testthat/test-Start-multiple-sdates.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') # When certain values in one observation file are required more than once, @@ -163,3 +164,4 @@ obs <- Start(dat = obs_path, 0 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R index 75f28d43c7e751dc5da5d6bfefce88477adeed85..2ff62788df2cca0780ea12f072e0d91c266f8ea3 100644 --- a/tests/testthat/test-Start-path_glob_permissive.R +++ b/tests/testthat/test-Start-path_glob_permissive.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("1. expid/member/version", { years <- paste0(c(1960:1961), '01-', c(1960:1961), '12') @@ -159,3 +160,4 @@ list("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_ }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lat.R b/tests/testthat/test-Start-reorder-lat.R index c87792e972259100dae6ba604aa1851b95a2d3fc..34874842bb80eb4b28c4afea4b550c4c7ef302dd 100644 --- a/tests/testthat/test-Start-reorder-lat.R +++ b/tests/testthat/test-Start-reorder-lat.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -1053,3 +1054,4 @@ as.vector(attr(exp1_3, 'Variables')$common$latitude) }) ############################################## +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-latCoarse.R b/tests/testthat/test-Start-reorder-latCoarse.R index 34a766f78653209266c493823d460b2cbe89d842..1cb3b8dca83b54b2190b018a505dcf5daaa3e002 100644 --- a/tests/testthat/test-Start-reorder-latCoarse.R +++ b/tests/testthat/test-Start-reorder-latCoarse.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -887,3 +888,4 @@ res <- Start(dat = list(list(path=path_exp)), # #}) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon-180to180.R b/tests/testthat/test-Start-reorder-lon-180to180.R index 0f71f0a0feb28e2cfa6ae7f7a4fcd1b1849c6428..5dfced4d8646209daa0b12154d2970639ee81ca5 100644 --- a/tests/testthat/test-Start-reorder-lon-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-180to180.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -817,3 +818,4 @@ res <- Start(dat = list(list(path=path_exp)), tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R index 5e7701ab41f74ccee5350f04ee88ccb140ca3116..c36b29e41d0aa2e18bbecbf17330f335d0a60b4d 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -957,3 +958,4 @@ res <- Start(dat = list(list(path=path_exp)), 21 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360.R b/tests/testthat/test-Start-reorder-lon-transform_0to360.R index 86ad5e737b30c81668e5b57dbce8f5953a930b66..a47b7072dd4e6f217c98fb33d7b40e6d20e50e20 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -1040,3 +1041,4 @@ res <- Start(dat = list(list(path=path_exp)), 21 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R index c18d34a241b47843d04ed45a881fa832fb56b786..4185cca67128c8f4a16a3a3e33cf8705ef8e4266 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -1045,3 +1046,4 @@ res <- Start(dat = list(list(path=path_exp)), 21 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon0to360.R b/tests/testthat/test-Start-reorder-lon0to360.R index 1e946d994a38fdfe97c061e02e53d4b2804359fa..8c717b3e5fa102b67b41e93c7e79ff21e42eee10 100644 --- a/tests/testthat/test-Start-reorder-lon0to360.R +++ b/tests/testthat/test-Start-reorder-lon0to360.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -658,3 +659,4 @@ res <- Start(dat = list(list(path=path_exp)), tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon0to360Coarse.R b/tests/testthat/test-Start-reorder-lon0to360Coarse.R index 71361d95896f50b465789e7186d43a43bc773be9..3de49a6415ce4d1a76178083d64c44152226976b 100644 --- a/tests/testthat/test-Start-reorder-lon0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon0to360Coarse.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -658,3 +659,4 @@ res <- Start(dat = list(list(path=path_exp)), tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-metadata.R b/tests/testthat/test-Start-reorder-metadata.R index ea727e5c76b9dba815f6c408cfd91f993d147dc4..501873d78bd03238ea7267a7331b9ae649f22890 100644 --- a/tests/testthat/test-Start-reorder-metadata.R +++ b/tests/testthat/test-Start-reorder-metadata.R @@ -1,3 +1,4 @@ +suppressMessages({ # Ensure returns_vars = NULL or 'dat' have the same metadata test_that("1. Sort() and CircularSort(0, 360)", { @@ -277,3 +278,4 @@ res_dat <- Start(dat = list(list(path = path_exp)), ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-retrieve.R b/tests/testthat/test-Start-reorder-retrieve.R index 25efcfc91b22f6126afbac26864461107f3f2f5e..3b8016f60c43109aeedf5f25a5f12880905a3b44 100644 --- a/tests/testthat/test-Start-reorder-retrieve.R +++ b/tests/testthat/test-Start-reorder-retrieve.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') ############################################## @@ -155,3 +156,4 @@ res2 <- Start(dat = path_exp, }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder_all.R b/tests/testthat/test-Start-reorder_all.R index 87a4416e5edef0b51e2b2f06304e4a2ad4e1f2e4..fadee521bb709fbcf5876e5d97334df621b72d23 100644 --- a/tests/testthat/test-Start-reorder_all.R +++ b/tests/testthat/test-Start-reorder_all.R @@ -1,3 +1,4 @@ +suppressMessages({ # No transform, test reorder function Sort() and CircularSort() with selector 'all'. #--------------------------------------------------------------- @@ -143,3 +144,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder_indices.R b/tests/testthat/test-Start-reorder_indices.R index 59d00d4accf229fb7b9086060147b071a029bd70..5f50c49b8b5a179afba4a38fc82d61e99dfa7a96 100644 --- a/tests/testthat/test-Start-reorder_indices.R +++ b/tests/testthat/test-Start-reorder_indices.R @@ -1,3 +1,4 @@ +suppressMessages({ # No transform, test reorder function Sort() and CircularSort() with selector indices(). #--------------------------------------------------------------- @@ -143,3 +144,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reshape.R b/tests/testthat/test-Start-reshape.R index 480a3bc64c24c2e74765c289d8179f739602fe40..af8e89fba554832b0071ee0a402344259d503adb 100644 --- a/tests/testthat/test-Start-reshape.R +++ b/tests/testthat/test-Start-reshape.R @@ -1,3 +1,4 @@ +suppressMessages({ # This one is more comprehensive than test-Start-split-merge.R path_exp <- '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc' @@ -515,3 +516,4 @@ easy_array[31:61 ,1] ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-return_vars_name.R b/tests/testthat/test-Start-return_vars_name.R index e3ff876e4dc2dc3f8e5018652488590f4b601aa2..384bd63d7ab8cd51462c90dbea6328bc3b423420 100644 --- a/tests/testthat/test-Start-return_vars_name.R +++ b/tests/testthat/test-Start-return_vars_name.R @@ -1,3 +1,4 @@ +suppressMessages({ # The name of return_vars should be one of the inner dimension names. The synonims can # be used but will be changed back to the inner dim names. @@ -237,3 +238,4 @@ c(-19.5, -14.5) ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index 699c01cc145214b33d2aa8198f3f32956c42e03f..8e4d2e1b51af52a7caeaf62abea60cfbe75eaef2 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -1,3 +1,4 @@ +suppressMessages({ var_name <- 'tas' path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc' @@ -188,3 +189,4 @@ as.POSIXct('2013-11-15', tz = 'UTC') }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-time_unit.R b/tests/testthat/test-Start-time_unit.R index 0c499d31398189a840eedd0fb5a6f07cafcddba7..f15b3bd2973ab549e28f1a5caf9fcc3357a06184 100644 --- a/tests/testthat/test-Start-time_unit.R +++ b/tests/testthat/test-Start-time_unit.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("1. The data has units like time", { @@ -88,3 +89,4 @@ as.POSIXct(c("2018-04-08", "2018-04-15", "2018-04-22 UTC", "2018-04-29 UTC"), tz #test_that("3. Time dimension is implicit", { # See test-Start-implicit_inner_dim.R #}) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-all.R b/tests/testthat/test-Start-transform-all.R index a8290a6860ba659d9576b6d94c4ce61de65db6a6..e21f6c1ac290261327fc57ebc28a60443d220431 100644 --- a/tests/testthat/test-Start-transform-all.R +++ b/tests/testthat/test-Start-transform-all.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test uses 'all' to do the transformation and tests the output grid. # The results should be identical and consistent with cdo result (with precision difference). # The test contains three calls with different target grids: @@ -141,3 +142,4 @@ test_that("2. test path 2", { +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-border.R b/tests/testthat/test-Start-transform-border.R index 9b3cc6a6828a40f0f99577486a383a44a730e054..34a33a2cca0fbe856102aca1661fd94dff16d893 100644 --- a/tests/testthat/test-Start-transform-border.R +++ b/tests/testthat/test-Start-transform-border.R @@ -1,3 +1,4 @@ +suppressMessages({ ############################################## # This unit test checks different border situations: normal regional that doesn't touch the borders, @@ -712,3 +713,4 @@ expect_equal( # [9,] 299.4723 299.9515 299.4566 299.0601 299.5071 # [10,] 299.5299 299.7573 299.0317 299.1104 300.0644 ############################################## +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-lat-Sort-all.R b/tests/testthat/test-Start-transform-lat-Sort-all.R index d7d895e3a3a66e9aff7f3f771c2c299ba1295d94..2aa8e397e10b1eb9857440a0a12383433d71625f 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-all.R +++ b/tests/testthat/test-Start-transform-lat-Sort-all.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test uses 'all' to do the transformation and tests "lat_reorder". # The results should be identical and consistent with cdo result (with precision difference). # "lon_reorder = CircularSort(0, 360)" are used in all the tests. @@ -124,3 +125,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-lat-Sort-indices.R b/tests/testthat/test-Start-transform-lat-Sort-indices.R index 16daa79309116ef9502a9fdf9988ea43cbff2995..8d7312b0f45620056d18da9671b41a1844ad3273 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-indices.R +++ b/tests/testthat/test-Start-transform-lat-Sort-indices.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test uses indices() to do the transformation and tests "lat_reorder". # The results should be identical and consistent with cdo result (with precision difference). # The lat/lon range is all the grids here. @@ -230,3 +231,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-lat-Sort-values.R b/tests/testthat/test-Start-transform-lat-Sort-values.R index b70b637cc140c51c37b5e7ab5514df330d5ec85d..f69d551dfcc75073434dfc7cd1bdd931a223ed1c 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-values.R +++ b/tests/testthat/test-Start-transform-lat-Sort-values.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test uses values() to do the transformation and tests "lat_reorder". # The results should be identical and consistent with cdo result (with precision difference). # The lon range is all the grids here. @@ -430,3 +431,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-lon-across_meridian.R b/tests/testthat/test-Start-transform-lon-across_meridian.R index d07388ea13697deebfacacc3d00c9cb72822a1dd..a8df42ba25b3d6d1237d35dc8938b44c53e44dff 100644 --- a/tests/testthat/test-Start-transform-lon-across_meridian.R +++ b/tests/testthat/test-Start-transform-lon-across_meridian.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') test_that("first test", { @@ -80,3 +81,4 @@ suppressWarnings( ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-metadata.R b/tests/testthat/test-Start-transform-metadata.R index 227f09dea14bca0dfdeb478fd06b541ba2dd416f..6010b6f8d49da9e43ab56acaad67779d67f56cb6 100644 --- a/tests/testthat/test-Start-transform-metadata.R +++ b/tests/testthat/test-Start-transform-metadata.R @@ -1,3 +1,4 @@ +suppressMessages({ # Ensure returns_vars = NULL or 'dat' have the same metadata test_that("1. Sort() and CircularSort(0, 360)", { @@ -275,3 +276,4 @@ res_dat <- Start(dat = list(list(path = path_exp)), ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-three-selectors.R b/tests/testthat/test-Start-transform-three-selectors.R index 95e7c2b88ecdb53f7ddf8d6c0b6f2ce36f57a7d3..3fe48242e9a37185f991e4636471a0178d20b827 100644 --- a/tests/testthat/test-Start-transform-three-selectors.R +++ b/tests/testthat/test-Start-transform-three-selectors.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test uses three different selector forms: indices(), values(), and 'all', to do # the transformation. "lat_reorder" is also tested. # Their results should be all identical and consistent with cdo result (with precision difference). @@ -194,3 +195,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R index 46b57d84d29f6fe357f9246a241a940257b89076..640b4785ce4ca4c5c784248829ec169b8ff2ddc8 100644 --- a/tests/testthat/test-Start-two_dats.R +++ b/tests/testthat/test-Start-two_dats.R @@ -1,3 +1,4 @@ +suppressMessages({ # ex1_8 test_that("1. ex1_8, case 1", { @@ -99,3 +100,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-values_list_vector.R b/tests/testthat/test-Start-values_list_vector.R index 1a6288be8d6f98df37326d33a4908253df1cabac..2b5cda037970753f33706714ef296dc7b8e1583c 100644 --- a/tests/testthat/test-Start-values_list_vector.R +++ b/tests/testthat/test-Start-values_list_vector.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test tests the consistence between list of values and vector of values. # 1. transform # 2. no transform @@ -246,3 +247,4 @@ as.vector(exp2) ) }) +}) #suppressMessages