diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index ec3363293b13cf351876365ad8da795bd8a9a220..16d273d04c4955478f179e62a2f9283c961313e4 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -20,7 +20,7 @@ #'@param threads_compute An integer indicating the number of execution threads #' to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the -#' machine to be run on. The comoponents vary from different machines. Check +#' machine to be run on. The components vary for different machines. Check #' \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide} #' for more details and examples. #'@param autosubmit_suite_dir A character string indicating the path to a folder @@ -29,8 +29,9 @@ #' as autosubmit machine. The default value is NULL, and a temporary folder #' under the current working folder will be created. #'@param autosubmit_server A character vector indicating the login node of the -#' autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". -#' The default value is NULL, and the node will be randomly chosen. +#' autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". +#' If NULL, Autosubmit will be run locally on the current machine. +#' The default value is NULL. #'@param silent A logical value deciding whether to print the computation #' progress (FALSE) on the R session or not (TRUE). It only works when the #' execution runs locally or the parameter 'wait' is TRUE. The default value @@ -167,8 +168,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") } - } else { - autosubmit_server <- paste0('bscesautosubmit0', sample(1:2, 1)) } ## silent @@ -238,7 +237,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', ### queue_host support_hpcs <- c('local', 'nord3') # names in platforms.yml if (is.null(cluster$queue_host) || !cluster$queue_host %in% support_hpcs) { - stop("Cluster component 'queue_host' must be one of the follows: ", + stop("Cluster component 'queue_host' must be one of the following: ", paste(support_hpcs, collapse = ','), '.') } @@ -288,6 +287,17 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', if (!is.character(cluster[['autosubmit_module']])) { stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") } + ### autosubmit_version + if (!is.null(cluster[['autosubmit_version']])) { + if (!is.character(cluster[['autosubmit_version']])) { + stop("The component 'autosubmit_version' of the parameter 'cluster' must be a character string.") + } + } else { + cluster[['autosubmit_version']] <- stringr::str_extract(cluster['autosubmit_module'], + "(?<=/)(.+)(?=\\-foss)") + warning("The component 'autosubmit_version' has not been provided. It will ", + "be parsed from 'autosubmit_module'.") + } ### cores_per_job if (is.null(cluster[['cores_per_job']])) { cluster[['cores_per_job']] <- threads_compute @@ -329,10 +339,14 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', ### expid as_module <- cluster[['autosubmit_module']] if (is.null(cluster[['expid']])) { - text <- system( - paste0("module load ", as_module, "; ", - "autosubmit expid -H local -d 'startR computation'"), - intern = T) + sys_commands <- paste0("module load ", as_module, "; ", + "autosubmit expid -H local -d 'startR computation'") + if (!is.null(autosubmit_server)) { + as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') + sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') + } + + text <- system(sys_commands, intern = TRUE) cluster[['expid']] <- strsplit( text[grep("The new experiment", text)], "\"")[[1]][2] @@ -600,26 +614,25 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } time_begin_first_chunk <- Sys.time() sys_commands <- paste0("module load ", as_module, "; ", - "autosubmit create ", suite_id, " -np; ", + "autosubmit create ", suite_id, " -v -np; ", "autosubmit refresh ", suite_id, "; ") if (wait) { sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) } else { sys_commands <- paste0(sys_commands, "nohup autosubmit run ", suite_id, " >/dev/null 2>&1 &") # disown? } - if (gsub('[[:digit:]]', "", Sys.getenv('HOSTNAME')) == 'bscesautosubmit') { + # Execute system commands locally or remotely + if ((is.null(autosubmit_server)) || + (gsub('[[:digit:]]', "", Sys.getenv('HOSTNAME')) == 'bscesautosubmit')) { + # If autosubmit_server is NULL or we are already on bscesautosubmit0x #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here system(sys_commands) } else { -# } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { - # ssh from WS to AS VM to run exp + # ssh from local machine to AS VM to run exp as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') system(sys_commands) - -# } else { -# stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") } # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. diff --git a/R/Utils.R b/R/Utils.R index e440ddeedd76a6fe1c9fa3b7f81746f611c0692b..242ec70e1cc27b069ba3e68251dbce5dc3535293 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -943,6 +943,7 @@ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { ############################################################ if (conf_type == "autosubmit") { + conf$config$AUTOSUBMIT_VERSION <- cluster['autosubmit_version'] #Q: Should it be the total amount of chunk? conf$config$MAXWAITINGJOBS <- as.integer(prod(unlist(chunks))) # total amount of chunk #NOTE: Nord3 max. amount of queued jobs is 366 diff --git a/inst/chunking/Autosubmit/autosubmit.yml b/inst/chunking/Autosubmit/autosubmit.yml index 8b129a0dd71a2b6795f5548d7ba5937ce33b7970..b08fca4cbbc27918655a55b2f907596b08e725fa 100644 --- a/inst/chunking/Autosubmit/autosubmit.yml +++ b/inst/chunking/Autosubmit/autosubmit.yml @@ -1,5 +1,5 @@ config: - AUTOSUBMIT_VERSION: 4.0.0b0 + AUTOSUBMIT_VERSION: MAXWAITINGJOBS: # Should it be the total amount of chunk? TOTALJOBS: SAFETYSLEEPTIME: 10 diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 91c11ee627b0a679325dda8ea6e2c1ced06e2baa..637474ef1cbfd78e3a0469fde8fa06587df8daac 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -581,7 +581,7 @@ res <- Compute(wf, #### 4-3-2. Compute() on HPCs with ecFlow We can use workflow manager (ecFlow or Autosubmit) to dispatch computation jobs on a HPC. -To use Autosubmit, check the next session. +To use Autosubmit, check the next section. You will need to make sure that the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). @@ -712,12 +712,13 @@ To use Autosubmit as workflow manager, add the following parameters to your Comp `autosubmit_suite_dir` is the path where to store temporary files generated for Autosubmit to establish the workflow. It should be found in both workstation and the Autosubmit machine. -`autosubmit_server` is the login node of the Autosubmit machine, i.e., 'bscesautosubmit01'or 'bscesautosubmit02'. +`autosubmit_server` can be 'local' if the local machine R is running on has Autosubmit installed (e.g. the BSC-ES Hub), or it can be the login node of the Autosubmit machine, i.e., 'bscesautosubmit01'or 'bscesautosubmit02'. The default is 'local'. -The parameter `cluster` expects a list of components that provide the configuration of Autosubmit machine. For now, the supported platforms are 'local' (run on Autosubmit machine) and 'nord3' (Autosubmit submits jobs to Nord3). +The parameter `cluster` expects a list of components that provide the configuration of Autosubmit machine. For now, the supported platforms are 'local' (run on the current machine) and 'nord3' (Autosubmit submits jobs to Nord3). You can see one example of cluster configuration below. ```r + # Launch Autosubmit on the autosubmit machine res <- Compute(wf, chunks = list(sdate = 2), threads_compute = 4, threads_load = 2, cluster = list( @@ -727,6 +728,7 @@ You can see one example of cluster configuration below. r_module = "R/4.1.2-foss-2019b", CDO_module = "CDO/1.9.8-foss-2019b", autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3', + autosubmit_version = '4.0.0b', cores_per_job = 4, job_wallclock = '01:00:00', max_jobs = 4 @@ -736,6 +738,28 @@ You can see one example of cluster configuration below. autosubmit_server = 'bscesautosubmit01', wait = TRUE ) + + # Launch Autosubmit locally from the bsceshub + res <- Compute(wf, chunks = list(sdate = 2), + threads_compute = 4, threads_load = 2, + cluster = list( + queue_host = 'nord3', + expid = , + hpc_user = "bsc32xxx", + r_module = "R/4.1.2-foss-2019b", + CDO_module = "CDO/1.9.8-foss-2019b", + autosubmit_module = 'autosubmit/4.0.98-foss-2021b-Python-3.9.6', + autosubmit_version = '4.0.98', + cores_per_job = 4, + job_wallclock = '01:00:00', + max_jobs = 4 + ), + workflow_manager = 'autosubmit', + autosubmit_suite_dir = "/home/Earth//startR_local_autosubmit/", + autosubmit_server = 'local', + wait = TRUE + ) + ``` The cluster components and options are explained next: @@ -751,6 +775,7 @@ To have the good practice, note down the expid if it is automatically created by - `r_module`: Name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. - `CDO_module`: Name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). - `autosubmit_module`: The name of the Autosubmit module. If not specified, `module load autosubmit` will be used. +- `autosubmit_version`: The Autosubmit version (e.g. '4.0.0b'). If not specified, it will be parsed from the `autosubmit_module`. If it cannot be parsed from this parameter, it will be set to '4.0.0b' by default. - `cores_per_job`: Number of computing cores to be requested when submitting the job for each chunk to the HPC queue. It is corresponded to the parameter "THREADS" in _jobs.yml_ and "PROCESSORS_PER_NODE" in _platforms.yml_. - `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. @@ -1105,6 +1130,7 @@ r <- Compute(wf, ### Nord3-v2 ```r +# Using ecFlow cluster = list(queue_host = 'nord4.bsc.es', queue_type = 'slurm', temp_dir = '/gpfs/scratch/bsc32/bsc32734/startR_hpc/', @@ -1113,7 +1139,8 @@ cluster = list(queue_host = 'nord4.bsc.es', max_jobs = 4, bidirectional = FALSE, polling_period = 10 - ) + ) + ``` ### Nord3 (deprecated)