From bb8abe51f20251e32ea99596772f055482e0d02d Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 28 Feb 2025 12:44:05 +0100 Subject: [PATCH 1/4] Add Nord4 as HPC Machine --- MODULES | 5 +++++ autosubmit/conf_esarchive/platforms.yml | 10 ++++++++++ autosubmit/conf_gpfs/expdef.yml | 2 +- autosubmit/conf_gpfs/platforms.yml | 10 ++++++++++ conf/autosubmit.yml | 2 +- 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/MODULES b/MODULES index e88620be..0e35a6f9 100644 --- a/MODULES +++ b/MODULES @@ -22,6 +22,11 @@ elif [[ $BSC_MACHINE == "mn5" ]]; then source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 +elif [[ $BSC_MACHINE == "nord4" ]]; then + + source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh + conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + elif [[ $BSC_MACHINE == "amd" ]]; then module purge diff --git a/autosubmit/conf_esarchive/platforms.yml b/autosubmit/conf_esarchive/platforms.yml index 78056d62..e97609ff 100644 --- a/autosubmit/conf_esarchive/platforms.yml +++ b/autosubmit/conf_esarchive/platforms.yml @@ -9,3 +9,13 @@ Platforms: PROCESSORS_PER_NODE: 16 SERIAL_QUEUE: debug QUEUE: bsc_es + nord4: + TYPE: slurm + HOST: n4login1.bsc.es + PROJECT: bsc32 + ADD_PROJECT_TO_HOST: "false" + USER: + SCRATCH_DIR: /gpfs/scratch/ + QUEUE: bsc_es + PROCESSORS_PER_NODE: 48 + diff --git a/autosubmit/conf_gpfs/expdef.yml b/autosubmit/conf_gpfs/expdef.yml index 8dc29b27..bd579342 100644 --- a/autosubmit/conf_gpfs/expdef.yml +++ b/autosubmit/conf_gpfs/expdef.yml @@ -1,6 +1,6 @@ DEFAULT: EXPID: - HPCARCH: nord3v2 + HPCARCH: nord4 experiment: DATELIST: MEMBERS: fc0 diff --git a/autosubmit/conf_gpfs/platforms.yml b/autosubmit/conf_gpfs/platforms.yml index 03f4b940..c1e8a48f 100644 --- a/autosubmit/conf_gpfs/platforms.yml +++ b/autosubmit/conf_gpfs/platforms.yml @@ -9,6 +9,16 @@ Platforms: PROCESSORS_PER_NODE: 16 SERIAL_QUEUE: debug QUEUE: bsc_es + nord4: + TYPE: slurm + HOST: n4login1.bsc.es + PROJECT: bsc32 + # ADD_PROJECT_TO_HOST: "false" + USER: + SCRATCH_DIR: /gpfs/scratch/ + QUEUE: bsc_es + PROCESSORS_PER_NODE: 48 + TEMP_DIR: '' mn5: TYPE: slurm HOST: glogin1.bsc.es diff --git a/conf/autosubmit.yml b/conf/autosubmit.yml index 99b29f3c..f20995f7 100644 --- a/conf/autosubmit.yml +++ b/conf/autosubmit.yml @@ -1,5 +1,5 @@ esarchive: - platform: nord3v2 + platform: nord4 module_version: autosubmit/4.0.98-foss-2015a-Python-3.7.3 auto_version: 4.0.98 conf_format: yaml -- GitLab From e38064e973281738e889108395e63f2c42fa9c30 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 3 Mar 2025 11:52:10 +0100 Subject: [PATCH 2/4] Changes to run on Nord4 with modules --- MODULES | 18 ++++++++++++++++-- autosubmit/auto-scorecards.sh | 2 +- autosubmit/auto-verification.sh | 2 +- conf/slurm_templates/run_parallel_workflow.sh | 3 ++- conf/slurm_templates/run_scorecards.sh | 2 +- launch_SUNSET.sh | 10 ++++++---- 6 files changed, 27 insertions(+), 10 deletions(-) diff --git a/MODULES b/MODULES index 0e35a6f9..ed0c30ca 100644 --- a/MODULES +++ b/MODULES @@ -24,8 +24,22 @@ elif [[ $BSC_MACHINE == "mn5" ]]; then elif [[ $BSC_MACHINE == "nord4" ]]; then - source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh - conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + module load nord3-singu + module load bsc/current + module use /gpfs/projects/bsc32/software/suselinux/11/modules/all + unset PYTHONSTARTUP + + module load CDO/1.9.8-foss-2019b + module load R/4.1.2-foss-2019b + module load OpenMPI/4.0.5-GCC-8.3.0-nord3-v2 + module load GEOS/3.7.2-foss-2019b-Python-3.7.4 + module load GDAL/3.5.0-foss-2019b-Python-3.7.4 + module load PROJ/9.0.0-GCCcore-8.3.0 + module load Phantomjs/2.1.1 + + singu_prefix='nord3_singu_es' + # source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh + # conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 elif [[ $BSC_MACHINE == "amd" ]]; then diff --git a/autosubmit/auto-scorecards.sh b/autosubmit/auto-scorecards.sh index 7722e3b5..5f108930 100644 --- a/autosubmit/auto-scorecards.sh +++ b/autosubmit/auto-scorecards.sh @@ -21,4 +21,4 @@ set +eu source MODULES set -eu -Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${srcdir} +${singu_prefix} Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${srcdir} diff --git a/autosubmit/auto-verification.sh b/autosubmit/auto-verification.sh index d323c451..25184537 100644 --- a/autosubmit/auto-verification.sh +++ b/autosubmit/auto-verification.sh @@ -24,4 +24,4 @@ set +eu source MODULES set -eu -Rscript ${script} ${atomic_recipe} +${singu_prefix} Rscript ${script} ${atomic_recipe} diff --git a/conf/slurm_templates/run_parallel_workflow.sh b/conf/slurm_templates/run_parallel_workflow.sh index e9ef6964..f18d9144 100644 --- a/conf/slurm_templates/run_parallel_workflow.sh +++ b/conf/slurm_templates/run_parallel_workflow.sh @@ -13,4 +13,5 @@ source MODULES # conda activate condaCerise # export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib -Rscript ${script} ${atomic_recipe} +${singu_prefix} Rscript ${script} ${atomic_recipe} + diff --git a/conf/slurm_templates/run_scorecards.sh b/conf/slurm_templates/run_scorecards.sh index 9abcac17..21bacea1 100644 --- a/conf/slurm_templates/run_scorecards.sh +++ b/conf/slurm_templates/run_scorecards.sh @@ -17,5 +17,5 @@ outdir=$2 source MODULES # Execute scorecards -Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${outdir} +${singu_prefix} Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${outdir} diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index b95898b5..193f2ebf 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -108,7 +108,7 @@ source MODULES # conda activate condaCerise # export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib -Rscript split.R ${recipe} $disable_unique_ID --tmpfile $tmpfile +${singu_prefix} Rscript split.R ${recipe} $disable_unique_ID --tmpfile $tmpfile # Run with Autosubmit or directly with Slurm's sbatch? run_method=$( head -1 $tmpfile | tail -1 ) @@ -119,9 +119,9 @@ if [[ $run_method == "sbatch" ]]; then # Retrieve output directory outdir=$( head -3 $tmpfile | tail -1 ) # Multimodel TRUE/FALSE - multimodel=$( head -4 $tmpfile | tail -1) + multimodel=$( head -4 $tmpfile | tail -1 ) # Scorecards TRUE/FALSE - scorecards=$( head -5 $tmpfile | tail -1) + scorecards=$( head -5 $tmpfile | tail -1 ) # Create directory for slurm output logdir=${outdir}/logs/slurm/ @@ -131,9 +131,11 @@ if [[ $run_method == "sbatch" ]]; then # Get corrected recipe recipe=${outdir}/logs/recipes/$(basename $recipe) - # Is machine MN5? + # Does machine require special queue/partition/group params? if [[ $BSC_MACHINE == "mn5" ]]; then platform_params="-A bsc32 -q gp_bsces" + elif [[ $BSC_MACHINE == "nord4" ]]; then + platform_params="-A bsc32 -q bsc_es" else platform_params="" fi -- GitLab From af2c0d49cf37f0d5576d1c69211b0df1969078cb Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 3 Mar 2025 12:09:23 +0100 Subject: [PATCH 3/4] Predefine singu_prefix variable --- MODULES | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MODULES b/MODULES index ed0c30ca..8939971c 100644 --- a/MODULES +++ b/MODULES @@ -3,6 +3,8 @@ # WARNING: CDO HAS TO BE ON VERSION 1.9.4 # (If not, conflicts with weekly means computation could appear) +singu_prefix="" + if [[ $BSC_MACHINE == "nord3v2" ]]; then module use /gpfs/projects/bsc32/software/suselinux/11/modules/all -- GitLab From 30521afede649d2e4acc4fec673706804fe00e19 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 7 Mar 2025 11:34:00 +0100 Subject: [PATCH 4/4] Update use cases to use Nord4 --- .../ex1_2_autosubmit_scorecards/ex1_2-handson.md | 12 ++++++------ .../ex1_2_autosubmit_scorecards/ex1_2-recipe.yml | 2 +- .../ex1_3_nino_indices_comparison/ex1_3-handson.md | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md index 8ad96b15..9f3b05f9 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md @@ -31,16 +31,16 @@ You should see a git folder "sunset" under the current directory. Now you have a Since we're going to use Autosubmit to dispatch jobs, we need to have an Autosubmit experiment. Note that SUNSET uses Autosubmit >= 4.0.0. -On the workstation or the Autosubmit machine, you can create an experiment by the following commands. +On the workstation or the Autosubmit machine, you can create an experiment by the following commands. **AUTOSUBMIT EXPERIMENTS ARE REUSABLE!** You do not need to create a new one every time. ```shell -module load autosubmit/4.0.0b-foss-2015a-Python-3.7.3 -autosubmit expid -H nord3v2 -d "SUNSET use case 1_2" +module load autosubmit/4.0.98b-foss-2015a-Python-3.7.3 +autosubmit expid -H nord4 -d "SUNSET use case 1_2" ``` You will see the messages like below: ```shell -Autosubmit is running with 4.0.0b +Autosubmit is running with 4.0.98b The new experiment "a6pc" has been registered. Generating folder structure... Experiment folder: /esarchive/autosubmit/a6pc @@ -58,7 +58,7 @@ You should at least edit some items in the "Run" section: - `code_dir`: The directory where your SUNSET code is stored (i.e., the git folder) - `auto_conf$script`: The path to the script ex1_2-recipe.yml - `auto_conf$expid`: The experiment "xxxx" you just created -- `auto_conf$hpc_user`: You user ID on Nord3, which should be bsc032xxx +- `auto_conf$hpc_user`: You user ID on Nord4, which should be bsc032xxx - `auto_conf$email_address`: Your email. You can also adjust other email notification parts up to your preference. In the recipe, we ask for anomaly calculation after loading the data, calculate the skill scores and save the result for scorecards. In the Scorecard section, three regions are requested. @@ -123,7 +123,7 @@ We will start the jobs with the launcher. The SUNSET Launcher is a bash script n The bash script needs two inputs: (1) [recipe](#2-modifying-the-recipe) (2) [R script](#3-the-user-defined-script). - On your workstation or Nord3 under the SUNSET code directory, run: + On your workstation or Nord4 under the SUNSET code directory, run: ```shell bash launch_SUNSET.sh use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml index 3c6ed1d7..a4e4b367 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml @@ -90,7 +90,7 @@ Run: hpc_user: bsc032762 # replace with your hpc username wallclock: 03:00 # hh:mm processors_per_job: 16 - platform: nord3v2 + platform: nord4 custom_directives: ['#SBATCH --exclusive'] email_notifications: yes # enable/disable email notifications. Change it if you want to. email_address: victoria.agudetse@bsc.es # replace with your email address diff --git a/use_cases/ex1_3_nino_indices_comparison/ex1_3-handson.md b/use_cases/ex1_3_nino_indices_comparison/ex1_3-handson.md index edf6f8f0..318a25b9 100644 --- a/use_cases/ex1_3_nino_indices_comparison/ex1_3-handson.md +++ b/use_cases/ex1_3_nino_indices_comparison/ex1_3-handson.md @@ -83,13 +83,13 @@ A complete, ready-to-use sample of this example script can be found in `use_case ## 3. Launching the jobs with the SUNSET Launcher -The first step is to connect to the HPC machine through `ssh`. When working without Autosubmit, the SUNSET Launcher should be run directly from the HPC machine where the jobs will run (for example, Nord3v2). There is no need to request an interactive session; the launcher script can be called directly from the login node. You can obtain detailed usage information by running: +The first step is to connect to the HPC machine through `ssh`. When working without Autosubmit, the SUNSET Launcher should be run directly from the HPC machine where the jobs will run (for example, Nord4). There is no need to request an interactive session; the launcher script can be called directly from the login node. You can obtain detailed usage information by running: ```shell bash launch_SUNSET.sh --help ``` -The mandatory arguments are the paths to the recipe and the script. We can also include other optional arguments to be used by SLURM, such as the number of CPUs to request (--cpus), the wallclock time for each job (--wallclock) and other extra directives (--custom_directives). You can refer to the [Nord3v2 user guide](https://www.bsc.es/user-support/nord3v2.php#jobdirectives) and the [SLURM sbatch documentation](https://slurm.schedmd.com/sbatch.html) for more information on the available options for the parameters. +The mandatory arguments are the paths to the recipe and the script. We can also include other optional arguments to be used by SLURM, such as the number of CPUs to request (--cpus), the wallclock time for each job (--wallclock) and other extra directives (--custom_directives). You can refer to the [Nord4 user guide](https://www.bsc.es/supportkc/docs/Nord4/slurm#job-directives) and the [SLURM sbatch documentation](https://slurm.schedmd.com/sbatch.html) for more information on the available options for the parameters. In this case, we are giving each job a wallclock time of 1 hour and requesting exclusive usage of all the cores in one node. The shell command to run SUNSET will look like this: -- GitLab