diff --git a/README.md b/README.md index 58941d3eb2a9360b1accf6a1218045dde7289c1e..ade9efd8110ef83f0c120ee3d458fd9ebbe5db2e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Nemo modelfactors -The project was developed tested using NEMO 4.2 version. +The project was developed and tested using NEMO 4.2 version. This project is intended to compute important performance metrics for a NEMO run. @@ -27,15 +27,16 @@ Here the list of the modules that need to be loaded before the script execution. # Usage * Clone this repository wherever you please. -* Edit the file perf_metrics.config and replace the parameters values with the suited information. -* MINIMUM CHANGES perf_metrics.config : +* Don't move the content of the repository outside , the sub-modules won't load and the script will fail. If you want you can init the sub-modules manually with `git submodule update --init` and then move the content. +* ***Edit the file perf_metrics.config and replace the parameters values with the suited information.*** +* ***MINIMUM CHANGES perf_metrics.config:*** * Nemo_path, change the value to the path were NEMO is installed in your machine. * Nemo_input_data, change the value to the path were the input data for the configuration is downloaded. * Compilation_arch, replace the value with the name of the arch file that you use to compile NEMO. * Modules, change the value to suit the name of the modules you need to load. * Jobs_scheduler, replace the value with the name of the scheduler installed in your machine (currently supports slurm, lsf and torque) -* Execute perf_metrics.bash +* Execute perf_metrics.bash ``` ./perf_metrics.bash ``` -* If the script executes without problems the data will be ready in the Metrics folder. +* If the script executes without problems the data will be by default ready inside ../Output/Metrics folder. The Output dir path can be changed at perf_metrics.config. diff --git a/perf_metrics.bash b/perf_metrics.bash index 6f28d987ef822ca6eb2dcc48361db8a420a99dde..9470d4aa1eb019a172abd76f4ff476c7fcb61483 100755 --- a/perf_metrics.bash +++ b/perf_metrics.bash @@ -10,6 +10,9 @@ if [ $# -gt 0 ]; then fi + +#Load submodules +git submodule update --init #Get script directory dir=$(pwd) @@ -33,15 +36,21 @@ Init #Test if parameters are valid Test_arguments +cd "${Gprof_path}"||(echo "Error ${Gprof_path} folder doesn't exists"; exit 1) + #Create the list of important functions from NEMO -Gprof_functions +Gprof_functions & +cd "${Run_path}"||(echo "Error ${Run_path} folder doesn't exists"; exit 1) #Get the traces of the executions and cut 1 timestep Get_trace +cd "${Metrics_path}"||(echo "Error ${Metrics_path} folder doesn't exists"; exit 1) #Generate the performance metrics Create_metrics } main "$@"; exit + + diff --git a/perf_metrics.config b/perf_metrics.config index 575f1788bdb4948ea8bba285725668e16caffae6..59ee9cc73f1840e88c294ee1848a6d561a17e729 100644 --- a/perf_metrics.config +++ b/perf_metrics.config @@ -3,39 +3,50 @@ ################################################################################# -# Nemo_path: Path to nemo installation folder containing the cfgs and arch dirs. -# Nemo_input_data: Path to the input data needed to run the nemo cfg. -# Nemo_cores: List of nºcores used for executing Nemo, ( 4 48 ) makes the script execute and +# Output (string): Path where the Output dir, containing all the output files, will be created. + +Output=".." + +# Nemo_path (string) : Path to nemo installation folder containing the cfgs and arch dirs. +# Nemo_input_data (string): Path to the input data needed to run the nemo cfg. +# Nemo_run (string): Path where the folder Run_NEMO will be created. +# Nemo_cores (array): List of nºcores used for executing Nemo, ( 4 48 ) makes the script execute and # get Nemo traces with 4 and 48 cores. 2 different nºcores are needed to obtain scalability data. -Nemo_path="../NEMO/" -Nemo_input_data="../DATA/ORCA2_ICE_v4.2.0/" +Nemo_path="NEMO_INSTALLATION_PATH" +Nemo_input_data="NEMO_INPUT_DATA_PATH" Nemo_cores=( 4 24 48 96 192) -# Jobs_n_cores: nºcores used for other jobs like compiling nemo. -# Jobs_scheduler: Available (slurm/lsf/torque). -# Jobs_time: Max duration of the job in min. -# Jobs_queue: Queue used. +# Jobs_n_cores (integer): nºcores used for other jobs like compiling nemo. +# Jobs_cores_per_node (integer): define the number of cores per node. +# Jobs_scheduler (string): Available (slurm/lsf/torque). +# Jobs_time (integer): Max duration of the job in min. +# Jobs_queue (string): Queue used. -Jobs_n_cores=96 +Jobs_n_cores=4 +Jobs_cores_per_node= Jobs_scheduler="slurm" Jobs_time=60 -Jobs_queue=debug +Jobs_queue= -# Compilation_compile: When false only compiles NEMO if arch file lacks the needed flags, when true always compiles NEMO. -# Compilation_ref: Reference configuration. -# Compilation_arch: Architecture used (without the -arch sufix and the .fcm). -# Compilation_name: Name of the new configutation (Important to not be an existing one). -# Compilation_sub: Add or remove subcomponents. +# Compilation_compile (boolean): When false only compiles NEMO if arch file lacks the needed flags, when true always compiles NEMO. +# Compilation_ref (string): Reference configuration. +# Compilation_arch (string): Architecture used (without the -arch suffix and the .fcm). +# Compilation_name (string): Name of the new configuration (Important to not be an existing one). +# Compilation_sub (string): Add or remove sub-components. -Compilation_compile="false" +Compilation_compile=false Compilation_ref="ORCA2_ICE_PISCES" -Compilation_arch="Your-arch-file" +Compilation_arch="YOUR_ARCH_FILE" Compilation_name="ORCA2_EXTRAE" Compilation_sub="OCE del_key 'key_si3 key_top'" - -# List of modules loaded. + +# Clean (boolean): If true, at the end of the script, all residual files from NEMO executions (data, outputs, executable, folders) are deleted. + +Clean=true + +# Modules (string): List of modules loaded. # Required: # - Perl interpreter # - Fortran compiler (ifort, gfortran, pgfortran, ftn, …) diff --git a/src/Job_Creator.py b/src/Job_Creator.py index dc70f90d6479acaa4bc95966fcaedebb69bb63a9..94180fd2b4632166050c7cfcf5d957158de80939 100644 --- a/src/Job_Creator.py +++ b/src/Job_Creator.py @@ -22,7 +22,7 @@ def get_command_line_arguments(): help="Set slurm time, in minutes") parser.add_argument("--set-core", default=1, type=int, help="Set number of cores to be used for the job") - parser.add_argument("--set-core-per-node", default=48, type=int, + parser.add_argument("--set-core-per-node", default=0, type=int, help="Set number of cores to be used for the job") parser.add_argument("-j", "--job-name", default=None, help="Name of the job you want to create or modify") @@ -65,9 +65,9 @@ def create_job_slurm(args): if cores is not None: file.append("#SBATCH --ntasks " + str(cores) + "\n") - if cores_per_node is not None: + if cores_per_node is not None and cores_per_node != 0: file.append("#SBATCH --ntasks-per-node " + str(cores_per_node) + "\n") - if time is not None and not 0: + if time is not None and time!= 0: file.append("#SBATCH --time " + str(time) + "\n") if name is not None: file.append("#SBATCH -J " + name + "\n") @@ -100,7 +100,7 @@ def create_job_lsf(args): if cores is not None: file.append("#BSUB -n " + str(cores) + "\n") - if time is not None and not 0: + if time is not None and time!= 0: file.append("#BSUB -W " + str(time) + "\n") if name is not None: file.append("#BSUB-J " + name + "\n") @@ -125,9 +125,13 @@ def create_job_torque(args): cores_per_node = args.set_core_per_node queue = args.set_queue workload = args.set_workload - nodes = (cores//cores_per_node)+1 - hours = time // 60 - minutes = time % 60 + if cores_per_node is not None: + nodes = (cores//cores_per_node)+1 + + if time is not None: + hours = time // 60 + minutes = time % 60 + file = ["#!/bin/bash \n", "############################################################################### \n", "#PBS -o "+str(name)+".out \n#PBS -e "+str(name)+".err \n" @@ -135,7 +139,7 @@ def create_job_torque(args): if cores is not None: file.append("#PBS -l nodes" + str(nodes) + ":ppn=" + str(cores) + "\n") - if time is not None and not 0: + if time is not None and time != 0: file.append("#PBS -l cput=" + str(hours) + ":"+str(minutes)+ ":00\n") if name is not None: file.append("#PBS -N " + name + "\n") diff --git a/src/functions.bash b/src/functions.bash index 7d7cb1d6128eb2442b065f66cd7efa91af70a0aa..fd3de97879ae390995b6746eafce93f2e44de893 100644 --- a/src/functions.bash +++ b/src/functions.bash @@ -1,3 +1,5 @@ + + # Functions #Checks if the job submission ended correctly. @@ -6,7 +8,10 @@ Job_completed() if [ "$Jobs_scheduler" == "slurm" ]; then local id1 id1=${1##* } - sleep 5 + until ! scontrol show job "$id1" | grep -q 'JobState=COMPLETING' + do + sleep 1 + done if ! scontrol show job "$id1" | grep -q 'JobState=COMPLETED'; then Completed=false else @@ -16,7 +21,6 @@ Job_completed() elif [ "$Jobs_scheduler" == "lsf" ]; then local id2 id2=$(head -n1 "$1" | cut -d'<' -f2 | cut -d'>' -f1) - sleep 5 if ! bjobs -l "$id2" | grep -q 'Status '; then Completed=false else @@ -25,7 +29,6 @@ Job_completed() elif [ "$Jobs_scheduler" == "torque" ]; then local id3 id3=$(head -n1 "$1" | awk '{ print $3 }') - sleep 5 if ! qstat f "$id3" | grep -q 'exit_status = 0'; then Completed=false else @@ -40,7 +43,7 @@ Job_completed() Compile_extrae() { - cd "$dir" || (echo "Error original dir doesn't exist"; exit 1) + trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM EXIT #Get flag lines @@ -112,42 +115,44 @@ Compile_extrae() if [ "$compile" == true ] || [ "$compile_ext" == true ]; then echo "Compiling Nemo for EXTRAE" - echo "Output of the compilation in compile.err and compile.out" + - printf -v workload1 "cd ${Nemo_path}\n./makenemo -r ${cfg} -n ${name_cfg} -m ${arch} -j$Jobs_n_cores $comp_cfg" - python3 ./src/Job_Creator.py -f "compile" -j "compile" --set-core "${Jobs_n_cores}" -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "${workload1}" + printf -v workload1 "cd ${Nemo_path}; ./makenemo -r ${cfg} -n ${name_cfg} -m ${arch} -j$Jobs_n_cores $comp_cfg; cd ${Run_path}" + python3 "$dir"/src/./Job_Creator.py -f "compile_extrae" -j "compile_extrae" --set-core "${Jobs_n_cores}" --set-core-per-node "$Jobs_cores_per_node" -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "${workload1}" - state1=$("$job" --wait compile."$Jobs_scheduler") + + if ! state1=$("$job" "$wait" compile_extrae."$Jobs_scheduler"); then + exit 1: + fi + echo Job_completed "$state1" + mv compile_extrae.* "${logs_path}" if [ $Completed == false ]; then - echo "Nemo compilation failed, remember to load all the needed modules. Check the details in compile.err" + echo "Nemo compilation failed, remember to load all the needed modules. Check the details in ${logs_path}/compile_extrae.err" echo exit 1 else - echo "Nemo compilation successful" + echo "Nemo Extrae compilation successful" echo + fi + else echo "Compilation not needed" echo fi - #Copy all the EXP00 data but don't overwrite namelist just the executable - cd "$dir" ||( echo "Error original dir doesn't exist"; exit 1 ) - echow "" - cp -n "${Nemo_path}"/cfgs/"${name_cfg}"/EXP00/* Run_NEMO - cp "${Nemo_path}"/cfgs/"${name_cfg}"/EXP00/nemo Run_NEMO - cd Run_NEMO||( echo "Error Run_NEMO folder doesn't exists"; exit 1 ) + + } #Check if Nemo is compiled for using Gprof Compile_gprof() { - - cd "$dir" || (echo "Error original dir doesn't exist"; exit 1) + trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM EXIT if [ "$compile" == true ]; then echo 'compile parameter is inicialized true' @@ -167,7 +172,7 @@ Compile_gprof() if ! echo "${line}"|grep -q "\-g\b"; then echo "-g flag not found in arch-${arch}_GPROF.fcm: editing arch-${arch}_GPROF.fcm " sed -i '/^%FCFLAGS/ s/$/ -g /' "${Nemo_path}"/arch/arch-"${arch}"_GPROF.fcm - compile_gprof=true + comp_gprof=true fi # If -pg is not there recompilation is requiered and -pg added @@ -175,96 +180,90 @@ Compile_gprof() if ! echo "${line}"|grep -q "\-pg\b"; then echo "-pg flag not found in FCFLAGS arch-${arch}_GPROF.fcm: editing arch-${arch}_GPROF.fcm " sed -i '/^%FCFLAGS/ s/$/ -pg/' "${Nemo_path}"/arch/arch-"${arch}"_GPROF.fcm - compile_gprof=true + comp_gprof=true fi if ! echo "${line2}"|grep -q "\-pg\b"; then echo "-pg flag not found in FPPFLAGS arch-${arch}_GPROF.fcm : editing arch-${arch}_GPROF.fcm " sed -i '/^%FPPFLAGS/ s/$/ -pg/' "${Nemo_path}"/arch/arch-"${arch}"_GPROF.fcm - compile_gprof=true + comp_gprof=true fi if ! echo "${line3}"|grep -q "\-pg\b"; then echo "-pg flag not found in LDFLAGS arch-${arch}_GPROF.fcm: editing arch-${arch}_GPROF.fcm " sed -i '/^%LDFLAGS/ s/$/ -pg/' "${Nemo_path}"/arch/arch-"${arch}"_GPROF.fcm - compile_gprof=true + comp_gprof=true fi # If nemo executable is not on the run file compile if ! test -f "${Nemo_path}/cfgs/${name_cfg}_GPROF/EXP00/nemo"; then echo "nemo executable not found in ${name_cfg}_GPROF" - compile_gprof=true + comp_gprof=true fi - if [ "$compile" == true ] || [ "$compile_gprof" == true ]; then + if [ "$compile" == true ] || [ "$comp_gprof" == true ]; then echo "Compiling Nemo for GPROF" - echo "Output of the compilation in compile.err and compile.out" - - printf -v workload1 "cd ${Nemo_path}\n./makenemo -r ${cfg} -n ${name_cfg}_GPROF -m ${arch}_GPROF -j$Jobs_n_cores $comp_cfg" - python3 ./src/Job_Creator.py -f "compile" -j "compile" --set-core "${Jobs_n_cores}" -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "${workload1}" + printf -v workload1 "cd ${Nemo_path}; ./makenemo -r ${cfg} -n ${name_cfg}_GPROF -m ${arch}_GPROF -j$Jobs_n_cores $comp_cfg; cd ${Gprof_path};" + python3 "$dir"/src/./Job_Creator.py -f "compile_gprof" -j "compile_gprof" --set-core "${Jobs_n_cores}" --set-core-per-node "$Jobs_cores_per_node" -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "${workload1}" - state1=$("$job" --wait compile."$Jobs_scheduler") + if ! state1=$("$job" "$wait" compile_gprof."$Jobs_scheduler"); then + exit 1 + fi + echo Job_completed "$state1" + mv compile_gprof.* "${logs_path}" if [ "$Completed" == false ]; then - echo "Nemo compilation failed, remember to load all the needed modules. Check the details in compile.err" + echo "Nemo compilation failed, remember to load all the needed modules. Check the details in ${logs_path}/compile_gprof.err" echo exit 1 else - echo "Nemo compilation successful" + echo "Nemo Gprof compilation successful" echo + fi + else echo "Compilation not needed" echo fi - - #Copy all the EXP00 data in RUN_NEMO folder but don't overwrite the namelist, just the executable. + #Copy all the EXP00 data in Gprof folder but don't overwrite the namelist, just the executable. - cp -n "${Nemo_path}"/cfgs/"${name_cfg}"_GPROF/EXP00/* Run_NEMO - cp "${Nemo_path}"/cfgs/"${name_cfg}"_GPROF/EXP00/nemo Run_NEMO + cp -n "${Nemo_path}"/cfgs/"${name_cfg}"_GPROF/EXP00/* "${Gprof_path}" + cp "${Nemo_path}"/cfgs/"${name_cfg}"_GPROF/EXP00/nemo "${Gprof_path}" if [[ $comp_cfg == "-d OCE del_key 'key_si3 key_top'" ]]; then - sed -i '/_def_nemo-ice.xml\|def_nemo-pisces.xml/d' Run_NEMO/context_nemo.xml #DELETE ICE AND PISCES CONTEXT (NOT USED) - fi - - #Solving NEMO input file common errors - - if test -f "Run_NEMO/weights_core_orca2_bicubic_noc.nc"; then - mv Run_NEMO/weights_core_orca2_bicubic_noc.nc Run_NEMO/weights_core2_orca2_bicub.nc #RENAME WRONG NAMED FILES + sed -i '/_def_nemo-ice.xml\|def_nemo-pisces.xml/d' "${Gprof_path}"/context_nemo.xml #DELETE ICE AND PISCES CONTEXT (NOT USED) fi - if test -f "weights_core_orca2_bilinear_noc.nc"; then - mv Run_NEMO/weights_core_orca2_bilinear_noc.nc Run_NEMO/weights_core2_orca2_bilin.nc #RENAME WRONG NAMED FILES - fi - - cd Run_NEMO||(echo "Error Run_NEMO folder doesn't exists"; exit 1) + sed -i '/ /dev/null echo "Runing Nemo with 2 cores to obtain function data..." echo - python3 ./../src/Job_Creator.py -f "run" -j "run" --set-core 4 -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "mpirun -np 2 ./nemo" + python3 "$dir"/src/./Job_Creator.py -f "run" -j "run" --set-core 4 --set-core-per-node "$Jobs_cores_per_node" -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "mpirun -np 2 ./nemo" - state2=$("$job" --wait run."$Jobs_scheduler") + if ! state2=$("$job" "$wait" run."$Jobs_scheduler"); then + exit 1 + fi Job_completed "$state2" + mv run.* "${logs_path}" if [ $Completed == false ]; then - echo "Nemo execution failed look at Run_NEMO/run.err and Run_NEMO/ocean.output for more info" - echo "Remember that the namelist files copied are the default ones, change theme in order to fit with the input files in the dir " + echo "Nemo execution failed look at ${logs_path}/run.err and ${Gprof_path}/ocean.output for more info" + echo "Remember that the namelist files copied are the default ones, change them in order to fit with the input files in the dir " echo exit 1 else echo "Gprof files generated " echo fi + + echo "Gthrottling functions ..." echo - python3 ./../src/Job_Creator.py -f "gthrottling" -j "gthrottling" --set-core 4 -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "./../src/gthrottling.sh nemo" - state3=$("$job" --wait gthrottling."$Jobs_scheduler") + python3 "$dir"/src/./Job_Creator.py -f "gthrottling" -j "gthrottling" --set-core 4 --set-core-per-node "$Jobs_cores_per_node" -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "$dir/src/./gthrottling.sh nemo" + if ! state3=$("$job" "$wait" gthrottling."$Jobs_scheduler"); then + exit 1 + fi Job_completed "$state3" + mv gthrottling.* "${logs_path}" if [ $Completed == false ]; then - echo "Error listing functions, look at Run_NEMO/gthrottling.err for more info" + echo "Error listing functions, look at ${logs_path}/gthrottling.err for more info" echo exit 1 else + mv extrae_functions.txt extrae_functions_"${name_cfg}".txt echo "Functions listed correctly" echo + fi - - - - + else - echo "Functions already listed, file Run_NEMO/extrae_functions.txt does exist" + echo "Functions already listed, file ${Gprof_path}/extrae_functions_${name_cfg}.txt does exist." echo fi - } @@ -434,76 +488,110 @@ Gprof_functions() Get_trace() { + trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM EXIT + Compile_extrae + + wait + "$dir"/src/./extraf.sh "${Nemo_path}"/cfgs/"${name_cfg}"/EXP00/nemo "${Gprof_path}"/extrae_functions_"${name_cfg}".txt > /dev/null + sed -i "s|list=.*|list=\"${Run_path}/extrae_functions_for_xml.txt\" exclude-automatic-functions=\"yes\">|g" "$dir"/src/extrae.xml + + # Change iterations + sed -i "s|nn_itend * =.*|nn_itend = $Nemo_iterations ! last time step (std 5475)|g" "${Nemo_path}"/cfgs/"${name_cfg}"/EXP00/namelist_cfg + if [[ $comp_cfg == "-d OCE del_key 'key_si3 key_top'" ]]; then + sed -i '/_def_nemo-ice.xml\|def_nemo-pisces.xml/d' "${Nemo_path}"/cfgs/"${name_cfg}"/EXP00/context_nemo.xml #DELETE ICE AND PISCES CONTEXT (NOT USED) + fi + sed -i '/|g" ../src/extrae.xml for core in "${Nemo_cores[@]}" do + ( + + # Create folder for n cores + mkdir -p "$core"_cores + cd "$core"_cores + + # Copy all the EXP00 data but don't overwrite namelist just the executable + + cp -n "${Nemo_path}"/cfgs/"${name_cfg}"/EXP00/* . + cp "${Nemo_path}"/cfgs/"${name_cfg}"/EXP00/nemo . + cp -s -f "${Nemo_input_data}"/* . + + # Obtain trace echo "Creating trace with $core cores..." echo - python3 ./../src/Job_Creator.py -f "run_extrae" -j "run_extrae" --set-core "$core" -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "mpirun -np $core ./../src/trace.sh ./nemo" + python3 "$dir"/src/./Job_Creator.py -f "run_extrae_$core" -j "run_extrae_$core" --set-core "$core" --set-core-per-node "$Jobs_cores_per_node" -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "mpirun -np $core "$dir"/src/./trace.sh ./nemo" - state4=$("$job" --wait run_extrae."$Jobs_scheduler") + if ! state4=$("$job" "$wait" run_extrae_$core."$Jobs_scheduler") ; then + kill 0 + exit 1 + fi Job_completed "$state4" + mv run_extrae_"$core".* "${logs_path}" + if [ $Completed == false ]; then - echo "Nemo execution failed, no traces files generated more info inside Run_NEMO/run_extrae.err" - echo + echo "Nemo execution failed, no traces files generated more info inside ${logs_path}/run_extrae_$core.err" + kill 0 exit 1 fi mv nemo.prv nemo_"$core".prv mv nemo.pcf nemo_"$core".pcf mv nemo.row nemo_"$core".row - echo "Cutting best iteration" - echo - ./../src/magiccut/magicCut nemo_"${core}".prv "$Nemo_iterations" > cut_"$core".out 2>&1 + echo "Cutting up best iteration" + "$dir"/src/magiccut/./magicCut nemo_"${core}".prv "$Nemo_iterations" > "${logs_path}"/cut_"$core".out 2>&1 + if ! ls nemo_"$core".best_cut.prv; then - echo "Cut failed, look at Run_NEMO/cut_$core.out for more info." - echo + echo "Cut failed, look at ${logs_path}/cut_$core.out for more info." + kill 0 exit 1 fi echo - # Creating folder - - if ! test -d "../Metrics"; then - mkdir ../Metrics - fi - cp nemo_"$core".best_cut.* ../Metrics - - - + cp nemo_"$core".best_cut.* "${Metrics_path}" + cd "$Run_path" + )& + done + wait } Create_metrics() { - - cd "$dir" || (echo "Error original dir doesn't exist"; exit 1) # Create performance metrics - echo "Creating metrics and storing theme in Metrics folder" + echo "Creating metrics and storing them in ${Metrics_path} folder" echo - python3 ./src/Job_Creator.py -f "analysis" -j "analysis" --set-core "${Jobs_n_cores}" -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "./../src/modelfactors.py -ms 100000 *" - mv analysis."$Jobs_scheduler" Metrics - cd Metrics||(echo "Error Metrics folder doesn't exists"; exit 1) - state5=$("$job" --wait analysis."$Jobs_scheduler") + python3 "$dir"/src/./Job_Creator.py -f "analysis" -j "analysis" --set-core "${Jobs_n_cores}" --set-core-per-node "$Jobs_cores_per_node" -s "$Jobs_scheduler" --set-time "$time" --set-queue "$queue" -w "$dir/src/./modelfactors.py -ms 100000 *" + + + if ! state5=$("$job" "$wait" analysis."$Jobs_scheduler"); then + exit 1 + fi + Job_completed "$state5" + cp analysis.out overview.txt + mv analysis.* "${logs_path}" if [ $Completed == false ]; then - echo "Error, metrics have not generated check Metrics/analysis.err to get more details" + echo "Error, metrics have not generated check ${logs_path}/analysis.err to get more details" echo exit 1 fi - + + # Removing run folders + if [ $Clean == true ]; then + rm -r -f "$Run_path" + mv "${Gprof_path}"/extrae_functions* "$dir" + rm -r -f "$Gprof_path"/* + mv "${dir}"/extrae_functions* "${Gprof_path}" + fi + echo "------------------------------------------------------------------------------" echo "------------------------- Script Completed -----------------------------------" - echo "----------------------- Data in Metrics folder -------------------------------" + echo "--- Data in ${Metrics_path} folder ---" echo "------------------------------------------------------------------------------" echo "------------------------------------------------------------------------------" echo -} \ No newline at end of file +} diff --git a/src/gthrottling.sh b/src/gthrottling.sh index 27c4f503350f4b036d7b7095bfd46d4f047b5065..e91d785dd1a72a0a75921b131d2af3f2c734957a 100755 --- a/src/gthrottling.sh +++ b/src/gthrottling.sh @@ -3,27 +3,26 @@ # Usage: ./extract_gprof.sh path/to/executable/executable # Output file: extrae_functions.txt -rm gprof_functions -rm suspected_functions_names_only -rm suspected_functions -rm extrae_functions.txt # nm tool lists the symbols from objects and we select the ones with type T|t which the type is in the text section nm $1 | grep -i " T " | awk '{print $3}' > function_names.txt echo "See the function names from the binary in the file function_names.txt" dir=$(dirname $1) -for i in `ls $dir/gmon*`; -do +analyze_gmon() + + { + local n=$(echo "$2" | sed 's/[^0-9]*//g') + local i=$2 echo -e "Analyzing "$i"\n" - gprof $1 $i >gprof_temp + gprof $1 $i >gprof_temp_"$n" #We extract from each gprof file only the part about the functions, number of calls and durations, the call-paths ar enot needed - cat gprof_temp | grep -v ":" | awk 'BEGIN{k=0}{if($1=="%") {k=k+1};if(k>0 && k<2 && $1==$1+0) print $0}' > temp + cat gprof_temp_"$n" | grep -v ":" | awk 'BEGIN{k=0}{if($1=="%") {k=k+1};if(k>0 && k<2 && $1==$1+0) print $0}' > temp_"$n" #We save the name of the functions - cat temp | awk '{if($7~/^$/) print $4;else print $7}' > gprof_functions + cat temp_"$n" | awk '{if($7~/^$/) print $4;else print $7}' > gprof_functions_"$n" #From the initial list we save only the ones that gprof files include - cat function_names.txt | grep -w -f gprof_functions > extrae_new_list + cat function_names.txt | grep -w -f gprof_functions_"$n" > extrae_new_list_"$n" #We apply the throttling rule: # 1) If there is no information about each call of a function is suspected if the total duration is less than 0.1% of the total execution time @@ -31,24 +30,34 @@ do # 2.1) If its duration is less or equal to 5% ($1<=5, you can change them according to your application) of the total execution time # 2.2) If the duration of each call is less than 0.001s, then exclude it # 3) If the total execution time of this function is 0.0%, then remove it - cat temp | awk '{if($7~/^$/ && $1<0.1) print $4" "$1; else if(NF==7 && $4>10000 && (($1<=5 || $5<=0.001)) || $1==0.0) print $7" "$4}' >> suspected_functions - awk '{print $1}' suspected_functions >> suspected_functions_names_only + cat temp_"$n" | awk '{if($7~/^$/ && $1<0.1) print $4" "$1; else if(NF==7 && $4>10000 && (($1<=5 || $5<=0.001)) || $1==0.0) print $7" "$4}' >> suspected_functions_"$n" + awk '{print $1}' suspected_functions_"$n" >> suspected_functions_names_only_"$n" # Sort and remove any double functions from the list with suspected functions - cat suspected_functions_names_only | sort | uniq > temp_file - mv temp_file suspected_functions_names_only + cat suspected_functions_names_only_"$n" | sort | uniq > temp_file_"$n" + mv temp_file_"$n" suspected_functions_names_only_"$n" # Create a new fucntion list with the non suspected functions - cat extrae_new_list | grep -w -v -f suspected_functions_names_only >> extrae_functions.txt -done + cat extrae_new_list_"$n" | grep -w -v -f suspected_functions_names_only_"$n" >> extrae_functions.txt + + rm extrae_new_list_"$n" + rm suspected_functions_names_only_"$n" + rm suspected_functions_"$n" + rm gprof_temp_"$n" + rm temp_"$n" + rm gprof_functions_"$n" + +} +for i in `ls $dir/gmon*`; do analyze_gmon "$1" "$i" & done + +wait #Sort and uniw the useful functions because are called from many processors and can by include twice cat extrae_functions.txt | sort | uniq > temp2 mv temp2 extrae_functions.txt -rm temp echo -e "Input function list: "function_names.txt" "`wc -l function_names.txt | awk '{print $1}'`" functions" echo -e "New function list: extrae_functions.txt "`wc -l extrae_functions.txt | awk '{print $1}'`" functions" - +rm function_names.txt exit diff --git a/src/trace.sh b/src/trace.sh index 12a53dc2ccbf9a14656ce41728d3016c932cc6f6..f81f6d535f2ae30316b1989898c063a45fc5a7b8 100755 --- a/src/trace.sh +++ b/src/trace.sh @@ -1,7 +1,7 @@ #!/bin/sh # Configure Extrae -export EXTRAE_CONFIG_FILE=./src/extrae.xml +export EXTRAE_CONFIG_FILE=./../src/extrae.xml # Load the tracing library (choose C/Fortran) #export LD_PRELOAD=${EXTRAE_HOME}/lib/libmpitrace.so export EXTRAE_SKIP_AUTO_LIBRARY_INITIALIZE=1