From 2c0d78afea46af5a880b7c1d7f07b25b3b0c5e37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Pe=C3=B1a=20de=20Pedro?= Date: Thu, 25 Aug 2022 09:57:15 +0200 Subject: [PATCH 1/3] Added magiccut --- Job_Creator.py | 3 + README.md | 23 ++------ config.yaml | 38 ++++++------- cutter_example.xml | 72 ------------------------ extrae.xml | 4 +- magiccut | 1 + script.sh | 135 ++++++++++++++++++++++++++++++++------------- trace.sh | 2 + 8 files changed, 128 insertions(+), 150 deletions(-) delete mode 100644 cutter_example.xml create mode 160000 magiccut mode change 100644 => 100755 script.sh mode change 100644 => 100755 trace.sh diff --git a/Job_Creator.py b/Job_Creator.py index f0c4790..a2ed6dc 100644 --- a/Job_Creator.py +++ b/Job_Creator.py @@ -1,3 +1,5 @@ + + import os import subprocess @@ -150,3 +152,4 @@ if __name__ == "__main__": create_job_slurm(args) elif args.scheduler == "lsf": create_job_lsf(args) + diff --git a/README.md b/README.md index 67c7c81..bd34d29 100644 --- a/README.md +++ b/README.md @@ -4,26 +4,14 @@ Script used to get important metrics from NEMO # Installation and requeriments -This script requires Python 3 and relies on -*Extrae*,*Paraver* and *Dimemas* being installed and available -through the PATH environment variable. - -* *Extrae* available at https://ftp.tools.bsc.es/extrae/ !!! Install 3.7.1 version !!! +This script requires the following BSCTOOLS to be installed, loaded and available through the PATH environment variable. + +* *Extrae (4.0.0 or above)* available at https://ftp.tools.bsc.es/extrae * *Paraver* available at https://tools.bsc.es/paraver * *Dimemas* available at https://tools.bsc.es/dimemas +* *Basicanalysis* available at https://ftp.tools.bsc.es/basicanalysis/ -If not already done, install both tools and add them to the PATH environment -variable with: - -``` -export PATH=/bin:$PATH -export EXTRAE_HOME= -export PATH=/bin:$PATH -export PARAVER_HOME= -export PATH=/bin:$PATH -export DIMEMAS_HOME= -``` Also the different modules needed to compile and execute NEMO should be loaded before the script execution. * Perl interpreter @@ -31,9 +19,6 @@ Also the different modules needed to compile and execute NEMO should be loaded b * Message Passing Interface (MPI) implementation (e.g. OpenMPI or MPICH). * Network Common Data Form (NetCDF) library with its underlying Hierarchical Data Form (HDF) -Additionally, plotting relies on the according SciPy(>= 0.17.0), -NumPy, pandas, searborn and matplotlib (>= 3.x) modules for Python 3. -Furthermore, the gnuplot output requires gnuplot version 5.0 or higher. # Usage diff --git a/config.yaml b/config.yaml index 97b3643..04834b6 100644 --- a/config.yaml +++ b/config.yaml @@ -1,24 +1,24 @@ --- -# Nemo_path: Relative path to nemo installation folder containing the dir cfgs and arch +# Nemo_path: Relative path to nemo installation folder containing the dir cfgs and arch Nemo: - path: "." + path: ".." -# Jobs_n_cores: Number of cores used (Default:48) -# Jobs_scheduler: Available (slurm/lsf) (Default: slurm) -# Jobs_time: Max duration of the job in min (Default: no time limit defined) -# Jobs_queue: Queue used (Default: no queue defined) +# Jobs_n_cores: Number of cores used +# Jobs_scheduler: Available (slurm/lsf) +# Jobs_time: Max duration of the job in min +# Jobs_queue: Queue used -Jobs: - n_cores: "48" +Jobs: + n_cores: "16" scheduler: "slurm" time: "60" queue: "debug" -# Compilation_compile: When false only compiles NEMO if arch file lacks the -g flag, when true always compiles NEMO. (Default: false) -# Compilation_ref: Reference configuration (Default: ORCA2_ICE_PISCES) -# Compilation_arch: Architecture used (without the -arch sufix and the .fcm) (Default: X64_MN4) -# Compilation_name: Name of the new configutation (Default: ORCA2_EXTRAE) -# Compilation_sub: Add or remove subcomponents (Default: empty) +# Compilation_compile: When false only compiles NEMO if arch file lacks the -g flag, when true always compiles NEMO. +# Compilation_ref: Reference configuration +# Compilation_arch: Architecture used (without the -arch sufix and the .fcm) +# Compilation_name: Name of the new configutation +# Compilation_sub: Add or remove subcomponents Compilation: compile: "false" @@ -27,20 +27,18 @@ Compilation: name: "ORCA2_EXTRAE" sub: "OCE del_key 'key_si3 key_top'" -# List of modules loaded (Default: empty) +# List of modules loaded # Required: # - Perl interpreter # - Fortran compiler (ifort, gfortran, pgfortran, ftn, …) # - Message Passing Interface (MPI) implementation (e.g. OpenMPI or MPICH). # - Network Common Data Form (NetCDF) library with its underlying Hierarchical Data Form (HDF) -# - Python -# - modelfactors -# - Extrae +# - Extrae 3.7.1 # - Paraver # - Dimemas +# - Python3 +# - gnuplot -Modules: "perl intel gcc impi netcdf hdf5 EXTRAE/3.7.1 basicanalysis" - +Modules: "gcc/7.2.0 intel/2018.3 impi/2018.4 netcdf/4.4.1.1 hdf5/1.8.19 perl BASICANALYSIS EXTRAE" --- - diff --git a/cutter_example.xml b/cutter_example.xml deleted file mode 100644 index 224c7dd..0000000 --- a/cutter_example.xml +++ /dev/null @@ -1,72 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 500 - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - 0 - 0 - - - - diff --git a/extrae.xml b/extrae.xml index d79dbe9..2436e94 100644 --- a/extrae.xml +++ b/extrae.xml @@ -1,7 +1,7 @@ @@ -29,7 +29,7 @@ 1-3 - + diff --git a/magiccut b/magiccut new file mode 160000 index 0000000..13b714f --- /dev/null +++ b/magiccut @@ -0,0 +1 @@ +Subproject commit 13b714f08a265150dc3b5cc05c5303fa0a650448 diff --git a/script.sh b/script.sh old mode 100644 new mode 100755 index aaf7bdd..f5542c5 --- a/script.sh +++ b/script.sh @@ -65,7 +65,8 @@ Init() name_cfg="${Compilation_name:-"ORCA2_EXTRAE"}" comp_cfg="${Compilation_sub:-""}" Modules="${Modules:-""}" - + + module load $Modules } @@ -102,16 +103,14 @@ Test_arguments() exit 1 fi - # $EXTRAE_HOME correct ? - # if ! test -d "${EXTRAE_HOME}"; then - # echo "Extrae relative path: ${EXTRAE_HOME} is not found" - # echo - # exit 1 - - # else - # sed -i 's|home=.*|home="'"$EXTRAE_HOME"'"|g' extrae.xml - - #fi + #$EXTRAE_HOME correct ? + if ! test -d "${EXTRAE_HOME}"; then + echo "Extrae relative path: ${EXTRAE_HOME} is not found" + echo + # exit 1 + else + sed -i 's|home=.*|home="'"$EXTRAE_HOME"'"|g' extrae.xml + fi # Adding -d to variable if not empty if [ ! -z "$comp_cfg" ]; then @@ -125,7 +124,7 @@ Test_arguments() job="bsub" fi - module load $Modules + } @@ -152,36 +151,80 @@ Test_Comp() compile=true fi -# If finstrument-functions is not there recompilation is requiered and finstrument-functions added + # If finstrument-functions is not there recompilation is requiered and -finstrument-functions added if ! echo "${line}"|grep -q "\-finstrument-functions\b"; then echo "-finstrument-functions flag not found in arch-${arch}.fcm: editing arch-${arch}.fcm " sed -i '/^%FCFLAGS/ s/$/ -finstrument-functions/' ${Nemo_path}/arch/arch-${arch}.fcm compile=true fi + # If -pg is not there recompilation is requiered and -pg added + + line2=$(sed -n '/^%FPPFLAGS /p' $Nemo_path/arch/arch-${arch}.fcm) + line3=$(sed -n '/^%LDFLAGS /p' $Nemo_path/arch/arch-${arch}.fcm) + + + if ! echo "${line}"|grep -q "\-pg\b"; then + echo "-pg flag not found in FCFLAGS arch-${arch}.fcm: editing arch-${arch}.fcm " + sed -i '/^%FCFLAGS/ s/$/ -pg/' ${Nemo_path}/arch/arch-${arch}.fcm + compile=true + fi + + if ! echo "${line2}"|grep -q "\-pg\b"; then + echo "-pg flag not found in FPPFLAGS arch-${arch}.fcm : editing arch-${arch}.fcm " + sed -i '/^%FPPFLAGS/ s/$/ -pg/' ${Nemo_path}/arch/arch-${arch}.fcm + compile=true + fi + + if ! echo "${line3}"|grep -q "\-pg\b"; then + echo "-pg flag not found in LDFLAGS arch-${arch}.fcm: editing arch-${arch}.fcm " + sed -i '/^%LDFLAGS/ s/$/ -pg/' ${Nemo_path}/arch/arch-${arch}.fcm + compile=true + fi + + + # If -rdynamic is not there recompilation is requiered and -rdynamic added + + if ! echo "${line}"|grep -q "\-rdynamic\b"; then + echo "-rdynamic flag not found in FCFLAGS arch-${arch}.fcm: editing arch-${arch}.fcm " + sed -i '/^%FCFLAGS/ s/$/ -rdynamic/' ${Nemo_path}/arch/arch-${arch}.fcm + compile=true + fi + + if ! echo "${line3}"|grep -q "\-export-dynamic\b"; then + echo "-export-dynamic flag not found in LDFLAGS arch-${arch}.fcm: editing arch-${arch}.fcm " + sed -i '/^%LDFLAGS/ s/$/ -export-dynamic/' ${Nemo_path}/arch/arch-${arch}.fcm + compile=true + fi + + + #Compile the program if needed if [ $compile == true ]; then + echo "Compiling Nemo, expected duration 35m" python3 Job_Creator.py -f "compile" -j "compile" --set-core ${Jobs_n_cores} -s $Jobs_scheduler --set-time $time --set-queue $queue -w "cd ${Nemo_path} ./makenemo -r ${cfg} -n ${name_cfg} -m ${arch} -j$Jobs_n_cores $comp_cfg" echo $job --wait compile.$Jobs_scheduler - #Here maybe we can rename the namelist_cfg if exists in order to don't overwrite it - if ! test -f "namelist_cfg"; then + + #Rename the namelist_cfg if exists in order to not overwrite it + if test -f "namelist_cfg"; then mv namelist_cfg namelist_cfg_old + cd $dir + cp ${Nemo_path}/cfgs/${name_cfg}/EXP00/* . + rm namelist_cfg + mv namelist_cfg_old namelist_cfg + else + cd $dir + cp ${Nemo_path}/cfgs/${name_cfg}/EXP00/* . fi - cd $dir - cp ${Nemo_path}/cfgs/${name_cfg}/EXP00/* . - rm namelist_cfg - mv namelist_cfg_old namelist_cfg - if [[ $comp_cfg == "-d OCE del_key 'key_si3 key_top'" ]]; then sed -i '/_def_nemo-ice.xml\|def_nemo-pisces.xml/d' context_nemo.xml #DELETE ICE AND PISCES CONTEXT (NOT USED) fi #Solving NEMO 4.2 Errors - #Remove this line if the Nemo version is not 4.2. sed -i 's|ln_zdfiwm * = .true.|ln_zdfiwm = .false.|g' namelist_cfg #CHANGE DUE TO NON EXISTING FILES else @@ -197,34 +240,52 @@ Create_metrics() #Changing iterations, big traces generate problems. - sed -i 's|nn_itend * =.*|nn_itend = 12 ! last time step (std 5475)|g' namelist_cfg + sed -i 's|nn_itend * =.*|nn_itend = 12 ! last time step (std 5475)|g' namelist_cfg + + #Generating function list in case of missing + + if ! test -f "extrae_functions_for_xml.txt"; then + + echo "Runing Nemo with 4 cores to obtain function data..." + echo + python3 Job_Creator.py -f "run" -j "run" --set-core 4 -s $Jobs_scheduler --set-time $time --set-queue $queue -w "mpirun -np 4 ./nemo" + $job --wait run.$Jobs_scheduler + echo " Gthrottling functions ..." + ./gthrottling.sh nemo + ./extraf.sh nemo extrae_functions.txt + + else + echo "Functions already listed file nemo, extrae_functions.txt does exist" + echo + fi + sed -i "s|list=.*|list=\"${dir}/extrae_functions_for_xml.txt\" exclude-automatic-functions=\"yes\">|g" extrae.xml + # Run nemo with extrae echo "Creating trace ..." + echo python3 Job_Creator.py -f "run" -j "run" --set-core ${Jobs_n_cores} -s $Jobs_scheduler --set-time $time --set-queue $queue -w "mpirun -np ${Jobs_n_cores} ./trace.sh ./nemo" $job --wait run.$Jobs_scheduler + echo "Cutting best iteration" + echo + magiccut/./magicCut nemo.prv 12 - # Cut trace to less than 1000 MiB - paramedir -c nemo.prv cutter_example.xml -o trace.prv - echo "Trace information stored in trace.prv , trace.pcf and trace.row " - echo "Creating metrics and storing theme in Metrics folder" + + echo "Creating metrics and storing theme in Metrics_cores folder" + echo # Creating folder - if ! test -d "Metrics"; then - mkdir Metrics + if ! test -d "Metrics${Jobs_n_cores}cores"; then + mkdir Metrics_${Jobs_n_cores}_cores fi - cp trace.prv Metrics/ - cp trace.pcf Metrics/ - cp trace.row Metrics/ - python3 Job_Creator.py -f "modelfactors" -j "modelfactors" --set-core ${Jobs_n_cores} -s $Jobs_scheduler --set-time $time --set-queue $queue -w "modelfactors.py trace.prv" - cp modelfactors.$Jobs_scheduler Metrics/ - cd Metrics + cp nemo.best_cut.* Metrics_${Jobs_n_cores}_cores + + python3 Job_Creator.py -f "modelfactors" -j "modelfactors" --set-core ${Jobs_n_cores} -s $Jobs_scheduler --set-time $time --set-queue $queue -w "modelfactors.py nemo.best_cut.prv" + mv modelfactors.$Jobs_scheduler Metrics_${Jobs_n_cores}_cores + cd Metrics_${Jobs_n_cores}_cores $job --wait modelfactors.$Jobs_scheduler } main "$@"; exit - - - diff --git a/trace.sh b/trace.sh old mode 100644 new mode 100755 index 620065f..6ffdf90 --- a/trace.sh +++ b/trace.sh @@ -1,5 +1,7 @@ #!/bin/sh # Configure Extrae + +export EXTRAE_HOME=/apps/BSCTOOLS/extrae/4.0.1/impi_2017_4/ export EXTRAE_CONFIG_FILE=./extrae.xml # Load the tracing library (choose C/Fortran) #export LD_PRELOAD=${EXTRAE_HOME}/lib/libmpitrace.so -- GitLab From 61bb5e90fcf2b95c1d1a05f25ad825dc04e8fd37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Pe=C3=B1a=20de=20Pedro?= Date: Thu, 25 Aug 2022 10:00:50 +0200 Subject: [PATCH 2/3] Fixed typo --- script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script.sh b/script.sh index f5542c5..e25434d 100755 --- a/script.sh +++ b/script.sh @@ -273,7 +273,7 @@ Create_metrics() - echo "Creating metrics and storing theme in Metrics_cores folder" + echo "Creating metrics and storing theme in Metrics_${Jobs_n_cores}_cores folder" echo # Creating folder if ! test -d "Metrics${Jobs_n_cores}cores"; then -- GitLab From f99b3fd5416b7f86b6e5228fd66853aa1b0a3e80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Pe=C3=B1a=20de=20Pedro?= Date: Thu, 25 Aug 2022 10:40:02 +0200 Subject: [PATCH 3/3] Magiccut copied correctly --- magiccut | 1 - magiccut/README.md | 4 + magiccut/bin/TraceCutter.py | 266 +++++++++++++++++++++++++ magiccut/magicCut | 64 ++++++ magiccut/templates/cutter_template.xml | 23 +++ 5 files changed, 357 insertions(+), 1 deletion(-) delete mode 160000 magiccut create mode 100644 magiccut/README.md create mode 100755 magiccut/bin/TraceCutter.py create mode 100755 magiccut/magicCut create mode 100755 magiccut/templates/cutter_template.xml diff --git a/magiccut b/magiccut deleted file mode 160000 index 13b714f..0000000 --- a/magiccut +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 13b714f08a265150dc3b5cc05c5303fa0a650448 diff --git a/magiccut/README.md b/magiccut/README.md new file mode 100644 index 0000000..3015d29 --- /dev/null +++ b/magiccut/README.md @@ -0,0 +1,4 @@ +# Magic Cut + +A tool created to automatically cut a single iteration from a **extrae trace** using function events (60000019) as a reference. +Created using NEMO traces, not tested with other codes. \ No newline at end of file diff --git a/magiccut/bin/TraceCutter.py b/magiccut/bin/TraceCutter.py new file mode 100755 index 0000000..4b26cd1 --- /dev/null +++ b/magiccut/bin/TraceCutter.py @@ -0,0 +1,266 @@ +import numpy as np +import xml.etree.ElementTree as ET +import time as clock_time +from os.path import exists, realpath + + +def get_command_line_arguments(): + """ + Returns a list of files that have been provided as a command line argument + :return: list of files + """ + + # Parse and assert command line options + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input-data", required=True, type=PathType(exists=True, type='file'), + help="File with filtered info on cores ID, function ID, and timings") + parser.add_argument("--ts-number", default=None, type=int, + help="Number of time-steps present in the trace") + parser.add_argument("-t", "--tool-path", default=None, type=PathType(exists=True, type='dir'), + help="Path to Magicut templates") + parser.add_argument("--function-file", required=True, type=PathType(exists=True, type='file'), + help="File with names corresponding to function IDs") + + args = parser.parse_args() + + if args.input_data is None: + parser.error("Input data are needed") + else: + if not exists(args.input_data): + parser.error("The specified input does not exists") + if args.function_file is not None and not exists(args.function_file): + parser.error("The specified file does not exists") + if args.ts_number is None: + print("Warning: magicCut will try to guess the number of time-steps") + if args.tool_path is None: + args.tool_path = "/".join(realpath(__file__).split("/")[:-2]) + "/templates/" + if not exists(args.tool_path): + parser.error("The specified path to tools does not exist: " + args.tool_path) + + return args + + +class PathType(object): + def __init__(self, exists=True, type='file', dash_ok=True): + '''exists: + True: a path that does exist + False: a path that does not exist, in a valid parent directory + None: don't care + type: file, dir, symlink, None, or a function returning True for valid paths + None: don't care + dash_ok: whether to allow "-" as stdin/stdout''' + + assert exists in (True, False, None) + assert type in ('file', 'dir', 'symlink', None) or hasattr(type, '__call__') + + self._exists = exists + self._type = type + self._dash_ok = dash_ok + + def __call__(self, string): + from argparse import ArgumentTypeError as err + import os + if string == '-': + # the special argument "-" means sys.std{in,out} + if self._type == 'dir': + raise err('standard input/output (-) not allowed as directory path') + elif self._type == 'symlink': + raise err('standard input/output (-) not allowed as symlink path') + elif not self._dash_ok: + raise err('standard input/output (-) not allowed') + else: + e = os.path.exists(string) + if self._exists == True: + if not e: + raise err("path does not exist: '%s'" % string) + + if self._type is None: + pass + elif self._type == 'file': + if not os.path.isfile(string): + raise err("path is not a file: '%s'" % string) + elif self._type == 'symlink': + if not os.path.symlink(string): + raise err("path is not a symlink: '%s'" % string) + elif self._type == 'dir': + if not os.path.isdir(string): + raise err("path is not a directory: '%s'" % string) + elif not self._type(string): + raise err("path not valid: '%s'" % string) + else: + if self._exists == False and e: + raise err("path exists: '%s'" % string) + + p = os.path.dirname(os.path.normpath(string)) or '.' + if not os.path.isdir(p): + raise err("parent path is not a directory: '%s'" % p) + elif not os.path.exists(p): + raise err("parent directory does not exist: '%s'" % p) + + return string + + +def get_function_name(functions_name_file, function_ids): + # Opens file with format #ID FUNCTION_NAME + with open(functions_name_file) as f: + ids_name_list = f.read() + id = [l.split()[0] for l in ids_name_list.splitlines()] + names = [l.split()[1] for l in ids_name_list.splitlines()] + function_names = [] + for ID in function_ids: + index = id.index(str(ID)) + function_names.append(names[index]) + return function_names + + +def save_function_names(function_ids, functions_name_file): + # Opens file with format #ID FUNCTION_NAME + with open(functions_name_file) as f: + ids_name_list = f.read() + id = [l.split()[0] for l in ids_name_list.splitlines()] + names = [l.split()[1] for l in ids_name_list.splitlines()] + with open('functions.txt', 'w') as f: + for ID in function_ids: + index = id.index(str(ID)) + f.write(id[index] + " " + names[index] + "\n") + print("Functions that are called once per time-step saved to functions.txt") + + +def cut_time(tool_path, input_data, function_name_file, time_steps=None): + # Path to the cutter template + template_path = tool_path + "cutter_template.xml" + print("Starting the magic") + start = clock_time.time() + + # Read trace and load function calls + cpu_ids, time, function_ids = np.genfromtxt(input_data, dtype='int', unpack='True') + + print("for opening the input_data: ") + end = clock_time.time() + print(end - start) + + # Find set of different routines, and number of cores + unique_function_ids, call_counts = np.unique(function_ids, return_counts=True) + cpu_number = len(np.unique(cpu_ids)) + + function_names = get_function_name(function_name_file, unique_function_ids) + + if time_steps is None: + # Remember that everything is multiplied by N cores + # Creates an histogram with the number of how many functions are called n times + counts = np.bincount(call_counts) + # Suppose that the most common value is the number of ts + time_steps = np.argmax(counts) + # Find functions that are called once per time-step + functions_called_once_per_step = unique_function_ids[call_counts == time_steps] + else: + # We just search for that functions that appears once per core per time-step + functions_called_once_per_step = unique_function_ids[call_counts / cpu_number == time_steps] + + if not len(functions_called_once_per_step): + raise Exception("No function has been found which is called once per timestep") + + # Find which routine is called in first place + magic_sentinel = [n for n in function_names if n.count("magiccut")] + if magic_sentinel: + # An artificial first routine was introduces + index = function_names.index(magic_sentinel[0]) + first_routine = unique_function_ids[index] + else: + # see which functions is first + first_routine = functions_called_once_per_step[0] + + print("First called routine: ", function_names[unique_function_ids.tolist().index(first_routine)]) + # Find number of nemo_proc_number + nemo_proc = cpu_ids[function_ids == first_routine] + nemo_proc = np.unique(nemo_proc) + nemo_proc_number = nemo_proc.shape[0] + + nemo_proc_min = min(nemo_proc) + nemo_proc_max = max(nemo_proc) + + print("Actual n_proc", nemo_proc_number, "with ", time_steps, "time steps") + + # Find the index of the fastest step + time_step = np.ones([nemo_proc_number, time_steps]) + + ts_min_index = np.zeros(nemo_proc_number, dtype='int') + ts_max_index = np.zeros(nemo_proc_number, dtype='int') + + ts_time = np.zeros([nemo_proc_number, time_steps]) + start = clock_time.time() + # For each processor + for index, proc in enumerate(nemo_proc): + + # Get the starting time of all time-step + ts_time[index] = time[(cpu_ids == proc) & (function_ids == first_routine)] + + # Compute the duration of each ts + ts_duration = np.diff(ts_time[index]) + + # Index of the shortest ts + ts_min_index[index] = np.argmin(ts_duration) + + # Index of the largest ts + ts_max_index[index] = np.argmax(ts_duration[1:-1]) + 1 + + # Evaluate the most common index for best ts + counts = np.bincount(ts_min_index) + best_ts_index = np.argmax(counts) + + # Evaluate the most common index for worst ts + counts = np.bincount(ts_max_index) + worst_ts_index = np.argmax(counts) + + print("for finding the index of the slowest / fastest step: ", worst_ts_index, "/", best_ts_index) + end = clock_time.time() + print(end - start) + + # Find the start and the end of the best step + best_ts_start = min(ts_time[:, best_ts_index]) + best_ts_end = max(ts_time[:, best_ts_index + 1]) + + # Find the start and the end of the best step + worst_ts_start = min(ts_time[:, worst_ts_index]) + worst_ts_end = max(ts_time[:, worst_ts_index + 1]) + + print("Worst / best time step's duration: ", worst_ts_end - worst_ts_start, best_ts_end - best_ts_start) + + # Open the xml template + tree = ET.parse(template_path) + + cutter = tree.find('cutter') + for tasks in tree.iter('tasks'): + tasks.text = str(nemo_proc_min) + "-" + str(nemo_proc_max) + for minimum_time in tree.iter('minimum_time'): + minimum_time.text = str(best_ts_start - 1000) + for maximum_time in tree.iter('maximum_time'): + maximum_time.text = str(best_ts_end + 1000) + + # Create paramedir cutter file + tree.write('best_time_cutter.xml') + for minimum_time in tree.iter('minimum_time'): + minimum_time.text = str(worst_ts_start - 1000) + for maximum_time in tree.iter('maximum_time'): + maximum_time.text = str(worst_ts_end + 1000) + + # Create paramedir cutter file + tree.write('worst_time_cutter.xml') + + return unique_function_ids.tolist() + + +if __name__ == "__main__": + # Get files from command line + args = get_command_line_arguments() + + tool_path = args.tool_path + input_data = args.input_data + function_file = args.function_file + ts_number = args.ts_number + + unique_functions = cut_time(tool_path, input_data, function_file, ts_number) + # unique_functions = [6, 33] + save_function_names(unique_functions, function_file) diff --git a/magiccut/magicCut b/magiccut/magicCut new file mode 100755 index 0000000..9d0b6aa --- /dev/null +++ b/magiccut/magicCut @@ -0,0 +1,64 @@ +#!/bin/bash + +# Get inputs arg +if [[ $# -lt 2 ]] +then + echo $0 "expects trace and number of timesteps as input" + echo "abort" + exit 1 +fi + +# trace file to cut +trace_file=$1 +# Number of timesteps +time_steps=$2 + +if [[ ! -e ${trace_file} ]] +then + echo "The specified trace does not exist: "${trace_file} + echo "Abort" + exit 1 +fi +trace_file=`readlink -f ${trace_file}` +pcf_trace_file=${trace_file/.prv/.pcf} +trace_folder=${trace_file%/*} + +tool_path=`readlink -f $0` +tool_path=${tool_path//'/magicCut'} + + +trace_base_name=${trace_file//.exe.prv.gz} +trace_base_name=${trace_base_name//.exe.prv} +trace_base_name=${trace_base_name//.prv.gz} +trace_base_name=${trace_base_name//.prv} + +# grep through prv file if functions are there +func_check=`grep -m 1 60000019 ${pcf_trace_file} | wc -l` +# Stores the list of function and extrae ids +begin_line_number=`awk '/60000019 User function/ {print FNR}' ${pcf_trace_file}` +begin_line_number=$((begin_line_number+2)) +end_line_number=`tail -n +${begin_line_number} ${pcf_trace_file} | grep -nm 1 EVENT_TYPE | awk -F: '{print $1}'` +end_line_number=$((begin_line_number+end_line_number-2)) + +# removes also the blank lines at the end +sed -n "${begin_line_number},${end_line_number}p" ${pcf_trace_file}| awk '{print $1, $2}' | awk NF > FUNCTION_ID_NAMES.txt + +if [[ ${func_check} -gt 0 ]] +then + CPU_T_ID=${trace_folder}/CPU_T_ID.txt + cat /dev/null > ${CPU_T_ID} + + # Retrieve function's ID + echo "Retrieve function's ID" + grep ':60000019:' ${trace_file} |\ + grep -v ':0:' |\ + awk -F : '{print $2, $6, $8}' > ${CPU_T_ID} + # Finds best time step + python ${tool_path}/bin/TraceCutter.py --input-data ${CPU_T_ID} --ts-number ${time_steps} --function-file FUNCTION_ID_NAMES.txt + #rm ${CPU_T_ID} + echo "start paramedir cutter" + time paramedir -c best_time_cutter.xml ${trace_file} -o ${trace_base_name}.best_cut.prv +else + echo -e "Functions must be present in the trace for the script to work.\nAborting ... " + exit 1 +fi diff --git a/magiccut/templates/cutter_template.xml b/magiccut/templates/cutter_template.xml new file mode 100755 index 0000000..3394112 --- /dev/null +++ b/magiccut/templates/cutter_template.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + 1-256 + 0 + 1 + %START% + %END% + 0 + 100 + 0 + 1 + 0 + 0 + + -- GitLab