diff --git a/Makefile b/Makefile index 54d5d64bdb265425005040f07aca58d1888d4275..1c36fcffa9e89c90f93dee604047366e7ebbe3a8 100644 --- a/Makefile +++ b/Makefile @@ -306,11 +306,22 @@ paramedir_experiment: $(paramedir_dependencies) paraver_dependencies=$(foreach var, $(number_of_processes), .paraver_$(var).done) paraver_experiment: $(paraver_dependencies) +# Collect information and produce a json file with it. .json.done: .scalability.done paramedir_experiment - python cfg/parser/launcher.py test_*/*.timing trace_*/*pm -f ${latex_folder}/${latex_function_csv} -n ${configuration_name} + python cfg/parser/collector.py test_*/*.timing trace_*/*pm -n ${configuration_name} touch $@ json: .json.done +# Make reports +.info_report.done: .json.done + python cfg/parser/reporter.py \ + ${configuration_name}.json \ + -f ${latex_folder}/${latex_function_csv} \ + --sypd sypd.txt \ + --sypd-plot ${configuration_name}.png \ + --functions functions_summary.txt + touch $@ + ############ # Create a latex report images_folder: @@ -319,7 +330,7 @@ function_csv_folder: cd ${latex_folder}; mkdir -p ${latex_function_csv} report: .latex_report.done -.latex_report.done: modelfactors paramedir_experiment scalability +.latex_report.done: modelfactors paramedir_experiment scalability .info_report.done # Stores the nemo commit tag to a variable $(eval $@_NEMO_VERSION := $(shell cd nemo_mirror; git log | head -10 | grep 'git-svn-id:' | awk '{print $$2}'| awk -F / '{print $$NF}' )) diff --git a/cfg/parser/launcher.py b/cfg/parser/collector.py similarity index 53% rename from cfg/parser/launcher.py rename to cfg/parser/collector.py index 6496af3ba46c73fc9fdc555efd77e4b153e9ed9f..37006959d909238af30c2b56632dd6f281d0621e 100644 --- a/cfg/parser/launcher.py +++ b/cfg/parser/collector.py @@ -1,5 +1,17 @@ from cfg.parser.utils.parsers import TimingInformation -from cfg.parser.utils.curation import experiment_comparison, fit_step_time_to_model, fit_functions_to_model + + +def expand_arguments(arguments): + import glob + # Using the glob library to make it possible to use wildcards + expanded_arguments = [] + for arg in arguments: + if arg.count("*"): + for new_argument in glob.glob(arg): + expanded_arguments.append(new_argument) + else: + expanded_arguments.append(arg) + return expanded_arguments def get_file_list_from_command_line_arguments(): @@ -8,26 +20,13 @@ def get_file_list_from_command_line_arguments(): :return: list of files """ from optparse import OptionParser - import glob # Parse and assert command line options parser = OptionParser() parser.add_option("-n", "--name", dest="experiment_name", help="Experiment name", default="Experiment") - parser.add_option("-f", "--csv_folder", dest="csv_function_folder", - help="folder to save csv files with functions' info ", default=None) - parser.add_option("-s", "--save", dest="save_experiment", - help="Save a json file with the experiment data.", default=True, action="store_false") (options, args) = parser.parse_args() - # Using the glob library to make it possible to use wildcards - expanded_arguments = [] - for arg in args: - if arg.count("*"): - for new_argument in glob.glob(arg): - expanded_arguments.append(new_argument) - else: - expanded_arguments.append(arg) - files = expanded_arguments + files = expand_arguments(args) # In case of not having any argument print help and exit if not files: @@ -40,18 +39,17 @@ if __name__ == "__main__": # Get files from command line files, options = get_file_list_from_command_line_arguments() experiment_name = options.experiment_name - save_experiment = options.save_experiment - csv_function_folder = options.csv_function_folder - # Process the info of the experiments provided + + # Split the files between timing and paramedir outputs. + # timing_files = [f for f in files if f.count(".timing")] paramedir_files = [f for f in files if f.count(".pm")] - experiment_results = TimingInformation(experiment_name=experiment_name, - timing_files=timing_files, - paramedir_files=paramedir_files, - ) - if csv_function_folder is not None: - experiment_results.function_info_csv(csv_folder=csv_function_folder) - - if save_experiment: - experiment_results.save_json() + # Parse files and create a TimingInformation object + experiment_results = TimingInformation( + experiment_name=experiment_name, + timing_files=timing_files, + paramedir_files=paramedir_files, + ) + # Save json file + experiment_results.save_json() diff --git a/cfg/parser/comparator.py b/cfg/parser/comparator.py index bcff9730fdc2183dea62e8e4277c0901f35715c0..39a6e7122a6cc640ff34e6227e89b507fdca6dc2 100644 --- a/cfg/parser/comparator.py +++ b/cfg/parser/comparator.py @@ -1,5 +1,6 @@ from cfg.parser.utils.parsers import TimingInformation -from cfg.parser.utils.curation import experiment_comparison, fit_step_time_to_model, fit_functions_to_model +from cfg.parser.utils.curation import experiment_comparison +from cfg.parser.collector import expand_arguments def get_file_list_from_command_line_arguments(): @@ -8,21 +9,14 @@ def get_file_list_from_command_line_arguments(): :return: list of files """ from optparse import OptionParser - import glob + # Parse and assert command line options parser = OptionParser() (options, args) = parser.parse_args() # Using the glob library to make it possible to use wildcards - expanded_arguments = [] - for arg in args: - if arg.count("*"): - for new_argument in glob.glob(arg): - expanded_arguments.append(new_argument) - else: - expanded_arguments.append(arg) - files = expanded_arguments + files = expand_arguments(args) # In case of not having any argument print help and exit if not files: @@ -34,5 +28,6 @@ def get_file_list_from_command_line_arguments(): if __name__ == "__main__": # Get files from command line files, options = get_file_list_from_command_line_arguments() + # Load the two experiments provided. experiment_data = [TimingInformation(json_file=f) for f in files] experiment_comparison(experiment_data[0], experiment_data[1]).print() diff --git a/cfg/parser/reporter.py b/cfg/parser/reporter.py new file mode 100644 index 0000000000000000000000000000000000000000..7f51ab81fca646a4a23a4442598229c9ab405fcb --- /dev/null +++ b/cfg/parser/reporter.py @@ -0,0 +1,97 @@ +from cfg.parser.utils.parsers import TimingInformation + + +def get_command_line_instructions(): + """ + Returns a list of files that have been provided as a command line argument + :return: list of files + """ + # Using optparse to enable command line selection of different runtime options + from optparse import OptionParser + + # Initializing parser + parser = OptionParser() + + # Adding options + + # Expanded description to make it easier for someone to add a new option + parser.add_option( + # Short cml option + "-s", + # Long cml option + "--sypd", + # Name of the property in which the value will be stored. i.e. options.sypd + dest="sypd_file", + # Help message for this specific option + help="Produce a Simulation Years per Day report and save to the specified file ", + # Default value + default=None, + ) + parser.add_option( + # Short cml option + "-p", + # Long cml option + "--sypd-plot", + # Name of the property in which the value will be stored. i.e. options.sypd + dest="sypd_plot_file", + # Help message for this specific option + help="Produce a Simulation Years per Day plot and save to the specified file ", + # Default value + default=None, + ) + + parser.add_option( + "-f", + "--csv_folder", + dest="csv_function_folder", + help="folder to save csv files with functions' info ", + default=None, + ) + + parser.add_option( + "--functions", + dest="functions_file", + help="Produce functions report", + default=None, + ) + + # OptParser will parse the text provided as command line arguments and will distringuish between the options + # and other arguments + (options, args) = parser.parse_args() + + # Check that one argument is provided. + if len(args) != 1: + print(args) + parser.print_help() + print("A single json file is required as an argument.") + exit(1) + json_file = args[0] + return json_file, options + + +if __name__ == "__main__": + # Get files from command line + json_file, options = get_command_line_instructions() + # Deal with the options provided + csv_function_folder = options.csv_function_folder + sypd_file = options.sypd_file + sypd_plot_file = options.sypd_plot_file + functions_file = options.functions_file + + # Load the json file provided + experiment_results = TimingInformation( + json_file=json_file, + ) + + # Depending on the options selected, do different kinds of diagnostics. + if csv_function_folder is not None: + experiment_results.function_info_csv(csv_folder=csv_function_folder) + + if sypd_file is not None: + experiment_results.simulated_years_per_day_summary().save(sypd_file) + if sypd_plot_file: + experiment_results.simulated_years_per_day_plot(sypd_plot_file) + if functions_file is not None: + experiment_results.function_summary().save(functions_file) + + # To create a new diagnostic/plot you need to create the function in curation.py and add the call inside parsers.py diff --git a/cfg/parser/utils/curation.py b/cfg/parser/utils/curation.py index 459bab2697e6cbf3f9515ce6f2887cda72e803bf..999da8723a0672a9eb2ce70d7c9cbb8f6e239d1d 100644 --- a/cfg/parser/utils/curation.py +++ b/cfg/parser/utils/curation.py @@ -16,6 +16,13 @@ class Log: def print(self): print(self.text) + def save(self, filename=None): + if filename is None: + self.print() + else: + with open(filename, "w") as f: + f.write(self.text) + def function_curated_summary(experiment_data): """ @@ -55,6 +62,7 @@ def function_curated_summary(experiment_data): log.add("_" * 30) return log + def save_function_summary_csv(experiment_data, csv_folder): """ Function to save the results as a csv which will be used in the latex report @@ -104,6 +112,7 @@ def save_function_summary_csv(experiment_data, csv_folder): np.savetxt(csv_folder + '/' + k + '.csv', final_table, delimiter=',', fmt="%s") return log + def simulated_years_per_day_summary(experiment_data): from cfg.parser.utils.plotter import plotter """ @@ -114,6 +123,34 @@ def simulated_years_per_day_summary(experiment_data): """ # Log object to output the text log = Log() + + base_case = experiment_data.base_case + base_case_speed = experiment_data[base_case]["Communication"]["sypd"]["average"] + + log.add("Base case with %4i processes has an speed of %.1f SYPD" % + (base_case, base_case_speed)) + + for case in experiment_data.cases: + if case == experiment_data.base_case: + continue + speed = experiment_data[case]["Communication"]["sypd"]["average"] + speedup = speed / base_case_speed + resource_increase = case / base_case + efficiency = speedup / resource_increase + log.add(" %4i processes speed : %.1f SYPD SpeedUp: %.1fx Efficiency: %.2f" % + (case, speed, speedup, efficiency)) + + return log + + +def simulated_years_per_day_plot(experiment_data, filename): + from cfg.parser.utils.plotter import plotter + """ + Produces a simple log showing the simulated years per day of the different cases contained in the experiment, + with the speed up and the efficiency respect to the base case. + :param experiment_data: + :return: Log object with a summary of the simulated years per day and few more metrics. + """ # Data to plot x_label = "Number of processes" y_label = "Simulated Years Per Day " @@ -125,9 +162,6 @@ def simulated_years_per_day_summary(experiment_data): plot_data[x_label].append(base_case) plot_data[y_label].append(base_case_speed) - log.add("Base case with %4i processes has an speed of %.1f SYPD" % - (base_case, base_case_speed)) - for case in experiment_data.cases: if case == experiment_data.base_case: continue @@ -135,13 +169,12 @@ def simulated_years_per_day_summary(experiment_data): speedup = speed / base_case_speed resource_increase = case / base_case efficiency = speedup / resource_increase - log.add(" %4i processes speed : %.1f SYPD SpeedUp: %.1fx Efficiency: %.2f" % - (case, speed, speedup, efficiency)) plot_data[x_label].append(case) plot_data[y_label].append(speed) - plotter(experiment_name=experiment_data.experiment_name, data=plot_data) - return log + plotter(experiment_name=experiment_data.experiment_name, data=plot_data, filename=filename) + + def experiment_comparison(experiment1, experiment2): @@ -310,29 +343,31 @@ def get_functions_data(experiments, function_name=None): else: my_fun = function_name for case in experiment.cases: - to_append = (ceil(case / experiment.processes_per_node), case, experiment[case][fun_key][my_fun]['Elap. Time(s)']) + to_append = ( + ceil(case / experiment.processes_per_node), case, experiment[case][fun_key][my_fun]['Elap. Time(s)']) data.append(to_append) return (data) def oversimplified_memory_model(experiments): - model_parameters = fit_step_time_to_model(experiments) - error, parameters = model_parameters - import numpy as np + model_parameters = fit_step_time_to_model(experiments) + error, parameters = model_parameters + import numpy as np + error = np.sqrt(abs(error)) + mb, cb, nc, cc, r = parameters + cb = cb / 48. + tt = mb + cb + nc + cc + r + print("Overall model:") + print("MB: %.1f%%\tCB: %.1f%%\tError:%.1f%%" % (100. * mb / tt, 100. * cb / tt, 100. * error / tt)) + print("Top10 Functions:") + functions = list(experiments[0].data[experiments[0].base_case]["FunctionInfo"].labels()) + functions = functions[:10] + for function in functions: + error, parameters = fit_functions_to_model(experiments, function_name=function) error = np.sqrt(abs(error)) - mb, cb, nc, cc, r = parameters - cb = cb/48. - tt = mb+cb+nc+cc+r - print("Overall model:") - print("MB: %.1f%%\tCB: %.1f%%\tError:%.1f%%" % (100.*mb/tt, 100.*cb/tt, 100.*error/tt)) - print("Top10 Functions:") - functions = list(experiments[0].data[experiments[0].base_case]["FunctionInfo"].labels()) - functions = functions[:10] - for function in functions: - error, parameters = fit_functions_to_model(experiments, function_name=function) - error = np.sqrt(abs(error)) - mb_time, cb_time, internode, intranode, residual = parameters - cb_time = cb_time/48 - total_time = mb_time + cb_time + internode + intranode + residual - print("Function: %20s\tTime(s): %.1f\t MB: %.1f%%\tCB: %.1f%%\tError:%.1f%%" % - (function, total_time, 100.*mb_time/total_time, 100.*cb_time/total_time, 100.*abs(error/total_time))) + mb_time, cb_time, internode, intranode, residual = parameters + cb_time = cb_time / 48 + total_time = mb_time + cb_time + internode + intranode + residual + print("Function: %20s\tTime(s): %.1f\t MB: %.1f%%\tCB: %.1f%%\tError:%.1f%%" % + (function, total_time, 100. * mb_time / total_time, 100. * cb_time / total_time, + 100. * abs(error / total_time))) diff --git a/cfg/parser/utils/parsers.py b/cfg/parser/utils/parsers.py index 7bb123275b953a15ddfaaf3b784468fce38d0c9d..3a4a4873162d6102e104d1f94ed52a834dfd655f 100644 --- a/cfg/parser/utils/parsers.py +++ b/cfg/parser/utils/parsers.py @@ -2,7 +2,7 @@ import re import numpy as np from cfg.parser.utils.curation import function_curated_summary, simulated_years_per_day_summary,\ - save_function_summary_csv + save_function_summary_csv, simulated_years_per_day_plot from os.path import basename from cfg.parser.utils.histograms import process_category_histogram, split_3d_histogram @@ -376,6 +376,9 @@ class TimingInformation: def simulated_years_per_day_summary(self): return simulated_years_per_day_summary(self) + def simulated_years_per_day_plot(self, filename=None): + return simulated_years_per_day_plot(self, filename) + def save_json(self, filename=None): import json if filename is None: diff --git a/cfg/parser/utils/plotter.py b/cfg/parser/utils/plotter.py index 8e833e5dc6fbcd9856aa32b5e72c20692b9b3292..145539a9abace9b68eaa6039d28ea9118a866dec 100644 --- a/cfg/parser/utils/plotter.py +++ b/cfg/parser/utils/plotter.py @@ -1,5 +1,5 @@ -def plotter(experiment_name, data): +def plotter(data, experiment_name, filename=None): try: import matplotlib matplotlib.use('Agg') @@ -38,5 +38,6 @@ def plotter(experiment_name, data): ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') - filename = experiment_name + ".png" + if filename is None: + filename = experiment_name + ".png" plt.savefig(filename) \ No newline at end of file