From c57dbdc5e57a07cbd16151ef058b02b5fba70a69 Mon Sep 17 00:00:00 2001 From: Wilmer Uruchi Ticona Date: Fri, 7 May 2021 19:57:42 +0200 Subject: [PATCH 1/4] Documenting stats function #644 --- autosubmit/autosubmit.py | 4 +- autosubmit/experiment/statistics.py | 58 +++++++++++--- autosubmit/job/job.py | 16 ++++ autosubmit/monitor/diagram.py | 67 ++++++++++++---- autosubmit/monitor/monitor.py | 117 ++++++++++++++++++---------- docs/source/usage/stats/stats.rst | 46 +++++++++++ 6 files changed, 236 insertions(+), 72 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 81c26e175..688d48003 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -2213,7 +2213,7 @@ class Autosubmit: job_list = Autosubmit.load_job_list( expid, as_conf, notransitive=notransitive) Log.debug("Job list restored from {0} files", pkl_dir) - + # Filter by job section if filter_type: ft = filter_type Log.debug(ft) @@ -2225,7 +2225,7 @@ class Autosubmit: else: ft = 'Any' job_list = job_list.get_job_list() - + # Filter by time (hours before) period_fi = datetime.datetime.now().replace(second=0, microsecond=0) if filter_period: period_ini = period_fi - datetime.timedelta(hours=filter_period) diff --git a/autosubmit/experiment/statistics.py b/autosubmit/experiment/statistics.py index de3f1ce24..5874e65b0 100644 --- a/autosubmit/experiment/statistics.py +++ b/autosubmit/experiment/statistics.py @@ -20,7 +20,7 @@ import datetime from autosubmit.job.job import Job from autosubmit.monitor.utils import FixedSizeList -from log.log import Log,AutosubmitError,AutosubmitCritical +from log.log import Log, AutosubmitError, AutosubmitCritical def timedelta2hours(deltatime): @@ -108,63 +108,97 @@ class ExperimentStats(object): return FixedSizeList(self._fail_run, 0.0) def _calculate_stats(self): + """ + Main calculation + """ queued_by_id = dict() + # Start enumeration of job objects for i, job in enumerate(self._jobs_list): last_retrials = job.get_last_retrials() processors = job.total_processors for retrial in last_retrials: if Job.is_a_completed_retrial(retrial): + # The retrial has all necessary values and is status COMPLETED + # This IF block appears to be an attempt to normalize the queuing times for wrapped job. + # However, considering the current implementation of wrappers, it does not work. if job.id not in queued_by_id: - self._queued[i] += retrial[1] - retrial[0] + self._queued[i] += retrial[1] - \ + retrial[0] # Queue time + # Job -> Queue time queued_by_id[job.id] = self._queued[i] else: self._queued[i] += queued_by_id[job.id] self._start_times[i] = retrial[1] self._end_times[i] = retrial[2] + # RUN time self._run[i] += retrial[2] - retrial[1] + # CPU consumption = run time (COMPLETED retrial) * number of processors requested (accumulated) self._cpu_consumption += self.run[i] * int(processors) + # REAL CONSUMPTION = run time (accumulated) self._real_consumption += self.run[i] + # Count as COMPLETED job self._total_jobs_completed += 1 else: + # Not COMPLETED status if len(retrial) > 2: + # Consider it as a FAILED run + # Accumulate RUN time self._fail_run[i] += retrial[2] - retrial[1] if len(retrial) > 1: + # It only QUEUED + # Accumulate QUEUE time self._fail_queued[i] += retrial[1] - retrial[0] + # CPU consumption = run time (FAILED retrial) * number of processors requested (accumulated) self._cpu_consumption += self.fail_run[i] * int(processors) + # REAL CONSUMPTION = run time (accumulated) self._real_consumption += self.fail_run[i] + # Count as FAILED job self._failed_jobs[i] += 1 self._total_jobs_submitted += len(last_retrials) self._total_jobs_run += len(last_retrials) self._total_jobs_failed += self.failed_jobs[i] self._threshold = max(self._threshold, job.total_wallclock) - self._expected_cpu_consumption += job.total_wallclock * int(processors) + self._expected_cpu_consumption += job.total_wallclock * \ + int(processors) self._expected_real_consumption += job.total_wallclock self._total_queueing_time += self._queued[i] def _calculate_maxs(self): max_run = max(max(self._run), max(self._fail_run)) max_queued = max(max(self._queued), max(self._fail_queued)) - self._max_timedelta = max(max_run, max_queued, datetime.timedelta(hours=self._threshold)) - self._max_time = max(self._max_time, self._max_timedelta.days * 24 + self._max_timedelta.seconds / 3600.0) + self._max_timedelta = max( + max_run, max_queued, datetime.timedelta(hours=self._threshold)) + self._max_time = max(self._max_time, self._max_timedelta.days * + 24 + self._max_timedelta.seconds / 3600.0) self._max_fail = max(self._max_fail, max(self._failed_jobs)) def _calculate_totals(self): - percentage_consumption = timedelta2hours(self._cpu_consumption) / self._expected_cpu_consumption * 100 + """ + Calculates totals and prints to console. + """ + percentage_consumption = timedelta2hours( + self._cpu_consumption) / self._expected_cpu_consumption * 100 self._totals = ['Period: ' + str(self._start) + " ~ " + str(self._end), 'Submitted (#): ' + str(self._total_jobs_submitted), 'Run (#): ' + str(self._total_jobs_run), 'Failed (#): ' + str(self._total_jobs_failed), 'Completed (#): ' + str(self._total_jobs_completed), - 'Queueing time (h): ' + str(round(timedelta2hours(self._total_queueing_time), 2)), - 'Expected consumption real (h): ' + str(round(self._expected_real_consumption, 2)), - 'Expected consumption CPU time (h): ' + str(round(self._expected_cpu_consumption, 2)), - 'Consumption real (h): ' + str(round(timedelta2hours(self._real_consumption), 2)), - 'Consumption CPU time (h): ' + str(round(timedelta2hours(self._cpu_consumption), 2)), + 'Queueing time (h): ' + + str(round(timedelta2hours(self._total_queueing_time), 2)), + 'Expected consumption real (h): ' + str( + round(self._expected_real_consumption, 2)), + 'Expected consumption CPU time (h): ' + str( + round(self._expected_cpu_consumption, 2)), + 'Consumption real (h): ' + + str(round(timedelta2hours(self._real_consumption), 2)), + 'Consumption CPU time (h): ' + str( + round(timedelta2hours(self._cpu_consumption), 2)), 'Consumption (%): ' + str(round(percentage_consumption, 2))] Log.result('\n'.join(self._totals)) def _format_stats(self): self._queued = map(lambda y: timedelta2hours(y), self._queued) self._run = map(lambda y: timedelta2hours(y), self._run) - self._fail_queued = map(lambda y: timedelta2hours(y), self._fail_queued) + self._fail_queued = map( + lambda y: timedelta2hours(y), self._fail_queued) self._fail_run = map(lambda y: timedelta2hours(y), self._fail_run) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 78346b27f..7296bc39a 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -287,6 +287,10 @@ class Job(object): @property def total_processors(self): + """ + Number of processors requested by job. + Reduces ':' separated format if necessary. + """ if ':' in self.processors: return reduce(lambda x, y: int(x) + int(y), self.processors.split(':')) return int(self.processors) @@ -501,18 +505,27 @@ class Job(object): return self._get_from_total_stats(1) def get_last_retrials(self): + """ + Returns the retrials of a job, including the last COMPLETED run. The selection stops, and does not include, when the previous COMPLETED job is located or the list of registers is exhausted. + + :return: list of list of dates of retrial [submit, start, finish] in datetime format + :rtype: list of list + """ log_name = os.path.join(self._tmp_path, self.name + '_TOTAL_STATS') retrials_list = [] if os.path.exists(log_name): already_completed = False + # Read lines of the TOTAL_STATS file starting from last for retrial in reversed(open(log_name).readlines()): retrial_fields = retrial.split() if Job.is_a_completed_retrial(retrial_fields): + # It's a COMPLETED run if already_completed: break already_completed = True retrial_dates = map(lambda y: parse_date(y) if y != 'COMPLETED' and y != 'FAILED' else y, retrial_fields) + # Inserting list [submit, start, finish] of datetimes at the beginning of the list. Restores ordering. retrials_list.insert(0, retrial_dates) return retrials_list @@ -1021,6 +1034,9 @@ class Job(object): @staticmethod def is_a_completed_retrial(fields): + """ + Returns true only if there 4 fields: submit start finish status, and status equals COMPLETED. + """ if len(fields) == 4: if fields[3] == 'COMPLETED': return True diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index 2b758859e..27253191c 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -22,7 +22,7 @@ import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import matplotlib.patches as mpatches from autosubmit.experiment.statistics import ExperimentStats -from log.log import Log,AutosubmitCritical,AutosubmitError +from log.log import Log, AutosubmitCritical, AutosubmitError Log.get_logger("Autosubmit") # Autosubmit stats constants @@ -32,6 +32,22 @@ MAX_NUM_PLOTS = 20 def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, period_ini=None, period_fi=None): + """ + Creates a bar diagram of the statistics. + + :param experiment_id: experiment's identifier + :type experiment_id: str + :param job_list: list of jobs (filtered) + :type job_list: list of Job objects + :param general_stats: list of sections and options in the %expid%_GENERAL_STATS file + :type general_stats: list of tuples + :param output_file: path to the output file + :type output_file: str + :param period_ini: starting date and time + :type period_ini: datetime + :param period_fi: finish date and time + :type period_fi: datetime + """ # Error prevention plt.close('all') # Stats variables definition @@ -43,15 +59,17 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per message = "The results are too large to be shown, try narrowing your query. \n Use a filter like -ft where you supply a list of job types, e.g. INI, SIM; \ or -fp where you supply an integer that represents the number of hours into the past that should be queried: \ suppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." - raise AutosubmitCritical("Stats query out of bounds",7061,message) + Log.info(message) + raise AutosubmitCritical("Stats query out of bounds", 7061, message) + exp_stats = ExperimentStats(jobs_list, period_ini, period_fi) fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * num_plots)) fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') # Variables initialization ax, ax2 = [], [] rects = [None] * 6 - exp_stats = ExperimentStats(jobs_list, period_ini, period_fi) + grid_spec = gridspec.GridSpec(RATIO * num_plots + 2, 1) for plot in xrange(1, num_plots + 1): @@ -59,10 +77,12 @@ suppose it is noon, if you supply -fp 5 the query will consider changes starting l1 = int((plot - 1) * MAX_JOBS_PER_PLOT) l2 = int(plot * MAX_JOBS_PER_PLOT) # Building plot axis - ax.append(fig.add_subplot(grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) + ax.append(fig.add_subplot( + grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) ax[plot - 1].set_ylabel('hours') ax[plot - 1].set_xticks(ind + width) - ax[plot - 1].set_xticklabels([job.name for job in jobs_list[l1:l2]], rotation='vertical') + ax[plot - 1].set_xticklabels( + [job.name for job in jobs_list[l1:l2]], rotation='vertical') ax[plot - 1].set_title(experiment_id, fontsize=20) ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_time)) # Axis 2 @@ -71,13 +91,18 @@ suppose it is noon, if you supply -fp 5 the query will consider changes starting ax2[plot - 1].set_yticks(range(0, exp_stats.max_fail + 2)) ax2[plot - 1].set_ylim(0, exp_stats.max_fail + 1) # Building rects - rects[0] = ax[plot - 1].bar(ind, exp_stats.queued[l1:l2], width, color='orchid') - rects[1] = ax[plot - 1].bar(ind + width, exp_stats.run[l1:l2], width, color='limegreen') - rects[2] = ax2[plot - 1].bar(ind + width * 2, exp_stats.failed_jobs[l1:l2], width, color='red') - rects[3] = ax[plot - 1].bar(ind + width * 3, exp_stats.fail_queued[l1:l2], width, color='purple') - rects[4] = ax[plot - 1].bar(ind + width * 4, exp_stats.fail_run[l1:l2], width, color='tomato') + rects[0] = ax[plot - + 1].bar(ind, exp_stats.queued[l1:l2], width, color='orchid') + rects[1] = ax[plot - 1].bar(ind + width, + exp_stats.run[l1:l2], width, color='limegreen') + rects[2] = ax2[plot - 1].bar(ind + width * 2, + exp_stats.failed_jobs[l1:l2], width, color='red') + rects[3] = ax[plot - 1].bar(ind + width * 3, + exp_stats.fail_queued[l1:l2], width, color='purple') + rects[4] = ax[plot - 1].bar(ind + width * 4, + exp_stats.fail_run[l1:l2], width, color='tomato') rects[5] = ax[plot - 1].plot([0., width * 6 * MAX_JOBS_PER_PLOT], [exp_stats.threshold, exp_stats.threshold], - "k--", label='wallclock sim') + "k--", label='wallclock sim') # Building legends subplot legends_plot = fig.add_subplot(grid_spec[0, 0]) @@ -94,6 +119,7 @@ suppose it is noon, if you supply -fp 5 the query will consider changes starting create_csv_stats(exp_stats, jobs_list, output_file) + def create_csv_stats(exp_stats, jobs_list, output_file): job_names = [job.name for job in jobs_list] start_times = exp_stats.start_times @@ -103,15 +129,19 @@ def create_csv_stats(exp_stats, jobs_list, output_file): output_file = output_file.replace('pdf', 'csv') with open(output_file, 'wb') as file: - file.write("Job,Started,Ended,Queuing time (hours),Running time (hours)\n") + file.write( + "Job,Started,Ended,Queuing time (hours),Running time (hours)\n") for i in xrange(len(jobs_list)): - file.write("{0},{1},{2},{3},{4}\n".format(job_names[i], start_times[i], end_times[i], queuing_times[i], running_times[i])) + file.write("{0},{1},{2},{3},{4}\n".format( + job_names[i], start_times[i], end_times[i], queuing_times[i], running_times[i])) + def build_legends(plot, rects, experiment_stats, general_stats): # Main legend with colourful rectangles legend_rects = [[rect[0] for rect in rects]] legend_titles = [ - ['Queued (h)', 'Run (h)', 'Failed jobs (#)', 'Fail Queued (h)', 'Fail Run (h)', 'Max wallclock (h)'] + ['Queued (h)', 'Run (h)', 'Failed jobs (#)', + 'Fail Queued (h)', 'Fail Run (h)', 'Max wallclock (h)'] ] legend_locs = ["upper right"] legend_handlelengths = [None] @@ -119,7 +149,8 @@ def build_legends(plot, rects, experiment_stats, general_stats): # General stats legends, if exists if len(general_stats) > 0: legend_rects.append(get_whites_array(len(general_stats))) - legend_titles.append([str(key) + ': ' + str(value) for key, value in general_stats]) + legend_titles.append([str(key) + ': ' + str(value) + for key, value in general_stats]) legend_locs.append("upper center") legend_handlelengths.append(0) @@ -130,7 +161,8 @@ def build_legends(plot, rects, experiment_stats, general_stats): legend_handlelengths.append(0) # Creating the legends - legends = create_legends(plot, legend_rects, legend_titles, legend_locs, legend_handlelengths) + legends = create_legends( + plot, legend_rects, legend_titles, legend_locs, legend_handlelengths) for legend in legends: plt.gca().add_artist(legend) @@ -138,7 +170,8 @@ def build_legends(plot, rects, experiment_stats, general_stats): def create_legends(plot, rects, titles, locs, handlelengths): legends = [] for i in xrange(len(rects)): - legends.append(create_legend(plot, rects[i], titles[i], locs[i], handlelengths[i])) + legends.append(create_legend( + plot, rects[i], titles[i], locs[i], handlelengths[i])) return legends diff --git a/autosubmit/monitor/monitor.py b/autosubmit/monitor/monitor.py index 99f307d36..14e72c4f9 100644 --- a/autosubmit/monitor/monitor.py +++ b/autosubmit/monitor/monitor.py @@ -33,7 +33,7 @@ import subprocess from autosubmit.job.job_common import Status from autosubmit.config.basicConfig import BasicConfig from autosubmit.config.config_common import AutosubmitConfig -from log.log import Log,AutosubmitError,AutosubmitCritical +from log.log import Log, AutosubmitError, AutosubmitCritical from bscearth.utils.config_parser import ConfigParserFactory from diagram import create_bar_diagram @@ -41,8 +41,9 @@ from diagram import create_bar_diagram class Monitor: """Class to handle monitoring of Jobs at HPC.""" - _table = dict([(Status.UNKNOWN, 'white'), (Status.WAITING, 'gray'), (Status.READY, 'lightblue'),(Status.PREPARED, 'skyblue'), - (Status.SUBMITTED, 'cyan'), (Status.HELD, 'salmon'), (Status.QUEUING, 'pink'), (Status.RUNNING, 'green'), + _table = dict([(Status.UNKNOWN, 'white'), (Status.WAITING, 'gray'), (Status.READY, 'lightblue'), (Status.PREPARED, 'skyblue'), + (Status.SUBMITTED, 'cyan'), (Status.HELD, + 'salmon'), (Status.QUEUING, 'pink'), (Status.RUNNING, 'green'), (Status.COMPLETED, 'yellow'), (Status.FAILED, 'red'), (Status.SUSPENDED, 'orange'), (Status.SKIPPED, 'lightyellow')]) @staticmethod @@ -95,7 +96,8 @@ class Monitor: graph = pydotplus.Dot(graph_type='digraph') Log.debug('Creating legend...') - legend = pydotplus.Subgraph(graph_name='Legend', label='Legend', rank="source") + legend = pydotplus.Subgraph( + graph_name='Legend', label='Legend', rank="source") legend.add_node(pydotplus.Node(name='UNKNOWN', shape='box', style="", fillcolor=self._table[Status.UNKNOWN])) legend.add_node(pydotplus.Node(name='WAITING', shape='box', style="filled", @@ -146,7 +148,8 @@ class Monitor: if groups and job.name in groups['jobs']: group = groups['jobs'][job.name][0] node_job.obj_dict['name'] = group - node_job.obj_dict['attributes']['fillcolor'] = self.color_status(groups['status'][group]) + node_job.obj_dict['attributes']['fillcolor'] = self.color_status( + groups['status'][group]) node_job.obj_dict['attributes']['shape'] = 'box3d' exp.add_node(node_job) @@ -156,9 +159,10 @@ class Monitor: if not hide_groups: for job, group in groups['jobs'].items(): if len(group) > 1: - group_name = 'cluster_'+'_'.join(group) + group_name = 'cluster_' + '_'.join(group) if group_name not in graph.obj_dict['subgraphs']: - subgraph = pydotplus.graphviz.Cluster(graph_name='_'.join(group)) + subgraph = pydotplus.graphviz.Cluster( + graph_name='_'.join(group)) subgraph.obj_dict['attributes']['color'] = 'invis' else: subgraph = graph.get_subgraph(group_name)[0] @@ -172,7 +176,8 @@ class Monitor: if len(subgraph.get_node(group[i])) == 0: subgraph.add_node(node) - edge = subgraph.get_edge(node.obj_dict['name'], previous_node.obj_dict['name']) + edge = subgraph.get_edge( + node.obj_dict['name'], previous_node.obj_dict['name']) if len(edge) == 0: edge = pydotplus.Edge(previous_node, node) edge.obj_dict['attributes']['dir'] = 'none' @@ -198,7 +203,8 @@ class Monitor: if name in jobs_packages_dict: package = jobs_packages_dict[name] if package not in packages_subgraphs_dict: - packages_subgraphs_dict[package] = pydotplus.graphviz.Cluster(graph_name=package) + packages_subgraphs_dict[package] = pydotplus.graphviz.Cluster( + graph_name=package) packages_subgraphs_dict[package].obj_dict['attributes']['color'] = 'black' packages_subgraphs_dict[package].obj_dict['attributes']['style'] = 'dashed' packages_subgraphs_dict[package].add_node(node) @@ -215,7 +221,8 @@ class Monitor: self.nodes_ploted.add(job) if job.has_children() != 0: for child in sorted(job.children, key=lambda k: k.name): - node_child, skip = self._check_node_exists(exp, child, groups, hide_groups) + node_child, skip = self._check_node_exists( + exp, child, groups, hide_groups) if len(node_child) == 0 and not skip: node_child = self._create_node(child, groups, hide_groups) if node_child: @@ -228,7 +235,8 @@ class Monitor: exp.add_edge(pydotplus.Edge(node_job, node_child)) skip = True if not skip: - self._add_children(child, exp, node_child, groups, hide_groups) + self._add_children( + child, exp, node_child, groups, hide_groups) def _check_node_exists(self, exp, job, groups, hide_groups): skip = False @@ -249,15 +257,15 @@ class Monitor: if not hide_groups: group = groups['jobs'][job.name][0] node = pydotplus.Node(group, shape='box3d', style="filled", - fillcolor=self.color_status(groups['status'][group])) + fillcolor=self.color_status(groups['status'][group])) node.set_name(group.replace('"', '')) elif not groups or job.name not in groups['jobs']: node = pydotplus.Node(job.name, shape='box', style="filled", - fillcolor=self.color_status(job.status)) + fillcolor=self.color_status(job.status)) return node - def generate_output(self, expid, joblist, path, output_format="pdf", packages=None, show=False, groups=dict(), hide_groups=False, job_list_object = None): + def generate_output(self, expid, joblist, path, output_format="pdf", packages=None, show=False, groups=dict(), hide_groups=False, job_list_object=None): """ Plots graph for joblist and stores it in a file @@ -278,27 +286,30 @@ class Monitor: output_file = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "plot", expid + "_" + output_date + "." + output_format) - graph = self.create_tree_list(expid, joblist, packages, groups, hide_groups) + graph = self.create_tree_list( + expid, joblist, packages, groups, hide_groups) Log.debug("Saving workflow plot at '{0}'", output_file) if output_format == "png": # noinspection PyUnresolvedReferences - graph.write_png(output_file) + graph.write_png(output_file) elif output_format == "pdf": # noinspection PyUnresolvedReferences - graph.write_pdf(output_file) + graph.write_pdf(output_file) elif output_format == "ps": # noinspection PyUnresolvedReferences - graph.write_ps(output_file) + graph.write_ps(output_file) elif output_format == "svg": # noinspection PyUnresolvedReferences - graph.write_svg(output_file) + graph.write_svg(output_file) elif output_format == "txt": # JobList object is needed, also it acts as a flag. if job_list_object is not None: - self.generate_output_txt(expid, joblist, path, job_list_object=job_list_object) + self.generate_output_txt( + expid, joblist, path, job_list_object=job_list_object) else: - raise AutosubmitCritical('Format {0} not supported'.format(output_format),7069) + raise AutosubmitCritical( + 'Format {0} not supported'.format(output_format), 7069) if output_format != "txt": Log.result('Plot created at {0}', output_file) # If txt, don't open @@ -306,12 +317,13 @@ class Monitor: try: subprocess.check_call(['xdg-open', output_file]) except subprocess.CalledProcessError: - raise AutosubmitCritical('File {0} could not be opened'.format(output_file), 7068) + raise AutosubmitCritical( + 'File {0} could not be opened'.format(output_file), 7068) # If the txt has been generated, don't make it again. if output_format != "txt": self.generate_output_txt(expid, joblist, path, "default") - def generate_output_txt(self, expid, joblist, path,classictxt=False, job_list_object = None): + def generate_output_txt(self, expid, joblist, path, classictxt=False, job_list_object=None): """ Function that generates a representation of the jobs in a txt file :param expid: experiment's identifier @@ -322,10 +334,11 @@ class Monitor: :type job_list_object: JobList object """ Log.info('Writing status txt...') - + now = time.localtime() output_date = time.strftime("%Y%m%d_%H%M", now) - file_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "status", expid + "_" + output_date + ".txt") + file_path = os.path.join( + BasicConfig.LOCAL_ROOT_DIR, expid, "status", expid + "_" + output_date + ".txt") if not os.path.exists(os.path.dirname(file_path)): os.makedirs(os.path.dirname(file_path)) @@ -339,29 +352,37 @@ class Monitor: log_out = path + "/" + job.local_logs[0] log_err = path + "/" + job.local_logs[1] - output = job.name + " " + Status().VALUE_TO_KEY[job.status] + " " + log_out + " " + log_err + "\n" + output = job.name + " " + \ + Status().VALUE_TO_KEY[job.status] + \ + " " + log_out + " " + log_err + "\n" output_file.write(output) - else: + else: # Replaced call to function for a call to the function of the object that # was previously implemented, nocolor is set to True because we don't want # strange ANSI codes in our plain text file - if job_list_object is not None: - print("In the new thingy") - output_file.write(job_list_object.print_with_status(statusChange = None, nocolor=True, existingList=joblist)) + if job_list_object is not None: + print("In the new thingy") + output_file.write(job_list_object.print_with_status( + statusChange=None, nocolor=True, existingList=joblist)) else: - output_file.write("Writing jobs, they're grouped by [FC and DATE] \n") - self.write_output_txt_recursive(joblist[0],output_file,"",file_path) + output_file.write( + "Writing jobs, they're grouped by [FC and DATE] \n") + self.write_output_txt_recursive( + joblist[0], output_file, "", file_path) output_file.close() Log.result('Status txt created at {0}', output_file) - def write_output_txt_recursive(self,job,output_file,level,path): + def write_output_txt_recursive(self, job, output_file, level, path): log_out = "" log_err = "" - output = level+job.name + " " + Status().VALUE_TO_KEY[job.status] +"\n" #+ " " + log_out + " " + log_err + "\n" + # + " " + log_out + " " + log_err + "\n" + output = level + job.name + " " + \ + Status().VALUE_TO_KEY[job.status] + "\n" output_file.write(output) if job.has_children() > 0: for child in job.children: - self.write_output_txt_recursive(child,output_file,"_"+level,path) + self.write_output_txt_recursive( + child, output_file, "_" + level, path) def generate_output_stats(self, expid, joblist, output_format="pdf", period_ini=None, period_fi=None, show=False): """ @@ -391,13 +412,16 @@ class Monitor: output_file = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "stats", expid + "_statistics_" + output_date + "." + output_format) - create_bar_diagram(expid, joblist, self.get_general_stats(expid), output_file, period_ini, period_fi) + create_bar_diagram(expid, joblist, self.get_general_stats( + expid), output_file, period_ini, period_fi) Log.result('Stats created at {0}', output_file) if show: try: subprocess.check_call(['xdg-open', output_file]) except subprocess.CalledProcessError: - raise AutosubmitCritical('File {0} could not be opened'.format(output_file),7068) + raise AutosubmitCritical( + 'File {0} could not be opened'.format(output_file), 7068) + @staticmethod def clean_plot(expid): """ @@ -410,7 +434,8 @@ class Monitor: search_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "plot") chdir(search_dir) files = filter(path.isfile, listdir(search_dir)) - files = [path.join(search_dir, f) for f in files if 'statistics' not in f] + files = [path.join(search_dir, f) + for f in files if 'statistics' not in f] files.sort(key=lambda x: path.getmtime(x)) remain = files[-2:] filelist = [f for f in files if f not in remain] @@ -440,10 +465,20 @@ class Monitor: @staticmethod def get_general_stats(expid): + """ + Returns all the options in the sections of the %expid%_GENERAL_STATS + + :param expid: experiment's identifier + :type expid: str + :return: list of tuples (section, ''), (option, value), (option, value), (section, ''), (option, value), ... + :rtype: list + """ general_stats = [] - general_stats_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "tmp", expid + "_GENERAL_STATS") - parser = AutosubmitConfig.get_parser(ConfigParserFactory(), general_stats_path) + general_stats_path = os.path.join( + BasicConfig.LOCAL_ROOT_DIR, expid, "tmp", expid + "_GENERAL_STATS") + parser = AutosubmitConfig.get_parser( + ConfigParserFactory(), general_stats_path) for section in parser.sections(): general_stats.append((section, '')) - general_stats += parser.items(section) + general_stats += parser.items(section) return general_stats diff --git a/docs/source/usage/stats/stats.rst b/docs/source/usage/stats/stats.rst index 9e39ea942..1e7523cac 100644 --- a/docs/source/usage/stats/stats.rst +++ b/docs/source/usage/stats/stats.rst @@ -58,3 +58,49 @@ The location where user can put this stats is in the file: /cxxx/tmp/cxxx_GENERAL_STATS .. hint:: If it is not yet created, you can manually create the file: ```expid_GENERAL_STATS``` inside the ```tmp``` folder. + +Console output description +========================== + +Example: +:: + + Period: 2021-04-25 06:43:00 ~ 2021-05-07 18:43:00 + Submitted (#): 37 + Run (#): 37 + Failed (#): 3 + Completed (#): 34 + Queueing time (h): 1.61 + Expected consumption real (h): 2.75 + Expected consumption CPU time (h): 3.33 + Consumption real (h): 0.05 + Consumption CPU time (h): 0.06 + Consumption (%): 1.75 + +Where: + +- Period: Requested time frame +- Submitted: Total number of attempts that reached the SUBMITTED status. +- Run: Total number of attempts that reached the RUNNING status. +- Failed: Total number of FAILED attempts of running a job. +- Completed: Total number of attempts that reached the COMPLETED status. +- Queueing time (h): Sum of the time spent queuing by attempts that reached the COMPLETED status, in hours. +- Expected consumption real (h): Sum of wallclock values for all jobs, in hours. +- Expected consumption CPU time (h): Sum of the products of wallclock value and number of requested of processors for each job, in hours. +- Consumption real (h): Sum of the time spent running by all attempts of jobs, in hours. +- Consumption CPU time (h): Sum of the products of the time spent running and number of requested of processors for each job, in hours. +- Consumption (%): Percentage of `Consumption CPU time` relative to `Expected consumption CPU time`. + +Diagram output description +========================== + +The main `stats` output is a bar diagram. On this diagram, each job presents these values: + +- Queued (h): Sum of time spent queuing for COMPLETED attempts, in hours. +- Run (h): Sum of time spent running for COMPLETED attempts, in hours. +- Failed jobs (#): Total number of FAILED attempts. +- Fail Queued (h): Sum of time spent queuing for FAILED attempts, in hours. +- Fail Run (h): Sum of time spent running for FAILED attempts, in hours. +- Max wallclock (h): Maximum wallclock value for all jobs in the plot. + +Notice that the left scale of the diagram measures the time in hours, and the right scale measures the number of attempts. \ No newline at end of file -- GitLab From 2ccaf1676a8af3f6671d4d6e08a51401b6f46cc9 Mon Sep 17 00:00:00 2001 From: Wilmer Uruchi Ticona Date: Fri, 7 May 2021 20:01:17 +0200 Subject: [PATCH 2/4] Improving docs #644 --- docs/source/usage/stats/stats.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/usage/stats/stats.rst b/docs/source/usage/stats/stats.rst index 1e7523cac..4d1b34654 100644 --- a/docs/source/usage/stats/stats.rst +++ b/docs/source/usage/stats/stats.rst @@ -86,7 +86,7 @@ Where: - Completed: Total number of attempts that reached the COMPLETED status. - Queueing time (h): Sum of the time spent queuing by attempts that reached the COMPLETED status, in hours. - Expected consumption real (h): Sum of wallclock values for all jobs, in hours. -- Expected consumption CPU time (h): Sum of the products of wallclock value and number of requested of processors for each job, in hours. +- Expected consumption CPU time (h): Sum of the products of wallclock value and number of requested processors for each job, in hours. - Consumption real (h): Sum of the time spent running by all attempts of jobs, in hours. - Consumption CPU time (h): Sum of the products of the time spent running and number of requested of processors for each job, in hours. - Consumption (%): Percentage of `Consumption CPU time` relative to `Expected consumption CPU time`. -- GitLab From d6efdc9ecfa289a6499ec72552ba4feb494187e7 Mon Sep 17 00:00:00 2001 From: Wilmer Uruchi Ticona Date: Fri, 7 May 2021 20:02:35 +0200 Subject: [PATCH 3/4] Improving docs #644 grammar --- docs/source/usage/stats/stats.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/usage/stats/stats.rst b/docs/source/usage/stats/stats.rst index 4d1b34654..f5abac54b 100644 --- a/docs/source/usage/stats/stats.rst +++ b/docs/source/usage/stats/stats.rst @@ -88,7 +88,7 @@ Where: - Expected consumption real (h): Sum of wallclock values for all jobs, in hours. - Expected consumption CPU time (h): Sum of the products of wallclock value and number of requested processors for each job, in hours. - Consumption real (h): Sum of the time spent running by all attempts of jobs, in hours. -- Consumption CPU time (h): Sum of the products of the time spent running and number of requested of processors for each job, in hours. +- Consumption CPU time (h): Sum of the products of the time spent running and number of requested processors for each job, in hours. - Consumption (%): Percentage of `Consumption CPU time` relative to `Expected consumption CPU time`. Diagram output description -- GitLab From 0eef13aaee7fe9da92997800b09b7a0f21d4c1f6 Mon Sep 17 00:00:00 2001 From: Wilmer Uruchi Ticona Date: Fri, 7 May 2021 20:06:57 +0200 Subject: [PATCH 4/4] Improving docs #644 more grammar --- docs/source/usage/stats/stats.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/usage/stats/stats.rst b/docs/source/usage/stats/stats.rst index f5abac54b..cb183d386 100644 --- a/docs/source/usage/stats/stats.rst +++ b/docs/source/usage/stats/stats.rst @@ -79,7 +79,7 @@ Example: Where: -- Period: Requested time frame +- Period: Requested time frame. - Submitted: Total number of attempts that reached the SUBMITTED status. - Run: Total number of attempts that reached the RUNNING status. - Failed: Total number of FAILED attempts of running a job. -- GitLab