From 50526c92db2c48693c7649f93aebe2aace10c6fc Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 28 Jul 2023 09:14:00 +0200 Subject: [PATCH 01/10] some fixes --- autosubmit/monitor/diagram.py | 11 +- autosubmit/statistics/statistics.py | 209 +++++++++++++++------------- 2 files changed, 123 insertions(+), 97 deletions(-) diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index 786c66e49..e069c533f 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -17,9 +17,9 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . -import traceback -import numpy as np import matplotlib as mtp +import numpy as np +import traceback mtp.use('Agg') import matplotlib.pyplot as plt @@ -30,7 +30,7 @@ from autosubmit.statistics.statistics import Statistics from autosubmit.job.job import Job from log.log import Log, AutosubmitCritical from datetime import datetime -from typing import Dict, List +from typing import List Log.get_logger("Autosubmit") @@ -98,7 +98,7 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') # Variables initialization ax, ax2 = [], [] - rects = [None] * 5 + rects = [] * 5 # print("Normal plots: {}".format(normal_plots_count)) # print("Failed jobs plots: {}".format(failed_jobs_plots_count)) # print("Total plots: {}".format(total_plots_count)) @@ -129,6 +129,7 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per rects[3] = ax[plot - 1].bar(ind + width * 4, exp_stats.fail_run[l1:l2], width, color='salmon') rects[4] = ax[plot - 1].plot([0., width * 6 * MAX_JOBS_PER_PLOT], [exp_stats.threshold, exp_stats.threshold], "k--", label='wallclock sim') + # Building legend i_plot = plot except Exception as exp: print((traceback.format_exc())) @@ -197,7 +198,9 @@ def create_csv_stats(exp_stats, jobs_list, output_file): def build_legends(plot, rects, experiment_stats, general_stats): # type: (plt.figure, List[plt.bar], Statistics, List[str]) -> None # Main legend with colourful rectangles + legend_rects = [[rect[0] for rect in rects]] + legend_titles = [ ['Queued (h)', 'Run (h)', 'Fail Queued (h)', 'Fail Run (h)', 'Max wallclock (h)'] ] diff --git a/autosubmit/statistics/statistics.py b/autosubmit/statistics/statistics.py index 504cd9b50..33618efe2 100644 --- a/autosubmit/statistics/statistics.py +++ b/autosubmit/statistics/statistics.py @@ -1,98 +1,106 @@ #!/bin/env/python from datetime import datetime, timedelta +from typing import List, Union, Dict + from autosubmit.job.job import Job from .jobs_stat import JobStat from .stats_summary import StatsSummary from .utils import timedelta2hours, parse_number_processors -from typing import List, Union, Dict + # from collections import namedtuple _COMPLETED_RETRIAL = 1 _FAILED_RETRIAL = 0 + class Statistics(object): def __init__(self, jobs, start, end, queue_time_fix): # type: (List[Job], datetime, datetime, Dict[str, int]) -> None """ """ - self._jobs = jobs + self._jobs = jobs self._start = start self._end = end self._queue_time_fixes = queue_time_fix - self._name_to_jobstat_dict = dict() # type: Dict[str, JobStat] - self.jobs_stat = [] # type: List[JobStat] + self._name_to_jobstat_dict = dict() # type: Dict[str, JobStat] + self.jobs_stat = [] # type: List[JobStat] # Old format - self.max_time = 0.0 # type: float - self.max_fail = 0 # type: int - self.start_times = [] # type: List[Union[datetime, None]] - self.end_times = [] # type: List[Union[datetime, None]] - self.queued = [] # type: List[timedelta] - self.run = [] # type: List[timedelta] - self.failed_jobs = [] # type: List[int] - self.fail_queued = [] # type: List[timedelta] - self.fail_run = [] # type: List[timedelta] - self.wallclocks = [] # type: List[float] - self.threshold = 0.0 # type: float - self.failed_jobs_dict = {} # type: Dict[str, int] + self.max_time = 0.0 # type: float + self.max_fail = 0 # type: int + self.start_times = [] # type: List[Union[datetime, None]] + self.end_times = [] # type: List[Union[datetime, None]] + self.queued = [] # type: List[timedelta] + self.run = [] # type: List[timedelta] + self.failed_jobs = [] # type: List[int] + self.fail_queued = [] # type: List[timedelta] + self.fail_run = [] # type: List[timedelta] + self.wallclocks = [] # type: List[float] + self.threshold = 0.0 # type: float + self.failed_jobs_dict = {} # type: Dict[str, int] self.summary = StatsSummary() self.totals = [" Description text \n", "Line 1"] - + def calculate_statistics(self): - # type: () -> List[JobStat] - for index, job in enumerate(self._jobs): - retrials = job.get_last_retrials() - for retrial in retrials: - # print(retrial) - job_stat = self._name_to_jobstat_dict.setdefault(job.name, JobStat(job.name, parse_number_processors(job.processors), job.total_wallclock, job.section, job.date, job.member, job.chunk)) - job_stat.inc_retrial_count() - if Job.is_a_completed_retrial(retrial): - job_stat.inc_completed_retrial_count() - job_stat.submit_time = retrial[0] - job_stat.start_time = retrial[1] - job_stat.finish_time = retrial[2] - adjusted_queue = max(job_stat.start_time - job_stat.submit_time, timedelta()) - timedelta(seconds=self._queue_time_fixes.get(job.name, 0)) - job_stat.completed_queue_time += max(adjusted_queue, timedelta()) - job_stat.completed_run_time += max(job_stat.finish_time - job_stat.start_time, timedelta()) - else: - job_stat.inc_failed_retrial_count() - job_stat.submit_time = retrial[0] if len(retrial) >= 1 and type(retrial[0]) == datetime else None - job_stat.start_time = retrial[1] if len(retrial) >= 2 and type(retrial[1]) == datetime else None - job_stat.finish_time = retrial[2] if len(retrial) >= 3 and type(retrial[2]) == datetime else None - if job_stat.finish_time and job_stat.start_time: - job_stat.failed_run_time += max(job_stat.finish_time - job_stat.start_time, timedelta()) - if job_stat.start_time and job_stat.submit_time: - adjusted_failed_queue = max(job_stat.start_time - job_stat.submit_time, timedelta()) - timedelta(seconds=self._queue_time_fixes.get(job.name, 0)) - job_stat.failed_queue_time += max(adjusted_failed_queue, timedelta()) - self.jobs_stat =sorted(list(self._name_to_jobstat_dict.values()), key=lambda x: (x.date if x.date else datetime.now(), x.member if x.member else "", x.section if x.section else "", x.chunk)) - return self.jobs_stat - + # type: () -> List[JobStat] + for index, job in enumerate(self._jobs): + retrials = job.get_last_retrials() + for retrial in retrials: + # print(retrial) + job_stat = self._name_to_jobstat_dict.setdefault(job.name, JobStat(job.name, parse_number_processors( + job.processors), job.total_wallclock, job.section, job.date, job.member, job.chunk)) + job_stat.inc_retrial_count() + if Job.is_a_completed_retrial(retrial): + job_stat.inc_completed_retrial_count() + job_stat.submit_time = retrial[0] + job_stat.start_time = retrial[1] + job_stat.finish_time = retrial[2] + adjusted_queue = max(job_stat.start_time - job_stat.submit_time, timedelta()) - timedelta( + seconds=self._queue_time_fixes.get(job.name, 0)) + job_stat.completed_queue_time += max(adjusted_queue, timedelta()) + job_stat.completed_run_time += max(job_stat.finish_time - job_stat.start_time, timedelta()) + else: + job_stat.inc_failed_retrial_count() + job_stat.submit_time = retrial[0] if len(retrial) >= 1 and type(retrial[0]) == datetime else None + job_stat.start_time = retrial[1] if len(retrial) >= 2 and type(retrial[1]) == datetime else None + job_stat.finish_time = retrial[2] if len(retrial) >= 3 and type(retrial[2]) == datetime else None + if job_stat.finish_time and job_stat.start_time: + job_stat.failed_run_time += max(job_stat.finish_time - job_stat.start_time, timedelta()) + if job_stat.start_time and job_stat.submit_time: + adjusted_failed_queue = max(job_stat.start_time - job_stat.submit_time, + timedelta()) - timedelta( + seconds=self._queue_time_fixes.get(job.name, 0)) + job_stat.failed_queue_time += max(adjusted_failed_queue, timedelta()) + self.jobs_stat = sorted(list(self._name_to_jobstat_dict.values()), key=lambda x: ( + x.date if x.date else datetime.now(), x.member if x.member else "", x.section if x.section else "", x.chunk)) + return self.jobs_stat + def calculate_summary(self): - # type: () -> StatsSummary - stat_summary = StatsSummary() - for job in self.jobs_stat: - # print("{} -> {}".format(job._name, job.expected_real_consumption)) - job_stat_dict = job.get_as_dict() - # Counter - stat_summary.submitted_count += job_stat_dict["submittedCount"] - stat_summary.run_count += job_stat_dict["retrialCount"] - stat_summary.completed_count += job_stat_dict["completedCount"] - stat_summary.failed_count += job_stat_dict["failedCount"] - # Consumption - stat_summary.expected_consumption += job_stat_dict["expectedConsumption"] - stat_summary.real_consumption += job_stat_dict["realConsumption"] - stat_summary.failed_real_consumption += job_stat_dict["failedRealConsumption"] - # CPU Consumption - stat_summary.expected_cpu_consumption += job_stat_dict["expectedCpuConsumption"] - stat_summary.cpu_consumption += job_stat_dict["cpuConsumption"] - stat_summary.failed_cpu_consumption += job_stat_dict["failedCpuConsumption"] - stat_summary.total_queue_time += job_stat_dict["completedQueueTime"] + job_stat_dict["failedQueueTime"] - stat_summary.calculate_consumption_percentage() - self.summary = stat_summary + # type: () -> StatsSummary + stat_summary = StatsSummary() + for job in self.jobs_stat: + # print("{} -> {}".format(job._name, job.expected_real_consumption)) + job_stat_dict = job.get_as_dict() + # Counter + stat_summary.submitted_count += job_stat_dict["submittedCount"] + stat_summary.run_count += job_stat_dict["retrialCount"] + stat_summary.completed_count += job_stat_dict["completedCount"] + stat_summary.failed_count += job_stat_dict["failedCount"] + # Consumption + stat_summary.expected_consumption += job_stat_dict["expectedConsumption"] + stat_summary.real_consumption += job_stat_dict["realConsumption"] + stat_summary.failed_real_consumption += job_stat_dict["failedRealConsumption"] + # CPU Consumption + stat_summary.expected_cpu_consumption += job_stat_dict["expectedCpuConsumption"] + stat_summary.cpu_consumption += job_stat_dict["cpuConsumption"] + stat_summary.failed_cpu_consumption += job_stat_dict["failedCpuConsumption"] + stat_summary.total_queue_time += job_stat_dict["completedQueueTime"] + job_stat_dict["failedQueueTime"] + stat_summary.calculate_consumption_percentage() + self.summary = stat_summary def get_summary_as_list(self): - return self.summary.get_as_list() + return self.summary.get_as_list() def get_statistics(self): job_stat_list = self.calculate_statistics() @@ -100,32 +108,47 @@ class Statistics(object): "Period": {"From": str(self._start), "To": str(self._end)}, "JobStatistics": [job.get_as_dict() for job in job_stat_list] } - + def make_old_format(self): - # type: () -> None - """ Makes old format """ - self.start_times = [job.start_time for job in self.jobs_stat] - self.end_times = [job.finish_time for job in self.jobs_stat] - self.queued = [timedelta2hours(job.completed_queue_time) for job in self.jobs_stat] - self.run = [timedelta2hours(job.completed_run_time) for job in self.jobs_stat] - self.failed_jobs = [job.failed_retrial_count for job in self.jobs_stat] - self.max_fail = max(self.failed_jobs) - self.fail_run = [timedelta2hours(job.failed_run_time) for job in self.jobs_stat] - self.fail_queued = [timedelta2hours(job.failed_queue_time) for job in self.jobs_stat] - self.wallclocks = [job.expected_real_consumption for job in self.jobs_stat] - self.threshold = max(self.wallclocks) - max_queue = max(self.queued) - max_run = max(self.run) - max_fail_queue = max(self.fail_queued) - max_fail_run = max(self.fail_run) - self.max_time = max(max_queue, max_run, max_fail_queue, max_fail_run, self.threshold) + # type: () -> None + """ Makes old format """ + self.start_times = [job.start_time for job in self.jobs_stat] + self.end_times = [job.finish_time for job in self.jobs_stat] + self.queued = [timedelta2hours(job.completed_queue_time) for job in self.jobs_stat] + self.run = [timedelta2hours(job.completed_run_time) for job in self.jobs_stat] + self.failed_jobs = [job.failed_retrial_count for job in self.jobs_stat] + if len(self.failed_jobs) == 0: + self.max_fail = 0 + else: + self.max_fail = max(self.failed_jobs) + self.fail_run = [timedelta2hours(job.failed_run_time) for job in self.jobs_stat] + self.fail_queued = [timedelta2hours(job.failed_queue_time) for job in self.jobs_stat] + self.wallclocks = [job.expected_real_consumption for job in self.jobs_stat] + if len(self.wallclocks) == 0: + self.threshold = 0.0 + else: + self.threshold = max(self.wallclocks) + if len(self.queued) == 0: + max_queue = 0.0 + else: + max_queue = max(self.queued) + if len(self.run) == 0: + max_run = 0.0 + else: + max_run = max(self.run) + if len(self.fail_queued) == 0: + max_fail_queue = 0.0 + else: + max_fail_queue = max(self.fail_queued) + if len(self.fail_run) == 0: + max_fail_run = 0.0 + else: + max_fail_run = max(self.fail_run) + self.max_time = max(max_queue, max_run, max_fail_queue, max_fail_run, self.threshold) def build_failed_jobs_only_list(self): - # type: () -> Dict[str, int] - for i, job in enumerate(self.jobs_stat): - if self.failed_jobs[i] > 0: - self.failed_jobs_dict[job._name] = self.failed_jobs[i] - return self.failed_jobs_dict - - - + # type: () -> Dict[str, int] + for i, job in enumerate(self.jobs_stat): + if self.failed_jobs[i] > 0: + self.failed_jobs_dict[job._name] = self.failed_jobs[i] + return self.failed_jobs_dict -- GitLab From 14cdb1d7efe09f5eab612d7de8f5b974295a0de1 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Jul 2023 13:44:32 +0200 Subject: [PATCH 02/10] Some fixes --- autosubmit/monitor/diagram.py | 6 ++++-- autosubmit/statistics/statistics.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index e069c533f..ab984e649 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -98,7 +98,7 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') # Variables initialization ax, ax2 = [], [] - rects = [] * 5 + rects = [None] * 5 # print("Normal plots: {}".format(normal_plots_count)) # print("Failed jobs plots: {}".format(failed_jobs_plots_count)) # print("Total plots: {}".format(total_plots_count)) @@ -190,7 +190,9 @@ def create_csv_stats(exp_stats, jobs_list, output_file): with open(output_file, 'w') as file: file.write( "Job,Started,Ended,Queuing time (hours),Running time (hours)\n") - for i in range(len(jobs_list)): + # In the other function, job_names,start_times... etc is only filled if the job has completed retrials + # So I'll change this one to do the same + for i in range(len([ job for job in jobs_list if job.get_last_retrials() ])): file.write("{0},{1},{2},{3},{4}\n".format( job_names[i], start_times[i], end_times[i], queuing_times[i], running_times[i])) diff --git a/autosubmit/statistics/statistics.py b/autosubmit/statistics/statistics.py index 33618efe2..9f7590657 100644 --- a/autosubmit/statistics/statistics.py +++ b/autosubmit/statistics/statistics.py @@ -47,7 +47,7 @@ class Statistics(object): for index, job in enumerate(self._jobs): retrials = job.get_last_retrials() for retrial in retrials: - # print(retrial) + print(retrial) job_stat = self._name_to_jobstat_dict.setdefault(job.name, JobStat(job.name, parse_number_processors( job.processors), job.total_wallclock, job.section, job.date, job.member, job.chunk)) job_stat.inc_retrial_count() -- GitLab From 08a72be953d07251462437bf8de2bf46f32b5716 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Jul 2023 14:16:33 +0200 Subject: [PATCH 03/10] Some fixes --- autosubmit/monitor/diagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index ab984e649..ae51e34bf 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -90,7 +90,7 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per message = "The results are too large to be shown, try narrowing your query. \n Use a filter like -ft where you supply a list of job types, e.g. INI, SIM; \ or -fp where you supply an integer that represents the number of hours into the past that should be queried: \ suppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." - Log.info(message) + #Log.info(message) raise AutosubmitCritical("Stats query out of bounds", 7061, message) fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * total_plots_count)) -- GitLab From a2adeb698bb6761dfea6c09aba8a85f880ac9ec2 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Jul 2023 15:24:49 +0200 Subject: [PATCH 04/10] Some fixes --- autosubmit/monitor/diagram.py | 166 +++++++++++++++++----------------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index ae51e34bf..684d8175b 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -90,93 +90,93 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per message = "The results are too large to be shown, try narrowing your query. \n Use a filter like -ft where you supply a list of job types, e.g. INI, SIM; \ or -fp where you supply an integer that represents the number of hours into the past that should be queried: \ suppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." - #Log.info(message) - raise AutosubmitCritical("Stats query out of bounds", 7061, message) - - fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * total_plots_count)) - - fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') - # Variables initialization - ax, ax2 = [], [] - rects = [None] * 5 - # print("Normal plots: {}".format(normal_plots_count)) - # print("Failed jobs plots: {}".format(failed_jobs_plots_count)) - # print("Total plots: {}".format(total_plots_count)) - grid_spec = gridspec.GridSpec(RATIO * total_plots_count + 2, 1) - i_plot = 0 - for plot in range(1, normal_plots_count + 1): - try: - # Calculating jobs inside the given plot - l1 = int((plot - 1) * MAX_JOBS_PER_PLOT) - l2 = min(int(plot * MAX_JOBS_PER_PLOT), len(exp_stats.jobs_stat)) - if l2 - l1 <= 0: - continue - ind = np.arange(l2 - l1) - # Building plot axis - ax.append(fig.add_subplot(grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) - ax[plot - 1].set_ylabel('hours') - ax[plot - 1].set_xticks(ind + width) - ax[plot - 1].set_xticklabels( - [job.name for job in jobs_list[l1:l2]], rotation='vertical') - ax[plot - 1].set_title(experiment_id, fontsize=20) - upper_limit = round(1.10 * exp_stats.max_time, 4) - ax[plot - 1].set_yticks(np.arange(0, upper_limit, round(upper_limit / 10, 4))) - ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_time)) - # Building reacts - rects[0] = ax[plot - 1].bar(ind, exp_stats.queued[l1:l2], width, color='lightpink') - rects[1] = ax[plot - 1].bar(ind + width, exp_stats.run[l1:l2], width, color='green') - rects[2] = ax[plot - 1].bar(ind + width * 3, exp_stats.fail_queued[l1:l2], width, color='lightsalmon') - rects[3] = ax[plot - 1].bar(ind + width * 4, exp_stats.fail_run[l1:l2], width, color='salmon') - rects[4] = ax[plot - 1].plot([0., width * 6 * MAX_JOBS_PER_PLOT], - [exp_stats.threshold, exp_stats.threshold], "k--", label='wallclock sim') - # Building legend - i_plot = plot - except Exception as exp: - print((traceback.format_exc())) - print(exp) + Log.info(message) + #raise AutosubmitCritical("Stats query out of bounds", 7061, message) + else: + fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * total_plots_count)) + + fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') + # Variables initialization + ax, ax2 = [], [] + rects = [None] * 5 + # print("Normal plots: {}".format(normal_plots_count)) + # print("Failed jobs plots: {}".format(failed_jobs_plots_count)) + # print("Total plots: {}".format(total_plots_count)) + grid_spec = gridspec.GridSpec(RATIO * total_plots_count + 2, 1) + i_plot = 0 + for plot in range(1, normal_plots_count + 1): + try: + # Calculating jobs inside the given plot + l1 = int((plot - 1) * MAX_JOBS_PER_PLOT) + l2 = min(int(plot * MAX_JOBS_PER_PLOT), len(exp_stats.jobs_stat)) + if l2 - l1 <= 0: + continue + ind = np.arange(l2 - l1) + # Building plot axis + ax.append(fig.add_subplot(grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) + ax[plot - 1].set_ylabel('hours') + ax[plot - 1].set_xticks(ind + width) + ax[plot - 1].set_xticklabels( + [job.name for job in jobs_list[l1:l2]], rotation='vertical') + ax[plot - 1].set_title(experiment_id, fontsize=20) + upper_limit = round(1.10 * exp_stats.max_time, 4) + ax[plot - 1].set_yticks(np.arange(0, upper_limit, round(upper_limit / 10, 4))) + ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_time)) + # Building reacts + rects[0] = ax[plot - 1].bar(ind, exp_stats.queued[l1:l2], width, color='lightpink') + rects[1] = ax[plot - 1].bar(ind + width, exp_stats.run[l1:l2], width, color='green') + rects[2] = ax[plot - 1].bar(ind + width * 3, exp_stats.fail_queued[l1:l2], width, color='lightsalmon') + rects[3] = ax[plot - 1].bar(ind + width * 4, exp_stats.fail_run[l1:l2], width, color='salmon') + rects[4] = ax[plot - 1].plot([0., width * 6 * MAX_JOBS_PER_PLOT], + [exp_stats.threshold, exp_stats.threshold], "k--", label='wallclock sim') + # Building legend + i_plot = plot + except Exception as exp: + print((traceback.format_exc())) + print(exp) + + job_names_in_failed = [name for name in exp_stats.failed_jobs_dict] + failed_jobs_rects = [None] + for j_plot in range(1, failed_jobs_plots_count + 1): + try: + l1 = int((j_plot - 1) * MAX_JOBS_PER_PLOT) + l2 = min(int(j_plot * MAX_JOBS_PER_PLOT), len(job_names_in_failed)) + if l2 - l1 <= 0: + continue + ind = np.arange(l2 - l1) + plot = i_plot + j_plot + ax.append(fig.add_subplot(grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) + ax[plot - 1].set_ylabel('# failed attempts') + ax[plot - 1].set_xticks(ind + width) + ax[plot - 1].set_xticklabels([name for name in job_names_in_failed[l1:l2]], rotation='vertical') + ax[plot - 1].set_title(experiment_id, fontsize=20) + ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_fail)) + ax[plot - 1].set_yticks(range(0, exp_stats.max_fail + 2)) + failed_jobs_rects[0] = ax[plot - 1].bar(ind + width * 2, [exp_stats.failed_jobs_dict[name] for name in + job_names_in_failed[l1:l2]], width, color='red') + except Exception as exp: + print((traceback.format_exc())) + print(exp) + + # Building legends subplot + legends_plot = fig.add_subplot(grid_spec[0, 0]) + legends_plot.set_frame_on(False) + legends_plot.axes.get_xaxis().set_visible(False) + legends_plot.axes.get_yaxis().set_visible(False) - job_names_in_failed = [name for name in exp_stats.failed_jobs_dict] - failed_jobs_rects = [None] - for j_plot in range(1, failed_jobs_plots_count + 1): try: - l1 = int((j_plot - 1) * MAX_JOBS_PER_PLOT) - l2 = min(int(j_plot * MAX_JOBS_PER_PLOT), len(job_names_in_failed)) - if l2 - l1 <= 0: - continue - ind = np.arange(l2 - l1) - plot = i_plot + j_plot - ax.append(fig.add_subplot(grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) - ax[plot - 1].set_ylabel('# failed attempts') - ax[plot - 1].set_xticks(ind + width) - ax[plot - 1].set_xticklabels([name for name in job_names_in_failed[l1:l2]], rotation='vertical') - ax[plot - 1].set_title(experiment_id, fontsize=20) - ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_fail)) - ax[plot - 1].set_yticks(range(0, exp_stats.max_fail + 2)) - failed_jobs_rects[0] = ax[plot - 1].bar(ind + width * 2, [exp_stats.failed_jobs_dict[name] for name in - job_names_in_failed[l1:l2]], width, color='red') - except Exception as exp: - print((traceback.format_exc())) - print(exp) - - # Building legends subplot - legends_plot = fig.add_subplot(grid_spec[0, 0]) - legends_plot.set_frame_on(False) - legends_plot.axes.get_xaxis().set_visible(False) - legends_plot.axes.get_yaxis().set_visible(False) + # Building legends + # print("Legends") + build_legends(legends_plot, rects, exp_stats, general_stats) - try: - # Building legends - # print("Legends") - build_legends(legends_plot, rects, exp_stats, general_stats) - - # Saving output figure - grid_spec.tight_layout(fig, rect=[0, 0.03, 1, 0.97]) - plt.savefig(output_file) + # Saving output figure + grid_spec.tight_layout(fig, rect=[0, 0.03, 1, 0.97]) + plt.savefig(output_file) - create_csv_stats(exp_stats, jobs_list, output_file) - except Exception as exp: - print(exp) - print((traceback.format_exc())) + create_csv_stats(exp_stats, jobs_list, output_file) + except Exception as exp: + print(exp) + print((traceback.format_exc())) def create_csv_stats(exp_stats, jobs_list, output_file): -- GitLab From e81f94337418abf9f30275aad4e5cd57bb8607fe Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 7 Aug 2023 08:42:17 +0200 Subject: [PATCH 05/10] Stats will now always generate csv --- autosubmit/monitor/diagram.py | 19 ++++++++++--------- autosubmit/monitor/monitor.py | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index 684d8175b..6989ccf02 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -40,6 +40,7 @@ MAX_JOBS_PER_PLOT = 12.0 MAX_NUM_PLOTS = 40 + def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, period_ini=None, period_fi=None, queue_time_fixes=None): # type: (str, List[Job], List[str], str, datetime, datetime, Dict[str, int]) -> None @@ -86,15 +87,13 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per # ind = np.arrange(int(MAX_JOBS_PER_PLOT)) width = 0.16 # Creating stats figure + sanity check + plot = True + err_message = "The results are too large to be shown, try narrowing your query.\nUse a filter like -ft where you supply a list of job types, e.g. INI, SIM or use the flag -fp where you supply an integer that represents the number of hours into the past that should be queried:\nSuppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." if total_plots_count > MAX_NUM_PLOTS: - message = "The results are too large to be shown, try narrowing your query. \n Use a filter like -ft where you supply a list of job types, e.g. INI, SIM; \ - or -fp where you supply an integer that represents the number of hours into the past that should be queried: \ - suppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." - Log.info(message) - #raise AutosubmitCritical("Stats query out of bounds", 7061, message) + Log.info(err_message) + plot = False else: fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * total_plots_count)) - fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') # Variables initialization ax, ax2 = [], [] @@ -168,15 +167,17 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per # Building legends # print("Legends") build_legends(legends_plot, rects, exp_stats, general_stats) - # Saving output figure grid_spec.tight_layout(fig, rect=[0, 0.03, 1, 0.97]) plt.savefig(output_file) - - create_csv_stats(exp_stats, jobs_list, output_file) except Exception as exp: print(exp) print((traceback.format_exc())) + try: + create_csv_stats(exp_stats, jobs_list, output_file) + except Exception as exp: + Log.info(f'Error while creating csv stats:\n{err_message}') + return plot def create_csv_stats(exp_stats, jobs_list, output_file): diff --git a/autosubmit/monitor/monitor.py b/autosubmit/monitor/monitor.py index 8b8bffc55..25d77cdf8 100644 --- a/autosubmit/monitor/monitor.py +++ b/autosubmit/monitor/monitor.py @@ -481,7 +481,7 @@ class Monitor: Log.info("You don't have enough permissions to the experiment's ({}) folder. The output file will be created in the default location: {}".format(expid, BasicConfig.DEFAULT_OUTPUT_DIR)) HUtils.create_path_if_not_exists_group_permission(BasicConfig.DEFAULT_OUTPUT_DIR) - create_bar_diagram(expid, joblist, self.get_general_stats(expid), output_complete_path, period_ini, period_fi, queue_time_fixes) + show = create_bar_diagram(expid, joblist, self.get_general_stats(expid), output_complete_path, period_ini, period_fi, queue_time_fixes) Log.result('Stats created at {0}', output_complete_path) if show: try: -- GitLab From 0870fff422d1de60492b37e126ed7c15792de6e1 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 28 Jul 2023 09:14:00 +0200 Subject: [PATCH 06/10] some fixes --- autosubmit/monitor/diagram.py | 11 +- autosubmit/statistics/statistics.py | 209 +++++++++++++++------------- 2 files changed, 123 insertions(+), 97 deletions(-) diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index 786c66e49..e069c533f 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -17,9 +17,9 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . -import traceback -import numpy as np import matplotlib as mtp +import numpy as np +import traceback mtp.use('Agg') import matplotlib.pyplot as plt @@ -30,7 +30,7 @@ from autosubmit.statistics.statistics import Statistics from autosubmit.job.job import Job from log.log import Log, AutosubmitCritical from datetime import datetime -from typing import Dict, List +from typing import List Log.get_logger("Autosubmit") @@ -98,7 +98,7 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') # Variables initialization ax, ax2 = [], [] - rects = [None] * 5 + rects = [] * 5 # print("Normal plots: {}".format(normal_plots_count)) # print("Failed jobs plots: {}".format(failed_jobs_plots_count)) # print("Total plots: {}".format(total_plots_count)) @@ -129,6 +129,7 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per rects[3] = ax[plot - 1].bar(ind + width * 4, exp_stats.fail_run[l1:l2], width, color='salmon') rects[4] = ax[plot - 1].plot([0., width * 6 * MAX_JOBS_PER_PLOT], [exp_stats.threshold, exp_stats.threshold], "k--", label='wallclock sim') + # Building legend i_plot = plot except Exception as exp: print((traceback.format_exc())) @@ -197,7 +198,9 @@ def create_csv_stats(exp_stats, jobs_list, output_file): def build_legends(plot, rects, experiment_stats, general_stats): # type: (plt.figure, List[plt.bar], Statistics, List[str]) -> None # Main legend with colourful rectangles + legend_rects = [[rect[0] for rect in rects]] + legend_titles = [ ['Queued (h)', 'Run (h)', 'Fail Queued (h)', 'Fail Run (h)', 'Max wallclock (h)'] ] diff --git a/autosubmit/statistics/statistics.py b/autosubmit/statistics/statistics.py index 504cd9b50..33618efe2 100644 --- a/autosubmit/statistics/statistics.py +++ b/autosubmit/statistics/statistics.py @@ -1,98 +1,106 @@ #!/bin/env/python from datetime import datetime, timedelta +from typing import List, Union, Dict + from autosubmit.job.job import Job from .jobs_stat import JobStat from .stats_summary import StatsSummary from .utils import timedelta2hours, parse_number_processors -from typing import List, Union, Dict + # from collections import namedtuple _COMPLETED_RETRIAL = 1 _FAILED_RETRIAL = 0 + class Statistics(object): def __init__(self, jobs, start, end, queue_time_fix): # type: (List[Job], datetime, datetime, Dict[str, int]) -> None """ """ - self._jobs = jobs + self._jobs = jobs self._start = start self._end = end self._queue_time_fixes = queue_time_fix - self._name_to_jobstat_dict = dict() # type: Dict[str, JobStat] - self.jobs_stat = [] # type: List[JobStat] + self._name_to_jobstat_dict = dict() # type: Dict[str, JobStat] + self.jobs_stat = [] # type: List[JobStat] # Old format - self.max_time = 0.0 # type: float - self.max_fail = 0 # type: int - self.start_times = [] # type: List[Union[datetime, None]] - self.end_times = [] # type: List[Union[datetime, None]] - self.queued = [] # type: List[timedelta] - self.run = [] # type: List[timedelta] - self.failed_jobs = [] # type: List[int] - self.fail_queued = [] # type: List[timedelta] - self.fail_run = [] # type: List[timedelta] - self.wallclocks = [] # type: List[float] - self.threshold = 0.0 # type: float - self.failed_jobs_dict = {} # type: Dict[str, int] + self.max_time = 0.0 # type: float + self.max_fail = 0 # type: int + self.start_times = [] # type: List[Union[datetime, None]] + self.end_times = [] # type: List[Union[datetime, None]] + self.queued = [] # type: List[timedelta] + self.run = [] # type: List[timedelta] + self.failed_jobs = [] # type: List[int] + self.fail_queued = [] # type: List[timedelta] + self.fail_run = [] # type: List[timedelta] + self.wallclocks = [] # type: List[float] + self.threshold = 0.0 # type: float + self.failed_jobs_dict = {} # type: Dict[str, int] self.summary = StatsSummary() self.totals = [" Description text \n", "Line 1"] - + def calculate_statistics(self): - # type: () -> List[JobStat] - for index, job in enumerate(self._jobs): - retrials = job.get_last_retrials() - for retrial in retrials: - # print(retrial) - job_stat = self._name_to_jobstat_dict.setdefault(job.name, JobStat(job.name, parse_number_processors(job.processors), job.total_wallclock, job.section, job.date, job.member, job.chunk)) - job_stat.inc_retrial_count() - if Job.is_a_completed_retrial(retrial): - job_stat.inc_completed_retrial_count() - job_stat.submit_time = retrial[0] - job_stat.start_time = retrial[1] - job_stat.finish_time = retrial[2] - adjusted_queue = max(job_stat.start_time - job_stat.submit_time, timedelta()) - timedelta(seconds=self._queue_time_fixes.get(job.name, 0)) - job_stat.completed_queue_time += max(adjusted_queue, timedelta()) - job_stat.completed_run_time += max(job_stat.finish_time - job_stat.start_time, timedelta()) - else: - job_stat.inc_failed_retrial_count() - job_stat.submit_time = retrial[0] if len(retrial) >= 1 and type(retrial[0]) == datetime else None - job_stat.start_time = retrial[1] if len(retrial) >= 2 and type(retrial[1]) == datetime else None - job_stat.finish_time = retrial[2] if len(retrial) >= 3 and type(retrial[2]) == datetime else None - if job_stat.finish_time and job_stat.start_time: - job_stat.failed_run_time += max(job_stat.finish_time - job_stat.start_time, timedelta()) - if job_stat.start_time and job_stat.submit_time: - adjusted_failed_queue = max(job_stat.start_time - job_stat.submit_time, timedelta()) - timedelta(seconds=self._queue_time_fixes.get(job.name, 0)) - job_stat.failed_queue_time += max(adjusted_failed_queue, timedelta()) - self.jobs_stat =sorted(list(self._name_to_jobstat_dict.values()), key=lambda x: (x.date if x.date else datetime.now(), x.member if x.member else "", x.section if x.section else "", x.chunk)) - return self.jobs_stat - + # type: () -> List[JobStat] + for index, job in enumerate(self._jobs): + retrials = job.get_last_retrials() + for retrial in retrials: + # print(retrial) + job_stat = self._name_to_jobstat_dict.setdefault(job.name, JobStat(job.name, parse_number_processors( + job.processors), job.total_wallclock, job.section, job.date, job.member, job.chunk)) + job_stat.inc_retrial_count() + if Job.is_a_completed_retrial(retrial): + job_stat.inc_completed_retrial_count() + job_stat.submit_time = retrial[0] + job_stat.start_time = retrial[1] + job_stat.finish_time = retrial[2] + adjusted_queue = max(job_stat.start_time - job_stat.submit_time, timedelta()) - timedelta( + seconds=self._queue_time_fixes.get(job.name, 0)) + job_stat.completed_queue_time += max(adjusted_queue, timedelta()) + job_stat.completed_run_time += max(job_stat.finish_time - job_stat.start_time, timedelta()) + else: + job_stat.inc_failed_retrial_count() + job_stat.submit_time = retrial[0] if len(retrial) >= 1 and type(retrial[0]) == datetime else None + job_stat.start_time = retrial[1] if len(retrial) >= 2 and type(retrial[1]) == datetime else None + job_stat.finish_time = retrial[2] if len(retrial) >= 3 and type(retrial[2]) == datetime else None + if job_stat.finish_time and job_stat.start_time: + job_stat.failed_run_time += max(job_stat.finish_time - job_stat.start_time, timedelta()) + if job_stat.start_time and job_stat.submit_time: + adjusted_failed_queue = max(job_stat.start_time - job_stat.submit_time, + timedelta()) - timedelta( + seconds=self._queue_time_fixes.get(job.name, 0)) + job_stat.failed_queue_time += max(adjusted_failed_queue, timedelta()) + self.jobs_stat = sorted(list(self._name_to_jobstat_dict.values()), key=lambda x: ( + x.date if x.date else datetime.now(), x.member if x.member else "", x.section if x.section else "", x.chunk)) + return self.jobs_stat + def calculate_summary(self): - # type: () -> StatsSummary - stat_summary = StatsSummary() - for job in self.jobs_stat: - # print("{} -> {}".format(job._name, job.expected_real_consumption)) - job_stat_dict = job.get_as_dict() - # Counter - stat_summary.submitted_count += job_stat_dict["submittedCount"] - stat_summary.run_count += job_stat_dict["retrialCount"] - stat_summary.completed_count += job_stat_dict["completedCount"] - stat_summary.failed_count += job_stat_dict["failedCount"] - # Consumption - stat_summary.expected_consumption += job_stat_dict["expectedConsumption"] - stat_summary.real_consumption += job_stat_dict["realConsumption"] - stat_summary.failed_real_consumption += job_stat_dict["failedRealConsumption"] - # CPU Consumption - stat_summary.expected_cpu_consumption += job_stat_dict["expectedCpuConsumption"] - stat_summary.cpu_consumption += job_stat_dict["cpuConsumption"] - stat_summary.failed_cpu_consumption += job_stat_dict["failedCpuConsumption"] - stat_summary.total_queue_time += job_stat_dict["completedQueueTime"] + job_stat_dict["failedQueueTime"] - stat_summary.calculate_consumption_percentage() - self.summary = stat_summary + # type: () -> StatsSummary + stat_summary = StatsSummary() + for job in self.jobs_stat: + # print("{} -> {}".format(job._name, job.expected_real_consumption)) + job_stat_dict = job.get_as_dict() + # Counter + stat_summary.submitted_count += job_stat_dict["submittedCount"] + stat_summary.run_count += job_stat_dict["retrialCount"] + stat_summary.completed_count += job_stat_dict["completedCount"] + stat_summary.failed_count += job_stat_dict["failedCount"] + # Consumption + stat_summary.expected_consumption += job_stat_dict["expectedConsumption"] + stat_summary.real_consumption += job_stat_dict["realConsumption"] + stat_summary.failed_real_consumption += job_stat_dict["failedRealConsumption"] + # CPU Consumption + stat_summary.expected_cpu_consumption += job_stat_dict["expectedCpuConsumption"] + stat_summary.cpu_consumption += job_stat_dict["cpuConsumption"] + stat_summary.failed_cpu_consumption += job_stat_dict["failedCpuConsumption"] + stat_summary.total_queue_time += job_stat_dict["completedQueueTime"] + job_stat_dict["failedQueueTime"] + stat_summary.calculate_consumption_percentage() + self.summary = stat_summary def get_summary_as_list(self): - return self.summary.get_as_list() + return self.summary.get_as_list() def get_statistics(self): job_stat_list = self.calculate_statistics() @@ -100,32 +108,47 @@ class Statistics(object): "Period": {"From": str(self._start), "To": str(self._end)}, "JobStatistics": [job.get_as_dict() for job in job_stat_list] } - + def make_old_format(self): - # type: () -> None - """ Makes old format """ - self.start_times = [job.start_time for job in self.jobs_stat] - self.end_times = [job.finish_time for job in self.jobs_stat] - self.queued = [timedelta2hours(job.completed_queue_time) for job in self.jobs_stat] - self.run = [timedelta2hours(job.completed_run_time) for job in self.jobs_stat] - self.failed_jobs = [job.failed_retrial_count for job in self.jobs_stat] - self.max_fail = max(self.failed_jobs) - self.fail_run = [timedelta2hours(job.failed_run_time) for job in self.jobs_stat] - self.fail_queued = [timedelta2hours(job.failed_queue_time) for job in self.jobs_stat] - self.wallclocks = [job.expected_real_consumption for job in self.jobs_stat] - self.threshold = max(self.wallclocks) - max_queue = max(self.queued) - max_run = max(self.run) - max_fail_queue = max(self.fail_queued) - max_fail_run = max(self.fail_run) - self.max_time = max(max_queue, max_run, max_fail_queue, max_fail_run, self.threshold) + # type: () -> None + """ Makes old format """ + self.start_times = [job.start_time for job in self.jobs_stat] + self.end_times = [job.finish_time for job in self.jobs_stat] + self.queued = [timedelta2hours(job.completed_queue_time) for job in self.jobs_stat] + self.run = [timedelta2hours(job.completed_run_time) for job in self.jobs_stat] + self.failed_jobs = [job.failed_retrial_count for job in self.jobs_stat] + if len(self.failed_jobs) == 0: + self.max_fail = 0 + else: + self.max_fail = max(self.failed_jobs) + self.fail_run = [timedelta2hours(job.failed_run_time) for job in self.jobs_stat] + self.fail_queued = [timedelta2hours(job.failed_queue_time) for job in self.jobs_stat] + self.wallclocks = [job.expected_real_consumption for job in self.jobs_stat] + if len(self.wallclocks) == 0: + self.threshold = 0.0 + else: + self.threshold = max(self.wallclocks) + if len(self.queued) == 0: + max_queue = 0.0 + else: + max_queue = max(self.queued) + if len(self.run) == 0: + max_run = 0.0 + else: + max_run = max(self.run) + if len(self.fail_queued) == 0: + max_fail_queue = 0.0 + else: + max_fail_queue = max(self.fail_queued) + if len(self.fail_run) == 0: + max_fail_run = 0.0 + else: + max_fail_run = max(self.fail_run) + self.max_time = max(max_queue, max_run, max_fail_queue, max_fail_run, self.threshold) def build_failed_jobs_only_list(self): - # type: () -> Dict[str, int] - for i, job in enumerate(self.jobs_stat): - if self.failed_jobs[i] > 0: - self.failed_jobs_dict[job._name] = self.failed_jobs[i] - return self.failed_jobs_dict - - - + # type: () -> Dict[str, int] + for i, job in enumerate(self.jobs_stat): + if self.failed_jobs[i] > 0: + self.failed_jobs_dict[job._name] = self.failed_jobs[i] + return self.failed_jobs_dict -- GitLab From c28428969a5a10642e40c05d71127ebaa68150be Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Jul 2023 13:44:32 +0200 Subject: [PATCH 07/10] Some fixes --- autosubmit/monitor/diagram.py | 6 ++++-- autosubmit/statistics/statistics.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index e069c533f..ab984e649 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -98,7 +98,7 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') # Variables initialization ax, ax2 = [], [] - rects = [] * 5 + rects = [None] * 5 # print("Normal plots: {}".format(normal_plots_count)) # print("Failed jobs plots: {}".format(failed_jobs_plots_count)) # print("Total plots: {}".format(total_plots_count)) @@ -190,7 +190,9 @@ def create_csv_stats(exp_stats, jobs_list, output_file): with open(output_file, 'w') as file: file.write( "Job,Started,Ended,Queuing time (hours),Running time (hours)\n") - for i in range(len(jobs_list)): + # In the other function, job_names,start_times... etc is only filled if the job has completed retrials + # So I'll change this one to do the same + for i in range(len([ job for job in jobs_list if job.get_last_retrials() ])): file.write("{0},{1},{2},{3},{4}\n".format( job_names[i], start_times[i], end_times[i], queuing_times[i], running_times[i])) diff --git a/autosubmit/statistics/statistics.py b/autosubmit/statistics/statistics.py index 33618efe2..9f7590657 100644 --- a/autosubmit/statistics/statistics.py +++ b/autosubmit/statistics/statistics.py @@ -47,7 +47,7 @@ class Statistics(object): for index, job in enumerate(self._jobs): retrials = job.get_last_retrials() for retrial in retrials: - # print(retrial) + print(retrial) job_stat = self._name_to_jobstat_dict.setdefault(job.name, JobStat(job.name, parse_number_processors( job.processors), job.total_wallclock, job.section, job.date, job.member, job.chunk)) job_stat.inc_retrial_count() -- GitLab From cc8e06c6fe31ffeff025d5c6648f241ec4de59fd Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Jul 2023 14:16:33 +0200 Subject: [PATCH 08/10] Some fixes --- autosubmit/monitor/diagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index ab984e649..ae51e34bf 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -90,7 +90,7 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per message = "The results are too large to be shown, try narrowing your query. \n Use a filter like -ft where you supply a list of job types, e.g. INI, SIM; \ or -fp where you supply an integer that represents the number of hours into the past that should be queried: \ suppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." - Log.info(message) + #Log.info(message) raise AutosubmitCritical("Stats query out of bounds", 7061, message) fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * total_plots_count)) -- GitLab From 6dc5b8682fd92f7e64cb06c3c5cab67ba91f1eb2 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Jul 2023 15:24:49 +0200 Subject: [PATCH 09/10] Some fixes --- autosubmit/monitor/diagram.py | 166 +++++++++++++++++----------------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index ae51e34bf..684d8175b 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -90,93 +90,93 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per message = "The results are too large to be shown, try narrowing your query. \n Use a filter like -ft where you supply a list of job types, e.g. INI, SIM; \ or -fp where you supply an integer that represents the number of hours into the past that should be queried: \ suppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." - #Log.info(message) - raise AutosubmitCritical("Stats query out of bounds", 7061, message) - - fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * total_plots_count)) - - fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') - # Variables initialization - ax, ax2 = [], [] - rects = [None] * 5 - # print("Normal plots: {}".format(normal_plots_count)) - # print("Failed jobs plots: {}".format(failed_jobs_plots_count)) - # print("Total plots: {}".format(total_plots_count)) - grid_spec = gridspec.GridSpec(RATIO * total_plots_count + 2, 1) - i_plot = 0 - for plot in range(1, normal_plots_count + 1): - try: - # Calculating jobs inside the given plot - l1 = int((plot - 1) * MAX_JOBS_PER_PLOT) - l2 = min(int(plot * MAX_JOBS_PER_PLOT), len(exp_stats.jobs_stat)) - if l2 - l1 <= 0: - continue - ind = np.arange(l2 - l1) - # Building plot axis - ax.append(fig.add_subplot(grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) - ax[plot - 1].set_ylabel('hours') - ax[plot - 1].set_xticks(ind + width) - ax[plot - 1].set_xticklabels( - [job.name for job in jobs_list[l1:l2]], rotation='vertical') - ax[plot - 1].set_title(experiment_id, fontsize=20) - upper_limit = round(1.10 * exp_stats.max_time, 4) - ax[plot - 1].set_yticks(np.arange(0, upper_limit, round(upper_limit / 10, 4))) - ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_time)) - # Building reacts - rects[0] = ax[plot - 1].bar(ind, exp_stats.queued[l1:l2], width, color='lightpink') - rects[1] = ax[plot - 1].bar(ind + width, exp_stats.run[l1:l2], width, color='green') - rects[2] = ax[plot - 1].bar(ind + width * 3, exp_stats.fail_queued[l1:l2], width, color='lightsalmon') - rects[3] = ax[plot - 1].bar(ind + width * 4, exp_stats.fail_run[l1:l2], width, color='salmon') - rects[4] = ax[plot - 1].plot([0., width * 6 * MAX_JOBS_PER_PLOT], - [exp_stats.threshold, exp_stats.threshold], "k--", label='wallclock sim') - # Building legend - i_plot = plot - except Exception as exp: - print((traceback.format_exc())) - print(exp) + Log.info(message) + #raise AutosubmitCritical("Stats query out of bounds", 7061, message) + else: + fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * total_plots_count)) + + fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') + # Variables initialization + ax, ax2 = [], [] + rects = [None] * 5 + # print("Normal plots: {}".format(normal_plots_count)) + # print("Failed jobs plots: {}".format(failed_jobs_plots_count)) + # print("Total plots: {}".format(total_plots_count)) + grid_spec = gridspec.GridSpec(RATIO * total_plots_count + 2, 1) + i_plot = 0 + for plot in range(1, normal_plots_count + 1): + try: + # Calculating jobs inside the given plot + l1 = int((plot - 1) * MAX_JOBS_PER_PLOT) + l2 = min(int(plot * MAX_JOBS_PER_PLOT), len(exp_stats.jobs_stat)) + if l2 - l1 <= 0: + continue + ind = np.arange(l2 - l1) + # Building plot axis + ax.append(fig.add_subplot(grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) + ax[plot - 1].set_ylabel('hours') + ax[plot - 1].set_xticks(ind + width) + ax[plot - 1].set_xticklabels( + [job.name for job in jobs_list[l1:l2]], rotation='vertical') + ax[plot - 1].set_title(experiment_id, fontsize=20) + upper_limit = round(1.10 * exp_stats.max_time, 4) + ax[plot - 1].set_yticks(np.arange(0, upper_limit, round(upper_limit / 10, 4))) + ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_time)) + # Building reacts + rects[0] = ax[plot - 1].bar(ind, exp_stats.queued[l1:l2], width, color='lightpink') + rects[1] = ax[plot - 1].bar(ind + width, exp_stats.run[l1:l2], width, color='green') + rects[2] = ax[plot - 1].bar(ind + width * 3, exp_stats.fail_queued[l1:l2], width, color='lightsalmon') + rects[3] = ax[plot - 1].bar(ind + width * 4, exp_stats.fail_run[l1:l2], width, color='salmon') + rects[4] = ax[plot - 1].plot([0., width * 6 * MAX_JOBS_PER_PLOT], + [exp_stats.threshold, exp_stats.threshold], "k--", label='wallclock sim') + # Building legend + i_plot = plot + except Exception as exp: + print((traceback.format_exc())) + print(exp) + + job_names_in_failed = [name for name in exp_stats.failed_jobs_dict] + failed_jobs_rects = [None] + for j_plot in range(1, failed_jobs_plots_count + 1): + try: + l1 = int((j_plot - 1) * MAX_JOBS_PER_PLOT) + l2 = min(int(j_plot * MAX_JOBS_PER_PLOT), len(job_names_in_failed)) + if l2 - l1 <= 0: + continue + ind = np.arange(l2 - l1) + plot = i_plot + j_plot + ax.append(fig.add_subplot(grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) + ax[plot - 1].set_ylabel('# failed attempts') + ax[plot - 1].set_xticks(ind + width) + ax[plot - 1].set_xticklabels([name for name in job_names_in_failed[l1:l2]], rotation='vertical') + ax[plot - 1].set_title(experiment_id, fontsize=20) + ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_fail)) + ax[plot - 1].set_yticks(range(0, exp_stats.max_fail + 2)) + failed_jobs_rects[0] = ax[plot - 1].bar(ind + width * 2, [exp_stats.failed_jobs_dict[name] for name in + job_names_in_failed[l1:l2]], width, color='red') + except Exception as exp: + print((traceback.format_exc())) + print(exp) + + # Building legends subplot + legends_plot = fig.add_subplot(grid_spec[0, 0]) + legends_plot.set_frame_on(False) + legends_plot.axes.get_xaxis().set_visible(False) + legends_plot.axes.get_yaxis().set_visible(False) - job_names_in_failed = [name for name in exp_stats.failed_jobs_dict] - failed_jobs_rects = [None] - for j_plot in range(1, failed_jobs_plots_count + 1): try: - l1 = int((j_plot - 1) * MAX_JOBS_PER_PLOT) - l2 = min(int(j_plot * MAX_JOBS_PER_PLOT), len(job_names_in_failed)) - if l2 - l1 <= 0: - continue - ind = np.arange(l2 - l1) - plot = i_plot + j_plot - ax.append(fig.add_subplot(grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) - ax[plot - 1].set_ylabel('# failed attempts') - ax[plot - 1].set_xticks(ind + width) - ax[plot - 1].set_xticklabels([name for name in job_names_in_failed[l1:l2]], rotation='vertical') - ax[plot - 1].set_title(experiment_id, fontsize=20) - ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_fail)) - ax[plot - 1].set_yticks(range(0, exp_stats.max_fail + 2)) - failed_jobs_rects[0] = ax[plot - 1].bar(ind + width * 2, [exp_stats.failed_jobs_dict[name] for name in - job_names_in_failed[l1:l2]], width, color='red') - except Exception as exp: - print((traceback.format_exc())) - print(exp) - - # Building legends subplot - legends_plot = fig.add_subplot(grid_spec[0, 0]) - legends_plot.set_frame_on(False) - legends_plot.axes.get_xaxis().set_visible(False) - legends_plot.axes.get_yaxis().set_visible(False) + # Building legends + # print("Legends") + build_legends(legends_plot, rects, exp_stats, general_stats) - try: - # Building legends - # print("Legends") - build_legends(legends_plot, rects, exp_stats, general_stats) - - # Saving output figure - grid_spec.tight_layout(fig, rect=[0, 0.03, 1, 0.97]) - plt.savefig(output_file) + # Saving output figure + grid_spec.tight_layout(fig, rect=[0, 0.03, 1, 0.97]) + plt.savefig(output_file) - create_csv_stats(exp_stats, jobs_list, output_file) - except Exception as exp: - print(exp) - print((traceback.format_exc())) + create_csv_stats(exp_stats, jobs_list, output_file) + except Exception as exp: + print(exp) + print((traceback.format_exc())) def create_csv_stats(exp_stats, jobs_list, output_file): -- GitLab From 9d4cc3ec36bd6b68f2b26dd28c7e83e140b6d71a Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 7 Aug 2023 08:42:17 +0200 Subject: [PATCH 10/10] Stats will now always generate csv --- autosubmit/monitor/diagram.py | 19 ++++++++++--------- autosubmit/monitor/monitor.py | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index 684d8175b..6989ccf02 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -40,6 +40,7 @@ MAX_JOBS_PER_PLOT = 12.0 MAX_NUM_PLOTS = 40 + def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, period_ini=None, period_fi=None, queue_time_fixes=None): # type: (str, List[Job], List[str], str, datetime, datetime, Dict[str, int]) -> None @@ -86,15 +87,13 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per # ind = np.arrange(int(MAX_JOBS_PER_PLOT)) width = 0.16 # Creating stats figure + sanity check + plot = True + err_message = "The results are too large to be shown, try narrowing your query.\nUse a filter like -ft where you supply a list of job types, e.g. INI, SIM or use the flag -fp where you supply an integer that represents the number of hours into the past that should be queried:\nSuppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." if total_plots_count > MAX_NUM_PLOTS: - message = "The results are too large to be shown, try narrowing your query. \n Use a filter like -ft where you supply a list of job types, e.g. INI, SIM; \ - or -fp where you supply an integer that represents the number of hours into the past that should be queried: \ - suppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." - Log.info(message) - #raise AutosubmitCritical("Stats query out of bounds", 7061, message) + Log.info(err_message) + plot = False else: fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * total_plots_count)) - fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') # Variables initialization ax, ax2 = [], [] @@ -168,15 +167,17 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per # Building legends # print("Legends") build_legends(legends_plot, rects, exp_stats, general_stats) - # Saving output figure grid_spec.tight_layout(fig, rect=[0, 0.03, 1, 0.97]) plt.savefig(output_file) - - create_csv_stats(exp_stats, jobs_list, output_file) except Exception as exp: print(exp) print((traceback.format_exc())) + try: + create_csv_stats(exp_stats, jobs_list, output_file) + except Exception as exp: + Log.info(f'Error while creating csv stats:\n{err_message}') + return plot def create_csv_stats(exp_stats, jobs_list, output_file): diff --git a/autosubmit/monitor/monitor.py b/autosubmit/monitor/monitor.py index 8b8bffc55..25d77cdf8 100644 --- a/autosubmit/monitor/monitor.py +++ b/autosubmit/monitor/monitor.py @@ -481,7 +481,7 @@ class Monitor: Log.info("You don't have enough permissions to the experiment's ({}) folder. The output file will be created in the default location: {}".format(expid, BasicConfig.DEFAULT_OUTPUT_DIR)) HUtils.create_path_if_not_exists_group_permission(BasicConfig.DEFAULT_OUTPUT_DIR) - create_bar_diagram(expid, joblist, self.get_general_stats(expid), output_complete_path, period_ini, period_fi, queue_time_fixes) + show = create_bar_diagram(expid, joblist, self.get_general_stats(expid), output_complete_path, period_ini, period_fi, queue_time_fixes) Log.result('Stats created at {0}', output_complete_path) if show: try: -- GitLab