From b03c778b4d3ec34360e34667839fc5042bb5a470 Mon Sep 17 00:00:00 2001
From: nalonso <naiara.alonso@bsc.es>
Date: Mon, 15 Jul 2024 15:58:57 +0200
Subject: [PATCH 1/4] Updated tests for statistics module

---
 test/unit/test_statistics.py | 151 +++++++++++++++++++++++++----------
 1 file changed, 107 insertions(+), 44 deletions(-)

diff --git a/test/unit/test_statistics.py b/test/unit/test_statistics.py
index f2c3d84b..c4c1f803 100644
--- a/test/unit/test_statistics.py
+++ b/test/unit/test_statistics.py
@@ -2,7 +2,7 @@ from datetime import datetime, timedelta
 import time
 
 import pytest  # TODO: need to import something or add to project requirements?
-from typing import List, Any
+from typing import List, Any, Tuple
 from random import seed, randint, choice
 
 from pyparsing import Dict
@@ -20,6 +20,80 @@ NUM_JOBS = 1000  # modify this value to test with different job number
 MAX_NUM_RETRIALS_PER_JOB = 20  # modify this value to test with different retrials number
 
 
+@pytest.fixture(scope="function")
+def job_with_different_retrials():
+    job_aux = Job(name="example_name", job_id="example_id", status="COMPLETED", priority=0)
+    job_aux.processors = "1"
+    job_aux.wallclock = '00:05'
+    job_aux.section = "example_section"
+    job_aux.member = "example_member"
+    job_aux.chunk = "example_chunk"
+    job_aux.processors_per_node = "1"
+    job_aux.tasks = "1"
+    job_aux.nodes = "1"
+    job_aux.exclusive = "example_exclusive"
+    job_aux.retrials = 7
+
+    retrials = [
+        [
+            datetime(2024, 3, 2, 15, 24, 16),
+            datetime(2024, 3, 2, 15, 26, 14),
+            datetime(2024, 3, 3, 00, 10, 7),
+            "COMPLETED"
+        ],
+        [
+            datetime(2024, 3, 2, 15, 17, 31),
+            datetime(2024, 3, 2, 15, 23, 45),
+            datetime(2024, 3, 2, 15, 24, 45),
+            "FAILED"
+        ],
+        [
+            datetime(2024, 3, 2, 15, 17, 31),
+            datetime(1970, 1, 1, 2, 00, 00),
+            datetime(2024, 3, 2, 15, 23, 45),
+            "FAILED"
+        ],
+        [
+            datetime(2024, 3, 2, 15, 17, 31),
+            datetime(2024, 3, 2, 15, 23, 45),
+            datetime(1970, 1, 1, 2, 00, 00),
+            "FAILED"
+        ],
+        [
+            datetime(2024, 3, 2, 15, 17, 31),
+            datetime(2024, 3, 2, 15, 23, 45),
+            "FAILED"
+        ],
+        [
+            datetime(2024, 3, 2, 15, 17, 31),
+            datetime(1970, 1, 1, 2, 00, 00),
+            "FAILED"
+        ],
+        [
+            datetime(2024, 3, 2, 15, 17, 31),
+            "FAILED"
+        ]
+    ]
+    job_aux.get_last_retrials = lambda: retrials
+
+    job_stat_aux = JobStat("example_name", 1, float(5)/60, "example_section",
+                           "example_date", "example_member", "example_chunk", "1",
+                           "1", "1", "example_exclusive")
+
+    job_stat_aux.submit_time = retrials[len(retrials) - 1][0]
+    job_stat_aux.start_time = None
+    job_stat_aux.finish_time = None
+    job_stat_aux.completed_queue_time = timedelta(seconds=118)
+    job_stat_aux.completed_run_time = timedelta(seconds=31433)
+    job_stat_aux.failed_queue_time = timedelta(seconds=374) * 3 + timedelta() * 2
+    job_stat_aux.failed_run_time = timedelta(seconds=60) + timedelta(days=19784, seconds=48225) + timedelta()
+    job_stat_aux.retrial_count = 7
+    job_stat_aux.completed_retrial_count = 1
+    job_stat_aux.failed_retrial_count = 6
+
+    return [job_aux], job_stat_aux
+
+
 @pytest.fixture(scope="function")
 def jobs_instances():
     # type: () -> List[Job]
@@ -70,7 +144,7 @@ def jobs_instances():
                     else:
                         retrial[3] = job_aux.status
             retrials.append(retrial)
-        job_aux.get_last_retrials = lambda: retrials  # override get_last_retrials method, similar to mock
+        job_aux.get_last_retrials = lambda: retrials
         jobs.append(job_aux)
 
     return jobs
@@ -146,18 +220,18 @@ def summary_instance_as_list(summary_instance):
     # type: (StatsSummary) -> List[str]
     return [
         "Summary: ",
-        "{}  :  {}".format("CPU Consumption Percentage", str(summary_instance.cpu_consumption_percentage) + "%"),
-        "{}  :  {:,} hrs.".format("Total Queue Time", round(summary_instance.total_queue_time, 2)),
-        "{}  :  {:,}".format("Submitted Count", summary_instance.submitted_count),
-        "{}  :  {:,}".format("Run Count", summary_instance.run_count),
-        "{}  :  {:,}".format("Completed Count", summary_instance.completed_count),
-        "{}  :  {:,}".format("Failed Count", summary_instance.failed_count),
-        "{}  :  {:,} hrs.".format("Expected Consumption", round(summary_instance.expected_consumption, 4)),
-        "{}  :  {:,} hrs.".format("Real Consumption", round(summary_instance.real_consumption, 4)),
-        "{}  :  {:,} hrs.".format("Failed Real Consumption", round(summary_instance.failed_real_consumption, 4)),
-        "{}  :  {:,} hrs.".format("Expected CPU Consumption", round(summary_instance.expected_cpu_consumption, 4)),
-        "{}  :  {:,} hrs.".format("CPU Consumption", round(summary_instance.cpu_consumption, 4)),
-        "{}  :  {:,} hrs.".format("Failed CPU Consumption", round(summary_instance.failed_cpu_consumption, 4))
+        "CPU Consumption Percentage  :  " + "{}".format(str(summary_instance.cpu_consumption_percentage) + "%"),
+        "Total Queue Time  :  " + "{:,}".format((round(summary_instance.total_queue_time, 2))) + " hrs.",
+        "Submitted Count  :  " + "{:,}".format(summary_instance.submitted_count),
+        "Run Count  :  " + "{:,}".format(summary_instance.run_count),
+        "Completed Count  :  " + "{:,}".format(summary_instance.completed_count),
+        "Failed Count  :  " + "{:,}".format(summary_instance.failed_count),
+        "Expected Consumption  :  " + "{:,}".format(round(summary_instance.expected_consumption, 4)) + " hrs.",
+        "Real Consumption  :  " + "{:,}".format(round(summary_instance.real_consumption, 4)) + " hrs.",
+        "Failed Real Consumption  :  " + "{:,}".format(round(summary_instance.failed_real_consumption, 4)) + " hrs.",
+        "Expected CPU Consumption  :  " + "{:,}".format(round(summary_instance.expected_cpu_consumption, 4)) + " hrs.",
+        "CPU Consumption  :  " + "{:,}".format(round(summary_instance.cpu_consumption, 4)) + " hrs.",
+        "Failed CPU Consumption  :  " + "{:,}".format(round(summary_instance.failed_cpu_consumption, 4)) + " hrs."
     ]
 
 
@@ -207,36 +281,25 @@ def test_working_functions(jobs_instances):
     exp_stats.build_failed_jobs_only_list()
 
 
-def test_calculate_statistics(statistics_instance, jobs_instances):
-    # type: (Statistics, List[Job]) -> None
-    stats = statistics_instance
-    job_list = jobs_instances
-    job_stats = stats.calculate_statistics()
-
-    assert len(job_stats) == len(job_list)
-    for index, job_stat in enumerate(job_stats):
-        original_retrials = job_list[index].get_last_retrials()
-        last_retrial = original_retrials[(len(original_retrials) - 1)]
-
-        assert job_stat.retrial_count == len(original_retrials)
-        assert job_stat.completed_retrial_count == len(
-            [retrial for retrial in original_retrials
-             if len(retrial) == 4 and retrial[3] == "COMPLETED"])
-
-        assert job_stat.failed_retrial_count == len(
-            [retrial for retrial in original_retrials
-             if (len(retrial) == 4 and retrial[3] != "COMPLETED")
-             or (len(retrial) < 4)])
-
-        assert job_stat.submit_time == (
-            last_retrial[0] if (len(last_retrial) == 4 or len(last_retrial) == 3 or len(last_retrial) == 2) else None)
-        assert job_stat.start_time == (last_retrial[1] if (len(last_retrial) == 4 or len(last_retrial) == 3) else None)
-        assert job_stat.finish_time == (last_retrial[2] if (len(last_retrial) == 4) else None)
-
-        # TODO: by making retrials creation random it is "imposible" to predict the results of:
-        # TODO: completed_queue_time, completed_run_time, failed_queue_time, failed_run_time
-        # TODO: idea, remove randomness and create a fixed dataset dependending on a constant, easier to test
-
+def test_calculate_statistics(statistics_instance, job_with_different_retrials):
+    # type: (Statistics, Tuple[List[Job], JobStat]) -> None
+    statistics_instance._jobs = job_with_different_retrials[0]
+
+    job_stats = statistics_instance.calculate_statistics()
+
+    # Times
+    assert job_stats[0].submit_time == job_with_different_retrials[1].submit_time
+    assert job_stats[0].start_time == job_with_different_retrials[1].start_time
+    assert job_stats[0].finish_time == job_with_different_retrials[1].finish_time
+    # Retrials
+    assert job_stats[0].retrial_count == job_with_different_retrials[1].retrial_count
+    assert job_stats[0].completed_retrial_count == job_with_different_retrials[1].completed_retrial_count
+    assert job_stats[0].failed_retrial_count == job_with_different_retrials[1].failed_retrial_count
+    # Queue/run times
+    assert job_stats[0].completed_queue_time == job_with_different_retrials[1].completed_queue_time
+    assert job_stats[0].completed_run_time == job_with_different_retrials[1].completed_run_time
+    assert job_stats[0].failed_queue_time == job_with_different_retrials[1].failed_queue_time
+    assert job_stats[0].failed_run_time == job_with_different_retrials[1].failed_run_time
 
 
 def test_calculate_summary(statistics_instance, summary_instance):
-- 
GitLab


From bb54e50b60c035a239afa2422095f128a4a3cd69 Mon Sep 17 00:00:00 2001
From: nalonso <naiara.alonso@bsc.es>
Date: Tue, 16 Jul 2024 14:16:59 +0200
Subject: [PATCH 2/4] Initial commit

---
 autosubmit/autosubmit.py      |  10 +-
 autosubmit/monitor/diagram.py | 202 ++++++++++++++++++++++++++--------
 autosubmit/monitor/monitor.py |  20 +++-
 3 files changed, 180 insertions(+), 52 deletions(-)

diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py
index 8d3516c3..4e23fc95 100644
--- a/autosubmit/autosubmit.py
+++ b/autosubmit/autosubmit.py
@@ -311,6 +311,8 @@ class Autosubmit:
                                                                             'in number of hours back')
             subparser.add_argument('-o', '--output', choices=('pdf', 'png', 'ps', 'svg'), default='pdf',
                                    help='type of output for generated plot')
+            subparser.add_argument('-s', '--include_summary', action='store_true', default=False,
+                                   help='Includes summary in the plot')
             subparser.add_argument('--hide', action='store_true', default=False,
                                    help='hides plot window')
             subparser.add_argument('-nt', '--notransitive', action='store_true',
@@ -715,7 +717,7 @@ class Autosubmit:
                                       args.expand_status, args.hide_groups, args.notransitive, args.check_wrapper,
                                       args.txt_logfiles, args.profile, detail=False)
         elif args.command == 'stats':
-            return Autosubmit.statistics(args.expid, args.filter_type, args.filter_period, args.output, args.hide,
+            return Autosubmit.statistics(args.expid, args.filter_type, args.filter_period, args.output, args.include_summary, args.hide,
                                          args.notransitive, args.database)
         elif args.command == 'clean':
             return Autosubmit.clean(args.expid, args.project, args.plot, args.stats)
@@ -2780,7 +2782,7 @@ class Autosubmit:
         return True
 
     @staticmethod
-    def statistics(expid, filter_type, filter_period, file_format, hide, notransitive=False, db = False):
+    def statistics(expid, filter_type, filter_period, file_format, summary, hide, notransitive=False, db = False):
         """
         Plots statistics graph for a given experiment.
         Plot is created in experiment's plot folder with name <expid>_<date>_<time>.<file_format>
@@ -2791,6 +2793,8 @@ class Autosubmit:
         :param filter_type: type of the jobs to plot
         :param filter_period: period to plot
         :param file_format: plot's file format. It can be pdf, png, ps or svg
+        :param summary: shows summary statistics
+        :type summary: bool
         :param hide: hides plot window
         :type hide: bool
         :param notransitive: Reduces workflow linkage complexity
@@ -2838,7 +2842,7 @@ class Autosubmit:
                     Log.info("Plotting stats...")
                     monitor_exp = Monitor()
                     # noinspection PyTypeChecker
-                    monitor_exp.generate_output_stats(expid, jobs, file_format, period_ini, period_fi, not hide,
+                    monitor_exp.generate_output_stats(expid, jobs, file_format, summary, period_ini, period_fi, not hide,
                                                       queue_time_fixes)
                     Log.result("Stats plot ready")
                 except Exception as e:
diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py
index 661c757c..4f21cb4f 100644
--- a/autosubmit/monitor/diagram.py
+++ b/autosubmit/monitor/diagram.py
@@ -22,6 +22,11 @@ import math
 import traceback
 
 import matplotlib as mtp
+from reportlab.lib import colors
+from reportlab.lib.styles import getSampleStyleSheet
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
+
+from autosubmit.statistics.jobs_stat import JobStat
 
 mtp.use('Agg')
 import matplotlib.pyplot as plt
@@ -30,8 +35,8 @@ import matplotlib.patches as mpatches
 from autosubmit.statistics.statistics import Statistics
 from autosubmit.job.job import Job
 from log.log import Log
-from datetime import datetime
-from typing import List, Dict
+from datetime import datetime, timedelta
+from typing import List, Dict, Union, Any
 
 Log.get_logger("Autosubmit")
 
@@ -40,6 +45,11 @@ RATIO = 4
 MAX_JOBS_PER_PLOT = 12.0
 MAX_NUM_PLOTS = 40
 
+# Table config constants
+WIDTH = 1150
+HEIGHT = 1728
+MARGIN_DISTANCE = WIDTH * 0.15
+TOP_MARGIN = 10
 
 
 def _seq(start, end, step):
@@ -47,42 +57,36 @@ def _seq(start, end, step):
     sample_count = int(abs(end - start) / step)
     return itertools.islice(itertools.count(start, step), sample_count)
 
-def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, period_ini=None, period_fi=None,
-                       queue_time_fixes=None) -> bool:
-    # type: (str, List[Job], List[str], str, datetime, datetime, Dict[str, int]) -> None
-    """
-    Creates a bar diagram of the statistics.
-
-    :param queue_time_fixes:
-    :param experiment_id: experiment's identifier
-    :type experiment_id: str
-    :param jobs_list: list of jobs
-    :type jobs_list: List[Job]
-
-    :param general_stats: list of sections and options in the %DEFAULT.EXPID%_GENERAL_STATS file
-    :type general_stats: list of tuples  
-    :param output_file: path to the output file  
-    :type output_file: str  
-    :param period_ini: starting date and time
-    :type period_ini: datetime  
-    :param period_fi: finish date and time
-    :type period_fi: datetime  
-    """
-    # Error prevention
-    plt.close('all')
-    normal_plots_count = 0
-    failed_jobs_plots_count = 0
-    exp_stats = None
+def populate_statistics(jobs_list, period_ini, period_fi, queue_time_fixes):
+    # type: (List[Job], datetime, datetime, Dict[str, int]) -> Statistics
     try:
         exp_stats = Statistics(jobs_list, period_ini, period_fi, queue_time_fixes)
         exp_stats.calculate_statistics()
         exp_stats.calculate_summary()
         exp_stats.make_old_format()
+        return exp_stats
+    except Exception as exp:
+        print(exp)
+        print((traceback.format_exc()))
+
+
+def create_stats_report(experiment_id, jobs_list, general_stats, output_file, summary, period_ini=None, period_fi=None, queue_fix_times=None) -> bool:
+    exp_stats = populate_statistics(jobs_list, period_ini, period_fi, queue_fix_times)
+    plot = create_bar_diagram(experiment_id, exp_stats, general_stats, output_file)
+    if summary:
+        create_table_pdf(jobs_list, exp_stats.jobs_stat, output_file, experiment_id)
+    return plot
+
 
-        failed_jobs_dict = exp_stats.build_failed_jobs_only_list()
-        # Stats variables definition
+def create_bar_diagram(expid, exp_stats, general_stats, output_file):
+    # type: (str, Statistics, List[str], str) -> bool
+    # Error prevention
+    plt.close('all')
+    normal_plots_count = 0
+    failed_jobs_plots_count = 0
+    try :
         normal_plots_count = int(math.ceil(len(exp_stats.jobs_stat) / MAX_JOBS_PER_PLOT))
-        failed_jobs_plots_count = int(math.ceil(len(failed_jobs_dict) / MAX_JOBS_PER_PLOT))
+        failed_jobs_plots_count = int(math.ceil(len(exp_stats.build_failed_jobs_only_list()) / MAX_JOBS_PER_PLOT))
     except Exception as exp:
         print(exp)
         print((traceback.format_exc()))
@@ -99,13 +103,10 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per
         plot = False
     else:
         fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * total_plots_count))
-        fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold')
+        fig.suptitle('STATS - ' + expid, fontsize=24, fontweight='bold')
         # Variables initialization
         ax, ax2 = [], []
         rects = [None] * 5
-        # print("Normal plots: {}".format(normal_plots_count))
-        # print("Failed jobs plots: {}".format(failed_jobs_plots_count))
-        # print("Total plots: {}".format(total_plots_count))
         grid_spec = gridspec.GridSpec(RATIO * total_plots_count + 2, 1)
         i_plot = 0
         for plot in range(1, normal_plots_count + 1):
@@ -124,8 +125,8 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per
                 ax[plot - 1].set_ylabel('hours')
                 ax[plot - 1].set_xticks(ind_width)
                 ax[plot - 1].set_xticklabels(
-                    [job.name for job in jobs_list[l1:l2]], rotation='vertical')
-                ax[plot - 1].set_title(experiment_id, fontsize=20)
+                    [job.name for job in exp_stats._jobs[l1:l2]], rotation='vertical')  # TODO: is this correct? Accessing to private attribute
+                ax[plot - 1].set_title(expid, fontsize=20)
                 upper_limit = round(1.10 * exp_stats.max_time, 4)
                 step = round(upper_limit / 10, 4)
                 # Here we use ``upper_limit + step`` as np.arange is inclusive at the end,
@@ -163,11 +164,11 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per
                 ax[plot - 1].set_ylabel('# failed attempts')
                 ax[plot - 1].set_xticks(ind_width)
                 ax[plot - 1].set_xticklabels([name for name in job_names_in_failed[l1:l2]], rotation='vertical')
-                ax[plot - 1].set_title(experiment_id, fontsize=20)
+                ax[plot - 1].set_title(expid, fontsize=20)
                 ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_fail))
                 ax[plot - 1].set_yticks(range(0, exp_stats.max_fail + 2))
                 failed_jobs_rects[0] = ax[plot - 1].bar(ind_width_2, [exp_stats.failed_jobs_dict[name] for name in
-                                                                          job_names_in_failed[l1:l2]], width, color='red')
+                                                                      job_names_in_failed[l1:l2]], width, color='red')
             except Exception as exp:
                 print((traceback.format_exc()))
                 print(exp)
@@ -180,7 +181,6 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per
 
         try:
             # Building legends
-            # print("Legends")
             build_legends(legends_plot, rects, exp_stats, general_stats)
             # Saving output figure
             grid_spec.tight_layout(fig, rect=[0, 0.03, 1, 0.97])
@@ -189,7 +189,7 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per
             print(exp)
             print((traceback.format_exc()))
     try:
-        create_csv_stats(exp_stats, jobs_list, output_file)
+        create_csv_stats(exp_stats, exp_stats._jobs, output_file)  # TODO: same here accesing to private attribute
     except Exception as exp:
         Log.info(f'Error while creating csv stats:\n{err_message}')
     return plot
@@ -206,17 +206,15 @@ def create_csv_stats(exp_stats, jobs_list, output_file):
     with open(output_file, 'w') as file:
         file.write(
             "Job,Started,Ended,Queuing time (hours),Running time (hours)\n")
-        # In the other function, job_names,start_times... etc is only filled if the job has completed retrials
-        # So I'll change this one to do the same
-        for i in range(len([ job for job in jobs_list if job.get_last_retrials() ])):
+        for i in range(len([job for job in jobs_list if job.get_last_retrials()])):
             file.write("{0},{1},{2},{3},{4}\n".format(
                 job_names[i], start_times[i], end_times[i], queuing_times[i], running_times[i]))
 
 
 def build_legends(plot, rects, experiment_stats, general_stats):
     # type: (plt.figure, List[plt.bar], Statistics, List[str]) -> None
-    # Main legend with colourful rectangles
 
+    # Main legend with colourful rectangles
     legend_rects = [[rect[0] for rect in rects]]
 
     legend_titles = [
@@ -260,3 +258,117 @@ def create_legend(plot, rects, titles, loc, handlelength=None):
 def get_whites_array(length):
     white = mpatches.Rectangle((0, 0), 0, 0, alpha=0.0)
     return [white for _ in range(length)]
+
+
+def group_by_section(job_stats):
+    # type: (List[JobStat]) -> Dict[str, List[JobStat]]
+    grouped_jobs_by_section = {}
+    for job_stat in job_stats:
+        if job_stat.section not in grouped_jobs_by_section:
+            grouped_jobs_by_section[job_stat.section] = []
+        grouped_jobs_by_section[job_stat.section].append(job_stat)
+    return grouped_jobs_by_section
+
+
+def aggregated_by_job_section_data(job_stats):
+    # type: (List[JobStat]) -> List[List[Union[str, Any]]]
+    aggregated_by_job_section = group_by_section(job_stats)
+
+    section_values = list(aggregated_by_job_section.keys())
+    count_values = [len(values) for values in aggregated_by_job_section.values()]
+    total_queue_time_values = [sum([job_stat.completed_queue_time + job_stat.failed_queue_time for job_stat in values], timedelta()) for values in aggregated_by_job_section.values()]
+    total_run_time_values = [sum([job_stat.completed_run_time + job_stat.failed_run_time for job_stat in values], timedelta()) for values in aggregated_by_job_section.values()]
+    avg_queue_time_values = [total_queue_time / count if count != 0 else 0 for total_queue_time, count in zip(total_queue_time_values, count_values)]
+    avg_run_time_values = [total_run_time / count if count != 0 else 0 for total_run_time, count in zip(total_run_time_values, count_values)]
+
+    data = [["Section", "Count", "Queue Sum", "Avg Queue", "Run Sum", "Avg Run"]]
+    for i in range(len(section_values)):
+        data.append([section_values[i], count_values[i], total_queue_time_values[i], avg_queue_time_values[i], total_run_time_values[i], avg_run_time_values[i]])
+    return data
+
+
+def job_list_data(job_list, job_stats):
+    # type: (List[Job], List[JobStat]) -> List[Union[List[str], List[Union[str, Any]]]]
+
+    job_names = [job_list.name for job_list in job_list]
+    queue_values = [job_stat.completed_queue_time + job_stat.failed_queue_time for job_stat in job_stats]
+    run_values = [job_stat.completed_run_time + job_stat.failed_run_time for job_stat in job_stats]
+    # get status of each job, it is a string
+    status = [get_status(job_list, job_stat.name) for job_stat in job_stats]
+
+    res = [["Job Name", "Queue Time", "Run Time", "Status"]]
+    for i in range(len(job_list)):
+        res.append([job_names[i], queue_values[i], run_values[i], status[i]])
+    return res
+
+
+def get_status(job_list, job_name):
+    # type: (List[Job], str) -> str
+    for job in job_list:
+        if job.name == job_name:
+            return job.status_str  # TODO: the status taken are the previous to the exp execution not the most recent ones
+
+
+def create_table_pdf(jobs_list, job_stats, filename, expid):
+    # type: (List[Job], List[JobStat], str, str) -> None
+    pdf_settings = {
+        "page_size": (WIDTH, HEIGHT),
+        "right_margin": MARGIN_DISTANCE,
+        "left_margin": MARGIN_DISTANCE,
+        "top_margin": TOP_MARGIN,
+        "bottom_margin": MARGIN_DISTANCE,
+        "elements_max_width": WIDTH - 2 * MARGIN_DISTANCE,
+        "available_width": WIDTH - 2 * MARGIN_DISTANCE,
+        "col_width": (WIDTH - 2 * MARGIN_DISTANCE) / 6,
+    }
+    filename = filename.replace("statistics", "summary")
+    pdf = SimpleDocTemplate(filename,
+                            pagesize=pdf_settings["page_size"],
+                            rightMargin=pdf_settings["right_margin"],
+                            leftMargin=pdf_settings["left_margin"],
+                            topMargin=pdf_settings["top_margin"],
+                            bottomMargin=pdf_settings["bottom_margin"])
+
+    elements = []
+
+    title = "Summary - " + expid
+    title_style = getSampleStyleSheet()["Title"]
+    title_style.fontSize = 24
+    title_style.fontWeight = 'bold'
+    title_style.spaceAfter = MARGIN_DISTANCE
+    title_para = Paragraph(title, title_style)
+    elements.append(title_para)
+
+    left_col_width = pdf_settings["available_width"] * 0.3
+    tables_data = [
+        ("Aggregated by Job Section", aggregated_by_job_section_data(job_stats)),
+        ("Job List", job_list_data(jobs_list, job_stats))
+    ]
+
+    available_width = pdf_settings["available_width"] - left_col_width
+    for title, data in tables_data:
+        num_cols = len(data[0])
+        cols_width = [left_col_width] + [available_width / (num_cols - 1)] * (num_cols - 1)
+
+        # Title for table
+        title_style = getSampleStyleSheet()["Heading2"]
+        title_para = Paragraph(title, title_style)
+        elements.append(title_para)
+        elements.append(Spacer(0, 1))  # Spacer between title and table
+
+        # Table
+        tables_data = Table(data, hAlign="LEFT", colWidths=cols_width)
+        style = style = TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), colors.dimgrey),  # Header row background color
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),  # Header row text color
+            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),  # Header font
+            ('FONTSIZE', (0, 0), (-1, -1), 8),  # Header font size
+            ('BOTTOMPADDING', (0, 0), (-1, 0), 2),  # Bottom padding for header
+            ('ALIGN', (0, 0), (-1, -1), 'LEFT'),  # Left align all cells
+            ('LINEBELOW', (0, 1), (-1, -1), 1, colors.lightgrey),  # Horizontal grid lines for other rows
+        ])
+        tables_data.setStyle(style)
+        elements.append(tables_data)
+        elements.append(Spacer(0, 10))
+    pdf.build(elements)
+
diff --git a/autosubmit/monitor/monitor.py b/autosubmit/monitor/monitor.py
index 4b0afea1..2e64c469 100644
--- a/autosubmit/monitor/monitor.py
+++ b/autosubmit/monitor/monitor.py
@@ -40,7 +40,7 @@ from autosubmitconfigparser.config.configcommon import AutosubmitConfig
 from log.log import Log, AutosubmitCritical
 from autosubmitconfigparser.config.yamlparser import YAMLParserFactory
 
-from .diagram import create_bar_diagram
+from .diagram import create_bar_diagram, create_stats_report
 from typing import Dict, List
 
 GENERAL_STATS_OPTION_MAX_LENGTH = 1000
@@ -488,8 +488,8 @@ class Monitor:
                 self.write_output_txt_recursive(
                     child, output_file, "_" + level, path)
 
-    def generate_output_stats(self, expid, joblist, output_format="pdf", period_ini=None, period_fi=None, show=False, queue_time_fixes=None):
-        # type: (str, List[Job], str, datetime.datetime, datetime.datetime, bool, Dict[str, int]) -> None
+    def generate_output_stats(self, expid, joblist, output_format="pdf", summary=False, period_ini=None, period_fi=None, show=False, queue_time_fixes=None):
+        # type: (str, List[Job], str, bool, datetime.datetime, datetime.datetime, bool, Dict[str, int]) -> None
         """
         Plots stats for joblist and stores it in a file
 
@@ -500,6 +500,8 @@ class Monitor:
         :type joblist: JobList
         :param output_format: file format for plot
         :type output_format: str (png, pdf, ps)
+        :param summary: if true, will plot a summary of the experiment
+        :type summary: bool
         :param period_ini: initial datetime of filtered period
         :type period_ini: datetime
         :param period_fi: final datetime of filtered period
@@ -529,23 +531,33 @@ class Monitor:
             Log.info("You don't have enough permissions to the experiment's ({}) folder. The output file will be created in the default location: {}".format(expid, BasicConfig.DEFAULT_OUTPUT_DIR))
             HUtils.create_path_if_not_exists_group_permission(BasicConfig.DEFAULT_OUTPUT_DIR)                
 
-        show = create_bar_diagram(expid, joblist, self.get_general_stats(expid), output_complete_path, period_ini, period_fi, queue_time_fixes)
+        show = create_stats_report(expid, joblist, self.get_general_stats(expid), output_complete_path, summary, period_ini, period_fi, queue_time_fixes)
         Log.result('Stats created at {0}', output_complete_path)
         if show:
             try:
                 if sys.platform != "linux":
                     try:
                         subprocess.check_output(["open", output_complete_path])
+                        if summary:
+                            subprocess.check_output(["open", output_complete_path.replace("_statistics_", "_summary_")])
                     except Exception as e:
                         try:
                             subprocess.check_output(["xdg-open", output_complete_path])
+                            if summary:
+                                subprocess.check_output(["xdg-open", output_complete_path.replace("_statistics_", "_summary_")])
                         except Exception as e:
                             subprocess.check_output(["mimeopen", output_complete_path])
+                            if summary:
+                                subprocess.check_output(["mimeopen", output_complete_path.replace("_statistics_", "_summary_")])
                 else:
                     try:
                         subprocess.check_output(["xdg-open", output_complete_path])
+                        if summary:
+                            subprocess.check_output(["xdg-open", output_complete_path.replace("_statistics_", "_summary_")])
                     except Exception as e:
                         subprocess.check_output(["mimeopen", output_complete_path])
+                        if summary:
+                            subprocess.check_output(["mimeopen", output_complete_path.replace("_statistics_", "_summary_")])
 
             except subprocess.CalledProcessError:
                 Log.printlog('File {0} could not be opened, only the txt option will show'.format(output_complete_path), 7068)
-- 
GitLab


From 2dec28f237358d1219f88283dd12231789003b81 Mon Sep 17 00:00:00 2001
From: nalonso <naiara.alonso@bsc.es>
Date: Wed, 17 Jul 2024 11:56:18 +0200
Subject: [PATCH 3/4] Generated CSV files from stats summary

---
 autosubmit/monitor/diagram.py |  67 ++++++++++++---
 autosubmit/monitor/monitor.py | 150 ++++++++++++++++++++--------------
 2 files changed, 142 insertions(+), 75 deletions(-)

diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py
index 4f21cb4f..da8af140 100644
--- a/autosubmit/monitor/diagram.py
+++ b/autosubmit/monitor/diagram.py
@@ -28,7 +28,7 @@ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, Tabl
 
 from autosubmit.statistics.jobs_stat import JobStat
 
-mtp.use('Agg')
+
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import matplotlib.patches as mpatches
@@ -38,6 +38,7 @@ from log.log import Log
 from datetime import datetime, timedelta
 from typing import List, Dict, Union, Any
 
+mtp.use('Agg')
 Log.get_logger("Autosubmit")
 
 # Autosubmit stats constants
@@ -45,7 +46,7 @@ RATIO = 4
 MAX_JOBS_PER_PLOT = 12.0
 MAX_NUM_PLOTS = 40
 
-# Table config constants
+# Summary config constants
 WIDTH = 1150
 HEIGHT = 1728
 MARGIN_DISTANCE = WIDTH * 0.15
@@ -57,6 +58,7 @@ def _seq(start, end, step):
     sample_count = int(abs(end - start) / step)
     return itertools.islice(itertools.count(start, step), sample_count)
 
+
 def populate_statistics(jobs_list, period_ini, period_fi, queue_time_fixes):
     # type: (List[Job], datetime, datetime, Dict[str, int]) -> Statistics
     try:
@@ -70,7 +72,8 @@ def populate_statistics(jobs_list, period_ini, period_fi, queue_time_fixes):
         print((traceback.format_exc()))
 
 
-def create_stats_report(experiment_id, jobs_list, general_stats, output_file, summary, period_ini=None, period_fi=None, queue_fix_times=None) -> bool:
+def create_stats_report(experiment_id, jobs_list, general_stats, output_file, summary, period_ini=None, period_fi=None,
+                        queue_fix_times=None) -> bool:
     exp_stats = populate_statistics(jobs_list, period_ini, period_fi, queue_fix_times)
     plot = create_bar_diagram(experiment_id, exp_stats, general_stats, output_file)
     if summary:
@@ -84,7 +87,7 @@ def create_bar_diagram(expid, exp_stats, general_stats, output_file):
     plt.close('all')
     normal_plots_count = 0
     failed_jobs_plots_count = 0
-    try :
+    try:
         normal_plots_count = int(math.ceil(len(exp_stats.jobs_stat) / MAX_JOBS_PER_PLOT))
         failed_jobs_plots_count = int(math.ceil(len(exp_stats.build_failed_jobs_only_list()) / MAX_JOBS_PER_PLOT))
     except Exception as exp:
@@ -97,7 +100,11 @@ def create_bar_diagram(expid, exp_stats, general_stats, output_file):
     width = 0.16
     # Creating stats figure + sanity check
     plot = True
-    err_message = "The results are too large to be shown, try narrowing your query.\nUse a filter like -ft where you supply a list of job types, e.g. INI, SIM or use the flag -fp where you supply an integer that represents the number of hours into the past that should be queried:\nSuppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI."
+    err_message = ("The results are too large to be shown, try narrowing your query.\nUse a filter like -ft where you "
+                   "supply a list of job types, e.g. INI, SIM or use the flag -fp where you supply an integer that "
+                   "represents the number of hours into the past that should be queried:\nSuppose it is noon, if you "
+                   "supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the "
+                   "whole experiment, refer to Autosubmit GUI.")
     if total_plots_count > MAX_NUM_PLOTS:
         Log.info(err_message)
         plot = False
@@ -125,7 +132,8 @@ def create_bar_diagram(expid, exp_stats, general_stats, output_file):
                 ax[plot - 1].set_ylabel('hours')
                 ax[plot - 1].set_xticks(ind_width)
                 ax[plot - 1].set_xticklabels(
-                    [job.name for job in exp_stats._jobs[l1:l2]], rotation='vertical')  # TODO: is this correct? Accessing to private attribute
+                    [job.name for job in exp_stats._jobs[l1:l2]],
+                    rotation='vertical')  # TODO: is this correct? Accessing to private attribute
                 ax[plot - 1].set_title(expid, fontsize=20)
                 upper_limit = round(1.10 * exp_stats.max_time, 4)
                 step = round(upper_limit / 10, 4)
@@ -190,7 +198,7 @@ def create_bar_diagram(expid, exp_stats, general_stats, output_file):
             print((traceback.format_exc()))
     try:
         create_csv_stats(exp_stats, exp_stats._jobs, output_file)  # TODO: same here accesing to private attribute
-    except Exception as exp:
+    except Exception:
         Log.info(f'Error while creating csv stats:\n{err_message}')
     return plot
 
@@ -276,14 +284,21 @@ def aggregated_by_job_section_data(job_stats):
 
     section_values = list(aggregated_by_job_section.keys())
     count_values = [len(values) for values in aggregated_by_job_section.values()]
-    total_queue_time_values = [sum([job_stat.completed_queue_time + job_stat.failed_queue_time for job_stat in values], timedelta()) for values in aggregated_by_job_section.values()]
-    total_run_time_values = [sum([job_stat.completed_run_time + job_stat.failed_run_time for job_stat in values], timedelta()) for values in aggregated_by_job_section.values()]
-    avg_queue_time_values = [total_queue_time / count if count != 0 else 0 for total_queue_time, count in zip(total_queue_time_values, count_values)]
-    avg_run_time_values = [total_run_time / count if count != 0 else 0 for total_run_time, count in zip(total_run_time_values, count_values)]
+    total_queue_time_values = [
+        sum([job_stat.completed_queue_time + job_stat.failed_queue_time for job_stat in values], timedelta()) for values
+        in aggregated_by_job_section.values()]
+    total_run_time_values = [
+        sum([job_stat.completed_run_time + job_stat.failed_run_time for job_stat in values], timedelta()) for values in
+        aggregated_by_job_section.values()]
+    avg_queue_time_values = [total_queue_time / count if count != 0 else 0 for total_queue_time, count in
+                             zip(total_queue_time_values, count_values)]
+    avg_run_time_values = [total_run_time / count if count != 0 else 0 for total_run_time, count in
+                           zip(total_run_time_values, count_values)]
 
     data = [["Section", "Count", "Queue Sum", "Avg Queue", "Run Sum", "Avg Run"]]
     for i in range(len(section_values)):
-        data.append([section_values[i], count_values[i], total_queue_time_values[i], avg_queue_time_values[i], total_run_time_values[i], avg_run_time_values[i]])
+        data.append([section_values[i], count_values[i], total_queue_time_values[i], avg_queue_time_values[i],
+                     total_run_time_values[i], avg_run_time_values[i]])
     return data
 
 
@@ -331,6 +346,7 @@ def create_table_pdf(jobs_list, job_stats, filename, expid):
 
     elements = []
 
+    # Title
     title = "Summary - " + expid
     title_style = getSampleStyleSheet()["Title"]
     title_style.fontSize = 24
@@ -358,7 +374,7 @@ def create_table_pdf(jobs_list, job_stats, filename, expid):
 
         # Table
         tables_data = Table(data, hAlign="LEFT", colWidths=cols_width)
-        style = style = TableStyle([
+        style = TableStyle([
             ('BACKGROUND', (0, 0), (-1, 0), colors.dimgrey),  # Header row background color
             ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),  # Header row text color
             ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),  # Header font
@@ -371,4 +387,29 @@ def create_table_pdf(jobs_list, job_stats, filename, expid):
         elements.append(tables_data)
         elements.append(Spacer(0, 10))
     pdf.build(elements)
+    create_csv_summary(jobs_list, job_stats, filename)
+
 
+def create_csv_summary(jobs_list, job_stats, output_file):
+    # type: (List[Job], List[JobStat], str) -> None
+    output_file = output_file.replace('pdf', 'csv')
+    output_file_job_aggregated = output_file.replace('summary', 'job_aggregated')
+    output_file_job_list = output_file.replace('summary', 'job_list')
+
+    data_job_aggregated = aggregated_by_job_section_data(job_stats)
+    data_job_list = job_list_data(jobs_list, job_stats)
+
+    with open(output_file_job_aggregated, 'w') as file:
+        file.write(
+            "Section,Count,Queue Sum,Avg Queue,Run Sum,Avg Run\n")
+        for i in range(1, len(data_job_aggregated)):  # Skip header
+            file.write("{0},{1},{2},{3},{4},{5}\n".format(
+                data_job_aggregated[i][0], data_job_aggregated[i][1], data_job_aggregated[i][2],
+                data_job_aggregated[i][3], data_job_aggregated[i][4], data_job_aggregated[i][5]))
+
+    with open(output_file_job_list, 'w') as file:
+        file.write(
+            "Job Name,Queue Time,Run Time,Status\n")
+        for i in range(1, len(data_job_list)):  # Skip header
+            file.write("{0},{1},{2},{3}\n".format(
+                data_job_list[i][0], data_job_list[i][1], data_job_list[i][2], data_job_list[i][3]))
diff --git a/autosubmit/monitor/monitor.py b/autosubmit/monitor/monitor.py
index 2e64c469..5ae4e693 100644
--- a/autosubmit/monitor/monitor.py
+++ b/autosubmit/monitor/monitor.py
@@ -45,12 +45,15 @@ from typing import Dict, List
 
 GENERAL_STATS_OPTION_MAX_LENGTH = 1000
 
+
 class Monitor:
     """Class to handle monitoring of Jobs at HPC."""
-    _table = dict([(Status.UNKNOWN, 'white'), (Status.WAITING, 'gray'), (Status.READY, 'lightblue'), (Status.PREPARED, 'skyblue'),
-                   (Status.SUBMITTED, 'cyan'), (Status.HELD,
-                                                'salmon'), (Status.QUEUING, 'pink'), (Status.RUNNING, 'green'),
-                   (Status.COMPLETED, 'yellow'), (Status.FAILED, 'red'), (Status.DELAYED,'lightcyan') ,(Status.SUSPENDED, 'orange'), (Status.SKIPPED, 'lightyellow')])
+    _table = dict(
+        [(Status.UNKNOWN, 'white'), (Status.WAITING, 'gray'), (Status.READY, 'lightblue'), (Status.PREPARED, 'skyblue'),
+         (Status.SUBMITTED, 'cyan'), (Status.HELD,
+                                      'salmon'), (Status.QUEUING, 'pink'), (Status.RUNNING, 'green'),
+         (Status.COMPLETED, 'yellow'), (Status.FAILED, 'red'), (Status.DELAYED, 'lightcyan'),
+         (Status.SUSPENDED, 'orange'), (Status.SKIPPED, 'lightyellow')])
 
     def __init__(self):
         self.nodes_plotted = None
@@ -159,7 +162,8 @@ class Monitor:
             if job.has_parents():
                 continue
 
-            if not groups or job.name not in groups['jobs'] or (job.name in groups['jobs'] and len(groups['jobs'][job.name]) == 1):
+            if not groups or job.name not in groups['jobs'] or (
+                    job.name in groups['jobs'] and len(groups['jobs'][job.name]) == 1):
                 node_job = pydotplus.Node(job.name, shape='box', style="filled",
                                           fillcolor=self.color_status(job.status))
 
@@ -238,35 +242,35 @@ class Monitor:
         # the dictionary is composed by:
         label = None
         if len(child.edge_info) > 0:
-            if job.name in child.edge_info.get("FAILED",{}):
-                color = self._table.get(Status.FAILED,None)
-                label = child.edge_info["FAILED"].get(job.name,0)[1]
-            elif job.name in child.edge_info.get("RUNNING",{}):
-                color =  self._table.get(Status.RUNNING,None)
-                label = child.edge_info["RUNNING"].get(job.name,0)[1]
-            elif job.name in child.edge_info.get("QUEUING",{}):
-                color =  self._table.get(Status.QUEUING,None)
-            elif job.name in child.edge_info.get("HELD",{}):
-                color =  self._table.get(Status.HELD,None)
-            elif job.name in child.edge_info.get("DELAYED",{}):
-                color =  self._table.get(Status.DELAYED,None)
-            elif job.name in child.edge_info.get("UNKNOWN",{}):
-                color =  self._table.get(Status.UNKNOWN,None)
-            elif job.name in child.edge_info.get("SUSPENDED",{}):
-                color =  self._table.get(Status.SUSPENDED,None)
-            elif job.name in child.edge_info.get("SKIPPED",{}):
-                color =  self._table.get(Status.SKIPPED,None)
-            elif job.name in child.edge_info.get("WAITING",{}):
-                color =  self._table.get(Status.WAITING,None)
-            elif job.name in child.edge_info.get("READY",{}):
-                color =  self._table.get(Status.READY,None)
-            elif job.name in child.edge_info.get("SUBMITTED",{}):
-                color =  self._table.get(Status.SUBMITTED,None)
+            if job.name in child.edge_info.get("FAILED", {}):
+                color = self._table.get(Status.FAILED, None)
+                label = child.edge_info["FAILED"].get(job.name, 0)[1]
+            elif job.name in child.edge_info.get("RUNNING", {}):
+                color = self._table.get(Status.RUNNING, None)
+                label = child.edge_info["RUNNING"].get(job.name, 0)[1]
+            elif job.name in child.edge_info.get("QUEUING", {}):
+                color = self._table.get(Status.QUEUING, None)
+            elif job.name in child.edge_info.get("HELD", {}):
+                color = self._table.get(Status.HELD, None)
+            elif job.name in child.edge_info.get("DELAYED", {}):
+                color = self._table.get(Status.DELAYED, None)
+            elif job.name in child.edge_info.get("UNKNOWN", {}):
+                color = self._table.get(Status.UNKNOWN, None)
+            elif job.name in child.edge_info.get("SUSPENDED", {}):
+                color = self._table.get(Status.SUSPENDED, None)
+            elif job.name in child.edge_info.get("SKIPPED", {}):
+                color = self._table.get(Status.SKIPPED, None)
+            elif job.name in child.edge_info.get("WAITING", {}):
+                color = self._table.get(Status.WAITING, None)
+            elif job.name in child.edge_info.get("READY", {}):
+                color = self._table.get(Status.READY, None)
+            elif job.name in child.edge_info.get("SUBMITTED", {}):
+                color = self._table.get(Status.SUBMITTED, None)
             else:
                 return None, None
             if label and label == 0:
                 label = None
-            return color,label
+            return color, label
         else:
             return None, None
 
@@ -286,9 +290,10 @@ class Monitor:
                         if color:
                             # label = None doesn't disable label, instead it sets it to nothing and complain about invalid syntax
                             if label:
-                                exp.add_edge(pydotplus.Edge(node_job, node_child,style="dashed",color=color,label=label))
+                                exp.add_edge(
+                                    pydotplus.Edge(node_job, node_child, style="dashed", color=color, label=label))
                             else:
-                                exp.add_edge(pydotplus.Edge(node_job, node_child,style="dashed",color=color))
+                                exp.add_edge(pydotplus.Edge(node_job, node_child, style="dashed", color=color))
                         else:
                             exp.add_edge(pydotplus.Edge(node_job, node_child))
                     else:
@@ -335,7 +340,8 @@ class Monitor:
                                   fillcolor=self.color_status(job.status))
         return node
 
-    def generate_output(self, expid, joblist, path, output_format="pdf", packages=None, show=False, groups=dict(), hide_groups=False, job_list_object=None):
+    def generate_output(self, expid, joblist, path, output_format="pdf", packages=None, show=False, groups=dict(),
+                        hide_groups=False, job_list_object=None):
         """
         Plots graph for joblist and stores it in a file
 
@@ -410,20 +416,22 @@ class Monitor:
                             subprocess.check_output(["mimeopen", output_file])
 
                 except subprocess.CalledProcessError:
-                    Log.printlog('File {0} could not be opened, only the txt option will show'.format(output_file), 7068)
+                    Log.printlog('File {0} could not be opened, only the txt option will show'.format(output_file),
+                                 7068)
         except AutosubmitCritical:
             raise
         except BaseException as e:
             try:
-                message= str(e)
-                message += "\n"+str(e)
+                message = str(e)
+                message += "\n" + str(e)
                 if "GraphViz" in message:
-                    message= "Graphviz is not installed. Autosubmit need this system package in order to plot the workflow."
+                    message = "Graphviz is not installed. Autosubmit need this system package in order to plot the workflow."
             except Exception as e:
                 pass
 
-            Log.printlog("{0}\nSpecified output doesn't have an available viewer installed or graphviz is not installed. The output was only written in txt".format(message),7014)
-
+            Log.printlog(
+                "{0}\nSpecified output doesn't have an available viewer installed or graphviz is not installed. The output was only written in txt".format(
+                    message), 7014)
 
     def generate_output_txt(self, expid, joblist, path, classictxt=False, job_list_object=None):
         """
@@ -454,20 +462,21 @@ class Monitor:
                 log_err = ""
                 if job.status in [Status.FAILED, Status.COMPLETED]:
                     if type(job.local_logs) is not tuple:
-                        job.local_logs = ("","")
+                        job.local_logs = ("", "")
                     log_out = path + "/" + job.local_logs[0]
                     log_err = path + "/" + job.local_logs[1]
 
                 output = job.name + " " + \
-                    Status().VALUE_TO_KEY[job.status] + \
-                    " " + log_out + " " + log_err + "\n"
+                         Status().VALUE_TO_KEY[job.status] + \
+                         " " + log_out + " " + log_err + "\n"
                 output_file.write(output)
         else:
             # Replaced call to function for a call to the function of the object that
             # was previously implemented, nocolor is set to True because we don't want
             # strange ANSI codes in our plain text file
             if job_list_object is not None:
-                output_file.write(job_list_object.print_with_status(statusChange=None, nocolor=True, existingList=joblist))
+                output_file.write(
+                    job_list_object.print_with_status(statusChange=None, nocolor=True, existingList=joblist))
             else:
                 output_file.write(
                     "Writing jobs, they're grouped by [FC and DATE] \n")
@@ -481,14 +490,15 @@ class Monitor:
         log_err = ""
         # + " " + log_out + " " + log_err + "\n"
         output = level + job.name + " " + \
-            Status().VALUE_TO_KEY[job.status] + "\n"
+                 Status().VALUE_TO_KEY[job.status] + "\n"
         output_file.write(output)
         if job.has_children() > 0:
             for child in job.children:
                 self.write_output_txt_recursive(
                     child, output_file, "_" + level, path)
 
-    def generate_output_stats(self, expid, joblist, output_format="pdf", summary=False, period_ini=None, period_fi=None, show=False, queue_time_fixes=None):
+    def generate_output_stats(self, expid, joblist, output_format="pdf", summary=False, period_ini=None, period_fi=None,
+                              show=False, queue_time_fixes=None):
         # type: (str, List[Job], str, bool, datetime.datetime, datetime.datetime, bool, Dict[str, int]) -> None
         """
         Plots stats for joblist and stores it in a file
@@ -510,29 +520,39 @@ class Monitor:
         :type show: bool
         """
         Log.info('Creating stats file')
-        is_owner, is_eadmin, _ = HelperUtils.check_experiment_ownership(expid, BasicConfig, raise_error=False, logger=Log)
+        is_owner, is_eadmin, _ = HelperUtils.check_experiment_ownership(expid, BasicConfig, raise_error=False,
+                                                                        logger=Log)
         now = time.localtime()
         output_date = time.strftime("%Y%m%d_%H%M", now)
         output_filename = "{}_statistics_{}.{}".format(expid, output_date, output_format)
         output_complete_path = os.path.join(BasicConfig.DEFAULT_OUTPUT_DIR, output_filename)
-        is_default_path = True 
+        is_default_path = True
         if is_owner or is_eadmin:
             HUtils.create_path_if_not_exists_group_permission(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "stats"))
             output_complete_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "stats", output_filename)
             is_default_path = False
         else:
-            if os.path.exists(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "stats")) and os.access(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "stats"), os.W_OK):
+            if os.path.exists(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "stats")) and os.access(
+                    os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "stats"), os.W_OK):
                 output_complete_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "stats", output_filename)
                 is_default_path = False
-            elif os.path.exists(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, BasicConfig.LOCAL_TMP_DIR)) and os.access(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, BasicConfig.LOCAL_TMP_DIR), os.W_OK):
-                    output_complete_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, BasicConfig.LOCAL_TMP_DIR, output_filename)
-                    is_default_path = False
+            elif os.path.exists(
+                    os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, BasicConfig.LOCAL_TMP_DIR)) and os.access(
+                    os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, BasicConfig.LOCAL_TMP_DIR), os.W_OK):
+                output_complete_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, BasicConfig.LOCAL_TMP_DIR,
+                                                    output_filename)
+                is_default_path = False
         if is_default_path:
-            Log.info("You don't have enough permissions to the experiment's ({}) folder. The output file will be created in the default location: {}".format(expid, BasicConfig.DEFAULT_OUTPUT_DIR))
-            HUtils.create_path_if_not_exists_group_permission(BasicConfig.DEFAULT_OUTPUT_DIR)                
+            Log.info(
+                "You don't have enough permissions to the experiment's ({}) folder. The output file will be created in the default location: {}".format(
+                    expid, BasicConfig.DEFAULT_OUTPUT_DIR))
+            HUtils.create_path_if_not_exists_group_permission(BasicConfig.DEFAULT_OUTPUT_DIR)
 
-        show = create_stats_report(expid, joblist, self.get_general_stats(expid), output_complete_path, summary, period_ini, period_fi, queue_time_fixes)
+        show = create_stats_report(expid, joblist, self.get_general_stats(expid), output_complete_path, summary,
+                                   period_ini, period_fi, queue_time_fixes)
         Log.result('Stats created at {0}', output_complete_path)
+        if summary:
+            Log.result('Summary created at {0}'.format(output_complete_path.replace("_statistics_", "_summary_")))
         if show:
             try:
                 if sys.platform != "linux":
@@ -544,24 +564,28 @@ class Monitor:
                         try:
                             subprocess.check_output(["xdg-open", output_complete_path])
                             if summary:
-                                subprocess.check_output(["xdg-open", output_complete_path.replace("_statistics_", "_summary_")])
+                                subprocess.check_output(
+                                    ["xdg-open", output_complete_path.replace("_statistics_", "_summary_")])
                         except Exception as e:
                             subprocess.check_output(["mimeopen", output_complete_path])
                             if summary:
-                                subprocess.check_output(["mimeopen", output_complete_path.replace("_statistics_", "_summary_")])
+                                subprocess.check_output(
+                                    ["mimeopen", output_complete_path.replace("_statistics_", "_summary_")])
                 else:
                     try:
                         subprocess.check_output(["xdg-open", output_complete_path])
                         if summary:
-                            subprocess.check_output(["xdg-open", output_complete_path.replace("_statistics_", "_summary_")])
+                            subprocess.check_output(
+                                ["xdg-open", output_complete_path.replace("_statistics_", "_summary_")])
                     except Exception as e:
                         subprocess.check_output(["mimeopen", output_complete_path])
                         if summary:
-                            subprocess.check_output(["mimeopen", output_complete_path.replace("_statistics_", "_summary_")])
+                            subprocess.check_output(
+                                ["mimeopen", output_complete_path.replace("_statistics_", "_summary_")])
 
             except subprocess.CalledProcessError:
-                Log.printlog('File {0} could not be opened, only the txt option will show'.format(output_complete_path), 7068)
-
+                Log.printlog('File {0} could not be opened, only the txt option will show'.format(output_complete_path),
+                             7068)
 
     @staticmethod
     def clean_plot(expid):
@@ -606,7 +630,7 @@ class Monitor:
 
     @staticmethod
     def get_general_stats(expid):
-        # type: (str) -> List[str]
+        # type: (str) -> List
         """
         Returns all the options in the sections of the %expid%_GENERAL_STATS. Options with values larger than GENERAL_STATS_OPTION_MAX_LENGTH characters are not added.
 
@@ -629,7 +653,9 @@ class Monitor:
             try:
                 key, value = stat_item
                 if len(value) > GENERAL_STATS_OPTION_MAX_LENGTH:
-                    Log.critical("General Stats {}: The value for the key \"{}\" is too long ({} characters) and won't be added to the general_stats plot. Maximum length allowed: {} characters.".format(general_stats_path, key, len(value), GENERAL_STATS_OPTION_MAX_LENGTH))
+                    Log.critical(
+                        "General Stats {}: The value for the key \"{}\" is too long ({} characters) and won't be added to the general_stats plot. Maximum length allowed: {} characters.".format(
+                            general_stats_path, key, len(value), GENERAL_STATS_OPTION_MAX_LENGTH))
                     continue
                 result.append(stat_item)
             except Exception as e:
-- 
GitLab


From b5f4889c573e47ea08e84bbf5464a5b520f8dda0 Mon Sep 17 00:00:00 2001
From: Luiggi Tenorio <luiggi.tenorio@bsc.es>
Date: Thu, 18 Jul 2024 15:14:16 +0200
Subject: [PATCH 4/4] fix job load on statistics command

---
 autosubmit/autosubmit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py
index 4e23fc95..5139670e 100644
--- a/autosubmit/autosubmit.py
+++ b/autosubmit/autosubmit.py
@@ -2808,7 +2808,7 @@ class Autosubmit:
             as_conf.check_conf_files(False)
 
             pkl_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, 'pkl')
-            job_list = Autosubmit.load_job_list(expid, as_conf, notransitive=notransitive)
+            job_list = Autosubmit.load_job_list(expid, as_conf, notransitive=notransitive, new=False)
             for job in job_list.get_job_list():
                 job._init_runtime_parameters()
                 job.update_dict_parameters(as_conf)
-- 
GitLab