diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 001a2fe9777de2f2f386f0564c7e8fa34f2221ca..6b59ffa95381c6a560164e6773f4ecb66f856e57 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -10,6 +10,8 @@ build:
os: ubuntu-22.04
tools:
python: "3.9"
+ apt_packages:
+ - graphviz
jobs:
post_install:
# ruamel.yaml.clib appears to cause the build to fail when
diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py
index 78e8a7facf073f5c7deee20658f11fcf01928686..34f671680d7a6741da4fa76b875f4634ddf0222e 100644
--- a/autosubmit/autosubmit.py
+++ b/autosubmit/autosubmit.py
@@ -50,6 +50,7 @@ from .job.job_list_persistence import JobListPersistencePkl
from .job.job_package_persistence import JobPackagePersistence
from .job.job_packager import JobPackager
from .job.job_utils import SubJob, SubJobManager
+from .profiler.profiler import Profiler
from .monitor.monitor import Monitor
from .notifications.mail_notifier import MailNotifier
from .notifications.notifier import Notifier
@@ -190,6 +191,8 @@ class Autosubmit:
help='Sets a experiment expid which completion will trigger the start of this experiment.')
subparser.add_argument('-rom', '--run_only_members', required=False,
help='Sets members allowed on this run.')
+ subparser.add_argument('-p', '--profile', action='store_true', default=False, required=False,
+ help='Prints performance parameters of the execution of this experiment.')
# Expid
@@ -647,7 +650,7 @@ class Autosubmit:
Autosubmit._init_logs(args, args.logconsole, args.logfile, expid)
if args.command == 'run':
- return Autosubmit.run_experiment(args.expid, args.notransitive,args.start_time,args.start_after, args.run_only_members)
+ return Autosubmit.run_experiment(args.expid, args.notransitive,args.start_time,args.start_after, args.run_only_members, args.profile)
elif args.command == 'expid':
return Autosubmit.expid(args.description,args.HPC,args.copy, args.dummy,args.minimal_configuration,args.git_repo,args.git_branch,args.git_as_conf,args.operational,args.testcase,args.use_local_minimal) != ''
elif args.command == 'delete':
@@ -1999,8 +2002,9 @@ class Autosubmit:
Log.debug("Sleep: {0}", safetysleeptime)
Log.debug("Number of retrials: {0}", default_retrials)
return total_jobs, safetysleeptime, default_retrials, check_wrapper_jobs_sleeptime
+
@staticmethod
- def run_experiment(expid, notransitive=False, start_time=None, start_after=None,run_only_members=None):
+ def run_experiment(expid, notransitive=False, start_time=None, start_after=None, run_only_members=None, profile=False):
"""
Runs and experiment (submitting all the jobs properly and repeating its execution in case of failure).
:param expid: the experiment id
@@ -2008,6 +2012,7 @@ class Autosubmit:
:param start_time: the time at which the experiment should start
:param start_after: the expid after which the experiment should start
:param run_only_members: the members to run
+ :param profile: if True, the whole experiment will be profiled
:return: None
"""
@@ -2018,6 +2023,11 @@ class Autosubmit:
except BaseException as e:
raise AutosubmitCritical("Failure during the loading of the experiment configuration, check file paths",
7014, str(e))
+
+ # If the profile flag has been used, activate the profiler and start taking measures
+ if profile:
+ profiler = Profiler(expid)
+ profiler.start()
# checking if there is a lock file to avoid multiple running on the same expid
try:
@@ -2250,6 +2260,9 @@ class Autosubmit:
raise
except BaseException as e:
raise AutosubmitCritical("This seems like a bug in the code, please contact AS developers", 7070, str(e))
+ finally:
+ if profile:
+ profiler.stop()
@staticmethod
def restore_platforms(platform_to_test, mail_notify=False, as_conf=None, expid=None):
diff --git a/autosubmit/profiler/__init__.py b/autosubmit/profiler/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/autosubmit/profiler/profiler.py b/autosubmit/profiler/profiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..789402b4d9130faf67addbd3a179f2078e9a6788
--- /dev/null
+++ b/autosubmit/profiler/profiler.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+
+# Copyright 2015-2023 Earth Sciences Department, BSC-CNS
+
+# This file is part of Autosubmit.
+
+# Autosubmit is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# Autosubmit is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with Autosubmit. If not, see .
+
+import cProfile
+import io
+import os
+import pstats
+from datetime import datetime
+from pathlib import Path
+from pstats import SortKey
+
+import psutil
+from autosubmitconfigparser.config.basicconfig import BasicConfig
+
+from log.log import Log, AutosubmitCritical
+
+
+_UNITS = ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]
+
+class Profiler:
+ """Class to profile the execution of experiments."""
+
+ def __init__(self, expid:str):
+ self._profiler = cProfile.Profile()
+ self._expid = expid
+
+ # Memory profiling variables
+ self._mem_init = 0
+ self._mem_final = 0
+
+ # Error handling flags
+ self._started = False
+
+ def start(self) -> None:
+ """Function to start the profiling process."""
+ if self._started:
+ raise AutosubmitCritical('The profiling process was already started.', 7074)
+
+ self._profiler.enable()
+ self._mem_init += _get_current_memory()
+ self._started = True
+
+ def stop(self) -> None:
+ """Function to finish the profiling process."""
+ if not self._started:
+ raise AutosubmitCritical('Cannot stop the profiler because was not running.', 7074)
+
+ self._profiler.disable()
+ self._mem_final += _get_current_memory()
+ self._report()
+
+ def _report(self) -> None:
+ """Function to print the final report into the stdout, log and filesystem."""
+
+ # Create the profiler path if it does not exist
+ report_path = Path(BasicConfig.LOCAL_ROOT_DIR, self._expid, "tmp", "profile")
+ report_path.mkdir(parents=True, exist_ok=True)
+ report_path.chmod(0o755)
+ if not os.access(report_path, os.W_OK): # Check for write access
+ raise AutosubmitCritical(
+ f'Directory {report_path} not writable. Please check permissions.', 7012)
+
+
+ stream = io.StringIO()
+ date_time = datetime.now().strftime('%Y%m%d-%H%M%S')
+
+ # Generate function-by-function profiling results
+ sort_by = SortKey.CUMULATIVE
+ stats = pstats.Stats(self._profiler, stream=stream) # generate statistics
+ stats.strip_dirs().sort_stats(sort_by).print_stats() # format and save in the stream
+
+ # Generate memory profiling results
+ mem_total : float = self._mem_final - self._mem_init # memory in Bytes
+ unit = 0
+ # reduces the value to its most suitable unit
+ while mem_total >= 1024 and unit <= len(_UNITS) - 1:
+ unit += 1
+ mem_total /= 1024
+
+ # Create and save report
+ report = "\n".join([
+ _generate_title("Time & Calls Profiling"),
+ "",
+ stream.getvalue(),
+ _generate_title("Memory Profiling"),
+ f"MEMORY CONSUMPTION: {mem_total} {_UNITS[unit]}.",
+ ""
+ ]).replace('{', '{{').replace('}', '}}') # escape {} so Log can call str.format
+
+ Log.info(report)
+
+ stats.dump_stats(Path(report_path, f"{self._expid}_profile_{date_time}.prof"))
+ with open(Path(report_path, f"{self._expid}_profile_{date_time}.txt"),
+ 'w', encoding='UTF-8') as report_file:
+ report_file.write(report)
+
+ Log.info(f"[INFO] You can also find report and prof files at {report_path}\n")
+
+def _generate_title(title="") -> str:
+ """
+ Generates a title banner with the specified text.
+
+ :param title: The title that will be shown in the banner.
+ :type title: str
+ :return: The banner with the specified title.
+ :rtype: str
+ """
+ max_len = 80
+ separator = "=" * max_len
+ message = title.center(max_len)
+ return "\n".join([separator, message, separator])
+
+
+def _get_current_memory() -> int:
+ """
+ Return the current memory consumption of the process in Bytes.
+
+ :return: The current memory used by the process (Bytes).
+ :rtype: int
+ """
+ return psutil.Process(os.getpid()).memory_info().rss
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 5c7b344ad5c3bf66f4f409c44560bed1c17f039b..86c56818432b57e790a2d2b85eb9fb3cb1fb7c34 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -36,9 +36,13 @@ extensions = [
'sphinx.ext.autosectionlabel',
'sphinx_rtd_theme',
'sphinx_reredirects',
- 'autosubmit_variables'
+ 'autosubmit_variables',
+ 'sphinx.ext.graphviz'
]
+# Set .svg output fot the graphs generated by GraphViz
+graphviz_output_format='svg'
+
autosectionlabel_prefix_document = True
numfig = True
diff --git a/docs/source/troubleshooting/error-codes.rst b/docs/source/troubleshooting/error-codes.rst
index fbba9514e465d43514c30b08f7f022cacff60dad..9ed70aa32b7beaee5ce468fcaced9e67090e2051 100644
--- a/docs/source/troubleshooting/error-codes.rst
+++ b/docs/source/troubleshooting/error-codes.rst
@@ -123,6 +123,8 @@ Uncatalogued codes - Critical Error codes [7060+]
| 7073 | Private key is encrypted | Each Session: Add your key into the ssh agent. ex. ssh-add $HOME/.ssh/id_rsa, then launch autosubmit. |
| | | Alternative: You can use a non-encrypted key, just make sure that nobody except you has access to the file. |
+------+-----------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| 7074 | Profiling process failed | You can find descriptive explanations in the log, as well as hints to solve the problem. |
++------+-----------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
**Default Solution**
diff --git a/docs/source/userguide/run/index.rst b/docs/source/userguide/run/index.rst
index f38ede902bf9f3d72e008b16db01243e5af85090..767d3cce9af03e46c6a5303d97ae247e02a8f6b1 100644
--- a/docs/source/userguide/run/index.rst
+++ b/docs/source/userguide/run/index.rst
@@ -187,6 +187,42 @@ This functionality can be used together with other options supplied by the `run`
The `-sa` command has a long version `--start_after`.
+.. _basic_profiling:
+
+
+How to profile Autosubmit while running an experiment
+-----------------------------------------------------
+
+Autosubmit offers the possibility to profile an experiment execution. To enable the profiler, just
+add the ``--profile`` (or ``-p``) flag to your ``autosubmit run`` command, as in the following example:
+
+.. code-block:: bash
+
+ autosubmit run --profile EXPID
+
+.. note:: Remember that the purpose of this profiler is to measure the performance of Autosubmit,
+ not the jobs it runs.
+
+This profiler uses Python's ``cProfile`` and ``psutil`` modules to generate a report with simple CPU and
+memory metrics for the experiment execution. These metrics will be displayed in your console after
+the experiment finishes, as in the example below:
+
+.. figure:: profiler_output.png
+ :name: profiler_head_output
+ :align: center
+ :alt: Screenshot of the header of the profiler's output
+
+The profiler output is also saved in ``/tmp/profile``. There you will find two files, the
+report in plain text format and a ``.prof`` binary which contains the CPU metrics. We highly recommend
+using `SnakeViz `_ to visualize this file, as follows:
+
+.. figure:: profiler_snakeviz.png
+ :name: profiler_snakeviz
+ :align: center
+ :alt: The .prof file represented by the graphical library SnakeViz
+
+For more detailed documentation about the profiler, please visit this :ref:`page`.
+
.. _run_modes:
How to prepare an experiment to run in two independent job_list. (Priority jobs, Two-step-run) (OLD METHOD)
diff --git a/docs/source/userguide/run/profiler.rst b/docs/source/userguide/run/profiler.rst
new file mode 100644
index 0000000000000000000000000000000000000000..0a20cbd831f6700e8054fe3eb1716f29a6a3ca97
--- /dev/null
+++ b/docs/source/userguide/run/profiler.rst
@@ -0,0 +1,113 @@
+:orphan:
+
+..
+ The :orphan: section tells Sphinx not to include this page in any contents list
+
+.. _advanced_profiling:
+
+autosubmit.profiler
+===================
+
+.. important:: If you are interested in measuring only the performance of the execution of an
+ experiment in Autosubmit, a simpler user-oriented guide is available at this
+ :ref:`link`.
+
+######################################
+The autosubmit's profiler
+######################################
+
+Autosubmit integrates a profiler that allows developers to easily measure the performance of entire
+functions or specific code fragments.
+
+The profiler generates a comprehensive report with enough information to detect bottleneck functions
+during the execution of experiments, as well as information about the total memory consumed.
+
+It mainly uses the ``cProfile`` library to make the report, so this module inherits its deterministic
+profiling and reasonable overhead features. However, it also limits profiling to a single thread, so
+please, do not use it on concurrent code. For memory profiling, it uses ``psutil``.
+
+.. caution::
+ This profiler was originally designed to be used in the ``autosubmit run`` command, so using
+ it in other functions may produce unexpected results or errors.
+
+ The profiler instantiation requires an ``EXPID``, and not all the functions in Autosubmit use it.
+ This can be bypassed using another string, but keep in mind that there is no error handling in
+ this case.
+
+How to profile a function or a specific code fragment?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Depending on the Autosubmit function you want to profile, you must add a ``--profile`` argument to the
+parser. The ``autosubmit run`` subcommand supports it. It is recommended that the default value of
+this flag always be ``False``, to ensure that the profiler does not interfere with the normal execution
+in an unwanted way. You will need to add something like this to your parser:
+
+.. code-block:: python
+
+ subparser.add_argument('-p', '--profile', action='store_true', default=False, required=False,
+ help='Prints performance parameters of the execution of this experiment.')
+
+The function must receive the flag as argument to control the execution of the profiler. If the flag
+has value ``True``, you should proceed as follows:
+
+1. Instantiate a **Profiler(EXPID)** object. Specifying the ``EXPID`` is mandatory.
+
+2. Run the profiler by calling the **start()** function of the instantiated object, at the beginning
+ of the function or code fragment you want to evaluate. The measurement will start instantly.
+
+3. Execute the **stop()** function of the profiler at the end of the function or code fragment to be
+ evaluated. The process of taking measurements will stop instantly. The report will be generated
+ automatically and the files will be stored in the ``/tmp/profile`` directory.
+
+.. important:: Make sure, if necessary, that the call to `stop()` is always made, even if the
+ Autosubmit code fails, in order to get the performance report.
+
+
+The most relevant functions of the profiler, in detail
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. digraph:: foo
+ :name: status_diagram
+ :align: center
+ :alt: Status diagram of the profiler
+
+ bgcolor="transparent"
+ graph [nodesep=0.5]
+
+ // Extreme nodes (start/end)
+ node [label="",color="black",style=filled,fixedsize=true];
+
+ node [shape=circle,height=0.25,width=0.25] ENTRY;
+ node [shape=doublecircle,height=0.2,width=0.2] EXIT;
+
+ // Status nodes
+ node [shape=rect,style=rounded,height=0.5,width=1.2,fixedsize=true,fontsize=12,fontname=arial];
+
+ node [label="__init__"] init;
+ node [label="start"] start;
+ node [label="stop"] stop;
+ node [label="report"] report;
+
+ // Relations
+ ENTRY -> init;
+ init -> start [label="when\nstarted = False",fontsize=7,fontname=arial];
+ start -> stop [label=" when\n started = True",fontsize=7,fontname=arial];
+ stop -> report [label="automatically",fontsize=7,fontname=arial];
+ report -> EXIT;
+
+ { rank = same; ENTRY init start }
+ { rank = same; stop report EXIT }
+
+* The **start()** function: Starts taking measures, both of execution times thanks to ``cProfile``, and
+ memory thanks to ``psutil``. It also manages errors to avoid illegal transitions between states.
+
+* The **stop()** function: Same as the previous function, but terminating the taking of measurements.
+ It will call the report function automatically.
+
+* The **_report()** function: It is private, and its purpose is to generate the final performance
+ report and storing it properly. It will print the report to the console output and log it at the same time.
+ In addition, it will generate two files in the directory chosen when instantiating the Profiler
+ object, a ``.txt`` file with the same report shown on screen, and a ``.prof`` file with the report
+ generated by ``pstats``. The ``.prof`` file can be manipulated with the appropriate tools. Our
+ recommendation is to open it with `SnakeViz `_, a graphical
+ library that will interpret the data for you and display it in an interactive web interface.
diff --git a/docs/source/userguide/run/profiler_output.png b/docs/source/userguide/run/profiler_output.png
new file mode 100644
index 0000000000000000000000000000000000000000..2a4808ed8d170bb7717d125f69853a616475e32c
Binary files /dev/null and b/docs/source/userguide/run/profiler_output.png differ
diff --git a/docs/source/userguide/run/profiler_snakeviz.png b/docs/source/userguide/run/profiler_snakeviz.png
new file mode 100644
index 0000000000000000000000000000000000000000..8eadb90e21aa7c9f72e6bfca1a1eb240c1955c24
Binary files /dev/null and b/docs/source/userguide/run/profiler_snakeviz.png differ
diff --git a/environment.yml b/environment.yml
index 60e4d95c864dcd0e2b7e2e51282cac33d4cea018..e0ce1aded1e3adf495e84055a7dc1cac4136e3fa 100644
--- a/environment.yml
+++ b/environment.yml
@@ -44,4 +44,5 @@ dependencies:
- coverage
- requests
- configobj
+ - psutil
diff --git a/requeriments.txt b/requeriments.txt
index 64e023c1ac6ed6b1890519f8ef6bdc150bf87dd7..95d521a9b17e2aa1e4f2809d058c650559708e2f 100644
--- a/requeriments.txt
+++ b/requeriments.txt
@@ -30,3 +30,4 @@ Pygments
packaging==20
typing>=3.7
wheel
+psutil
diff --git a/setup.py b/setup.py
index 5761d7ac2b4b879e80a624e3c035bebc66c92020..f96a9a0c9f7159cd9285994bb3c6acbe0baec28b 100644
--- a/setup.py
+++ b/setup.py
@@ -39,7 +39,7 @@ setup(
url='http://www.bsc.es/projects/earthscience/autosubmit/',
download_url='https://earth.bsc.es/wiki/doku.php?id=tools:autosubmit',
keywords=['climate', 'weather', 'workflow', 'HPC'],
- install_requires=['ruamel.yaml==0.17.21','cython','autosubmitconfigparser','bcrypt>=3.2','packaging>19','six>=1.10.0','configobj>=5.0.6','argparse>=1.4.0','python-dateutil>=2.8.2','matplotlib<3.6','numpy<1.22','py3dotplus>=1.1.0','pyparsing>=3.0.7','paramiko>=2.9.2','mock>=4.0.3','portalocker>=2.3.2','networkx==2.6.3','requests>=2.27.1','bscearth.utils>=0.5.2','cryptography>=36.0.1','setuptools>=60.8.2','xlib>=0.21','pip>=22.0.3','pythondialog','pytest','nose','coverage','PyNaCl>=1.4.0','Pygments'],
+ install_requires=['ruamel.yaml==0.17.21','cython','autosubmitconfigparser','bcrypt>=3.2','packaging>19','six>=1.10.0','configobj>=5.0.6','argparse>=1.4.0','python-dateutil>=2.8.2','matplotlib<3.6','numpy<1.22','py3dotplus>=1.1.0','pyparsing>=3.0.7','paramiko>=2.9.2','mock>=4.0.3','portalocker>=2.3.2','networkx==2.6.3','requests>=2.27.1','bscearth.utils>=0.5.2','cryptography>=36.0.1','setuptools>=60.8.2','xlib>=0.21','pip>=22.0.3','pythondialog','pytest','nose','coverage','PyNaCl>=1.4.0','Pygments','psutil'],
classifiers=[
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.9",
diff --git a/test/unit/test_profiler.py b/test/unit/test_profiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d474d572a23a029be09fe099565fd9edbf40f13
--- /dev/null
+++ b/test/unit/test_profiler.py
@@ -0,0 +1,50 @@
+from unittest import TestCase, mock
+from autosubmit.profiler.profiler import Profiler
+from log.log import AutosubmitCritical
+from pathlib import Path
+
+
+class TestProfiler(TestCase):
+ def setUp(self):
+ self.profiler = Profiler("a000")
+
+ # Black box techniques for status machine based software
+ #
+ # O---->__init__------> start
+ # |
+ # |
+ # stop ----> report --->0
+
+ # Status coverage
+ def test_status_machine(self):
+ self.profiler.start()
+ self.profiler.stop()
+
+ # Transition coverage
+ def test_transitions(self):
+ # __init__ -> start
+ self.profiler.start()
+
+ # start -> stop
+ self.profiler.stop()
+
+ def test_transitions_fail_cases(self):
+ # __init__ -> stop
+ self.assertRaises(AutosubmitCritical, self.profiler.stop)
+
+ # start -> start
+ self.profiler.start()
+ self.assertRaises(AutosubmitCritical, self.profiler.start)
+
+ # White box tests
+ @mock.patch("os.access")
+ def test_writing_permission_check_fails(self, mock_response):
+ mock_response.return_value = False
+
+ self.profiler.start()
+ self.assertRaises(AutosubmitCritical, self.profiler.stop)
+
+ def test_memory_profiling_loop(self):
+ self.profiler.start()
+ bytearray(1024*1024)
+ self.profiler.stop()