diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000000000000000000000000000000000..7500d7d4befeba9b91f608e9637286c2275291cd --- /dev/null +++ b/.coveragerc @@ -0,0 +1,53 @@ +# This is the Coverage.py configuration file. This is used by CI when running +# the tests and collecting coverage + +[run] +branch = True +cover_pylib = False +concurrency = thread +data_file = .coverage +disable_warnings = + trace-changed + module-not-python + module-not-imported + no-data-collected + module-not-measured +omit = + tests/* +parallel = True +source = ./autosubmit +timid = False + +[report] +exclude_lines = + pragma: no cover + + # Don't complain if tests don't hit defensive assertion code: + raise NotImplementedError + return NotImplemented + + # Ignore type checking code: + if (typing\.)?TYPE_CHECKING: + @overload( |$) + + # Don't complain about ellipsis (exception classes, typing overloads etc): + \.\.\. + + # Ignore abstract methods + @(abc\.)?abstractmethod + +fail_under=0 +ignore_errors = False +omit = + tests/* +precision = 2 +show_missing = False +skip_covered = False +sort = Name + +[html] +directory = htmlcov + +[xml] +output = coverage.xml +package_depth = 99 diff --git a/CHANGELOG b/CHANGELOG index ef6a0eec16a30687b08a74820c6d5afef3f85187..6cb3b3b44838633da973f910248457e7d11ea8b2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,6 +9,9 @@ - #1397: Use `tini` as entrypoint in our Docker image. - #1428: Use natural sort order for graph entries (adapted from Cylc), enable doctests to be executed with pytest. +- #945: Fix portalocker releasing the lock when a portalocker exception + is raised. Now it prints the warnings but only releases the lock when + the command finished (successfully or not). 4.1.10 - Hotfix =============== diff --git a/autosubmit/__init__.py b/autosubmit/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3281913d2ac81d40521d238f6eebcf49e85e20f0 100644 --- a/autosubmit/__init__.py +++ b/autosubmit/__init__.py @@ -0,0 +1,70 @@ +import traceback +from contextlib import suppress +from os import _exit # type: ignore +from pathlib import Path +from typing import Union + +from portalocker.exceptions import BaseLockException + +from log.log import Log, AutosubmitCritical, AutosubmitError + + +def delete_lock_file(base_path: str = Log.file_path, lock_file: str = 'autosubmit.lock') -> None: + """Delete lock file if it exists. Suppresses permission errors raised. + + :param base_path: Base path to locate the lock file. Defaults to the experiment ``tmp`` directory. + :type base_path: str + :param lock_file: The name of the lock file. Defaults to ``autosubmit.lock``. + :type lock_file: str + :return: None + """ + with suppress(PermissionError): + Path(base_path, lock_file).unlink(missing_ok=True) + + +def exit_from_error(e: BaseException) -> None: + """Called by ``Autosubmit`` when an exception is raised during a command execution. + + Prints the exception in ``DEBUG`` level. + + Prints the exception in ``CRITICAL`` if is it an ``AutosubmitCritical`` or an + ``AutosubmitError`` exception. + + Exceptions raised by ``porta-locker` library print a message informing the user + about the locked experiment. Other exceptions raised cause the lock to be deleted. + + After printing the exception, this function calls ``os._exit(1)``, which will + forcefully exit the executable running. + + :param e: The exception being raised. + :type e: BaseException + :return: None + """ + trace = traceback.format_exc() + try: + Log.debug(trace) + except: + print(trace) + + is_portalocker_error = isinstance(e, BaseLockException) + is_autosubmit_error = isinstance(e, (AutosubmitCritical, AutosubmitError)) + + if isinstance(e, BaseLockException): + Log.warning('Another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are ' + 'using the experiment or delete autosubmit.lock file located on the /tmp folder.') + else: + delete_lock_file() + + if is_autosubmit_error: + e: Union[AutosubmitError, AutosubmitCritical] = e # type: ignore + if e.trace: + Log.debug("Trace: {0}", str(e.trace)) + Log.critical("{1} [eCode={0}]", e.code, e.message) + + if not is_portalocker_error and not is_autosubmit_error: + msg = "Unexpected error: {0}.\n Please report it to Autosubmit Developers through Git" + args = [str(e)] + Log.critical(msg.format(*args)) + + Log.info("More info at https://autosubmit.readthedocs.io/en/master/troubleshooting/error-codes.html") + _exit(1) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index f365656940532c60c731cbb99f0de09178e9e2ec..f70000216032b97329e88464fa5054a15fbdc6ae 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -21,7 +21,6 @@ import requests # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . import threading -import traceback from bscearth.utils.date import date2str from configparser import ConfigParser from distutils.util import strtobool @@ -74,10 +73,11 @@ import random import signal import datetime # import log.fd_show as fd_show -import portalocker -from importlib.resources import read_text, files as read_files +from importlib.resources import files as read_files from importlib.metadata import version from collections import defaultdict +from portalocker import Lock +from portalocker.exceptions import BaseLockException from pyparsing import nestedExpr from .history.experiment_status import ExperimentStatus from .history.experiment_history import ExperimentHistory @@ -191,7 +191,7 @@ class Autosubmit: default='DEBUG', type=str, help="sets file's log level.") parser.add_argument('-lc', '--logconsole', choices=('NO_LOG', 'INFO', 'WARNING', 'DEBUG'), - default='INFO', type=str, + default='WARNING', type=str, help="sets console's log level") subparsers = parser.add_subparsers(dest='command') @@ -2188,10 +2188,8 @@ class Autosubmit: raise AutosubmitCritical("Failure during the loading of the experiment configuration, check file paths", 7014, str(e)) - # checking if there is a lock file to avoid multiple running on the same expid try: - # Portalocker is used to avoid multiple autosubmit running on the same experiment, we have to change this system in #806 - with portalocker.Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1): + with Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1): try: Log.debug("Preparing run") # This function is called only once, when the experiment is started. It is used to initialize the experiment and to check the correctness of the configuration files. @@ -2382,10 +2380,7 @@ class Autosubmit: 7051, e.message) except AutosubmitCritical as e: # Critical errors can't be recovered. Failed configuration or autosubmit error raise AutosubmitCritical(e.message, e.code, e.trace) - except (portalocker.AlreadyLocked, portalocker.LockException) as e: - message = "We have detected that there is another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are using the experiment or delete autosubmit.lock file located on tmp folder" - raise AutosubmitCritical(message, 7000) - except BaseException as e: + except BaseException: raise # If this happens, there is a bug in the code or an exception not-well caught Log.result("No more jobs to run.") if not did_run and len(job_list.get_completed_without_logs()) > 0: @@ -2438,14 +2433,13 @@ class Autosubmit: exp_history.finish_current_experiment_run() except Exception: Log.warning("Database is locked") - except (portalocker.AlreadyLocked, portalocker.LockException) as e: - message = "We have detected that there is another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are using the experiment or delete autosubmit.lock file located on tmp folder" + except BaseLockException: terminate_child_process(expid) - raise AutosubmitCritical(message, 7000) - except AutosubmitCritical as e: + raise + except AutosubmitCritical: terminate_child_process(expid) raise - except BaseException as e: + except BaseException: terminate_child_process(expid) raise finally: @@ -4076,7 +4070,7 @@ class Autosubmit: backup_pkl_path = os.path.join( pkl_folder_path, "job_list_{}_backup.pkl".format(expid)) try: - with portalocker.Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1): + with Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1): # Not locked Log.info("Looking for backup file {}".format(backup_pkl_path)) if os.path.exists(backup_pkl_path): @@ -4132,13 +4126,8 @@ class Autosubmit: else: Log.info( "Backup file not found. Pkl restore operation stopped. No changes have been made.") - except (portalocker.AlreadyLocked, portalocker.LockException) as e: - message = "Another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are using the experiment or delete autosubmit.lock file located on the /tmp folder." - raise AutosubmitCritical(message, 7000) except AutosubmitCritical as e: raise AutosubmitCritical(e.message, e.code, e.trace) - except BaseException as e: - raise @staticmethod def database_backup(expid): @@ -4506,8 +4495,7 @@ class Autosubmit: Autosubmit._check_ownership(expid, raise_error=True) exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) tmp_path = os.path.join(exp_path, BasicConfig.LOCAL_TMP_DIR) - # Encapsulating the lock - with portalocker.Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1) as fh: + with Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1) as fh: try: Log.info( "Preparing .lock file to avoid multiple instances with same expid.") @@ -4669,23 +4657,14 @@ class Autosubmit: Autosubmit.detail(job_list) return True # catching Exception - except KeyboardInterrupt as e: + except KeyboardInterrupt: # Setting signal handler to handle subsequent CTRL-C signal.signal(signal.SIGINT, signal_handler_create) fh.flush() os.fsync(fh.fileno()) raise AutosubmitCritical("Stopped by user input", 7010) - except BaseException as e: + except BaseException: raise - except (portalocker.AlreadyLocked, portalocker.LockException) as e: - message = "We have detected that there is another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are using the experiment or delete autosubmit.lock file located on tmp folder" - raise AutosubmitCritical(message, 7000) - except AutosubmitError as e: - raise - except AutosubmitCritical as e: - raise - except BaseException as e: - raise finally: if profile: profiler.stop() @@ -5211,9 +5190,8 @@ class Autosubmit: tmp_path = os.path.join(exp_path, BasicConfig.LOCAL_TMP_DIR) section_validation_message = " " job_validation_message = " " - # checking if there is a lock file to avoid multiple running on the same expid try: - with portalocker.Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1): + with Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1): Log.info( "Preparing .lock file to avoid multiple instances with same expid.") @@ -5452,11 +5430,6 @@ class Autosubmit: groups=groups_dict, job_list_object=job_list) return True - except (portalocker.AlreadyLocked, portalocker.LockException) as e: - message = "We have detected that there is another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are using the experiment or delete autosubmit.lock file located on tmp folder" - raise AutosubmitCritical(message, 7000) - except (AutosubmitError, AutosubmitCritical): - raise except BaseException as e: raise AutosubmitCritical( "An Error has occurred while setting some of the workflow jobs, no changes were made", 7040, str(e)) diff --git a/bin/autosubmit b/bin/autosubmit index d08f47593a4bffef1fe577076a6ffe24c394b856..6f53abf1f62eb4095bcb5f86dbd5445f10c9e20e 100755 --- a/bin/autosubmit +++ b/bin/autosubmit @@ -20,54 +20,26 @@ """Script for handling experiment monitoring""" import os import sys -import traceback -from contextlib import suppress -scriptdir = os.path.abspath(os.path.dirname(sys.argv[0])) -sys.path.append(scriptdir) -sys.path.append(os.path.normpath(os.path.join(scriptdir, os.pardir))) +script_dir = os.path.abspath(os.path.dirname(sys.argv[0])) +sys.path.append(script_dir) +sys.path.append(os.path.normpath(os.path.join(script_dir, os.pardir))) # noinspection PyUnresolvedReferences -from log.log import Log, AutosubmitCritical , AutosubmitError +from log.log import Log, AutosubmitCritical, AutosubmitError +from autosubmit import delete_lock_file, exit_from_error from autosubmit.autosubmit import Autosubmit -from typing import Union - -def exit_from_error(e: BaseException): - trace = traceback.format_exc() - try: - Log.debug(trace) - except: - print(trace) - with suppress(FileNotFoundError, PermissionError): - os.remove(os.path.join(Log.file_path, "autosubmit.lock")) - if isinstance(e, (AutosubmitCritical, AutosubmitError)): - e: Union[AutosubmitError, AutosubmitCritical] = e - if e.trace: - Log.debug("Trace: {0}", str(e.trace)) - Log.critical("{1} [eCode={0}]", e.code, e.message) - else: - msg = "A not admitted configuration or error in the code has happened: {0}.\n Please report it to Autosubmit Developers through Git" - args = [str(e)] - Log.critical(msg.format(*args)) - Log.info("More info at https://autosubmit.readthedocs.io/en/master/troubleshooting/error-codes.html") - os._exit(1) - # noinspection PyProtectedMember def main(): try: return_value = Autosubmit.parse_args() - if os.path.exists(os.path.join(Log.file_path, "autosubmit.lock")): - os.remove(os.path.join(Log.file_path, "autosubmit.lock")) - if type(return_value) is int: - os._exit(return_value) - os._exit(0) - except AutosubmitError as e: - exit_from_error(e) - except AutosubmitCritical as e: - exit_from_error(e) + delete_lock_file() + return_value = return_value if type(return_value) is int else 0 + os._exit(return_value) except BaseException as e: exit_from_error(e) + if __name__ == "__main__": main() diff --git a/test/unit/test_lockfile.py b/test/unit/test_lockfile.py new file mode 100644 index 0000000000000000000000000000000000000000..6b7f4083705cb7fb24ddc55e7192a06f092f149b --- /dev/null +++ b/test/unit/test_lockfile.py @@ -0,0 +1,82 @@ +import pytest +from portalocker.exceptions import BaseLockException + +from autosubmit import delete_lock_file, exit_from_error +from log.log import AutosubmitCritical, AutosubmitError + + +def test_delete_lockfile(tmp_path): + fake_lock = tmp_path / 'autosubmit.lock' + fake_lock.touch() + + delete_lock_file(str(tmp_path), 'not-found') + + assert fake_lock.exists() + delete_lock_file(str(tmp_path), fake_lock.name) + assert not fake_lock.exists() + + +def test_log_debug_raises_error(mocker): + """TODO: this probably should never happen?""" + + def _fn(): + raise ValueError + + try: + _fn() + except BaseException as e: + mocker.patch('autosubmit._exit') # mock this to avoid the system from exiting + mocked_log = mocker.patch('autosubmit.Log') + mocked_print = mocker.patch('autosubmit.print') + + mocked_log.debug.side_effect = BaseException() + + exit_from_error(e) + + assert mocked_print.called + + +_TEST_EXCEPTION = AutosubmitCritical() +_TEST_EXCEPTION.trace = 'a trace' + + +@pytest.mark.parametrize( + 'exception,debug_calls,critical_calls,delete_called', + [ + (ValueError, 1, 1, True), + (BaseLockException, 1, 0, False), + (AutosubmitCritical, 1, 1, True), + (_TEST_EXCEPTION, 2, 1, True), + (AutosubmitError, 1, 1, True) + ], + ids=[ + 'normal_exception', + 'portalocker_exception', + 'autosubmit_critical', + 'autosubmit_critical_with_trace', + 'autosubmit_error' + ] +) +def test_exit_from_error( + mocker, + tmp_path, + exception: BaseException, + debug_calls: int, + critical_calls: int, + delete_called: bool +): + def _fn(): + raise exception + + try: + _fn() + except BaseException as e: + mocker.patch('autosubmit._exit') # mock this to avoid the system from exiting + mocked_log = mocker.patch('autosubmit.Log') + mocked_delete = mocker.patch('autosubmit.delete_lock_file') + + exit_from_error(e) + + assert mocked_log.debug.call_count == debug_calls + assert mocked_log.critical.call_count == critical_calls + assert mocked_delete.called == delete_called