diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index a6b0538bfbb32a0b4209149f1c7230f608f94a93..18513234a51888e6f85198ae61b536a3f2a722d2 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -4581,10 +4581,12 @@ class Autosubmit: message = "We have detected that there is another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are using the experiment or delete autosubmit.lock file located on tmp folder" raise AutosubmitCritical(message, 7000) except AutosubmitError as e: + # TODO: == "" or is None? if e.trace == "": e.trace = traceback.format_exc() raise AutosubmitError(e.message, e.code, e.trace) except AutosubmitCritical as e: + # TODO: == "" or is None? if e.trace == "": e.trace = traceback.format_exc() raise AutosubmitCritical(e.message, e.code, e.trace) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index d27b54d4059b9e3f0c6d49268bfa6870ab01d6df..854dfbd6a380aa359d59e345dd47c0aee426c8cb 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -644,14 +644,14 @@ class ParamikoPlatform(Platform): try: self.send_command(cmd) except AutosubmitError as e: - e_msg = str(e.trace)+" "+str(e.message) + e_msg = e.error_message slurm_error = True if not slurm_error: while not self._check_jobid_in_queue(self.get_ssh_output(), job_list_cmd) and retries > 0: try: self.send_command(cmd) except AutosubmitError as e: - e_msg = str(e.trace) + " " + str(e.message) + e_msg = e.error_message slurm_error = True break Log.debug('Retrying check job command: {0}', cmd) @@ -727,7 +727,7 @@ class ParamikoPlatform(Platform): job.new_status = job_status self.get_queue_status(in_queue_jobs,list_queue_jobid,as_conf) else: - for job in job_list: + for job, job_prev_status in job_list: job_status = Status.UNKNOWN Log.warning( 'check_job() The job id ({0}) from platform {1} has an status of {2}.', job.id, self.name, job_status) @@ -760,6 +760,18 @@ class ParamikoPlatform(Platform): job_ids = [job_id.split(',')[0] for job_id in job_ids_names] return job_ids + def get_queue_status(self, in_queue_jobs, list_queue_jobid, as_conf): + """Get queue status for a list of jobs. + + The job statuses are normally found via a command sent to the remote platform. + + Each ``job`` in ``in_queue_jobs`` must be updated. Implementations may check + for the reason for queueing cancellation, or if the job is held, and update + the ``job`` status appropriately. + """ + raise NotImplementedError + + def get_checkjob_cmd(self, job_id): """ Returns command to check job status on remote platforms diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index 104f5c5ec078f4c3a1b835717048718692eca77a..3403c07e37276aa88ebf1ba857693b106eb2b581 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -113,9 +113,7 @@ class SlurmPlatform(ParamikoPlatform): error_message+="\ncheck that {1} platform has set the correct scheduler. Sections that could be affected: {0}".format( error_msg[:-1], self.name) - if e.trace is None: - e.trace = "" - raise AutosubmitCritical(error_message,7014,e.message+"\n"+str(e.trace)) + raise AutosubmitCritical(error_message, 7014, e.error_message) except IOError as e: raise AutosubmitError( "IO issues ", 6016, str(e)) diff --git a/log/log.py b/log/log.py index 61f993d97c3fffa4f13e894c090e34b7fc40e57a..cf836338377a4d1193fc007b30bc6dcc7bc47acf 100644 --- a/log/log.py +++ b/log/log.py @@ -3,20 +3,36 @@ import os import sys from time import sleep from datetime import datetime +from typing import Union class AutosubmitError(Exception): - """Exception raised for Autosubmit critical errors . + """Exception raised for Autosubmit errors. + Attributes: - errorcode -- Classified code - message -- explanation of the error + message (str): explanation of the error + code (int): classified code + trace (str): extra information about the error """ - def __init__(self, message="Unhandled Error", code=6000, trace=None): + def __init__(self, message="Unhandled Error", code=6000, trace: Union[None, str]=None): self.code = code self.message = message self.trace = trace + @property + def error_message(self) -> str: + """ + Return the error message ready to be logged, with both trace + (when present) and the message separated by a space. Or just + the message if no trace is available. + + :return: ``trace`` and ``message`` separated by a space, or just the + ``message`` if no ``trace`` is available. + :rtype: str + """ + return self.message if not self.trace else f'{self.trace} {self.message}' + def __str__(self): return " " diff --git a/test/unit/test_log.py b/test/unit/test_log.py new file mode 100644 index 0000000000000000000000000000000000000000..e261b64291a65fb4e4fa29faabd49aa6bbb266bc --- /dev/null +++ b/test/unit/test_log.py @@ -0,0 +1,30 @@ +from unittest import TestCase +from log.log import AutosubmitError, AutosubmitCritical + + +"""Tests for the log module.""" + +class TestLog(TestCase): + + def setUp(self): + ... + + def test_autosubmit_error(self): + ae = AutosubmitError() + assert 'Unhandled Error' == ae.message + assert 6000 == ae.code + assert None is ae.trace + assert 'Unhandled Error' == ae.error_message + assert ' ' == str(ae) + + def test_autosubmit_error_error_message(self): + ae = AutosubmitError(trace='ERROR!') + assert 'ERROR! Unhandled Error' == ae.error_message + + def test_autosubmit_critical(self): + ac = AutosubmitCritical() + assert 'Unhandled Error' == ac.message + assert 7000 == ac.code + assert None is ac.trace + assert ' ' == str(ac) + diff --git a/test/unit/test_paramiko_platform.py b/test/unit/test_paramiko_platform.py new file mode 100644 index 0000000000000000000000000000000000000000..fa83491b4ba90e6816c73888f2a5ec7b0d2e9c62 --- /dev/null +++ b/test/unit/test_paramiko_platform.py @@ -0,0 +1,116 @@ +from collections import namedtuple +from unittest import TestCase + +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock, patch + +from autosubmit.job.job_common import Status +from autosubmit.platforms.paramiko_platform import ParamikoPlatform +from log.log import AutosubmitError + + +class TestParamikoPlatform(TestCase): + + Config = namedtuple('Config', ['LOCAL_ROOT_DIR', 'LOCAL_TMP_DIR']) + + def setUp(self): + self.local_root_dir = TemporaryDirectory() + self.config = TestParamikoPlatform.Config( + LOCAL_ROOT_DIR=self.local_root_dir.name, + LOCAL_TMP_DIR='tmp' + ) + self.platform = ParamikoPlatform(expid='a000', name='local', config=self.config) + self.platform.job_status = { + 'COMPLETED': [], + 'RUNNING': [], + 'QUEUING': [], + 'FAILED': [] + } + + def tearDown(self) -> None: + self.local_root_dir.cleanup() + + def test_paramiko_platform_constructor(self): + assert self.platform.name == 'local' + assert self.platform.expid == 'a000' + assert self.config is self.platform.config + + assert self.platform.header is None + assert self.platform.wrapper is None + + assert len(self.platform.job_status) == 4 + + @patch('autosubmit.platforms.paramiko_platform.Log') + @patch('autosubmit.platforms.paramiko_platform.sleep') + def test_check_Alljobs_send_command1_raises_autosubmit_error(self, mock_sleep, mock_log): + """ + Args: + mock_sleep (MagicMock): mocking because the function sleeps for 5 seconds. + """ + # Because it raises a NotImplementedError, but we want to skip it to test an error... + self.platform.get_checkAlljobs_cmd = MagicMock() + self.platform.get_checkAlljobs_cmd.side_effect = ['ls'] + # Raise the AE error here. + self.platform.send_command = MagicMock() + ae = AutosubmitError(message='Test', code=123, trace='ERR!') + self.platform.send_command.side_effect = ae + as_conf = MagicMock() + as_conf.get_copy_remote_logs.return_value = None + job = MagicMock() + job.id = 'TEST' + job.name = 'TEST' + with self.assertRaises(AutosubmitError) as cm: + # Retries is -1 so that it skips the retry code block completely, + # as we are not interested in testing that part here. + self.platform.check_Alljobs( + job_list=[(job, None)], + as_conf=as_conf, + retries=-1) + assert cm.exception.message == 'Some Jobs are in Unknown status' + assert cm.exception.code == 6008 + assert cm.exception.trace is None + + assert mock_log.warning.called + assert mock_log.warning.call_args[0][1] == job.id + assert mock_log.warning.call_args[0][2] == self.platform.name + assert mock_log.warning.call_args[0][3] == Status.UNKNOWN + + @patch('autosubmit.platforms.paramiko_platform.sleep') + def test_check_Alljobs_send_command2_raises_autosubmit_error(self, mock_sleep): + """ + Args: + mock_sleep (MagicMock): mocking because the function sleeps for 5 seconds. + """ + # Because it raises a NotImplementedError, but we want to skip it to test an error... + self.platform.get_checkAlljobs_cmd = MagicMock() + self.platform.get_checkAlljobs_cmd.side_effect = ['ls'] + # Raise the AE error here. + self.platform.send_command = MagicMock() + ae = AutosubmitError(message='Test', code=123, trace='ERR!') + # Here the first time ``send_command`` is called it returns None, but + # the second time it will raise the AutosubmitError for our test case. + self.platform.send_command.side_effect = [None, ae] + # Also need to make this function return False... + self.platform._check_jobid_in_queue = MagicMock(return_value = False) + # Then it will query the job status of the job, see further down as we set it + as_conf = MagicMock() + as_conf.get_copy_remote_logs.return_value = None + job = MagicMock() + job.id = 'TEST' + job.name = 'TEST' + job.status = Status.UNKNOWN + + self.platform.get_queue_status = MagicMock(side_effect=None) + + with self.assertRaises(AutosubmitError) as cm: + # Retries is -1 so that it skips the retry code block completely, + # as we are not interested in testing that part here. + self.platform.check_Alljobs( + job_list=[(job, None)], + as_conf=as_conf, + retries=1) + # AS raises an exception with the message using the previous exception's + # ``error_message``, but error code 6000 and no trace. + assert cm.exception.message == ae.error_message + assert cm.exception.code == 6000 + assert cm.exception.trace is None diff --git a/test/unit/test_slurm_platform.py b/test/unit/test_slurm_platform.py new file mode 100644 index 0000000000000000000000000000000000000000..88b47b5be7d3bdcb3726972a8b0a1e45e0c8e760 --- /dev/null +++ b/test/unit/test_slurm_platform.py @@ -0,0 +1,55 @@ +from collections import namedtuple +from unittest import TestCase + +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock + +from autosubmit.platforms.slurmplatform import SlurmPlatform +from log.log import AutosubmitCritical, AutosubmitError + + +class TestSlurmPlatform(TestCase): + + Config = namedtuple('Config', ['LOCAL_ROOT_DIR', 'LOCAL_TMP_DIR', 'LOCAL_ASLOG_DIR']) + + def setUp(self): + self.local_root_dir = TemporaryDirectory() + self.config = TestSlurmPlatform.Config( + LOCAL_ROOT_DIR=self.local_root_dir.name, + LOCAL_TMP_DIR='tmp', + LOCAL_ASLOG_DIR='ASLOG_a000' + ) + # We need to create the submission archive that AS expects to find in this location: + p = Path(self.local_root_dir.name) / 'a000' / 'tmp' / 'ASLOG_a000' + p.mkdir(parents=True) + submit_platform_script = Path(p) / 'submit_local.sh' + submit_platform_script.touch(exist_ok=True) + + self.platform = SlurmPlatform(expid='a000', name='local', config=self.config) + + def tearDown(self) -> None: + self.local_root_dir.cleanup() + + def test_slurm_platform_submit_script_raises_autosubmit_critical_with_trace(self): + package = MagicMock() + package.jobs.return_value = [] + valid_packages_to_submit = [ + package + ] + + ae = AutosubmitError(message='invalid partition', code=123, trace='ERR!') + self.platform.submit_Script = MagicMock(side_effect=ae) + + # AS will handle the AutosubmitError above, but then raise an AutosubmitCritical. + # This new error won't contain all the info from the upstream error. + with self.assertRaises(AutosubmitCritical) as cm: + self.platform.process_batch_ready_jobs( + valid_packages_to_submit=valid_packages_to_submit, + failed_packages=[] + ) + + # AS will handle the error and then later will raise another error message. + # But the AutosubmitError object we created will have been correctly used + # without raising any exceptions (such as AttributeError). + assert cm.exception.message != ae.message