From 761cd3da32d5dbc1b2a5f74f84d434ad8d31cde0 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Fri, 17 Mar 2023 12:40:33 +0100 Subject: [PATCH 1/5] Create an error_message property on the AutosubmitError. This property checks if ``.trace`` is ``None`` before using it to create a string to be logged or passed to another function. --- autosubmit/autosubmit.py | 2 ++ autosubmit/platforms/paramiko_platform.py | 4 ++-- autosubmit/platforms/slurmplatform.py | 4 +--- log/log.py | 24 +++++++++++++++++++---- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index a6b0538bf..18513234a 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -4581,10 +4581,12 @@ class Autosubmit: message = "We have detected that there is another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are using the experiment or delete autosubmit.lock file located on tmp folder" raise AutosubmitCritical(message, 7000) except AutosubmitError as e: + # TODO: == "" or is None? if e.trace == "": e.trace = traceback.format_exc() raise AutosubmitError(e.message, e.code, e.trace) except AutosubmitCritical as e: + # TODO: == "" or is None? if e.trace == "": e.trace = traceback.format_exc() raise AutosubmitCritical(e.message, e.code, e.trace) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index d27b54d40..eb843afb5 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -644,14 +644,14 @@ class ParamikoPlatform(Platform): try: self.send_command(cmd) except AutosubmitError as e: - e_msg = str(e.trace)+" "+str(e.message) + e_msg = e.error_message slurm_error = True if not slurm_error: while not self._check_jobid_in_queue(self.get_ssh_output(), job_list_cmd) and retries > 0: try: self.send_command(cmd) except AutosubmitError as e: - e_msg = str(e.trace) + " " + str(e.message) + e_msg = e.error_message slurm_error = True break Log.debug('Retrying check job command: {0}', cmd) diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index 104f5c5ec..3403c07e3 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -113,9 +113,7 @@ class SlurmPlatform(ParamikoPlatform): error_message+="\ncheck that {1} platform has set the correct scheduler. Sections that could be affected: {0}".format( error_msg[:-1], self.name) - if e.trace is None: - e.trace = "" - raise AutosubmitCritical(error_message,7014,e.message+"\n"+str(e.trace)) + raise AutosubmitCritical(error_message, 7014, e.error_message) except IOError as e: raise AutosubmitError( "IO issues ", 6016, str(e)) diff --git a/log/log.py b/log/log.py index 61f993d97..cf8363383 100644 --- a/log/log.py +++ b/log/log.py @@ -3,20 +3,36 @@ import os import sys from time import sleep from datetime import datetime +from typing import Union class AutosubmitError(Exception): - """Exception raised for Autosubmit critical errors . + """Exception raised for Autosubmit errors. + Attributes: - errorcode -- Classified code - message -- explanation of the error + message (str): explanation of the error + code (int): classified code + trace (str): extra information about the error """ - def __init__(self, message="Unhandled Error", code=6000, trace=None): + def __init__(self, message="Unhandled Error", code=6000, trace: Union[None, str]=None): self.code = code self.message = message self.trace = trace + @property + def error_message(self) -> str: + """ + Return the error message ready to be logged, with both trace + (when present) and the message separated by a space. Or just + the message if no trace is available. + + :return: ``trace`` and ``message`` separated by a space, or just the + ``message`` if no ``trace`` is available. + :rtype: str + """ + return self.message if not self.trace else f'{self.trace} {self.message}' + def __str__(self): return " " -- GitLab From eb764e49af691e02f9dcb0d7615075531e7e5b89 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Mon, 20 Mar 2023 10:09:30 +0100 Subject: [PATCH 2/5] Unit tests for AutosubmitError and AutosubmitCritical --- test/unit/test_log.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 test/unit/test_log.py diff --git a/test/unit/test_log.py b/test/unit/test_log.py new file mode 100644 index 000000000..e261b6429 --- /dev/null +++ b/test/unit/test_log.py @@ -0,0 +1,30 @@ +from unittest import TestCase +from log.log import AutosubmitError, AutosubmitCritical + + +"""Tests for the log module.""" + +class TestLog(TestCase): + + def setUp(self): + ... + + def test_autosubmit_error(self): + ae = AutosubmitError() + assert 'Unhandled Error' == ae.message + assert 6000 == ae.code + assert None is ae.trace + assert 'Unhandled Error' == ae.error_message + assert ' ' == str(ae) + + def test_autosubmit_error_error_message(self): + ae = AutosubmitError(trace='ERROR!') + assert 'ERROR! Unhandled Error' == ae.error_message + + def test_autosubmit_critical(self): + ac = AutosubmitCritical() + assert 'Unhandled Error' == ac.message + assert 7000 == ac.code + assert None is ac.trace + assert ' ' == str(ac) + -- GitLab From 3e5680dc53f6edbd35099dc808e4adcfea5ff636 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Mon, 20 Mar 2023 12:37:44 +0100 Subject: [PATCH 3/5] Unit tests for paramiko platform and fix possible bug in error handling for paramiko platform --- autosubmit/platforms/paramiko_platform.py | 2 +- test/unit/test_paramiko_platform.py | 113 ++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 test/unit/test_paramiko_platform.py diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index eb843afb5..5301b4466 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -727,7 +727,7 @@ class ParamikoPlatform(Platform): job.new_status = job_status self.get_queue_status(in_queue_jobs,list_queue_jobid,as_conf) else: - for job in job_list: + for job, job_prev_status in job_list: job_status = Status.UNKNOWN Log.warning( 'check_job() The job id ({0}) from platform {1} has an status of {2}.', job.id, self.name, job_status) diff --git a/test/unit/test_paramiko_platform.py b/test/unit/test_paramiko_platform.py new file mode 100644 index 000000000..c12c47483 --- /dev/null +++ b/test/unit/test_paramiko_platform.py @@ -0,0 +1,113 @@ +from collections import namedtuple +from unittest import TestCase + +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock, patch + +from autosubmit.job.job_common import Status +from autosubmit.platforms.paramiko_platform import ParamikoPlatform +from log.log import AutosubmitError + + +class TestParamikoPlatform(TestCase): + + Config = namedtuple('Config', ['LOCAL_ROOT_DIR', 'LOCAL_TMP_DIR']) + + def setUp(self): + self.local_root_dir = TemporaryDirectory() + self.config = TestParamikoPlatform.Config( + LOCAL_ROOT_DIR=self.local_root_dir.name, + LOCAL_TMP_DIR='tmp' + ) + self.platform = ParamikoPlatform(expid='a000', name='local', config=self.config) + self.platform.job_status = { + 'COMPLETED': [], + 'RUNNING': [], + 'QUEUING': [], + 'FAILED': [] + } + + def tearDown(self) -> None: + self.local_root_dir.cleanup() + + def test_paramiko_platform_constructor(self): + assert self.platform.name == 'local' + assert self.platform.expid == 'a000' + assert self.config is self.platform.config + + assert self.platform.header is None + assert self.platform.wrapper is None + + assert len(self.platform.job_status) == 4 + + @patch('autosubmit.platforms.paramiko_platform.Log') + @patch('autosubmit.platforms.paramiko_platform.sleep') + def test_check_Alljobs_send_command1_raises_autosubmit_error(self, mock_sleep, mock_log): + """ + Args: + mock_sleep (MagicMock): mocking because the function sleeps for 5 seconds. + """ + # Because it raises a NotImplementedError, but we want to skip it to test an error... + self.platform.get_checkAlljobs_cmd = MagicMock() + self.platform.get_checkAlljobs_cmd.side_effect = ['ls'] + # Raise the AE error here. + self.platform.send_command = MagicMock() + ae = AutosubmitError(message='Test', code=123, trace='ERR!') + self.platform.send_command.side_effect = ae + as_conf = MagicMock() + as_conf.get_copy_remote_logs.return_value = None + job = MagicMock() + job.id = 'TEST' + job.name = 'TEST' + with self.assertRaises(AutosubmitError) as cm: + # Retries is -1 so that it skips the retry code block completely, + # as we are not interested in testing that part here. + self.platform.check_Alljobs( + job_list=[(job, None)], + as_conf=as_conf, + retries=-1) + assert cm.exception.message == 'Some Jobs are in Unknown status' + assert cm.exception.code == 6008 + assert cm.exception.trace is None + + assert mock_log.warning.called + assert mock_log.warning.call_args[0][1] == job.id + assert mock_log.warning.call_args[0][2] == self.platform.name + assert mock_log.warning.call_args[0][3] == Status.UNKNOWN + + @patch('autosubmit.platforms.paramiko_platform.sleep') + def test_check_Alljobs_send_command2_raises_autosubmit_error(self, mock_sleep): + """ + Args: + mock_sleep (MagicMock): mocking because the function sleeps for 5 seconds. + """ + # Because it raises a NotImplementedError, but we want to skip it to test an error... + self.platform.get_checkAlljobs_cmd = MagicMock() + self.platform.get_checkAlljobs_cmd.side_effect = ['ls'] + # Raise the AE error here. + self.platform.send_command = MagicMock() + ae = AutosubmitError(message='Test', code=123, trace='ERR!') + # Here the first time ``send_command`` is called it returns None, but + # the second time it will raise the AutosubmitError for our test case. + self.platform.send_command.side_effect = [None, ae] + # Also need to make this function return False... + self.platform._check_jobid_in_queue = MagicMock(return_value = False) + # Then it will query the job status of the job, see further down as we set it + as_conf = MagicMock() + as_conf.get_copy_remote_logs.return_value = None + job = MagicMock() + job.id = 'TEST' + job.name = 'TEST' + job.status = Status.UNKNOWN + with self.assertRaises(AutosubmitError) as cm: + # Retries is -1 so that it skips the retry code block completely, + # as we are not interested in testing that part here. + self.platform.check_Alljobs( + job_list=[(job, None)], + as_conf=as_conf, + retries=1) + # AS raises an exception with the message using the previous exception's + # ``error_message``, but error code 6000 and no trace. + assert cm.exception.message == ae.error_message + assert cm.exception.code == 6000 + assert cm.exception.trace is None -- GitLab From c02df1e98379382579085c8aadca3952420222ef Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Mon, 20 Mar 2023 15:20:23 +0100 Subject: [PATCH 4/5] Unit tests for slurm platform --- test/unit/test_slurm_platform.py | 55 ++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 test/unit/test_slurm_platform.py diff --git a/test/unit/test_slurm_platform.py b/test/unit/test_slurm_platform.py new file mode 100644 index 000000000..88b47b5be --- /dev/null +++ b/test/unit/test_slurm_platform.py @@ -0,0 +1,55 @@ +from collections import namedtuple +from unittest import TestCase + +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock + +from autosubmit.platforms.slurmplatform import SlurmPlatform +from log.log import AutosubmitCritical, AutosubmitError + + +class TestSlurmPlatform(TestCase): + + Config = namedtuple('Config', ['LOCAL_ROOT_DIR', 'LOCAL_TMP_DIR', 'LOCAL_ASLOG_DIR']) + + def setUp(self): + self.local_root_dir = TemporaryDirectory() + self.config = TestSlurmPlatform.Config( + LOCAL_ROOT_DIR=self.local_root_dir.name, + LOCAL_TMP_DIR='tmp', + LOCAL_ASLOG_DIR='ASLOG_a000' + ) + # We need to create the submission archive that AS expects to find in this location: + p = Path(self.local_root_dir.name) / 'a000' / 'tmp' / 'ASLOG_a000' + p.mkdir(parents=True) + submit_platform_script = Path(p) / 'submit_local.sh' + submit_platform_script.touch(exist_ok=True) + + self.platform = SlurmPlatform(expid='a000', name='local', config=self.config) + + def tearDown(self) -> None: + self.local_root_dir.cleanup() + + def test_slurm_platform_submit_script_raises_autosubmit_critical_with_trace(self): + package = MagicMock() + package.jobs.return_value = [] + valid_packages_to_submit = [ + package + ] + + ae = AutosubmitError(message='invalid partition', code=123, trace='ERR!') + self.platform.submit_Script = MagicMock(side_effect=ae) + + # AS will handle the AutosubmitError above, but then raise an AutosubmitCritical. + # This new error won't contain all the info from the upstream error. + with self.assertRaises(AutosubmitCritical) as cm: + self.platform.process_batch_ready_jobs( + valid_packages_to_submit=valid_packages_to_submit, + failed_packages=[] + ) + + # AS will handle the error and then later will raise another error message. + # But the AutosubmitError object we created will have been correctly used + # without raising any exceptions (such as AttributeError). + assert cm.exception.message != ae.message -- GitLab From 3d0b8da10e07683d52483e1abf6b6189cbd684dd Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Thu, 23 Mar 2023 16:17:29 +0100 Subject: [PATCH 5/5] Add missing method to ParamikoPlatform --- autosubmit/platforms/paramiko_platform.py | 12 ++++++++++++ test/unit/test_paramiko_platform.py | 3 +++ 2 files changed, 15 insertions(+) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 5301b4466..854dfbd6a 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -760,6 +760,18 @@ class ParamikoPlatform(Platform): job_ids = [job_id.split(',')[0] for job_id in job_ids_names] return job_ids + def get_queue_status(self, in_queue_jobs, list_queue_jobid, as_conf): + """Get queue status for a list of jobs. + + The job statuses are normally found via a command sent to the remote platform. + + Each ``job`` in ``in_queue_jobs`` must be updated. Implementations may check + for the reason for queueing cancellation, or if the job is held, and update + the ``job`` status appropriately. + """ + raise NotImplementedError + + def get_checkjob_cmd(self, job_id): """ Returns command to check job status on remote platforms diff --git a/test/unit/test_paramiko_platform.py b/test/unit/test_paramiko_platform.py index c12c47483..fa83491b4 100644 --- a/test/unit/test_paramiko_platform.py +++ b/test/unit/test_paramiko_platform.py @@ -99,6 +99,9 @@ class TestParamikoPlatform(TestCase): job.id = 'TEST' job.name = 'TEST' job.status = Status.UNKNOWN + + self.platform.get_queue_status = MagicMock(side_effect=None) + with self.assertRaises(AutosubmitError) as cm: # Retries is -1 so that it skips the retry code block completely, # as we are not interested in testing that part here. -- GitLab