From 73950c1215d963c4fbbeaeecc907707099a5e2a6 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 3 Dec 2024 16:14:28 +0100 Subject: [PATCH 1/6] recovery --- autosubmit/autosubmit.py | 8 +------- bin/autosubmit | 1 + 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 6f84065b7..82c47ed4d 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -2573,7 +2573,6 @@ class Autosubmit: raise except BaseException as e: raise - raise AutosubmitCritical("This seems like a bug in the code, please contact AS developers", 7070, str(e)) @staticmethod def monitor(expid, file_format, lst, filter_chunks, filter_status, filter_section, hide, txt_only=False, @@ -3017,12 +3016,7 @@ class Autosubmit: Log.info( "CHANGED job '{0}' status to COMPLETED".format(job.name)) # Log.status("CHANGED job '{0}' status to COMPLETED".format(job.name)) - - if not no_recover_logs: - try: - job.platform.get_logs_files(expid, job.remote_logs) - except Exception as e: - pass + job.recover_completed_job_log_name(as_conf) elif job.status != Status.SUSPENDED: job.status = Status.WAITING job._fail_count = 0 diff --git a/bin/autosubmit b/bin/autosubmit index c13ce081e..6fcb569d6 100755 --- a/bin/autosubmit +++ b/bin/autosubmit @@ -40,6 +40,7 @@ def main(): return_value = exit_from_error(e) return return_value + if __name__ == "__main__": exit_code = main() sys.exit(exit_code) # Sys.exit ensures a proper cleanup of the program, while os._exit() does not. -- GitLab From 134e589bf2c516c3479e746e8dded518d267618d Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 4 Dec 2024 12:13:43 +0100 Subject: [PATCH 2/6] Fixed recovery issues --- autosubmit/autosubmit.py | 4 +-- autosubmit/job/job.py | 45 ++++++++++++++++++++++++ test/unit/test_job_pytest.py | 66 ++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 2 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 82c47ed4d..d9715a466 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -3015,8 +3015,8 @@ class Autosubmit: job.status = Status.COMPLETED Log.info( "CHANGED job '{0}' status to COMPLETED".format(job.name)) - # Log.status("CHANGED job '{0}' status to COMPLETED".format(job.name)) - job.recover_completed_job_log_name(as_conf) + job.recover_last_ready_date() + job.recover_last_log_name() elif job.status != Status.SUSPENDED: job.status = Status.WAITING job._fail_count = 0 diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 9b5897077..14331e754 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -222,6 +222,7 @@ class Job(object): self.parameters = None self._tmp_path = os.path.join( BasicConfig.LOCAL_ROOT_DIR, self.expid, BasicConfig.LOCAL_TMP_DIR) + self._log_path = Path(f"{self._tmp_path}/LOG_{self.expid}") self.write_start = False self._platform = None self.check = 'true' @@ -2501,6 +2502,50 @@ class Job(object): self.local_logs = local_logs self.remote_logs = copy.deepcopy(local_logs) + def _recover_last_log_name_from_filesystem(self) -> bool: + """ + Recovers the log name for the job from the filesystem. + :return: True if the log name was already recovered, False otherwise + :rtype: bool + """ + log_files = sorted(list(self._log_path.glob(f"{self.name}*")), key=lambda x: x.stat().st_mtime)[-2:] + if log_files: + file_timestamp = int(datetime.datetime.fromtimestamp(log_files[0].stat().st_mtime).strftime("%Y%m%d%H%M%S")) + if self.ready_date and file_timestamp >= int(self.ready_date): + self.local_logs = (log_files[0].name, log_files[1].name) + self.remote_logs = copy.deepcopy(self.local_logs) + return True + self.local_logs = (f"{self.name}.out.{self._fail_count}", f"{self.name}.err.{self._fail_count}") + self.remote_logs = copy.deepcopy(self.local_logs) + return False + + def recover_last_log_name(self): + """ + Recovers the last log name for the job + """ + if not self.updated_log: + self.updated_log = self._recover_last_log_name_from_filesystem() + + def recover_last_ready_date(self) -> None: + """ + Recovers the last ready date for this job + """ + if not self.ready_date: + stat_file = Path(f"{self._tmp_path}/{self.name}_TOTAL_STATS") + if stat_file.exists(): + with stat_file.open('r') as file: + output_by_lines = file.readlines() + if len(output_by_lines) > 0: + line_info = output_by_lines[-1].split(" ") + if len(output_by_lines[-1]) > 0: + self.ready_date = line_info[0] + else: + self.ready_date = datetime.datetime.fromtimestamp(stat_file.stat().st_mtime).strftime('%Y%m%d%H%M%S') + Log.debug(f"Failed to recover ready date for the job {self.name}") + else: # Default to last mod time + self.ready_date = datetime.datetime.fromtimestamp(stat_file.stat().st_mtime).strftime('%Y%m%d%H%M%S') + Log.debug(f"Failed to recover ready date for the job {self.name}") + class WrapperJob(Job): """ diff --git a/test/unit/test_job_pytest.py b/test/unit/test_job_pytest.py index 421429fbc..8a4df3977 100644 --- a/test/unit/test_job_pytest.py +++ b/test/unit/test_job_pytest.py @@ -1,7 +1,9 @@ +from datetime import datetime, timedelta import pytest from autosubmit.job.job import Job from autosubmit.platforms.psplatform import PsPlatform +from pathlib import Path @pytest.mark.parametrize('experiment_data, expected_data', [( @@ -50,3 +52,67 @@ def test_update_parameters_current_variables(autosubmit_config, experiment_data, job.update_parameters(as_conf, {}) for key, value in expected_data.items(): assert job.parameters[key] == value + + +@pytest.mark.parametrize('test_with_file, file_is_empty, last_line_empty', [ + (False, False, False), + (True, True, False), + (True, False, True), + (False, False, False) +], ids=["no file", "file is empty", "file is correct", "file last line is empty"]) +def test_recover_last_ready_date(tmpdir, test_with_file, file_is_empty, last_line_empty): + job = Job('dummy', '1', 0, 1) + job._tmp_path = Path(tmpdir) + stat_file = job._tmp_path.joinpath(f'{job.name}_TOTAL_STATS') + ready_time = datetime.now() + timedelta(minutes=5) + ready_date = int(ready_time.strftime("%Y%m%d%H%M%S")) + expected_date = None + if test_with_file: + if file_is_empty: + stat_file.touch() + expected_date = datetime.fromtimestamp(stat_file.stat().st_mtime).strftime('%Y%m%d%H%M%S') + else: + if last_line_empty: + with stat_file.open('w') as f: + f.write("") + expected_date = datetime.fromtimestamp(stat_file.stat().st_mtime).strftime('%Y%m%d%H%M%S') + else: + with stat_file.open('w') as f: + f.write(f"{ready_date} {ready_date} {ready_date} COMPLETED") + expected_date = str(ready_date) + job.ready_date = None + job.recover_last_ready_date() + assert job.ready_date == expected_date + + +@pytest.mark.parametrize('test_with_logfiles, file_timestamp_mayor_than_ready_date', [ + (False, False), + (True, True), + (True, False), +], ids=["no file", "log timestamp >= ready_date", "log timestamp < ready_date"]) +def test_recover_last_log_name(tmpdir, test_with_logfiles, file_timestamp_mayor_than_ready_date): + job = Job('dummy', '1', 0, 1) + job._log_path = Path(tmpdir) + expected_local_logs = (f"{job.name}.out.0", f"{job.name}.err.0") + if test_with_logfiles: + if file_timestamp_mayor_than_ready_date: + ready_time = datetime.now() - timedelta(minutes=5) + job.ready_date = str(ready_time.strftime("%Y%m%d%H%M%S")) + log_name = job._log_path.joinpath(f'{job.name}_{job.ready_date}') + expected_update_log = True + expected_local_logs = (log_name.with_suffix('.out').name, log_name.with_suffix('.err').name) + else: + expected_update_log = False + ready_time = datetime.now() + timedelta(minutes=5) + job.ready_date = str(ready_time.strftime("%Y%m%d%H%M%S")) + log_name = job._log_path.joinpath(f'{job.name}_{job.ready_date}') + log_name.with_suffix('.out').touch() + log_name.with_suffix('.err').touch() + else: + expected_update_log = False + + job.updated_log = False + job.recover_last_log_name() + assert job.updated_log == expected_update_log + assert job.local_logs == expected_local_logs + assert job.remote_logs == expected_local_logs -- GitLab From d8e2f96604470ba4389c75ee065d2dc264502991 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 4 Dec 2024 12:28:11 +0100 Subject: [PATCH 3/6] Fix test --- test/unit/test_job_pytest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/test_job_pytest.py b/test/unit/test_job_pytest.py index 8a4df3977..8edec09e2 100644 --- a/test/unit/test_job_pytest.py +++ b/test/unit/test_job_pytest.py @@ -114,5 +114,5 @@ def test_recover_last_log_name(tmpdir, test_with_logfiles, file_timestamp_mayor_ job.updated_log = False job.recover_last_log_name() assert job.updated_log == expected_update_log - assert job.local_logs == expected_local_logs - assert job.remote_logs == expected_local_logs + assert job.local_logs[0] == str(expected_local_logs[0]) + assert job.local_logs[1] == str(expected_local_logs[1]) -- GitLab From b2e67a7916cf04b1664b841a52bebe588ba1654b Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 10 Dec 2024 11:31:33 +0100 Subject: [PATCH 4/6] Fixed tests and .err and out swaped --- autosubmit/job/job.py | 16 ++++++++-------- test/unit/test_job_pytest.py | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 14331e754..7759ff4fa 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -2508,11 +2508,12 @@ class Job(object): :return: True if the log name was already recovered, False otherwise :rtype: bool """ - log_files = sorted(list(self._log_path.glob(f"{self.name}*")), key=lambda x: x.stat().st_mtime)[-2:] - if log_files: - file_timestamp = int(datetime.datetime.fromtimestamp(log_files[0].stat().st_mtime).strftime("%Y%m%d%H%M%S")) + log_name = sorted(list(self._log_path.glob(f"{self.name}*")), key=lambda x: x.stat().st_mtime) + log_name = log_name[-1] if log_name else None + if log_name: + file_timestamp = int(datetime.datetime.fromtimestamp(log_name.stat().st_mtime).strftime("%Y%m%d%H%M%S")) if self.ready_date and file_timestamp >= int(self.ready_date): - self.local_logs = (log_files[0].name, log_files[1].name) + self.local_logs = (log_name.with_suffix(".out").name, log_name.with_suffix(".err").name) self.remote_logs = copy.deepcopy(self.local_logs) return True self.local_logs = (f"{self.name}.out.{self._fail_count}", f"{self.name}.err.{self._fail_count}") @@ -2533,11 +2534,10 @@ class Job(object): if not self.ready_date: stat_file = Path(f"{self._tmp_path}/{self.name}_TOTAL_STATS") if stat_file.exists(): - with stat_file.open('r') as file: - output_by_lines = file.readlines() - if len(output_by_lines) > 0: + output_by_lines = stat_file.read_text().splitlines() + if output_by_lines: line_info = output_by_lines[-1].split(" ") - if len(output_by_lines[-1]) > 0: + if line_info and line_info[0].isdigit(): self.ready_date = line_info[0] else: self.ready_date = datetime.datetime.fromtimestamp(stat_file.stat().st_mtime).strftime('%Y%m%d%H%M%S') diff --git a/test/unit/test_job_pytest.py b/test/unit/test_job_pytest.py index 8edec09e2..33c066244 100644 --- a/test/unit/test_job_pytest.py +++ b/test/unit/test_job_pytest.py @@ -57,8 +57,8 @@ def test_update_parameters_current_variables(autosubmit_config, experiment_data, @pytest.mark.parametrize('test_with_file, file_is_empty, last_line_empty', [ (False, False, False), (True, True, False), - (True, False, True), - (False, False, False) + (True, False, False), + (True, False, True) ], ids=["no file", "file is empty", "file is correct", "file last line is empty"]) def test_recover_last_ready_date(tmpdir, test_with_file, file_is_empty, last_line_empty): job = Job('dummy', '1', 0, 1) @@ -74,7 +74,7 @@ def test_recover_last_ready_date(tmpdir, test_with_file, file_is_empty, last_lin else: if last_line_empty: with stat_file.open('w') as f: - f.write("") + f.write(" ") expected_date = datetime.fromtimestamp(stat_file.stat().st_mtime).strftime('%Y%m%d%H%M%S') else: with stat_file.open('w') as f: -- GitLab From 3e0d64f52e023953217808249094a686cadc08d1 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 11 Dec 2024 12:00:40 +0100 Subject: [PATCH 5/6] added todo --- test/unit/test_job_pytest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/test_job_pytest.py b/test/unit/test_job_pytest.py index 33c066244..87e045241 100644 --- a/test/unit/test_job_pytest.py +++ b/test/unit/test_job_pytest.py @@ -90,12 +90,12 @@ def test_recover_last_ready_date(tmpdir, test_with_file, file_is_empty, last_lin (True, True), (True, False), ], ids=["no file", "log timestamp >= ready_date", "log timestamp < ready_date"]) -def test_recover_last_log_name(tmpdir, test_with_logfiles, file_timestamp_mayor_than_ready_date): +def test_recover_last_log_name(tmpdir, test_with_logfiles, file_timestamp_greater_than_ready_date): job = Job('dummy', '1', 0, 1) job._log_path = Path(tmpdir) expected_local_logs = (f"{job.name}.out.0", f"{job.name}.err.0") if test_with_logfiles: - if file_timestamp_mayor_than_ready_date: + if file_timestamp_greater_than_ready_date: ready_time = datetime.now() - timedelta(minutes=5) job.ready_date = str(ready_time.strftime("%Y%m%d%H%M%S")) log_name = job._log_path.joinpath(f'{job.name}_{job.ready_date}') -- GitLab From a53d1f60f6f2d5ee70e5eab4c57a81526d399872 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 11 Dec 2024 12:03:09 +0100 Subject: [PATCH 6/6] fixed test --- autosubmit/job/job.py | 1 + test/unit/test_job_pytest.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 7759ff4fa..4674635ed 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -2526,6 +2526,7 @@ class Job(object): """ if not self.updated_log: self.updated_log = self._recover_last_log_name_from_filesystem() + # TODO: After PostgreSQL migration, implement _recover_last_log_from_db() to retrieve the last log from the database. def recover_last_ready_date(self) -> None: """ diff --git a/test/unit/test_job_pytest.py b/test/unit/test_job_pytest.py index 87e045241..71e2db21e 100644 --- a/test/unit/test_job_pytest.py +++ b/test/unit/test_job_pytest.py @@ -85,7 +85,7 @@ def test_recover_last_ready_date(tmpdir, test_with_file, file_is_empty, last_lin assert job.ready_date == expected_date -@pytest.mark.parametrize('test_with_logfiles, file_timestamp_mayor_than_ready_date', [ +@pytest.mark.parametrize('test_with_logfiles, file_timestamp_greater_than_ready_date', [ (False, False), (True, True), (True, False), -- GitLab