From eb8e546617d599b7da7686cd766e7104a67aa51b Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 15 Jul 2024 11:04:43 +0200 Subject: [PATCH] Log retrieval should be now correctly terminated --- autosubmit/platforms/ecplatform.py | 1 + autosubmit/platforms/locplatform.py | 1 + autosubmit/platforms/paramiko_platform.py | 2 ++ autosubmit/platforms/paramiko_submitter.py | 2 +- autosubmit/platforms/platform.py | 7 ++++--- autosubmit/platforms/sgeplatform.py | 1 + 6 files changed, 10 insertions(+), 4 deletions(-) diff --git a/autosubmit/platforms/ecplatform.py b/autosubmit/platforms/ecplatform.py index 5fb9914aa..2307baf8e 100644 --- a/autosubmit/platforms/ecplatform.py +++ b/autosubmit/platforms/ecplatform.py @@ -202,6 +202,7 @@ class EcPlatform(ParamikoPlatform): :return: True :rtype: bool """ + self.main_process_id = os.getpid() output = subprocess.check_output(self._checkvalidcert_cmd, shell=True).decode(locale.getlocale()[1]) if not output: output = "" diff --git a/autosubmit/platforms/locplatform.py b/autosubmit/platforms/locplatform.py index 0af2d65cd..b6109c39a 100644 --- a/autosubmit/platforms/locplatform.py +++ b/autosubmit/platforms/locplatform.py @@ -121,6 +121,7 @@ class LocalPlatform(ParamikoPlatform): def test_connection(self,as_conf): + self.main_process_id = os.getpid() if not self.connected: self.connect(as_conf) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 7dda872db..1c9a3dd72 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -121,6 +121,8 @@ class ParamikoPlatform(Platform): """ Test if the connection is still alive, reconnect if not. """ + self.main_process_id = os.getpid() + try: if not self.connected: self.reset() diff --git a/autosubmit/platforms/paramiko_submitter.py b/autosubmit/platforms/paramiko_submitter.py index 4ee256be2..67114ff2c 100644 --- a/autosubmit/platforms/paramiko_submitter.py +++ b/autosubmit/platforms/paramiko_submitter.py @@ -155,7 +155,7 @@ class ParamikoSubmitter(Submitter): else: raise Exception( "Queue type not specified on platform {0}".format(section)) - + remote_platform.main_process_id = os.getpid() except ParamikoPlatformException as e: Log.error("Queue exception: {0}".format(str(e))) return None diff --git a/autosubmit/platforms/platform.py b/autosubmit/platforms/platform.py index 5ccf6e91e..2eb58214d 100644 --- a/autosubmit/platforms/platform.py +++ b/autosubmit/platforms/platform.py @@ -104,7 +104,7 @@ class Platform(object): self.pw = None self.recovery_queue = Queue() self.log_retrieval_process_active = False - + self.main_process_id = None @property @autosubmit_parameter(name='current_arch') @@ -831,9 +831,10 @@ class Platform(object): job_names_processed = set() self.connected = False self.restore_connection(None) - while not event.is_set(): + # check if id of self.main_process exists with ps ax | grep self.main_process_id + while not event.is_set() and os.system(f"ps ax | grep {str(self.main_process_id)} | grep -v grep > /dev/null 2>&1") == 0: try: - job,children = self.recovery_queue.get() + job,children = self.recovery_queue.get(block=False) if job.wrapper_type != "vertical": if f'{job.name}_{job.fail_count}' in job_names_processed: continue diff --git a/autosubmit/platforms/sgeplatform.py b/autosubmit/platforms/sgeplatform.py index 9c5e813f1..f44fb6629 100644 --- a/autosubmit/platforms/sgeplatform.py +++ b/autosubmit/platforms/sgeplatform.py @@ -144,6 +144,7 @@ class SgePlatform(ParamikoPlatform): :return: True :rtype: bool """ + self.main_process_id = os.getpid() self.connected = True self.connected(as_conf,True) -- GitLab