From a4cd2500612d6853286bb34587eb9f7592298cc1 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 15 Jul 2020 20:16:58 +0200 Subject: [PATCH 1/5] timedelta --- autosubmit/autosubmit.py | 5 +++++ autosubmit/job/job.py | 15 ++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index fa8ea9e5f..5afaa243b 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1454,6 +1454,7 @@ class Autosubmit: return True except portalocker.AlreadyLocked: + Autosubmit.show_lock_warning(expid) except WrongTemplateException: @@ -3163,6 +3164,10 @@ class Autosubmit: return True # catching Exception except (KeyboardInterrupt, Exception) as e: + all_threads = threading.enumerate() + for thread in all_threads: + thread.join() + # Setting signal handler to handle subsequent CTRL-C signal.signal(signal.SIGINT, signal_handler_create) # Terminating locking as sugested by the portalocker developer diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 000b607d9..14232e7b2 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -1345,14 +1345,15 @@ done def _is_over_wallclock(self, start_time, wallclock): elapsed = datetime.datetime.now() - parse_date(start_time) - wallclock = datetime.datetime.strptime(wallclock, '%H:%M') + + (h, m, s) = wallclock.split(':') total = 0.0 - if wallclock.hour > 0: - total = wallclock.hour - if wallclock.minute > 0: - total += wallclock.minute/60.0 - if wallclock.second > 0: - total += wallclock.second/60.0/60.0 + if h > 0: + total = h + if m > 0: + total += m/60.0 + if s > 0: + total += s/60.0/60.0 total = total * 1.15 hour = int(total) minute = int((total - int(total)) * 60.0) -- GitLab From e28512514b8e552239cd98bed6ed15278709ce9c Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 15 Jul 2020 22:08:40 +0200 Subject: [PATCH 2/5] timedelta --- autosubmit/job/job.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 14232e7b2..713346bc1 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -807,7 +807,7 @@ class Job(object): template = template_file.read() else: if self.type == Type.BASH: - template = 'sleep 5' + template = 'sleep 90' elif self.type == Type.PYTHON: template = 'time.sleep(5)' elif self.type == Type.R: @@ -1346,7 +1346,7 @@ done def _is_over_wallclock(self, start_time, wallclock): elapsed = datetime.datetime.now() - parse_date(start_time) - (h, m, s) = wallclock.split(':') + (h, m) = wallclock.split(':') total = 0.0 if h > 0: total = h -- GitLab From 1e5deccca6574b7bba9ff76bfaeb1fb46979e240 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 16 Jul 2020 12:18:47 +0200 Subject: [PATCH 3/5] timedelta --- autosubmit/job/job.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 713346bc1..572f3bae3 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -1345,15 +1345,16 @@ done def _is_over_wallclock(self, start_time, wallclock): elapsed = datetime.datetime.now() - parse_date(start_time) - - (h, m) = wallclock.split(':') - total = 0.0 - if h > 0: - total = h - if m > 0: - total += m/60.0 - if s > 0: - total += s/60.0/60.0 + splited = wallclock.split(':') + if len(splited) == 3: + total = int(splited[1]) +int(splited[2]) +int(splited[3]) + elif len(splited) == 2: + total = int(splited[0]) + int(splited[1]) + int(splited[2]) + elif len(splited) == 1: + total = int(splited[0]) + else: + total = 0 + total = total * 1.15 hour = int(total) minute = int((total - int(total)) * 60.0) -- GitLab From a63841921badc6b8fcb3b331da3cd3c7f7b92d34 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 16 Jul 2020 12:19:53 +0200 Subject: [PATCH 4/5] timedelta --- autosubmit/job/job.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 572f3bae3..51208ff4e 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -1347,14 +1347,14 @@ done elapsed = datetime.datetime.now() - parse_date(start_time) splited = wallclock.split(':') if len(splited) == 3: - total = int(splited[1]) +int(splited[2]) +int(splited[3]) + total = int(splited[0]) +int(splited[1]) +int(splited[2]) elif len(splited) == 2: - total = int(splited[0]) + int(splited[1]) + int(splited[2]) + total = int(splited[0]) + int(splited[1]) elif len(splited) == 1: total = int(splited[0]) else: total = 0 - + total = total * 1.15 hour = int(total) minute = int((total - int(total)) * 60.0) -- GitLab From 7a50c588705e8f84c5072570d930618f7ebe46ec Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 16 Jul 2020 14:46:19 +0200 Subject: [PATCH 5/5] platform changes --- autosubmit/job/job.py | 4 +- autosubmit/platforms/paramiko_platform.py | 50 +++++++++++++++++++++-- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 51208ff4e..94d8e0f51 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -807,7 +807,7 @@ class Job(object): template = template_file.read() else: if self.type == Type.BASH: - template = 'sleep 90' + template = 'sleep 5' elif self.type == Type.PYTHON: template = 'time.sleep(5)' elif self.type == Type.R: @@ -1347,7 +1347,7 @@ done elapsed = datetime.datetime.now() - parse_date(start_time) splited = wallclock.split(':') if len(splited) == 3: - total = int(splited[0]) +int(splited[1]) +int(splited[2]) + total = int(splited[0]) + int(splited[1]) + int(splited[2]) elif len(splited) == 2: total = int(splited[0]) + int(splited[1]) elif len(splited) == 1: diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 58f008eb5..ada136ca8 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -65,7 +65,7 @@ class ParamikoPlatform(Platform): retries = 2 retry = 0 connected = False - while connected == False and retry < retries: + while connected is False and retry < retries: if self.connect(True): connected = True retry+=1 @@ -465,6 +465,47 @@ class ParamikoPlatform(Platform): :rtype: str """ raise NotImplementedError + def exec_command(self, command, bufsize=-1, timeout=None, get_pty=False,retries=3,x11=False): + """ + Execute a command on the SSH server. A new `.Channel` is opened and + the requested command is executed. The command's input and output + streams are returned as Python ``file``-like objects representing + stdin, stdout, and stderr. + + :param str command: the command to execute + :param int bufsize: + interpreted the same way as by the built-in ``file()`` function in + Python + :param int timeout: + set command's channel timeout. See `Channel.settimeout`.settimeout + :return: + the stdin, stdout, and stderr of the executing command, as a + 3-tuple + + :raises SSHException: if the server fails to execute the command + """ + while retries > 0: + try: + chan = self._ssh._transport.open_session() + if get_pty: + chan.get_pty() + if x11: + chan.request_x11() + chan.settimeout(timeout) + chan.exec_command(command) + stdin = chan.makefile('wb', bufsize) + stdout = chan.makefile('r', bufsize) + stderr = chan.makefile_stderr('r', bufsize) + return stdin, stdout, stderr + except paramiko.SSHException as e: + if str(e) in "SSH session not active": + self._ssh = None + self.restore_connection() + timeout = timeout + 60 + retries = retries - 1 + if retries <= 0: + return False , False, False + def send_command(self, command, ignore_log=False): """ Sends given command to HPC @@ -478,15 +519,16 @@ class ParamikoPlatform(Platform): if not self.restore_connection(): return False if "-rP" in command or "find" in command or "convertLink" in command: - timeout = 60*60 # Max Wait 1hour if the command is a copy or simbolic links ( migrate can trigger long times) + timeout = 60*60 elif "rm" in command: timeout = 60/2 else: timeout = 60*2 try: - stdin, stdout, stderr = self._ssh.exec_command(command) + stdin, stdout, stderr = self.exec_command(command,timeout=timeout) + if not stdin and not stdout and not stderr: + raise channel = stdout.channel - channel.settimeout(timeout) stdin.close() channel.shutdown_write() stdout_chunks = [] -- GitLab