From 7da327f3445fe765ae4fd9c8f236227d2e18bb58 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 12 Feb 2021 15:41:50 +0100 Subject: [PATCH 01/16] Vdevel --- autosubmit/job/job.py | 3 ++- autosubmit/platforms/slurmplatform.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 604784dbb..9f85229fb 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -880,7 +880,8 @@ class Job(object): :return: script code :rtype: str """ - if self.parameters['PROJECT_TYPE'].lower() != "none": + parameters = self.parameters + if parameters['PROJECT_TYPE'].lower() != "none": template_file = open(os.path.join( as_conf.get_project_dir(), self.file), 'r') template = template_file.read() diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index c2595b9e3..fd4476cf7 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -311,11 +311,12 @@ class SlurmPlatform(ParamikoPlatform): def parse_Alljobs_output(self, output, job_id): status = "" + Log.debug("Debugging for nimbus: Output {0}, Job_id {1}".format(output,job_id)) try: status = [x.split()[1] for x in output.splitlines() if x.split()[0] == str(job_id)] except BaseException as e: - return status + pass if len(status) == 0: return status return status[0] -- GitLab From 813b12a22fb5f6b145ef6a76ec07b2a6b5f51e8c Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 12 Feb 2021 16:21:29 +0100 Subject: [PATCH 02/16] Vdevel --- autosubmit/platforms/paramiko_platform.py | 3 ++- autosubmit/platforms/slurmplatform.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 46cac5e89..2436a0b14 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -460,7 +460,8 @@ class ParamikoPlatform(Platform): :return: current job status :rtype: autosubmit.job.job_common.Status """ - + if job_list_cmd[-1] == ",": + job_list_cmd=job_list_cmd[-1] cmd = self.get_checkAlljobs_cmd(job_list_cmd) sleep_time = 5 diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index fd4476cf7..ceee2f76b 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -311,7 +311,7 @@ class SlurmPlatform(ParamikoPlatform): def parse_Alljobs_output(self, output, job_id): status = "" - Log.debug("Debugging for nimbus: Output {0}, Job_id {1}".format(output,job_id)) + Log.debug("Debugging parse_Alljobs: Output {0}, Job_id {1}".format(output,job_id)) try: status = [x.split()[1] for x in output.splitlines() if x.split()[0] == str(job_id)] -- GitLab From cf274b47fa0477b8b3e525958a9bb5f3d1d8eb7b Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 12 Feb 2021 16:28:47 +0100 Subject: [PATCH 03/16] nimbus --- autosubmit/platforms/paramiko_platform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 2436a0b14..b9bfe2f26 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -461,7 +461,7 @@ class ParamikoPlatform(Platform): :rtype: autosubmit.job.job_common.Status """ if job_list_cmd[-1] == ",": - job_list_cmd=job_list_cmd[-1] + job_list_cmd=job_list_cmd[:-2] cmd = self.get_checkAlljobs_cmd(job_list_cmd) sleep_time = 5 -- GitLab From 220cfec7e7244e9a7ed68a5b89f51a6f89c0dc87 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 12 Feb 2021 16:52:22 +0100 Subject: [PATCH 04/16] nimbus changed -j for --jobs --- autosubmit/platforms/paramiko_platform.py | 2 +- autosubmit/platforms/slurmplatform.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index b9bfe2f26..f4ef58fb1 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -461,7 +461,7 @@ class ParamikoPlatform(Platform): :rtype: autosubmit.job.job_common.Status """ if job_list_cmd[-1] == ",": - job_list_cmd=job_list_cmd[:-2] + job_list_cmd=job_list_cmd[:-1] cmd = self.get_checkAlljobs_cmd(job_list_cmd) sleep_time = 5 diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index ceee2f76b..a0c7f6f16 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -348,16 +348,16 @@ class SlurmPlatform(ParamikoPlatform): self._submit_hold_cmd + job_script + "\n") def get_checkjob_cmd(self, job_id): - return 'sacct -n -X -j {1} -o "State"'.format(self.host, job_id) + return 'sacct -n -X --jobs {1} -o "State"'.format(self.host, job_id) def get_checkAlljobs_cmd(self, jobs_id): - return "sacct -n -X -j {1} -o jobid,State".format(self.host, jobs_id) + return "sacct -n -X --jobs {1} -o jobid,State".format(self.host, jobs_id) def get_queue_status_cmd(self, job_id): return 'squeue -j {0} -o %A,%R'.format(job_id) def get_job_energy_cmd(self, job_id): - return 'sacct -n -j {0} -o JobId%25,State,NCPUS,NNodes,Submit,Start,End,ConsumedEnergy,MaxRSS%25,AveRSS%25'.format(job_id) + return 'sacct -n --jobs {0} -o JobId%25,State,NCPUS,NNodes,Submit,Start,End,ConsumedEnergy,MaxRSS%25,AveRSS%25'.format(job_id) def parse_queue_reason(self, output, job_id): reason = [x.split(',')[1] for x in output.splitlines() -- GitLab From eca91824a6f9b9f687b3fce4fd17e23f8c1dbe86 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 12 Feb 2021 16:53:48 +0100 Subject: [PATCH 05/16] erase debug line --- autosubmit/platforms/slurmplatform.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index a0c7f6f16..9402a1498 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -311,7 +311,6 @@ class SlurmPlatform(ParamikoPlatform): def parse_Alljobs_output(self, output, job_id): status = "" - Log.debug("Debugging parse_Alljobs: Output {0}, Job_id {1}".format(output,job_id)) try: status = [x.split()[1] for x in output.splitlines() if x.split()[0] == str(job_id)] -- GitLab From d176bab0608ac4bfaf33a3d74ca10c7d419332cf Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 12 Feb 2021 17:20:27 +0100 Subject: [PATCH 06/16] unknown error with logfiles --- autosubmit/job/job.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 9f85229fb..2aa652228 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -517,19 +517,20 @@ class Job(object): @threaded def retrieve_logfiles(self, copy_remote_logs, local_logs, remote_logs, expid, platform_name): - remote_logs = (self.script_name + ".out", self.script_name + ".err") - as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) - as_conf.reload() - - submitter = self._get_submitter(as_conf) - submitter.load_platforms(as_conf) - hpcarch = as_conf.get_platform() - platforms_to_test = set() - if self.platform_name is None: - self.platform_name = hpcarch - # serial - self._platform = submitter.platforms[self.platform_name.lower()] try: + as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) + as_conf.reload() + remote_logs = (self.script_name + ".out", self.script_name + ".err") + + submitter = self._get_submitter(as_conf) + submitter.load_platforms(as_conf) + hpcarch = as_conf.get_platform() + platforms_to_test = set() + if self.platform_name is None: + self.platform_name = hpcarch + # serial + self._platform = submitter.platforms[self.platform_name.lower()] + self._platform.restore_connection() except Exception as e: Log.printlog( -- GitLab From 3321b8639540cc9badc4d562cdde7fee428d6108 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 12 Feb 2021 17:25:18 +0100 Subject: [PATCH 07/16] unknown error with logfiles --- autosubmit/job/job.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 2aa652228..862355fae 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -530,11 +530,10 @@ class Job(object): self.platform_name = hpcarch # serial self._platform = submitter.platforms[self.platform_name.lower()] - - self._platform.restore_connection() + self._platform.test_connection() except Exception as e: Log.printlog( - "{0} \n Couldn't connect to the remote platform for this {1} job err/out files. ".format(str(e), self.name), 6001) + "{0} \n Couldn't connect to the remote platform for this {1} job err/out files. ".format(str(e.message), self.name), 6001) out_exist = False err_exist = False retries = 5 -- GitLab From c4814803e693749fb114343de4224c8f0d766441 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 15 Feb 2021 09:36:10 +0100 Subject: [PATCH 08/16] unknown error with logfiles --- autosubmit/job/job.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 862355fae..f17040e4f 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -521,15 +521,9 @@ class Job(object): as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.reload() remote_logs = (self.script_name + ".out", self.script_name + ".err") - submitter = self._get_submitter(as_conf) submitter.load_platforms(as_conf) - hpcarch = as_conf.get_platform() - platforms_to_test = set() - if self.platform_name is None: - self.platform_name = hpcarch - # serial - self._platform = submitter.platforms[self.platform_name.lower()] + self._platform = submitter.platforms[platform_name.lower()] self._platform.test_connection() except Exception as e: Log.printlog( -- GitLab From f7512c6089df12b666b48afbd978ad515a4cfca5 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 15 Feb 2021 10:02:02 +0100 Subject: [PATCH 09/16] added banner timeout --- autosubmit/platforms/paramiko_platform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index f4ef58fb1..438344262 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -152,10 +152,10 @@ class ParamikoPlatform(Platform): self._proxy = paramiko.ProxyCommand( self._host_config['proxycommand']) self._ssh.connect(self._host_config['hostname'], 22, username=self.user, - key_filename=self._host_config_id, sock=self._proxy) + key_filename=self._host_config_id, sock=self._proxy, banner_timeout=200) else: self._ssh.connect(self._host_config['hostname'], 22, username=self.user, - key_filename=self._host_config_id) + key_filename=self._host_config_id, banner_timeout=200) self.transport = paramiko.Transport( (self._host_config['hostname'], 22)) self.transport.connect(username=self.user) -- GitLab From 76ea74af3146259cd442f51ea611121e33738749 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 15 Feb 2021 10:17:32 +0100 Subject: [PATCH 10/16] banner_timeout --- autosubmit/platforms/paramiko_platform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 438344262..c7eb053ae 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -152,10 +152,10 @@ class ParamikoPlatform(Platform): self._proxy = paramiko.ProxyCommand( self._host_config['proxycommand']) self._ssh.connect(self._host_config['hostname'], 22, username=self.user, - key_filename=self._host_config_id, sock=self._proxy, banner_timeout=200) + key_filename=self._host_config_id, sock=self._proxy, banner_timeout=1200) else: self._ssh.connect(self._host_config['hostname'], 22, username=self.user, - key_filename=self._host_config_id, banner_timeout=200) + key_filename=self._host_config_id, banner_timeout=1200) self.transport = paramiko.Transport( (self._host_config['hostname'], 22)) self.transport.connect(username=self.user) -- GitLab From 6d155d1aadc1a568032510201042dc4ca49aa2f6 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 15 Feb 2021 11:47:05 +0100 Subject: [PATCH 11/16] banner_timeout --- autosubmit/job/job.py | 5 ++++- autosubmit/platforms/paramiko_platform.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index f17040e4f..c82bf9b0f 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -524,7 +524,10 @@ class Job(object): submitter = self._get_submitter(as_conf) submitter.load_platforms(as_conf) self._platform = submitter.platforms[platform_name.lower()] - self._platform.test_connection() + try: + self._platform.test_connection() + except: + pass except Exception as e: Log.printlog( "{0} \n Couldn't connect to the remote platform for this {1} job err/out files. ".format(str(e.message), self.name), 6001) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index c7eb053ae..f8b840d47 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -77,7 +77,10 @@ class ParamikoPlatform(Platform): """ try: self.reset() - self.restore_connection() + try: + self.restore_connection() + except: + pass transport = self._ssh.get_transport() transport.send_ignore() except EOFError as e: -- GitLab From 437777f7d2ea06bc1a7f240bdccda969bc2e1310 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 15 Feb 2021 12:47:04 +0100 Subject: [PATCH 12/16] banner_timeout --- autosubmit/job/job.py | 10 +++++----- autosubmit/platforms/paramiko_platform.py | 10 ++++------ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index c82bf9b0f..e1cdaec07 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -530,7 +530,7 @@ class Job(object): pass except Exception as e: Log.printlog( - "{0} \n Couldn't connect to the remote platform for this {1} job err/out files. ".format(str(e.message), self.name), 6001) + "{0} \n Couldn't connect to the remote platform for this {1} job err/out files. ".format(e.message, self.name), 6001) out_exist = False err_exist = False retries = 5 @@ -556,7 +556,7 @@ class Job(object): sleep(sleeptime) try: self._platform.restore_connection() - except: + except BaseException as e: Log.printlog("{0} \n Couldn't connect to the remote platform for this {1} job err/out files. ".format( e.message, self.name), 6001) if i >= retries: @@ -581,7 +581,7 @@ class Job(object): e.message, self.name)) try: self._platform.closeConnection() - except: + except BaseException as e: pass return except AutosubmitError as e: @@ -589,7 +589,7 @@ class Job(object): e.message, self.name), 6001) try: self._platform.closeConnection() - except: + except BaseException as e: pass return @@ -605,7 +605,7 @@ class Job(object): sleep(5) # safe wait before end a thread try: self._platform.closeConnection() - except: + except BaseException as e: pass return diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index f8b840d47..a604d4364 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -121,7 +121,7 @@ class ParamikoPlatform(Platform): raise except Exception as e: raise AutosubmitCritical( - 'Cant connect to this platform due an unknown error', 7050, str(e)) + 'Cant connect to this platform due an unknown error', 7050, e.message) def connect(self, reconnect=False): """ @@ -261,7 +261,6 @@ class ParamikoPlatform(Platform): return True except Exception as e: if str(e) in "Garbage": - #raise AutosubmitError("Files couldn't be retrieved, session not active".format(filename),6004,e.message) if not ignore_log: Log.printlog( "File {0} seems to no exists (skipping)".format(filename), 5004) @@ -291,7 +290,6 @@ class ParamikoPlatform(Platform): self.get_files_path(), filename)) return True except IOError as e: - #Log.printlog("{0} couldn't be retrieved, session not active".format(os.path.join(self.get_files_path(), filename)),6004) return False except BaseException as e: Log.error('Could not remove file {0} due a wrong configuration'.format( @@ -328,10 +326,10 @@ class ParamikoPlatform(Platform): except Exception as e: if str(e) in "Garbage": raise AutosubmitError('File {0} does not exists'.format( - os.path.join(self.get_files_path(), src)), 6004, str(e)) + os.path.join(self.get_files_path(), src)), 6004, e.message) if must_exist: raise AutosubmitError("A critical file couldn't be retrieved, File {0} does not exists".format( - os.path.join(self.get_files_path(), src)), 6004, str(e)) + os.path.join(self.get_files_path(), src)), 6004, e.message) else: Log.printlog("Log file couldn't be moved: {0}".format( os.path.join(self.get_files_path(), src)), 5001) @@ -861,7 +859,7 @@ class ParamikoPlatform(Platform): try: # Test if remote_path exists self._ftpChannel.chdir(self.remote_log_dir) - except IOError: + except IOError as e: try: if self.send_command(self.get_mkdir_cmd()): Log.debug('{0} has been created on {1} .', -- GitLab From e81ecd05fa323439b451d30bdd086be8464db335 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 15 Feb 2021 13:39:03 +0100 Subject: [PATCH 13/16] banner_timeout --- autosubmit/platforms/paramiko_platform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index a604d4364..8776b8274 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -155,10 +155,10 @@ class ParamikoPlatform(Platform): self._proxy = paramiko.ProxyCommand( self._host_config['proxycommand']) self._ssh.connect(self._host_config['hostname'], 22, username=self.user, - key_filename=self._host_config_id, sock=self._proxy, banner_timeout=1200) + key_filename=self._host_config_id, sock=self._proxy, timeout=120 , banner_timeout=120) else: self._ssh.connect(self._host_config['hostname'], 22, username=self.user, - key_filename=self._host_config_id, banner_timeout=1200) + key_filename=self._host_config_id, timeout=120 , banner_timeout=1200) self.transport = paramiko.Transport( (self._host_config['hostname'], 22)) self.transport.connect(username=self.user) -- GitLab From 82b55497de8d06453eeb8397d3d99c410c22d989 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 15 Feb 2021 14:21:50 +0100 Subject: [PATCH 14/16] Added an option for disable threads --- autosubmit/config/config_common.py | 8 ++- autosubmit/job/job.py | 61 ++++++++++++++++++++++- autosubmit/platforms/paramiko_platform.py | 2 +- 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index e7f3613ef..138fe5b60 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -1306,7 +1306,13 @@ class AutosubmitConfig(object): :rtype: str """ return self._conf_parser.get_option('config', 'MAX_WALLCLOCK', '') - + def get_disable_recovery_threads(self, section): + """ + Returns FALSE/TRUE + :return: recovery_threads_option + :rtype: str + """ + return self._platforms_parser.get_option(section, 'DISABLE_RECOVERY_THREADS', 'FALSE').lower() def get_max_processors(self): """ Returns max processors from autosubmit's config file diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index e1cdaec07..e7545e9a2 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -515,6 +515,59 @@ class Job(object): retrials_list.insert(0, retrial_dates) return retrials_list + def retrieve_logfiles_unthreaded(self, copy_remote_logs, local_logs): + remote_logs = (self.script_name + ".out", self.script_name + ".err") + out_exist = False + err_exist = False + retries = 3 + sleeptime = 0 + i = 0 + no_continue = False + try: + while (not out_exist and not err_exist) and i < retries: + try: + out_exist = self._platform.check_file_exists( + remote_logs[0], True) + except IOError as e: + out_exist = False + try: + err_exist = self._platform.check_file_exists( + remote_logs[1], True) + except IOError as e: + err_exists = False + if not out_exist or not err_exist: + sleeptime = sleeptime + 5 + i = i + 1 + sleep(sleeptime) + if i >= retries: + if not out_exist or not err_exist: + Log.printlog("Failed to retrieve log files {1} and {2} e=6001".format( + retries, remote_logs[0], remote_logs[1])) + return + if copy_remote_logs: + # unifying names for log files + if remote_logs != local_logs: + self.synchronize_logs(self._platform, remote_logs, local_logs) + remote_logs = copy.deepcopy(local_logs) + self._platform.get_logs_files(self.expid, remote_logs) + # Update the logs with Autosubmit Job Id Brand + try: + for local_log in local_logs: + self._platform.write_jobid(self.id, os.path.join( + self._tmp_path, 'LOG_' + str(self.expid), local_log)) + except BaseException as e: + Log.printlog("Trace {0} \n Failed to write the {1} e=6001".format( + e.message, self.name)) + + except AutosubmitError as e: + Log.printlog("Trace {0} \nFailed to retrieve log file for job {1}".format( + e.message, self.name), 6001) + + except AutosubmitCritical as e: # Critical errors can't be recovered. Failed configuration or autosubmit error + Log.printlog("Trace {0} \nFailed to retrieve log file for job {0}".format( + e.message, self.name), 6001) + return + @threaded def retrieve_logfiles(self, copy_remote_logs, local_logs, remote_logs, expid, platform_name): try: @@ -676,8 +729,12 @@ class Job(object): platform_name = copy.deepcopy(self.platform_name.lower()) local_logs = copy.deepcopy(self.local_logs) remote_logs = copy.deepcopy(self.remote_logs) - self.retrieve_logfiles( - copy_remote_logs, local_logs, remote_logs, expid, platform_name) + as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) + as_conf.reload() + if as_conf.get_disable_recovery_threads(self.platform.name): + self.retrieve_logfiles_unthreaded(copy_remote_logs, local_logs) + else: + self.retrieve_logfiles(copy_remote_logs, local_logs, remote_logs, expid, platform_name) return self.status diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 8776b8274..ff99edc21 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -158,7 +158,7 @@ class ParamikoPlatform(Platform): key_filename=self._host_config_id, sock=self._proxy, timeout=120 , banner_timeout=120) else: self._ssh.connect(self._host_config['hostname'], 22, username=self.user, - key_filename=self._host_config_id, timeout=120 , banner_timeout=1200) + key_filename=self._host_config_id, timeout=120 , banner_timeout=120) self.transport = paramiko.Transport( (self._host_config['hostname'], 22)) self.transport.connect(username=self.user) -- GitLab From b6ee61576c7c20be0ef881368345063b86d5ce86 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 15 Feb 2021 14:26:59 +0100 Subject: [PATCH 15/16] Added an option for disable threads --- docs/source/troubleshoot.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/source/troubleshoot.rst b/docs/source/troubleshoot.rst index ada93ce2b..d64d677a5 100644 --- a/docs/source/troubleshoot.rst +++ b/docs/source/troubleshoot.rst @@ -19,6 +19,12 @@ My project parameters are not being substituted in the templates. *Solution*: Don't repeat section names and parameters names until Autosubmit 4.0 release. +Unable to recover remote logs files. +======================================================== + +*Explanation*: If there are limitations on the remote platform regarding multiple connections, +*Solution*: You can try DISABLE_RECOVERY_THREADS = TRUE under the [platform_name] section in the platform.conf. + Other possible errors ===================== -- GitLab From da3b76cb1c6347f0772a3d3e68b21ee4d98e6312 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 15 Feb 2021 14:36:28 +0100 Subject: [PATCH 16/16] Added an option for disable threads --- autosubmit/job/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index e7545e9a2..67e524b33 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -731,7 +731,7 @@ class Job(object): remote_logs = copy.deepcopy(self.remote_logs) as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.reload() - if as_conf.get_disable_recovery_threads(self.platform.name): + if as_conf.get_disable_recovery_threads(self.platform.name) == "true": self.retrieve_logfiles_unthreaded(copy_remote_logs, local_logs) else: self.retrieve_logfiles(copy_remote_logs, local_logs, remote_logs, expid, platform_name) -- GitLab