From 2fc35b788f29196b3064adb5763644133476f3e7 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 21 Sep 2020 14:07:05 +0200 Subject: [PATCH] Fixes #591 --- autosubmit/autosubmit.py | 11 +++++------ autosubmit/job/job.py | 16 ++++++++-------- autosubmit/job/job_list.py | 4 ++-- autosubmit/platforms/paramiko_platform.py | 9 +++++++-- log/log.py | 3 ++- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 486de2669..53ba5c6b2 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1445,19 +1445,18 @@ class Autosubmit: # Wait for all remaining threads of I/O, close remaining connections timeout = 0 - for platform in platforms_to_test: - platform.closeConnection() + active_threads = True all_threads = threading.enumerate() while active_threads and timeout < 360: active_threads = False - threads_active = 0 for thread in all_threads: if "Thread-" in thread.name: if thread.isAlive(): active_threads = True - threads_active = threads_active+1 - sleep(10) + sleep(10) + for platform in platforms_to_test: + platform.closeConnection() if len(job_list.get_failed()) > 0: Log.info("Some jobs have failed and reached maximum retrials") else: @@ -1961,7 +1960,7 @@ class Autosubmit: if job.platform.get_completed_files(job.name, 0, True): job.status = Status.COMPLETED Log.info("CHANGED job '{0}' status to COMPLETED".format(job.name)) - Log.status("CHANGED job '{0}' status to COMPLETED".format(job.name)) + #Log.status("CHANGED job '{0}' status to COMPLETED".format(job.name)) if not no_recover_logs: try: diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index e0fd9c60d..2efdb488f 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -293,11 +293,11 @@ class Job(object): """ Prints job information in log """ - Log.info("{0}\t{1}\t{2}", "Job Name", "Job Id", "Job Status") - Log.info("{0}\t\t{1}\t{2}", self.name, self.id, self.status) + Log.debug("{0}\t{1}\t{2}", "Job Name", "Job Id", "Job Status") + Log.debug("{0}\t\t{1}\t{2}", self.name, self.id, self.status) - Log.status("{0}\t{1}\t{2}", "Job Name", "Job Id", "Job Status") - Log.status("{0}\t\t{1}\t{2}", self.name, self.id, self.status) + #Log.status("{0}\t{1}\t{2}", "Job Name", "Job Id", "Job Status") + #Log.status("{0}\t\t{1}\t{2}", self.name, self.id, self.status) def print_parameters(self): """ @@ -520,13 +520,13 @@ class Job(object): retries = 3 sleeptime = 5 i = 0 - sleep(10) + sleep(20) try: while (not out_exist or not err_exist) and i < retries: try: out_exist = platform.check_file_exists(remote_logs[0]) # will do 5 retries err_exist = platform.check_file_exists(remote_logs[1]) # will do 5 retries - except AutosubmitError as e: + except Exception as e: out_exist = False err_exist = False pass @@ -535,7 +535,7 @@ class Job(object): i = i + 1 sleep(sleeptime) if i >= retries: - raise AutosubmitError("Failed to retrieve log files",6001) + raise AutosubmitError("Retries = {0}, Failed to retrieve log files {1} and {2}".format(retries,remote_logs[0],remote_logs[1]),6001) if out_exist and err_exist: if copy_remote_logs: if local_logs != remote_logs: @@ -1298,7 +1298,7 @@ done if len(out) > 1: if job not in self.running_jobs_start: start_time = self._check_time(out, 1) - Log.status("Job {0} started at {1}".format(jobname, str(parse_date(start_time)))) + Log.debug("Job {0} started at {1}".format(jobname, str(parse_date(start_time)))) self.running_jobs_start[job] = start_time job.new_status = Status.RUNNING diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index 46d5eddbc..281b708c3 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -38,7 +38,7 @@ import autosubmit.database.db_structure as DbStructure from networkx import DiGraph from autosubmit.job.job_utils import transitive_reduction from log.log import AutosubmitCritical,AutosubmitError,Log - +#Log.get_logger("Log.Autosubmit") class JobList: """ Class to manage the list of jobs to be run by autosubmit @@ -1243,7 +1243,7 @@ class JobList: else: job.hold = True - save = True + #save = True Log.debug('Update finished') return save diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index aacb81cc5..84a20b3db 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -364,7 +364,7 @@ class ParamikoPlatform(Platform): job.new_status = job_status sleep_time=5 while not (self.send_command(self.get_checkjob_cmd(job_id)) and retries >= 0) or (self.get_ssh_output() == "" and retries >= 0): - retries -= 1 + retries = retries - 1 Log.debug('Retrying check job command: {0}', self.get_checkjob_cmd(job_id)) Log.debug('retries left {0}', retries) Log.debug('Will be retrying in {0} seconds', sleep_time) @@ -561,9 +561,13 @@ class ParamikoPlatform(Platform): for errorLine in stderr_readlines: if errorLine.find("submission failed") != -1 or errorLine.find("git clone") != -1: raise AutosubmitError('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines),6005)) + if "not active" in errorLine: + raise AutosubmitError('SSH Session not active, will restart the platforms', 6005) + if not ignore_log: if len(stderr_readlines) > 0: Log.printlog('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines)),6006) + else: Log.debug('Command {0} in {1} successful with out message: {2}', command, self.host, self._ssh_output) return True @@ -740,7 +744,8 @@ class ParamikoPlatform(Platform): try: self.transport.sys.exit(0) except: - Log.debug("Transport already closed") + pass + def check_remote_log_dir(self): diff --git a/log/log.py b/log/log.py index 96ad87667..0e975235d 100644 --- a/log/log.py +++ b/log/log.py @@ -114,6 +114,7 @@ class Log: CRITICAL = 7000 NO_LOG = CRITICAL + 1000 logging.basicConfig() + log_dict_debug = logging.Logger.manager.loggerDict if 'Autosubmit' in logging.Logger.manager.loggerDict.keys(): log = logging.getLogger('Autosubmit') else: @@ -153,7 +154,7 @@ class Log: file_path = os.path.join(directory, ('{0:%Y%m%d_%H%M%S}_').format(datetime.now()) + filename) if type == 'out': file_handler = logging.FileHandler(file_path, 'w') - file_handler.setLevel(level) + file_handler.setLevel(Log.DEBUG) file_handler.setFormatter(LogFormatter(True)) Log.log.addHandler(file_handler) elif type == 'err': -- GitLab