diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 486de2669907fe259a90002423df420fe699af35..53ba5c6b2b741eb204291e7940d46afe6821fb24 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1445,19 +1445,18 @@ class Autosubmit: # Wait for all remaining threads of I/O, close remaining connections timeout = 0 - for platform in platforms_to_test: - platform.closeConnection() + active_threads = True all_threads = threading.enumerate() while active_threads and timeout < 360: active_threads = False - threads_active = 0 for thread in all_threads: if "Thread-" in thread.name: if thread.isAlive(): active_threads = True - threads_active = threads_active+1 - sleep(10) + sleep(10) + for platform in platforms_to_test: + platform.closeConnection() if len(job_list.get_failed()) > 0: Log.info("Some jobs have failed and reached maximum retrials") else: @@ -1961,7 +1960,7 @@ class Autosubmit: if job.platform.get_completed_files(job.name, 0, True): job.status = Status.COMPLETED Log.info("CHANGED job '{0}' status to COMPLETED".format(job.name)) - Log.status("CHANGED job '{0}' status to COMPLETED".format(job.name)) + #Log.status("CHANGED job '{0}' status to COMPLETED".format(job.name)) if not no_recover_logs: try: diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index e0fd9c60d534bed5f23a8e7a8890b0c917644866..2efdb488fe3c62c322eac3f98e41d06a3f79ae23 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -293,11 +293,11 @@ class Job(object): """ Prints job information in log """ - Log.info("{0}\t{1}\t{2}", "Job Name", "Job Id", "Job Status") - Log.info("{0}\t\t{1}\t{2}", self.name, self.id, self.status) + Log.debug("{0}\t{1}\t{2}", "Job Name", "Job Id", "Job Status") + Log.debug("{0}\t\t{1}\t{2}", self.name, self.id, self.status) - Log.status("{0}\t{1}\t{2}", "Job Name", "Job Id", "Job Status") - Log.status("{0}\t\t{1}\t{2}", self.name, self.id, self.status) + #Log.status("{0}\t{1}\t{2}", "Job Name", "Job Id", "Job Status") + #Log.status("{0}\t\t{1}\t{2}", self.name, self.id, self.status) def print_parameters(self): """ @@ -520,13 +520,13 @@ class Job(object): retries = 3 sleeptime = 5 i = 0 - sleep(10) + sleep(20) try: while (not out_exist or not err_exist) and i < retries: try: out_exist = platform.check_file_exists(remote_logs[0]) # will do 5 retries err_exist = platform.check_file_exists(remote_logs[1]) # will do 5 retries - except AutosubmitError as e: + except Exception as e: out_exist = False err_exist = False pass @@ -535,7 +535,7 @@ class Job(object): i = i + 1 sleep(sleeptime) if i >= retries: - raise AutosubmitError("Failed to retrieve log files",6001) + raise AutosubmitError("Retries = {0}, Failed to retrieve log files {1} and {2}".format(retries,remote_logs[0],remote_logs[1]),6001) if out_exist and err_exist: if copy_remote_logs: if local_logs != remote_logs: @@ -1298,7 +1298,7 @@ done if len(out) > 1: if job not in self.running_jobs_start: start_time = self._check_time(out, 1) - Log.status("Job {0} started at {1}".format(jobname, str(parse_date(start_time)))) + Log.debug("Job {0} started at {1}".format(jobname, str(parse_date(start_time)))) self.running_jobs_start[job] = start_time job.new_status = Status.RUNNING diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index 46d5eddbc84c834af4ce72c64db424e5259aea8e..281b708c3119bf01eeea53665681db4cde0f5d44 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -38,7 +38,7 @@ import autosubmit.database.db_structure as DbStructure from networkx import DiGraph from autosubmit.job.job_utils import transitive_reduction from log.log import AutosubmitCritical,AutosubmitError,Log - +#Log.get_logger("Log.Autosubmit") class JobList: """ Class to manage the list of jobs to be run by autosubmit @@ -1243,7 +1243,7 @@ class JobList: else: job.hold = True - save = True + #save = True Log.debug('Update finished') return save diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index aacb81cc52ee9112cbb6e726ff863a336ab5d8ca..84a20b3db0fde5925fc1eb3af64c3e079c88ab03 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -364,7 +364,7 @@ class ParamikoPlatform(Platform): job.new_status = job_status sleep_time=5 while not (self.send_command(self.get_checkjob_cmd(job_id)) and retries >= 0) or (self.get_ssh_output() == "" and retries >= 0): - retries -= 1 + retries = retries - 1 Log.debug('Retrying check job command: {0}', self.get_checkjob_cmd(job_id)) Log.debug('retries left {0}', retries) Log.debug('Will be retrying in {0} seconds', sleep_time) @@ -561,9 +561,13 @@ class ParamikoPlatform(Platform): for errorLine in stderr_readlines: if errorLine.find("submission failed") != -1 or errorLine.find("git clone") != -1: raise AutosubmitError('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines),6005)) + if "not active" in errorLine: + raise AutosubmitError('SSH Session not active, will restart the platforms', 6005) + if not ignore_log: if len(stderr_readlines) > 0: Log.printlog('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines)),6006) + else: Log.debug('Command {0} in {1} successful with out message: {2}', command, self.host, self._ssh_output) return True @@ -740,7 +744,8 @@ class ParamikoPlatform(Platform): try: self.transport.sys.exit(0) except: - Log.debug("Transport already closed") + pass + def check_remote_log_dir(self): diff --git a/log/log.py b/log/log.py index 96ad8766733540fd0d52af061354b599e804d906..0e975235d2c95e767ae8b56f02d9477a13d2b810 100644 --- a/log/log.py +++ b/log/log.py @@ -114,6 +114,7 @@ class Log: CRITICAL = 7000 NO_LOG = CRITICAL + 1000 logging.basicConfig() + log_dict_debug = logging.Logger.manager.loggerDict if 'Autosubmit' in logging.Logger.manager.loggerDict.keys(): log = logging.getLogger('Autosubmit') else: @@ -153,7 +154,7 @@ class Log: file_path = os.path.join(directory, ('{0:%Y%m%d_%H%M%S}_').format(datetime.now()) + filename) if type == 'out': file_handler = logging.FileHandler(file_path, 'w') - file_handler.setLevel(level) + file_handler.setLevel(Log.DEBUG) file_handler.setFormatter(LogFormatter(True)) Log.log.addHandler(file_handler) elif type == 'err':