diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 5730bfc83ccd5579d4e82908c9f69972f0241390..ba891e6a91e2c134524cddb8d9641dbb57dc6012 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -1254,7 +1254,6 @@ class WrapperJob(Job): self.failed = True self._platform.delete_file('WRAPPER_FAILED') break - if self.failed: self.update_failed_jobs() if len(self.inner_jobs_running) <= 0: @@ -1336,10 +1335,8 @@ class WrapperJob(Job): start_time = self.running_jobs_start[job] if self._is_over_wallclock(start_time, job.wallclock): # if self.as_config.get_wrapper_type() in ['vertical', 'horizontal']: - Log.printlog("Job {0} inside wrapper {1} is running for longer than it's wallclock! Cancelling...".format( + Log.printlog("Job {0} inside wrapper {1} is running for longer than it's wallclock!".format( job.name, self.name), 6009) - job.new_status = Status.FAILED - job.update_status(self.as_config.get_copy_remote_logs() == 'true') return True return False @@ -1406,11 +1403,10 @@ done self.as_config.get_copy_remote_logs() == 'true') if len(out) == 2: Log.info("Job {0} is RUNNING".format(jobname)) - over_wallclock = self._check_inner_job_wallclock( - job) + over_wallclock = self._check_inner_job_wallclock(job) # messaged included if over_wallclock: - Log.printlog( - "Job {0} is FAILED".format(jobname), 6009) + job.status = Status.FAILED + Log.printlog("Job {0} is FAILED".format(jobname),6009) elif len(out) == 3: end_time = self._check_time(out, 2) self._check_finished_job(job) @@ -1420,9 +1416,8 @@ done sleep(wait) retries = retries - 1 temp_list = self.inner_jobs_running - self.inner_jobs_running = [ - job for job in temp_list if job.status == Status.RUNNING] - if retries == 0 or over_wallclock: + self.inner_jobs_running = [job for job in temp_list if job.status == Status.RUNNING] + if retries == 0: # or over_wallclock: self.status = Status.FAILED def _check_finished_job(self, job, failed_file=False): @@ -1440,8 +1435,8 @@ done job.update_status(self.as_config.get_copy_remote_logs() == 'true') else: #Log.info("No completed filed found, setting {0} to FAILED...".format(job.name)) - job.new_status = Status.FAILED - job.update_status(self.as_config.get_copy_remote_logs() == 'true') + job.status = Status.FAILED + #job.update_status(self.as_config.get_copy_remote_logs() == 'true') self.running_jobs_start.pop(job, None) def update_failed_jobs(self, canceled_wrapper=False): diff --git a/autosubmit/platforms/wrappers/wrapper_builder.py b/autosubmit/platforms/wrappers/wrapper_builder.py index 15096b39ed1eb34ee32ce88f335a0c660efa2608..1b0322b2c38e76f9292f18510fde737de0f32fb6 100644 --- a/autosubmit/platforms/wrappers/wrapper_builder.py +++ b/autosubmit/platforms/wrappers/wrapper_builder.py @@ -290,12 +290,6 @@ processors_per_node = int(jobs_resources['PROCESSORS_PER_NODE']) current = {1} current.start() current.join() - if os.path.exists(failed_wrapper): - os.remove(os.path.join(os.getcwd(),wrapper_id)) - wrapper_failed = os.path.join(os.getcwd(),"WRAPPER_FAILED") - open(wrapper_failed, 'w').close() - os._exit(1) - """).format(jobs_list, thread, '\n'.ljust(13)) if footer: @@ -312,8 +306,24 @@ processors_per_node = int(jobs_resources['PROCESSORS_PER_NODE']) open(failed_path, 'w').close() print datetime.now(), "The job ", current.template," has FAILED" #{1} - """).format(jobs_list, self.exit_thread, '\n'.ljust(13)), 8) + """).format(jobs_list, self.exit_thread, '\n'.ljust(13)), 4) + sequential_threads_launcher += self._indent(textwrap.dedent(""" + if os.path.exists(failed_wrapper): + os.remove(os.path.join(os.getcwd(),wrapper_id)) + wrapper_failed = os.path.join(os.getcwd(),"WRAPPER_FAILED") + open(wrapper_failed, 'w').close() + os._exit(1) + + """).format(jobs_list, self.exit_thread, '\n'.ljust(13)), 4) + else: + sequential_threads_launcher += self._indent(textwrap.dedent(""" + if os.path.exists(failed_wrapper): + os.remove(os.path.join(os.getcwd(),wrapper_id)) + wrapper_failed = os.path.join(os.getcwd(),"WRAPPER_FAILED") + open(wrapper_failed, 'w').close() + os._exit(1) + """).format(jobs_list, self.exit_thread, '\n'.ljust(13)), 4) return sequential_threads_launcher def build_parallel_threads_launcher(self, jobs_list, thread, footer=True): diff --git a/log/log.py b/log/log.py index 222060b1ffbfab713bae04a1d9593a25a8f5241c..956d4015ffc6540227c5b8fac5aab038b2145a82 100644 --- a/log/log.py +++ b/log/log.py @@ -163,7 +163,7 @@ class Log: os.mkdir(directory) files = [f for f in os.listdir(directory) if os.path.isfile( os.path.join(directory, f)) and f.endswith(filename)] - if len(files) >= 5: + if len(files) >= 10: files.sort() os.remove(os.path.join(directory, files[0])) file_path = os.path.join(