From 97976df498b26b41d483bc695e5dad2f16389852 Mon Sep 17 00:00:00 2001 From: Wilmer Uruchi Ticona Date: Mon, 28 Sep 2020 12:27:57 +0200 Subject: [PATCH] Expanded the job status validation for better reliability --- autosubmit/autosubmit.py | 10 +++++----- autosubmit/database/db_jobdata.py | 29 ++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index e212a66ad..3acd40d47 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1420,9 +1420,9 @@ class Autosubmit: # Detect and store changes job_changes_tracker = {job.name: ( job.prev_status, job.status) for job in wrapper_job.job_list if job.prev_status != job.status} - job_data_structure.process_status_changes( - job_changes_tracker) - job_changes_tracker = {} + # job_data_structure.process_status_changes( + # job_changes_tracker) + # job_changes_tracker = {} else: # Prepare jobs, if slurm check all active jobs at once. job = job[0] prev_status = job.status @@ -1489,7 +1489,7 @@ class Autosubmit: job_list.save() # Safe spot to store changes job_data_structure.process_status_changes( - job_changes_tracker) + job_changes_tracker, job_list.get_job_list()) job_changes_tracker = {} if Autosubmit.exit: @@ -3837,7 +3837,7 @@ class Autosubmit: job_list.save() job_data_structure = JobDataStructure(expid) job_data_structure.process_status_changes( - job_tracked_changes, job_list.get_job_list(), as_conf.get_chunk_size_unit(), as_conf.get_chunk_size()) + job_tracked_changes, job_list.get_job_list(), as_conf.get_chunk_size_unit(), as_conf.get_chunk_size(), check_run=True) else: Log.printlog( "Changes NOT saved to the JobList!!!!: use -s option to save", 3000) diff --git a/autosubmit/database/db_jobdata.py b/autosubmit/database/db_jobdata.py index 9b8b4c20a..648795826 100644 --- a/autosubmit/database/db_jobdata.py +++ b/autosubmit/database/db_jobdata.py @@ -633,11 +633,11 @@ class JobDataStructure(MainDataBase): new_run = ExperimentRun(0) return self._insert_experiment_run(new_run) - def process_status_changes(self, tracking_dictionary, job_list=None, chunk_unit="NA", chunk_size=0): + def process_status_changes(self, tracking_dictionary, job_list=None, chunk_unit="NA", chunk_size=0, check_run=False): current_run = self.get_max_id_experiment_run() if current_run: if tracking_dictionary is not None and bool(tracking_dictionary) == True: - if job_list: + if job_list and check_run == True: current_date_member_completed_count = sum( 1 for job in job_list if job.date is not None and job.member is not None and job.status == Status.COMPLETED) if len(tracking_dictionary.keys()) >= int(current_date_member_completed_count * 0.9): @@ -648,9 +648,28 @@ class JobDataStructure(MainDataBase): self.validate_current_run( job_list, chunk_unit, chunk_size, True) return None - for name, (prev_status, status) in tracking_dictionary.items(): - current_run.update_counters(prev_status, status) - self._update_experiment_run(current_run) + if job_list and check_run == False: + if len(tracking_dictionary.items()) > 0: + # Changes exist + completed_count = sum( + 1 for job in job_list if job.status == Status.COMPLETED) + failed_count = sum( + 1 for job in job_list if job.status == Status.FAILED) + queue_count = sum( + 1 for job in job_list if job.status == Status.QUEUING) + submit_count = sum( + 1 for job in job_list if job.status == Status.SUBMITTED) + running_count = sum( + 1 for job in job_list if job.status == Status.RUNNING) + current_run.completed = completed_count + current_run.failed = failed_count + current_run.queuing = queue_count + current_run.submitted = submit_count + current_run.running = running_count + self._update_experiment_run(current_run) + # for name, (prev_status, status) in tracking_dictionary.items(): + # current_run.update_counters(prev_status, status) + else: raise Exception("Empty header database") -- GitLab