From 47159b5cdfb3dd8d427cfb210efc16e3011201e1 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 7 Feb 2023 14:50:24 +0100 Subject: [PATCH 1/4] !893 --- autosubmit/job/job_dict.py | 2 +- autosubmit/job/job_packager.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/autosubmit/job/job_dict.py b/autosubmit/job/job_dict.py index ff8b60734..c007d220c 100644 --- a/autosubmit/job/job_dict.py +++ b/autosubmit/job/job_dict.py @@ -389,7 +389,7 @@ class DicJobs: job.queue = str(parameters[section].get( "QUEUE", "")) job.ec_queue = str(parameters[section].get("EC_QUEUE", "")) - if job.ec_queue == "": + if job.ec_queue == "" and job.platform_name != "LOCAL": job.ec_queue = str(self.experiment_data["PLATFORMS"][job.platform_name].get("EC_QUEUE","hpc")) job.partition = str(parameters[section].get( "PARTITION", "")) diff --git a/autosubmit/job/job_packager.py b/autosubmit/job/job_packager.py index 222942bdb..e6c5ed2b6 100644 --- a/autosubmit/job/job_packager.py +++ b/autosubmit/job/job_packager.py @@ -196,6 +196,7 @@ class JobPackager(object): return packages_to_submit if not (self._max_wait_jobs_to_submit > 0 and self._max_jobs_to_submit > 0): # If there is no more space in platform, result is tuple of empty + Log.debug("No more space in platform {0} for jobs {1}".format(self._platform.name, [job.name for job in jobs_ready])) return packages_to_submit # Sort by 6 first digits of date -- GitLab From 148d614b4a1f4f3339f3e44c6210a28675278111 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 6 Feb 2023 11:14:40 +0100 Subject: [PATCH 2/4] Updated error codes --- autosubmit/autosubmit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index b51dd91bf..5ba5f8eeb 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -831,9 +831,8 @@ class Autosubmit: raise AutosubmitCritical( "Current experiment uses ({0}) which is not the running Autosubmit version \nPlease, update the experiment version if you wish to continue using AutoSubmit {1}\nYou can achieve this using the command autosubmit updateversion {2} \n" "Or with the -v parameter: autosubmit {3} {2} -v ".format(as_conf.get_version(), - Autosubmit.autosubmit_version, - expid, args.command), - 7067) + Autosubmit.autosubmit_version, expid,args.command), + 7014) else: if expid == 'None': exp_id = "" @@ -1863,7 +1862,8 @@ class Autosubmit: "Only jobs with member value in {0} or no member will be allowed in this run. Also, those jobs already SUBMITTED, QUEUING, or RUNNING will be allowed to complete and will be tracked.".format( str(allowed_members))) except AutosubmitCritical as e: - raise AutosubmitCritical(e.message, 7067, e.trace) + e.message += " HINT: check the CUSTOM_DIRECTIVE syntax in your jobs configuration files." + raise AutosubmitCritical(e.message, 7014, e.trace) except Exception as e: raise AutosubmitCritical( "Error in run initialization", 7014, str(e)) # Changing default to 7014 -- GitLab From de5649d9b7dca44f23b960965cef280d7d6011e9 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 3 Feb 2023 15:35:05 +0100 Subject: [PATCH 3/4] Cancel remote job if it is over_wallclock Job_id doesn't depend on retrials anymore when is written --- autosubmit/job/job.py | 6 +++--- autosubmit/platforms/paramiko_platform.py | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index ce76e333d..c397d4d5d 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -636,7 +636,7 @@ class Job(object): return @threaded - def retrieve_logfiles(self, copy_remote_logs, local_logs, remote_logs, expid, platform_name,fail_count = 0): + def retrieve_logfiles(self, copy_remote_logs, local_logs, remote_logs, expid, platform_name,fail_count = 0,job_id=""): max_logs = 0 last_log = 0 sleep(5) @@ -783,7 +783,7 @@ class Job(object): # Update the logs with Autosubmit Job ID Brand try: for local_log in local_logs: - platform.write_jobid(self.id, os.path.join( + platform.write_jobid(job_id, os.path.join( self._tmp_path, 'LOG_' + str(self.expid), local_log)) except BaseException as e: Log.printlog("Trace {0} \n Failed to write the {1} e=6001".format( @@ -925,7 +925,7 @@ class Job(object): if as_conf.get_disable_recovery_threads(self.platform.name) == "true": self.retrieve_logfiles_unthreaded(copy_remote_logs, local_logs) else: - self.retrieve_logfiles(copy_remote_logs, local_logs, remote_logs, expid, platform_name,fail_count = copy.copy(self.fail_count)) + self.retrieve_logfiles(copy_remote_logs, local_logs, remote_logs, expid, platform_name,fail_count = copy.copy(self.fail_count),job_id=self.id) if self.wrapper_type == "vertical": max_logs = int(as_conf.get_retrials()) for i in range(0,max_logs): diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index c50eb133e..039781265 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -654,6 +654,12 @@ class ParamikoPlatform(Platform): try: job.platform.get_completed_files(job.name) job_status = job.check_completion(over_wallclock=True) + if job_status is Status.FAILED: + try: + job.platform.send_command( + self.platform.cancel_cmd + " " + str(job.id)) + except: + pass except: job_status = Status.FAILED if job_status in self.job_status['COMPLETED']: -- GitLab From cdd0c596d12d1ae83c0c566f5fa2090f0ce3a9d3 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 7 Feb 2023 15:42:32 +0100 Subject: [PATCH 4/4] write_job job_id --- autosubmit/job/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index c397d4d5d..57bdca689 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -766,7 +766,7 @@ class Job(object): platform.get_logs_files(self.expid, l_log) try: for local_log in l_log: - platform.write_jobid(self.id, os.path.join(self._tmp_path, 'LOG_' + str(self.expid), local_log)) + platform.write_jobid(job_id, os.path.join(self._tmp_path, 'LOG_' + str(self.expid), local_log)) except BaseException as e: pass max_logs = max_logs - 1 -- GitLab