From 85af3e3face9b0d3b3c394d5e9047096c5307098 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 12 Jun 2024 08:59:46 +0200 Subject: [PATCH 1/8] Timeout fix --- autosubmit/job/job_dict.py | 2 +- autosubmit/platforms/paramiko_platform.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/autosubmit/job/job_dict.py b/autosubmit/job/job_dict.py index d47847fdb..2e3722bac 100644 --- a/autosubmit/job/job_dict.py +++ b/autosubmit/job/job_dict.py @@ -151,7 +151,7 @@ class DicJobs: if splits == "auto" and running != "chunk": raise AutosubmitCritical("SPLITS=auto is only allowed for running=chunk") - else: + elif splits != "auto": splits = int(splits) frequency = int(parameters[section].get("FREQUENCY", 1)) if running == 'once': diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 7a49482c3..bda788bc7 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -934,7 +934,7 @@ class ParamikoPlatform(Platform): pass - def exec_command(self, command, bufsize=-1, timeout=None, get_pty=False,retries=3, x11=False): + def exec_command(self, command, bufsize=-1, timeout=0, get_pty=False,retries=3, x11=False): """ Execute a command on the SSH server. A new `.Channel` is opened and the requested command is executed. The command's input and output @@ -997,7 +997,7 @@ class ParamikoPlatform(Platform): retries = retries - 1 if retries <= 0: return False , False, False - def exec_command_x11(self, command, bufsize=-1, timeout=None, get_pty=False,retries=3, x11=False): + def exec_command_x11(self, command, bufsize=-1, timeout=0, get_pty=False,retries=3, x11=False): session = self.transport.open_session() session.request_x11(handler=self.x11_handler) session.exec_command(command + " ; sleep infinity") -- GitLab From 11ee0af996b25d2fc8b992c344fe0ce6e9d4638b Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 12 Jun 2024 10:11:05 +0200 Subject: [PATCH 2/8] Renabled log recovery --- autosubmit/platforms/paramiko_platform.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index bda788bc7..5421eeddc 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -304,9 +304,9 @@ class ParamikoPlatform(Platform): self._ftpChannel = paramiko.SFTPClient.from_transport(self.transport,window_size=pow(4, 12) ,max_packet_size=pow(4, 12) ) self._ftpChannel.get_channel().settimeout(120) self.connected = True - if not self.log_retrieval_process_active and as_conf and str(as_conf.platforms_data.get(self.name, {}).get('DISABLE_RECOVERY_THREADS', "false")).lower() != "false": + if not self.log_retrieval_process_active and (as_conf is None or str(as_conf.platforms_data.get(self.name, {}).get('DISABLE_RECOVERY_THREADS', "false")).lower() == "false"): self.log_retrieval_process_active = True - if as_conf.misc_data["ASMISC"].get("COMMAND", "").lower() == "run": + if as_conf.experiment_data["ASMISC"].get("COMMAND", "").lower() == "run": self.recover_job_logs() except SSHException: raise @@ -937,7 +937,7 @@ class ParamikoPlatform(Platform): def exec_command(self, command, bufsize=-1, timeout=0, get_pty=False,retries=3, x11=False): """ Execute a command on the SSH server. A new `.Channel` is opened and - the requested command is executed. The command's input and output + the requested command is execed. The command's input and output streams are returned as Python ``file``-like objects representing stdin, stdout, and stderr. -- GitLab From 9da409c1461e152c38d5a7840f91dfbf22b2841c Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 12 Jun 2024 11:23:56 +0200 Subject: [PATCH 3/8] non-x11 jobs fix --- autosubmit/job/job.py | 2 +- autosubmit/platforms/headers/slurm_header.py | 2 +- autosubmit/platforms/paramiko_platform.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index bad576df2..5e4d1dc27 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -1871,7 +1871,7 @@ class Job(object): self.check_warnings = as_conf.jobs_data[self.section].get("CHECK_WARNINGS", False) self.shape = as_conf.jobs_data[self.section].get("SHAPE", "") self.script = as_conf.jobs_data[self.section].get("SCRIPT", "") - self.x11 = str(as_conf.jobs_data[self.section].get("X11", False)).lower() + self.x11 = False if str(as_conf.jobs_data[self.section].get("X11", False)).lower() == "false" else True if self.checkpoint: # To activate placeholder sustitution per in the template parameters["AS_CHECKPOINT"] = self.checkpoint parameters['JOBNAME'] = self.name diff --git a/autosubmit/platforms/headers/slurm_header.py b/autosubmit/platforms/headers/slurm_header.py index ce590465a..de57fd911 100644 --- a/autosubmit/platforms/headers/slurm_header.py +++ b/autosubmit/platforms/headers/slurm_header.py @@ -380,7 +380,7 @@ class SlurmHeader(object): header = header.replace("%WALLCLOCK%", job.wallclock) header = header.replace("%JOBNAME%", job.name) - if job.x11 == "true": + if job.x11: header = header.replace( '%X11%', "SBATCH --x11=batch") else: diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 5421eeddc..62b778a80 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -1314,7 +1314,7 @@ class ParamikoPlatform(Platform): if hasattr(self.header, 'get_threads_per_task'): header = header.replace( '%THREADS_PER_TASK_DIRECTIVE%', self.header.get_threads_per_task(job)) - if job.x11 == "true": + if job.x11: header = header.replace( '%X11%', "SBATCH --x11=batch") else: -- GitLab From bf47408f4b617d0b9a3add35a2135338baab415a Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 12 Jun 2024 14:13:17 +0200 Subject: [PATCH 4/8] ASMISC : True not in as_misc.yml ASMISC: COMMAND: Changed to AS_COMMAND: Fixed log_recovery looking at experiment_data instead of misc_data and fixed misc_data load the as_misc.yml file generated by autosubmit --- VERSION | 2 +- autosubmit/autosubmit.py | 2 +- autosubmit/platforms/ecplatform.py | 5 ++--- autosubmit/platforms/locplatform.py | 2 +- autosubmit/platforms/paramiko_platform.py | 2 +- autosubmit/platforms/sgeplatform.py | 2 +- requeriments.txt | 2 +- setup.py | 2 +- 8 files changed, 9 insertions(+), 10 deletions(-) diff --git a/VERSION b/VERSION index 9edf2a44f..a7c00da34 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.1.7 +4.1.8 diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 4e7e8a106..d54ef43eb 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -853,8 +853,8 @@ class Autosubmit: expids = [x.strip() for x in expids] for expid in expids: as_conf = AutosubmitConfig(expid, BasicConfig, YAMLParserFactory()) - as_conf.reload(force_load=True) as_conf.set_last_as_command(args.command) + as_conf.reload(force_load=True) if len(as_conf.experiment_data) == 0: if args.command not in ["expid", "upgrade"]: diff --git a/autosubmit/platforms/ecplatform.py b/autosubmit/platforms/ecplatform.py index 44beb9ea6..5fb9914aa 100644 --- a/autosubmit/platforms/ecplatform.py +++ b/autosubmit/platforms/ecplatform.py @@ -174,8 +174,7 @@ class EcPlatform(ParamikoPlatform): as_conf is None or str(as_conf.platforms_data.get(self.name, {}).get('DISABLE_RECOVERY_THREADS', "false")).lower() == "false"): self.log_retrieval_process_active = True - - if as_conf.experiment_data["ASMISC"].get("COMMAND","").lower() == "run": + if as_conf and as_conf.misc_data.get("AS_COMMAND","").lower() == "run": self.recover_job_logs() def restore_connection(self,as_conf): @@ -213,7 +212,7 @@ class EcPlatform(ParamikoPlatform): as_conf is None or str(as_conf.platforms_data.get(self.name, {}).get('DISABLE_RECOVERY_THREADS', "false")).lower() == "false"): self.log_retrieval_process_active = True - if as_conf.experiment_data["ASMISC"].get("COMMAND", "").lower() == "run": + if as_conf and as_conf.misc_data.get("AS_COMMAND", "").lower() == "run": self.recover_job_logs() return "OK" else: diff --git a/autosubmit/platforms/locplatform.py b/autosubmit/platforms/locplatform.py index 39ba5659e..0af2d65cd 100644 --- a/autosubmit/platforms/locplatform.py +++ b/autosubmit/platforms/locplatform.py @@ -116,7 +116,7 @@ class LocalPlatform(ParamikoPlatform): if not self.log_retrieval_process_active and ( as_conf is None or str(as_conf.platforms_data.get(self.name, {}).get('DISABLE_RECOVERY_THREADS',"false")).lower() == "false"): self.log_retrieval_process_active = True - if as_conf and as_conf.experiment_data["ASMISC"].get("COMMAND","").lower() == "run": + if as_conf and as_conf.misc_data.get("AS_COMMAND","").lower() == "run": self.recover_job_logs() diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 62b778a80..92b5300ee 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -306,7 +306,7 @@ class ParamikoPlatform(Platform): self.connected = True if not self.log_retrieval_process_active and (as_conf is None or str(as_conf.platforms_data.get(self.name, {}).get('DISABLE_RECOVERY_THREADS', "false")).lower() == "false"): self.log_retrieval_process_active = True - if as_conf.experiment_data["ASMISC"].get("COMMAND", "").lower() == "run": + if as_conf and as_conf.misc_data.get("AS_COMMAND", "").lower() == "run": self.recover_job_logs() except SSHException: raise diff --git a/autosubmit/platforms/sgeplatform.py b/autosubmit/platforms/sgeplatform.py index e1c166c69..9c5e813f1 100644 --- a/autosubmit/platforms/sgeplatform.py +++ b/autosubmit/platforms/sgeplatform.py @@ -126,7 +126,7 @@ class SgePlatform(ParamikoPlatform): as_conf is None or str(as_conf.platforms_data.get(self.name, {}).get('DISABLE_RECOVERY_THREADS', "false")).lower() == "false"): self.log_retrieval_process_active = True - if as_conf.experiment_data["ASMISC"].get("COMMAND","").lower() == "run": + if as_conf and as_conf.misc_data.get("AS_COMMAND","").lower() == "run": self.recover_job_logs() def restore_connection(self,as_conf): """ diff --git a/requeriments.txt b/requeriments.txt index dfabf5637..d93b5fe67 100644 --- a/requeriments.txt +++ b/requeriments.txt @@ -21,7 +21,7 @@ matplotlib<=3.8.3 numpy<2 ruamel.yaml==0.17.21 rocrate==0.* -autosubmitconfigparser==1.0.66 +autosubmitconfigparser==1.0.67 pathlib configparser pytest diff --git a/setup.py b/setup.py index c9c266c88..7e83757fa 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ setup( 'numpy<2', 'ruamel.yaml==0.17.21', 'rocrate==0.*', - 'autosubmitconfigparser==1.0.66', + 'autosubmitconfigparser==1.0.67', 'configparser', 'pathlib', 'setproctitle' -- GitLab From 3f494208afad8d89eeecc06d0178003448715ac4 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 12 Jun 2024 15:28:55 +0200 Subject: [PATCH 5/8] ASMISC : True not in as_misc.yml ASMISC: COMMAND: Changed to AS_COMMAND: Fixed log_recovery looking at experiment_data instead of misc_data and fixed misc_data load the as_misc.yml file generated by autosubmit --- autosubmit/platforms/paramiko_platform.py | 6 +++--- autosubmit/platforms/slurmplatform.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 92b5300ee..cd7950126 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -959,7 +959,7 @@ class ParamikoPlatform(Platform): while retries > 0: try: chan = self.transport.open_session() - if x11 == "true": + if x11: display = os.getenv('DISPLAY') if display is None or not display: display = "localhost:0" @@ -968,7 +968,7 @@ class ParamikoPlatform(Platform): chan.request_x11(single_connection=False,handler=self.x11_handler) else: chan = self.transport.open_session() - if x11 == "true": + if x11: if "timeout" in command: timeout_command = command.split("timeout ")[1].split(" ")[0] if timeout_command == 0: @@ -1064,7 +1064,7 @@ class ParamikoPlatform(Platform): stderr_readlines.append( stderr.channel.recv_stderr(len(c.in_stderr_buffer))) got_chunk = True - if x11 == "true": + if x11: if len(stderr_readlines) > 0: aux_stderr.extend(stderr_readlines) for stderr_line in stderr_readlines: diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index 4ba8248d9..970ee6916 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -544,7 +544,7 @@ class SlurmPlatform(ParamikoPlatform): if outputlines.find("failed") != -1: raise AutosubmitCritical( "Submission failed. Command Failed", 7014) - if x11 == "true": + if x11: return int(outputlines.splitlines()[0]) else: jobs_id = [] @@ -570,7 +570,7 @@ class SlurmPlatform(ParamikoPlatform): else: x11 = job.x11 - if x11 == "true": + if x11: return export + self.get_submit_cmd_x11(job.x11_options.strip(""), job_script.strip(""), job) else: try: -- GitLab From 094e1520d9ecd2d90af877c00191c832bafeab2c Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 12 Jun 2024 16:24:51 +0200 Subject: [PATCH 6/8] added exclusive directive --- autosubmit/platforms/headers/slurm_header.py | 3 +++ autosubmit/platforms/paramiko_platform.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/autosubmit/platforms/headers/slurm_header.py b/autosubmit/platforms/headers/slurm_header.py index de57fd911..a99b3997f 100644 --- a/autosubmit/platforms/headers/slurm_header.py +++ b/autosubmit/platforms/headers/slurm_header.py @@ -116,6 +116,7 @@ class SlurmHeader(object): if job.parameters['CURRENT_PROJ'] != '': return "SBATCH -A {0}".format(job.parameters['CURRENT_PROJ']) return "" + def get_exclusive_directive(self, job, het=-1): """ Returns account directive for the specified job @@ -421,6 +422,7 @@ class SlurmHeader(object): # #%QUEUE_DIRECTIVE% #%PARTITION_DIRECTIVE% +#%EXCLUSIVE_DIRECTIVE% #%ACCOUNT_DIRECTIVE% #%MEMORY_DIRECTIVE% #%THREADS_PER_TASK_DIRECTIVE% @@ -445,6 +447,7 @@ class SlurmHeader(object): # #%QUEUE_DIRECTIVE% #%PARTITION_DIRECTIVE% +#%EXCLUSIVE_DIRECTIVE% #%ACCOUNT_DIRECTIVE% #%MEMORY_DIRECTIVE% #%MEMORY_PER_TASK_DIRECTIVE% diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index cd7950126..d76cc13f3 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -1326,9 +1326,9 @@ class ParamikoPlatform(Platform): if hasattr(self.header, 'get_custom_directives'): header = header.replace( '%CUSTOM_DIRECTIVES%', self.header.get_custom_directives(job)) - if hasattr(self.header, 'get_exclusivity'): + if hasattr(self.header, 'get_exclusive_directive'): header = header.replace( - '%EXCLUSIVITY_DIRECTIVE%', self.header.get_exclusivity(job)) + '%EXCLUSIVE_DIRECTIVE%', self.header.get_exclusive_directive(job)) if hasattr(self.header, 'get_account_directive'): header = header.replace( '%ACCOUNT_DIRECTIVE%', self.header.get_account_directive(job)) -- GitLab From 32e7cab6a02c12b19dd3a3699b0a99dc88432223 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 12 Jun 2024 16:35:32 +0200 Subject: [PATCH 7/8] additional chan --- autosubmit/platforms/paramiko_platform.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index d76cc13f3..1c64d3c78 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -958,7 +958,6 @@ class ParamikoPlatform(Platform): """ while retries > 0: try: - chan = self.transport.open_session() if x11: display = os.getenv('DISPLAY') if display is None or not display: -- GitLab From c5724915f37c69495109fd4c739b5491426f0d20 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 12 Jun 2024 16:36:10 +0200 Subject: [PATCH 8/8] timeout changed back --- autosubmit/platforms/paramiko_platform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 1c64d3c78..7dda872db 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -934,7 +934,7 @@ class ParamikoPlatform(Platform): pass - def exec_command(self, command, bufsize=-1, timeout=0, get_pty=False,retries=3, x11=False): + def exec_command(self, command, bufsize=-1, timeout=30, get_pty=False,retries=3, x11=False): """ Execute a command on the SSH server. A new `.Channel` is opened and the requested command is execed. The command's input and output -- GitLab