diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index bb00d96a664264c87ddfbd52dcaf0e0e6726afe5..472e9eabd2a79271a7521ce819ffeb1434c7c027 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1852,15 +1852,15 @@ class Autosubmit: jobs_id = platform.submit_Script(hold=hold) except AutosubmitError as e: jobs_id = None - if e.message.lower().find("bad parameters") != -1: + if e.message.lower().find("bad parameters") != -1 or e.message.lower().find("invalid partition") != -1: error_msg = "" for package_tmp in valid_packages_to_submit: for job_tmp in package_tmp.jobs: if job_tmp.section not in error_msg: error_msg += job_tmp.section + "&" raise AutosubmitCritical( - "Submission failed, check job and queue specified of job_sections of {0}".format( - error_msg[:-1]), 7014, e.trace) + "Submission failed, check job,queue and partition specified of job_sections of {0}".format( + error_msg[:-1]), 7014, e.message) except BaseException as e: raise AutosubmitError( "Submission failed, this can be due a failure on the platform", 6015, e.message) diff --git a/autosubmit/config/basicConfig.py b/autosubmit/config/basicConfig.py index 1ef8bb4db6605020d7027f34b74151e635a09996..3fa785362d8667166b48aeeb6ea548d70e72e06d 100755 --- a/autosubmit/config/basicConfig.py +++ b/autosubmit/config/basicConfig.py @@ -76,7 +76,7 @@ class BasicConfig: """ if not os.path.isfile(file_path): return - Log.debug('Reading config from ' + file_path) + #Log.debug('Reading config from ' + file_path) parser = SafeConfigParser() parser.optionxform = str parser.read(file_path) diff --git a/autosubmit/database/db_jobdata.py b/autosubmit/database/db_jobdata.py index 91546d18d9f5dc460b40ad620a7d0fc4a76a955d..f43dc9483a2ca0efa837a1a8e9fac8ed608c3744 100644 --- a/autosubmit/database/db_jobdata.py +++ b/autosubmit/database/db_jobdata.py @@ -1012,7 +1012,7 @@ class JobDataStructure(MainDataBase): # if rowid > 0: # print("Successfully inserted") - def write_start_time(self, job_name, start=0, status="UNKWNONW", ncpus=0, wallclock="00:00", qos="debug", date="", member="", section="", chunk=0, platform="NA", job_id=0, packed=False): + def write_start_time(self, job_name, start=0, status="UNKWOWN", ncpus=0, wallclock="00:00", qos="debug", date="", member="", section="", chunk=0, platform="NA", job_id=0, packed=False): """Writes start time into the database Args: diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index f8a6668bad3dc0e9332ded8babb4e24c294af00a..1cad4bfa7d71d1b9731825dd1d1dc78f24f0aa67 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -110,7 +110,7 @@ class Job(object): self.executable = None self._local_logs = ('', '') self._remote_logs = ('', '') - + self.script_name = self.name+".cmd" self.status = status self.prev_status = status self.old_status = self.status @@ -272,7 +272,6 @@ class Job(object): @local_logs.setter def local_logs(self, value): self._local_logs = value - #self._remote_logs = value @property def remote_logs(self): @@ -515,6 +514,7 @@ class Job(object): @threaded def retrieve_logfiles(self, copy_remote_logs, local_logs, remote_logs, expid, platform_name): + remote_logs = (self.script_name + ".out", self.script_name + ".err") as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.reload() @@ -536,7 +536,7 @@ class Job(object): retries = 5 sleeptime = 0 i = 0 - sleep(10) + sleep(5) no_continue = False try: while (not out_exist and not err_exist) and i < retries: @@ -566,7 +566,7 @@ class Job(object): # unifying names for log files if remote_logs != local_logs: self.synchronize_logs(self._platform, remote_logs, local_logs) - remote_logs = local_logs + remote_logs = copy.deepcopy(local_logs) self._platform.get_logs_files(self.expid, remote_logs) # Update the logs with Autosubmit Job Id Brand try: @@ -962,6 +962,7 @@ class Job(object): '%(? 10000: thread_number = thread_number * 5 - chunksize = int((len(self.jobs) + thread_number - 1) / thread_number); + chunksize = int((len(self.jobs) + thread_number - 1) / thread_number) try: if len(self.jobs) < thread_number: for job in self.jobs: @@ -167,19 +166,7 @@ class JobPackageBase(object): raise AutosubmitCritical( "Error on {1}, template [{0}] still does not exists in running time(check=on_submission actived) ".format(job.file,job.name), 7014) Log.debug("Creating Scripts") - if only_generate: - if not exit: - if len(self.jobs) < thread_number: - self._create_scripts(configuration) - else: - Lhandle = list() - for i in xrange(0, len(self.jobs), chunksize): - Lhandle.append(self._create_scripts_threaded(self.jobs[i:i + chunksize], configuration)) - for dataThread in Lhandle: - dataThread.join() - self._common_script = self._create_common_script() - - else: + if not exit: if len(self.jobs) < thread_number: self._create_scripts(configuration) else: @@ -189,10 +176,12 @@ class JobPackageBase(object): for dataThread in Lhandle: dataThread.join() self._common_script = self._create_common_script() - Log.debug("Sending Files") - self._send_files() - Log.debug("Submitting") - self._do_submission(hold=hold) + if not only_generate: + + Log.debug("Sending Files") + self._send_files() + Log.debug("Submitting") + self._do_submission(hold=hold) def _create_scripts(self, configuration): @@ -292,7 +281,6 @@ class JobPackageArray(JobPackageBase): for i in xrange(0, len(self.jobs)): self._job_scripts[self.jobs[i].name] = self.jobs[i].create_script(configuration) self._job_inputs[self.jobs[i].name] = self._create_i_input(timestamp, i) - #self.jobs[i].remote_logs = (timestamp + ".{0}.out".format(i), timestamp + ".{0}.err".format(i)) self._common_script = self._create_common_script(timestamp) def _create_i_input(self, filename, index): @@ -405,7 +393,6 @@ class JobPackageThread(JobPackageBase): def _create_scripts(self, configuration): for i in xrange(0, len(self.jobs)): self._job_scripts[self.jobs[i].name] = self.jobs[i].create_script(configuration) - #self.jobs[i].remote_logs = (self._job_scripts[self.jobs[i].name] + ".out".format(i),self._job_scripts[self.jobs[i].name] + ".err".format(i)) self._common_script = self._create_common_script() def _create_common_script(self): @@ -511,7 +498,6 @@ class JobPackageThreadWrapped(JobPackageThread): def _create_scripts(self, configuration): for i in xrange(0, len(self.jobs)): self._job_scripts[self.jobs[i].name] = self.jobs[i].create_script(configuration) - #self.jobs[i].remote_logs = (self._job_scripts[self.jobs[i].name] + ".out".format(i),self._job_scripts[self.jobs[i].name] + ".err".format(i)) self._common_script = self._create_common_script() def _create_common_script(self): diff --git a/autosubmit/monitor/monitor.py b/autosubmit/monitor/monitor.py index cebb30848759908b521fc368088d1d30af794618..ce97cd319b34f235a9c71da0b0973f1c4ca6e2fc 100644 --- a/autosubmit/monitor/monitor.py +++ b/autosubmit/monitor/monitor.py @@ -353,9 +353,6 @@ class Monitor: def write_output_txt_recursive(self,job,output_file,level,path): log_out = "" log_err = "" - #if job.status in [Status.FAILED, Status.COMPLETED]: - # log_out = path + "/" + job.local_logs[0] - # log_err = path + "/" + job.local_logs[1] output = level+job.name + " " + Status().VALUE_TO_KEY[job.status] +"\n" #+ " " + log_out + " " + log_err + "\n" output_file.write(output) if job.has_children() > 0: diff --git a/autosubmit/platforms/locplatform.py b/autosubmit/platforms/locplatform.py index a9a0073a3db8f4b6cb343bfb06616747a8e5b0a4..bedc91c5a6132e1afb277f6291a79786652ca730 100644 --- a/autosubmit/platforms/locplatform.py +++ b/autosubmit/platforms/locplatform.py @@ -25,8 +25,8 @@ from autosubmit.platforms.paramiko_platform import ParamikoPlatform from autosubmit.platforms.headers.local_header import LocalHeader from autosubmit.config.basicConfig import BasicConfig -from log.log import Log from time import sleep +from log.log import Log, AutosubmitError, AutosubmitCritical class LocalPlatform(ParamikoPlatform): """ @@ -185,7 +185,39 @@ class LocalPlatform(ParamikoPlatform): Log.debug('Could not remove file {0}'.format(os.path.join(self.tmp_path, filename))) return False return True - + def move_file(self, src, dest, must_exist=False): + """ + Moves a file on the platform (includes .err and .out) + :param src: source name + :type src: str + :param dest: destination name + :param must_exist: ignore if file exist or not + :type dest: str + """ + try: + path_root = self.get_files_path() + os.rename(os.path.join(path_root, src),os.path.join(path_root, dest)) + return True + except IOError as e: + raise AutosubmitError('File {0} does not exists, something went wrong with the platform'.format( + path_root), 6004, e.message) + if must_exist: + raise AutosubmitError("A critical file couldn't be retrieved, File {0} does not exists".format( + path_root), 6004, e.message) + else: + Log.debug("File {0} doesn't exists ".format(path_root)) + return False + except Exception as e: + if str(e) in "Garbage": + raise AutosubmitError('File {0} does not exists'.format( + os.path.join(self.get_files_path(), src)), 6004, str(e)) + if must_exist: + raise AutosubmitError("A critical file couldn't be retrieved, File {0} does not exists".format( + os.path.join(self.get_files_path(), src)), 6004, str(e)) + else: + Log.printlog("Log file couldn't be moved: {0}".format( + os.path.join(self.get_files_path(), src)), 5001) + return False def get_ssh_output(self): return self._ssh_output def get_ssh_output_err(self): diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 723eab588e302dfb763f3f58eb7b34b10debdac4..46cac5e8969531a6950b12239bbcc65bceee79fe 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -82,10 +82,11 @@ class ParamikoPlatform(Platform): transport.send_ignore() except EOFError as e: raise AutosubmitError("[{0}] not alive. Host: {1}".format( - self.name, self.host), 6002, str(e)) - except Exception as e: - raise AutosubmitError("[{0}] connection failed for host: {1}".format( - self.name, self.host), 6002, str(e)) + self.name, self.host), 6002, e.message) + except (AutosubmitError,AutosubmitCritical): + raise + except BaseException as e: + raise AutosubmitError("[{0}] connection failed for host: {1}".format(self.name, self.host), 6002, e.message) def restore_connection(self): try: @@ -739,10 +740,11 @@ class ParamikoPlatform(Platform): executable = 'python' elif job.type == Type.R: executable = 'Rscript' + remote_logs = (job.script_name + ".out", job.script_name + ".err") return 'nohup ' + executable + ' {0} > {1} 2> {2} & echo $!'.format( os.path.join(self.remote_log_dir, job_script), - os.path.join(self.remote_log_dir, job.remote_logs[0]), - os.path.join(self.remote_log_dir, job.remote_logs[1]) + os.path.join(self.remote_log_dir, remote_logs[0]), + os.path.join(self.remote_log_dir, remote_logs[1]) ) @staticmethod @@ -780,11 +782,13 @@ class ParamikoPlatform(Platform): header = self.header.SERIAL else: header = self.header.PARALLEL - + #TODO str_datetime = date2str(datetime.datetime.now(), 'S') - out_filename = "{0}.{1}.out".format(job.name, str_datetime) - err_filename = "{0}.{1}.err".format(job.name, str_datetime) - job.local_logs = (out_filename, err_filename) + out_filename = "{0}.cmd.out".format(job.name) + err_filename = "{0}.cmd.err".format(job.name) + #out_filename = "{0}.{1}.out".format(job.name, str_datetime) + #err_filename = "{0}.{1}.err".format(job.name, str_datetime) + #job.local_logs = (out_filename, err_filename) header = header.replace('%OUT_LOG_DIRECTIVE%', out_filename) header = header.replace('%ERR_LOG_DIRECTIVE%', err_filename)