From 43c1b08434055be266321831f55121dde43e9838 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 1 Feb 2021 13:24:12 +0100 Subject: [PATCH 1/4] Log changes --- autosubmit/job/job.py | 5 ++++- autosubmit/platforms/paramiko_platform.py | 11 ++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index f8a6668ba..e059c46fd 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -515,6 +515,7 @@ class Job(object): @threaded def retrieve_logfiles(self, copy_remote_logs, local_logs, remote_logs, expid, platform_name): + remote_logs = (self.script_name + ".out", self.script_name + ".err") as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.reload() @@ -566,7 +567,7 @@ class Job(object): # unifying names for log files if remote_logs != local_logs: self.synchronize_logs(self._platform, remote_logs, local_logs) - remote_logs = local_logs + remote_logs = copy.deepcopy(local_logs) self._platform.get_logs_files(self.expid, remote_logs) # Update the logs with Autosubmit Job Id Brand try: @@ -962,6 +963,7 @@ class Job(object): '%(? Date: Mon, 1 Feb 2021 16:17:07 +0100 Subject: [PATCH 2/4] Log changes --- autosubmit/job/job.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index e059c46fd..8ee83aca5 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -110,7 +110,7 @@ class Job(object): self.executable = None self._local_logs = ('', '') self._remote_logs = ('', '') - + self.script_name = self.name+".cmd" self.status = status self.prev_status = status self.old_status = self.status @@ -272,7 +272,6 @@ class Job(object): @local_logs.setter def local_logs(self, value): self._local_logs = value - #self._remote_logs = value @property def remote_logs(self): @@ -981,7 +980,7 @@ class Job(object): '%(? Date: Mon, 1 Feb 2021 17:58:42 +0100 Subject: [PATCH 3/4] Issues when changing status --- autosubmit/database/db_jobdata.py | 2 +- autosubmit/job/job.py | 5 +++-- autosubmit/job/job_packages.py | 4 ---- autosubmit/monitor/monitor.py | 3 --- autosubmit/platforms/paramiko_platform.py | 8 +++++--- 5 files changed, 9 insertions(+), 13 deletions(-) diff --git a/autosubmit/database/db_jobdata.py b/autosubmit/database/db_jobdata.py index 91546d18d..f43dc9483 100644 --- a/autosubmit/database/db_jobdata.py +++ b/autosubmit/database/db_jobdata.py @@ -1012,7 +1012,7 @@ class JobDataStructure(MainDataBase): # if rowid > 0: # print("Successfully inserted") - def write_start_time(self, job_name, start=0, status="UNKWNONW", ncpus=0, wallclock="00:00", qos="debug", date="", member="", section="", chunk=0, platform="NA", job_id=0, packed=False): + def write_start_time(self, job_name, start=0, status="UNKWOWN", ncpus=0, wallclock="00:00", qos="debug", date="", member="", section="", chunk=0, platform="NA", job_id=0, packed=False): """Writes start time into the database Args: diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 8ee83aca5..3ea20deb9 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -1049,8 +1049,9 @@ class Job(object): Log.printlog('Could not get start time for {0}. Using current time as an approximation'.format( self.name), 3000) start_time = time.time() - timestamp = str(int(time.time())) - self.local_logs = (self.name + "."+timestamp+ ".out", self.name + "."+timestamp+".err") + timestamp = date2str(datetime.datetime.now(), 'S') + + self.local_logs = (self.name + "." + timestamp + ".out", self.name + "." + timestamp + ".err") path = os.path.join(self._tmp_path, self.name + '_TOTAL_STATS') f = open(path, 'a') diff --git a/autosubmit/job/job_packages.py b/autosubmit/job/job_packages.py index 01be470a2..ff1b86cbd 100644 --- a/autosubmit/job/job_packages.py +++ b/autosubmit/job/job_packages.py @@ -115,7 +115,6 @@ class JobPackageBase(object): def _create_scripts_threaded(self,jobs,configuration): for i in xrange(0, len(jobs)): self._job_scripts[jobs[i].name] = jobs[i].create_script(configuration) - #self.jobs[i].remote_logs = (self._job_scripts[jobs[i].name] + ".out".format(i),self._job_scripts[jobs[i].name] + ".err".format(i)) def _create_common_script(self): pass @@ -292,7 +291,6 @@ class JobPackageArray(JobPackageBase): for i in xrange(0, len(self.jobs)): self._job_scripts[self.jobs[i].name] = self.jobs[i].create_script(configuration) self._job_inputs[self.jobs[i].name] = self._create_i_input(timestamp, i) - #self.jobs[i].remote_logs = (timestamp + ".{0}.out".format(i), timestamp + ".{0}.err".format(i)) self._common_script = self._create_common_script(timestamp) def _create_i_input(self, filename, index): @@ -405,7 +403,6 @@ class JobPackageThread(JobPackageBase): def _create_scripts(self, configuration): for i in xrange(0, len(self.jobs)): self._job_scripts[self.jobs[i].name] = self.jobs[i].create_script(configuration) - #self.jobs[i].remote_logs = (self._job_scripts[self.jobs[i].name] + ".out".format(i),self._job_scripts[self.jobs[i].name] + ".err".format(i)) self._common_script = self._create_common_script() def _create_common_script(self): @@ -511,7 +508,6 @@ class JobPackageThreadWrapped(JobPackageThread): def _create_scripts(self, configuration): for i in xrange(0, len(self.jobs)): self._job_scripts[self.jobs[i].name] = self.jobs[i].create_script(configuration) - #self.jobs[i].remote_logs = (self._job_scripts[self.jobs[i].name] + ".out".format(i),self._job_scripts[self.jobs[i].name] + ".err".format(i)) self._common_script = self._create_common_script() def _create_common_script(self): diff --git a/autosubmit/monitor/monitor.py b/autosubmit/monitor/monitor.py index cebb30848..ce97cd319 100644 --- a/autosubmit/monitor/monitor.py +++ b/autosubmit/monitor/monitor.py @@ -353,9 +353,6 @@ class Monitor: def write_output_txt_recursive(self,job,output_file,level,path): log_out = "" log_err = "" - #if job.status in [Status.FAILED, Status.COMPLETED]: - # log_out = path + "/" + job.local_logs[0] - # log_err = path + "/" + job.local_logs[1] output = level+job.name + " " + Status().VALUE_TO_KEY[job.status] +"\n" #+ " " + log_out + " " + log_err + "\n" output_file.write(output) if job.has_children() > 0: diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index c83882d90..ed8285ff1 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -781,10 +781,12 @@ class ParamikoPlatform(Platform): header = self.header.SERIAL else: header = self.header.PARALLEL - + #TODO str_datetime = date2str(datetime.datetime.now(), 'S') - out_filename = "{0}.{1}.out".format(job.name, str_datetime) - err_filename = "{0}.{1}.err".format(job.name, str_datetime) + out_filename = "{0}.cmd.out".format(job.name) + err_filename = "{0}.cmd.err".format(job.name) + #out_filename = "{0}.{1}.out".format(job.name, str_datetime) + #err_filename = "{0}.{1}.err".format(job.name, str_datetime) #job.local_logs = (out_filename, err_filename) header = header.replace('%OUT_LOG_DIRECTIVE%', out_filename) header = header.replace('%ERR_LOG_DIRECTIVE%', err_filename) -- GitLab From efa7dfbbfbfe4e31d928cc0b0e8c6e6222aae60f Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 2 Feb 2021 17:10:25 +0100 Subject: [PATCH 4/4] Log changes --- autosubmit/autosubmit.py | 6 ++-- autosubmit/config/basicConfig.py | 2 +- autosubmit/job/job.py | 2 +- autosubmit/job/job_packages.py | 26 +++++----------- autosubmit/platforms/locplatform.py | 36 +++++++++++++++++++++-- autosubmit/platforms/paramiko_platform.py | 5 ++-- 6 files changed, 50 insertions(+), 27 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index bb00d96a6..472e9eabd 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1852,15 +1852,15 @@ class Autosubmit: jobs_id = platform.submit_Script(hold=hold) except AutosubmitError as e: jobs_id = None - if e.message.lower().find("bad parameters") != -1: + if e.message.lower().find("bad parameters") != -1 or e.message.lower().find("invalid partition") != -1: error_msg = "" for package_tmp in valid_packages_to_submit: for job_tmp in package_tmp.jobs: if job_tmp.section not in error_msg: error_msg += job_tmp.section + "&" raise AutosubmitCritical( - "Submission failed, check job and queue specified of job_sections of {0}".format( - error_msg[:-1]), 7014, e.trace) + "Submission failed, check job,queue and partition specified of job_sections of {0}".format( + error_msg[:-1]), 7014, e.message) except BaseException as e: raise AutosubmitError( "Submission failed, this can be due a failure on the platform", 6015, e.message) diff --git a/autosubmit/config/basicConfig.py b/autosubmit/config/basicConfig.py index 1ef8bb4db..3fa785362 100755 --- a/autosubmit/config/basicConfig.py +++ b/autosubmit/config/basicConfig.py @@ -76,7 +76,7 @@ class BasicConfig: """ if not os.path.isfile(file_path): return - Log.debug('Reading config from ' + file_path) + #Log.debug('Reading config from ' + file_path) parser = SafeConfigParser() parser.optionxform = str parser.read(file_path) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 3ea20deb9..1cad4bfa7 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -536,7 +536,7 @@ class Job(object): retries = 5 sleeptime = 0 i = 0 - sleep(10) + sleep(5) no_continue = False try: while (not out_exist and not err_exist) and i < retries: diff --git a/autosubmit/job/job_packages.py b/autosubmit/job/job_packages.py index ff1b86cbd..7e219c689 100644 --- a/autosubmit/job/job_packages.py +++ b/autosubmit/job/job_packages.py @@ -137,7 +137,7 @@ class JobPackageBase(object): thread_number = thread_number * 4 elif len(self.jobs) > 10000: thread_number = thread_number * 5 - chunksize = int((len(self.jobs) + thread_number - 1) / thread_number); + chunksize = int((len(self.jobs) + thread_number - 1) / thread_number) try: if len(self.jobs) < thread_number: for job in self.jobs: @@ -166,19 +166,7 @@ class JobPackageBase(object): raise AutosubmitCritical( "Error on {1}, template [{0}] still does not exists in running time(check=on_submission actived) ".format(job.file,job.name), 7014) Log.debug("Creating Scripts") - if only_generate: - if not exit: - if len(self.jobs) < thread_number: - self._create_scripts(configuration) - else: - Lhandle = list() - for i in xrange(0, len(self.jobs), chunksize): - Lhandle.append(self._create_scripts_threaded(self.jobs[i:i + chunksize], configuration)) - for dataThread in Lhandle: - dataThread.join() - self._common_script = self._create_common_script() - - else: + if not exit: if len(self.jobs) < thread_number: self._create_scripts(configuration) else: @@ -188,10 +176,12 @@ class JobPackageBase(object): for dataThread in Lhandle: dataThread.join() self._common_script = self._create_common_script() - Log.debug("Sending Files") - self._send_files() - Log.debug("Submitting") - self._do_submission(hold=hold) + if not only_generate: + + Log.debug("Sending Files") + self._send_files() + Log.debug("Submitting") + self._do_submission(hold=hold) def _create_scripts(self, configuration): diff --git a/autosubmit/platforms/locplatform.py b/autosubmit/platforms/locplatform.py index a9a0073a3..bedc91c5a 100644 --- a/autosubmit/platforms/locplatform.py +++ b/autosubmit/platforms/locplatform.py @@ -25,8 +25,8 @@ from autosubmit.platforms.paramiko_platform import ParamikoPlatform from autosubmit.platforms.headers.local_header import LocalHeader from autosubmit.config.basicConfig import BasicConfig -from log.log import Log from time import sleep +from log.log import Log, AutosubmitError, AutosubmitCritical class LocalPlatform(ParamikoPlatform): """ @@ -185,7 +185,39 @@ class LocalPlatform(ParamikoPlatform): Log.debug('Could not remove file {0}'.format(os.path.join(self.tmp_path, filename))) return False return True - + def move_file(self, src, dest, must_exist=False): + """ + Moves a file on the platform (includes .err and .out) + :param src: source name + :type src: str + :param dest: destination name + :param must_exist: ignore if file exist or not + :type dest: str + """ + try: + path_root = self.get_files_path() + os.rename(os.path.join(path_root, src),os.path.join(path_root, dest)) + return True + except IOError as e: + raise AutosubmitError('File {0} does not exists, something went wrong with the platform'.format( + path_root), 6004, e.message) + if must_exist: + raise AutosubmitError("A critical file couldn't be retrieved, File {0} does not exists".format( + path_root), 6004, e.message) + else: + Log.debug("File {0} doesn't exists ".format(path_root)) + return False + except Exception as e: + if str(e) in "Garbage": + raise AutosubmitError('File {0} does not exists'.format( + os.path.join(self.get_files_path(), src)), 6004, str(e)) + if must_exist: + raise AutosubmitError("A critical file couldn't be retrieved, File {0} does not exists".format( + os.path.join(self.get_files_path(), src)), 6004, str(e)) + else: + Log.printlog("Log file couldn't be moved: {0}".format( + os.path.join(self.get_files_path(), src)), 5001) + return False def get_ssh_output(self): return self._ssh_output def get_ssh_output_err(self): diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index ed8285ff1..46cac5e89 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -740,10 +740,11 @@ class ParamikoPlatform(Platform): executable = 'python' elif job.type == Type.R: executable = 'Rscript' + remote_logs = (job.script_name + ".out", job.script_name + ".err") return 'nohup ' + executable + ' {0} > {1} 2> {2} & echo $!'.format( os.path.join(self.remote_log_dir, job_script), - os.path.join(self.remote_log_dir, job.remote_logs[0]), - os.path.join(self.remote_log_dir, job.remote_logs[1]) + os.path.join(self.remote_log_dir, remote_logs[0]), + os.path.join(self.remote_log_dir, remote_logs[1]) ) @staticmethod -- GitLab