From 1d97c6bd3551c28d124c25b14219739be7854a0b Mon Sep 17 00:00:00 2001 From: jlope2 Date: Wed, 9 Nov 2016 15:25:40 +0100 Subject: [PATCH 1/7] Refactored recovery on ECMWF platform. Fixes #208 --- autosubmit/platforms/ecplatform.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/autosubmit/platforms/ecplatform.py b/autosubmit/platforms/ecplatform.py index 447716295..e887a45c2 100644 --- a/autosubmit/platforms/ecplatform.py +++ b/autosubmit/platforms/ecplatform.py @@ -142,12 +142,15 @@ class EcPlatform(ParamikoPlatform): command = '{0} {3}:{2} {1}'.format(self.get_cmd, local_path, os.path.join(self.get_files_path(), filename), self.host) try: - subprocess.check_call(command, stdout=open(os.devnull, 'w'), shell=True) - except subprocess.CalledProcessError: - if must_exist: - raise Exception('File {0} does not exists'.format(filename)) - return False - return True + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) + out, _ = process.communicate() + process_ok = False if 'No such file' in out or process.returncode != 0 else True + except Exception: + process_ok = False + + if not process_ok and must_exist: + raise Exception('File {0} does not exists'.format(filename)) + return process_ok def delete_file(self, filename): command = '{0} {1}:{2}'.format(self.del_cmd, self.host, os.path.join(self.get_files_path(), filename)) -- GitLab From 59ab5e341c944e159f42aedfefa695e63eb08ed8 Mon Sep 17 00:00:00 2001 From: jlope2 Date: Wed, 9 Nov 2016 15:26:16 +0100 Subject: [PATCH 2/7] Improved UX for local platform (avoiding OS error messages) --- autosubmit/platforms/locplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosubmit/platforms/locplatform.py b/autosubmit/platforms/locplatform.py index f51d2af92..4c5613c67 100644 --- a/autosubmit/platforms/locplatform.py +++ b/autosubmit/platforms/locplatform.py @@ -120,7 +120,7 @@ class LocalPlatform(ParamikoPlatform): command = '{0} {1} {2}'.format(self.get_cmd, os.path.join(self.tmp_path, 'LOG_' + self.expid, filename), local_path) try: - subprocess.check_call(command, shell=True) + subprocess.check_call(command, stdout=open(os.devnull, 'w'), stderr=open(os.devnull, 'w'), shell=True) except subprocess.CalledProcessError: if must_exist: raise Exception('File {0} does not exists'.format(filename)) -- GitLab From 2296551ed274cd624c1f6ea75553bac2039092f1 Mon Sep 17 00:00:00 2001 From: jlope2 Date: Wed, 9 Nov 2016 15:39:13 +0100 Subject: [PATCH 3/7] Now checking there's an SCRATCH_DIR in the PLATFORMS.CONF. Fixes #207 --- autosubmit/config/config_common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index 83fc98a56..8f06e8c68 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -261,6 +261,7 @@ class AutosubmitConfig: # result = result and AutosubmitConfig.check_exists(self._platforms_parser, section, 'VERSION') result = result and AutosubmitConfig.check_exists(self._platforms_parser, section, 'HOST') + result = result and AutosubmitConfig.check_exists(self._platforms_parser, section, 'SCRATCH_DIR') result = result and AutosubmitConfig.check_is_boolean(self._platforms_parser, section, 'ADD_PROJECT_TO_HOST', False) result = result and AutosubmitConfig.check_is_boolean(self._platforms_parser, section, 'TEST_SUITE', False) -- GitLab From a15fe9c34a244923d613171e5689d4df43e5a0e7 Mon Sep 17 00:00:00 2001 From: jlope2 Date: Wed, 9 Nov 2016 17:12:12 +0100 Subject: [PATCH 4/7] Retrieving .err & .out files when queue TYPE=PS. Fixes #209 --- autosubmit/platforms/ecplatform.py | 2 +- autosubmit/platforms/locplatform.py | 4 ++-- autosubmit/platforms/lsfplatform.py | 2 +- autosubmit/platforms/paramiko_platform.py | 22 ++++++++++++---------- autosubmit/platforms/pbsplatform.py | 2 +- autosubmit/platforms/psplatform.py | 4 ++-- autosubmit/platforms/sgeplatform.py | 2 +- autosubmit/platforms/slurmplatform.py | 2 +- 8 files changed, 21 insertions(+), 19 deletions(-) diff --git a/autosubmit/platforms/ecplatform.py b/autosubmit/platforms/ecplatform.py index e887a45c2..e8d96d289 100644 --- a/autosubmit/platforms/ecplatform.py +++ b/autosubmit/platforms/ecplatform.py @@ -100,7 +100,7 @@ class EcPlatform(ParamikoPlatform): def get_checkjob_cmd(self, job_id): return self._checkjob_cmd + str(job_id) - def get_submit_cmd(self, job_script, job_type): + def get_submit_cmd(self, job_script, job): return self._submit_cmd + job_script def connect(self): diff --git a/autosubmit/platforms/locplatform.py b/autosubmit/platforms/locplatform.py index 4c5613c67..ff69d2bf2 100644 --- a/autosubmit/platforms/locplatform.py +++ b/autosubmit/platforms/locplatform.py @@ -79,8 +79,8 @@ class LocalPlatform(ParamikoPlatform): jobs_xml = dom.getElementsByTagName("JB_job_number") return [int(element.firstChild.nodeValue) for element in jobs_xml] - def get_submit_cmd(self, job_script, job_type): - return self.get_call(job_script, job_type) + def get_submit_cmd(self, job_script, job): + return self.get_call(job_script, job) def get_checkjob_cmd(self, job_id): return self.get_pscall(job_id) diff --git a/autosubmit/platforms/lsfplatform.py b/autosubmit/platforms/lsfplatform.py index 83a664d9c..aed518c08 100644 --- a/autosubmit/platforms/lsfplatform.py +++ b/autosubmit/platforms/lsfplatform.py @@ -81,7 +81,7 @@ class LsfPlatform(ParamikoPlatform): def get_checkjob_cmd(self, job_id): return self._checkjob_cmd + str(job_id) - def get_submit_cmd(self, job_script, job_type): + def get_submit_cmd(self, job_script, job): return self._submit_cmd + job_script diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 713ab52c4..d22f9c1b5 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -163,7 +163,7 @@ class ParamikoPlatform(Platform): :return: job id for the submitted job :rtype: int """ - if self.send_command(self.get_submit_cmd(script_name, job.type)): + if self.send_command(self.get_submit_cmd(script_name, job)): job_id = self.get_submitted_job_id(self.get_ssh_output()) Log.debug("Job ID: {0}", job_id) return int(job_id) @@ -298,27 +298,29 @@ class ParamikoPlatform(Platform): Log.debug('Output {0}', self._ssh_output) return self._ssh_output - def get_call(self, job_script, job_type): + def get_call(self, job_script, job): """ Gets execution command for given job - :param job_type: + :param job: job + :type job: Job :param job_script: script to run :type job_script: str :return: command to execute script :rtype: str """ executable = '' - if job_type == Type.BASH: + if job.type == Type.BASH: executable = 'bash' - elif job_type == Type.PYTHON: + elif job.type == Type.PYTHON: executable = 'python' - elif job_type == Type.R: + elif job.type == Type.R: executable = 'Rscript' - return 'nohup ' + executable + ' {0} > {0}.{1}.out 2> {0}.{1}.err & echo $!'.format( - os.path.join(self.remote_log_dir, - job_script), - date2str(datetime.datetime.now(), 'S')) + return 'nohup ' + executable + ' {0} > {1} 2> {2} & echo $!'.format( + os.path.join(self.remote_log_dir, job_script), + os.path.join(self.remote_log_dir, job.out_filename), + os.path.join(self.remote_log_dir, job.err_filename) + ) @staticmethod def get_pscall(job_id): diff --git a/autosubmit/platforms/pbsplatform.py b/autosubmit/platforms/pbsplatform.py index 2037f152c..d94d3d5aa 100644 --- a/autosubmit/platforms/pbsplatform.py +++ b/autosubmit/platforms/pbsplatform.py @@ -90,7 +90,7 @@ class PBSPlatform(ParamikoPlatform): def jobs_in_queue(self): return ''.split() - def get_submit_cmd(self, job_script, job_type): + def get_submit_cmd(self, job_script, job): return self._submit_cmd + job_script def get_checkjob_cmd(self, job_id): diff --git a/autosubmit/platforms/psplatform.py b/autosubmit/platforms/psplatform.py index 9e01afed4..8ba0f1b29 100644 --- a/autosubmit/platforms/psplatform.py +++ b/autosubmit/platforms/psplatform.py @@ -73,8 +73,8 @@ class PsPlatform(ParamikoPlatform): jobs_xml = dom.getElementsByTagName("JB_job_number") return [int(element.firstChild.nodeValue) for element in jobs_xml] - def get_submit_cmd(self, job_script, job_type): - return self.get_call(job_script, job_type) + def get_submit_cmd(self, job_script, job): + return self.get_call(job_script, job) def get_checkjob_cmd(self, job_id): return self.get_pscall(job_id) diff --git a/autosubmit/platforms/sgeplatform.py b/autosubmit/platforms/sgeplatform.py index 4aaaa4322..73aa130b8 100644 --- a/autosubmit/platforms/sgeplatform.py +++ b/autosubmit/platforms/sgeplatform.py @@ -78,7 +78,7 @@ class SgePlatform(ParamikoPlatform): jobs_xml = dom.getElementsByTagName("JB_job_number") return [int(element.firstChild.nodeValue) for element in jobs_xml] - def get_submit_cmd(self, job_script, job_type): + def get_submit_cmd(self, job_script, job): return self._submit_cmd + job_script def get_checkjob_cmd(self, job_id): diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index 4aa307f0d..4bf4a03f8 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -90,7 +90,7 @@ class SlurmPlatform(ParamikoPlatform): jobs_xml = dom.getElementsByTagName("JB_job_number") return [int(element.firstChild.nodeValue) for element in jobs_xml] - def get_submit_cmd(self, job_script, job_type): + def get_submit_cmd(self, job_script, job): return self._submit_cmd + job_script def get_checkjob_cmd(self, job_id): -- GitLab From 4547dd26fcad592acb4617be2b8c14c88584c3a5 Mon Sep 17 00:00:00 2001 From: jlope2 Date: Tue, 15 Nov 2016 08:12:04 +0100 Subject: [PATCH 5/7] Now the local transfer of the remote logs can be disabled. Fixes #216 --- autosubmit/autosubmit.py | 5 ++++- autosubmit/config/config_common.py | 9 +++++++++ autosubmit/config/files/autosubmit.conf | 2 ++ autosubmit/job/job.py | 6 ++++-- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 3a650e4a7..c84da67f2 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -656,8 +656,11 @@ class Autosubmit: for platform in platforms_to_test: for job in job_list.get_in_queue(platform): prev_status = job.status - if prev_status != job.update_status(platform.check_job(job.id)): + if prev_status != job.update_status(platform.check_job(job.id), + as_conf.get_copy_remote_logs() == 'true'): + if as_conf.get_notifications() == 'true': + if Status.VALUE_TO_KEY[job.status] in job.notify_on: Notifier.notify_status_change(MailNotifier(BasicConfig), expid, job.name, Status.VALUE_TO_KEY[prev_status], diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index 8f06e8c68..3b6405389 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -802,6 +802,15 @@ class AutosubmitConfig: """ return self.get_option(self._conf_parser, 'mail', 'NOTIFICATIONS', 'false').lower() + def get_copy_remote_logs(self): + """ + Returns if the user has enabled the logs local copy from autosubmit's config file + + :return: if logs local copy + :rtype: bool + """ + return self.get_option(self._conf_parser, 'storage', 'COPY_REMOTE_LOGS', 'true').lower() + def get_mails_to(self): """ Returns the address where notifications will be sent from autosubmit's config file diff --git a/autosubmit/config/files/autosubmit.conf b/autosubmit/config/files/autosubmit.conf index 509971269..78dc9f423 100644 --- a/autosubmit/config/files/autosubmit.conf +++ b/autosubmit/config/files/autosubmit.conf @@ -34,3 +34,5 @@ API = paramiko # Defines the way of storing the progress of the experiment. The available options are: # A PICKLE file (pkl) or an SQLite database (db). Default = pkl TYPE = pkl +# Defines if the remote logs will be copied to the local platform. Default = True. +COPY_REMOTE_LOGS = True diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 7b92508e8..3d3e479a4 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -389,11 +389,12 @@ class Job: """ return self._get_from_total_stats(1) - def update_status(self, new_status): + def update_status(self, new_status, copy_remote_logs): """ Updates job status, checking COMPLETED file if needed :param new_status: job status retrieved from the platform + :param copy_remote_logs: should copy remote logs when finished? :type: Status """ previous_status = self.status @@ -429,7 +430,8 @@ class Job: self.write_start_time() if self.status in [Status.COMPLETED, Status.FAILED, Status.UNKNOWN]: self.write_end_time(self.status == Status.COMPLETED) - self.get_platform().get_logs_files(self.expid, self.out_filename, self.err_filename) + if copy_remote_logs: + self.get_platform().get_logs_files(self.expid, self.out_filename, self.err_filename) return self.status def check_completion(self, default_status=Status.FAILED): -- GitLab From 5872fb973830ee7654e76a480de4cc47d65295a4 Mon Sep 17 00:00:00 2001 From: jlope2 Date: Tue, 15 Nov 2016 09:04:25 +0100 Subject: [PATCH 6/7] Autosubmit 'refresh' fixed. Fixes #211 --- autosubmit/autosubmit.py | 1 + 1 file changed, 1 insertion(+) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index c84da67f2..f5ceb7c94 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1451,6 +1451,7 @@ class Autosubmit: Log.set_file(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, BasicConfig.LOCAL_TMP_DIR, 'refresh.log')) as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) + as_conf.reload() if not as_conf.check_expdef_conf(): Log.critical('Can not copy with invalid configuration') return False -- GitLab From 354ac812c4f1cdccf129c48a01d4b562530c15a2 Mon Sep 17 00:00:00 2001 From: jlope2 Date: Fri, 25 Nov 2016 17:11:01 +0100 Subject: [PATCH 7/7] Bumped version number to 3.7.6 --- CHANGELOG | 7 +++++++ VERSION | 2 +- docs/source/conf.py | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 8efc77bd6..1d24e8584 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,10 @@ +3.7.6 + Fixed refresh + Fixed recovery for ECMWF + Local logs copy can be disabled + Some UX improvements + Other minor bugfixes + 3.7.5 Fixed minor with LSF's logs diff --git a/VERSION b/VERSION index aaf18d294..897e56be0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.7.5 +3.7.6 diff --git a/docs/source/conf.py b/docs/source/conf.py index d6132700f..99b48afd8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -64,7 +64,7 @@ author = u'Earth Science Department, Barcelona Supercomputing Center, BSC' # The short X.Y version. version = '3.7' # The full version, including alpha/beta/rc tags. -release = '3.7.5' +release = '3.7.6' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -- GitLab