diff --git a/CHANGELOG b/CHANGELOG index 39830fa43307f71aeeff3f8582fcaad093ed9810..3f01bb2057baf954312f046daade44089623c187 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,12 +1,44 @@ +3.8.1 + First version with job packages ~ only paramiko (LSF, SLURM and PBS) + - Vertical + - Horizontal + - With dependencies ~ only for vertical + Python wrapper for CCA (ECMWF) + On submission template checking + Some UX improvements + Other minor bug fixes + 3.8.0 - First version with LSF arrays + First version with LSF arrays: + - Include all the bug fixes & features from 3.7.7 + - NOT include the bug fixes from 3.7.8 + +3.7.8 + Some bug fixes: + - Database persistence + - Delete command + - Unarchive command + - CHUNKINI option + - Paramiko permissions + - Paramiko non-existing remote copy + - Saga sessions + +3.7.7 + Some improvements for Slurm platforms + Geo-definition of processors + New configuration variables: + - CHUNKINI + - MEMORY + - MEMORY_PER_TASK + - HYPERTHREADING + Other minor bug fixes 3.7.6 Fixed refresh Fixed recovery for ECMWF Local logs copy can be disabled Some UX improvements - Other minor bugfixes + Other minor bug fixes 3.7.5 Fixed minor with LSF's logs @@ -18,22 +50,22 @@ PROCESSORS_PER_NODE/TASKS now optional Exclusivity for MN3 (with Paramiko) THREADS optional for ECMWF - Minor bugfixes + Minor bug fixes 3.7.3 Fixed error with logs directives (err & out were swapped) Added new option for MN3: SCRATCH_FREE_SPACE PROCESSORS_PER_NODE/TASKS now available with Paramiko - Other minor bugfixes + Other minor bug fixes 3.7.2 - Minor bugfixes + Minor bug fixes Regression test suite improved Solved some problems with paramiko & ECMWF platform 3.7.1 Fixed issue in setstatus - Added new testcase command + Added new 'testcase' command 3.7.0 Big improvements on memory consumption @@ -44,7 +76,7 @@ Fixed error with LSF schedulers by default Fixed bug on stats feature Fixed some bugs with Git and SVN - Other minor bugfixes + Other minor bug fixes 3.6.1 Fixed an incompatibility with recent versions of radical.utils (saga) @@ -54,14 +86,14 @@ UX improved on some error cases Fixed permission backwards incompatibility Fixed authorization problems on SAGA implementation - Other minor bugfixes + Other minor bug fixes 3.5.0 Added another mechanism for SAGA errors prevention Added no-plot option to setstatus Added exclusivity and processes per host support for MN Check method fixed (not working since 3.2) - Other minor bugfixes + Other minor bug fixes 3.4.1 Hot-fix ECMWF binary (bash, R, python) @@ -135,7 +167,7 @@ Minor bug fixes 3.1.1 - Hotfix for issue with noleap calendar + Fix for issue with 'noleap' calendar 3.1.0 Added archive and unarchive commands @@ -152,13 +184,13 @@ Fixed bug in delete. Added readme and changelog commands. MAX_WAITING_JOBS and TOTAL_JOBS now defined by platform. - Simplified console output of run subcommand. + Simplified console output of run sub command. 3.0.3 Fixed bug in expid test. 3.0.2 - Fixed bug in localplatform. + Fixed bug in the local platform. 3.0.1 Fixed bug in config. diff --git a/VERSION b/VERSION index 19811903a7f7584d7aa752ea29bbf9d74cf78b47..f2807196747ffcee4ae8a36604b9cff7ebeed9ca 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.8.0 +3.8.1 diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 6bdecc01aaaa66f6fb58eafffcb77a6d24ed7a3f..5530ba3f50c92383756f7f485c5ddb959e81f4e1 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -61,25 +61,27 @@ import saga from config.basicConfig import BasicConfig # noinspection PyPackageRequirements from config.config_common import AutosubmitConfig -from config.parser_factory import ConfigParserFactory +from bscearth.utils.config_parser import ConfigParserFactory from job.job_common import Status from git.autosubmit_git import AutosubmitGit from job.job_list import JobList from job.job_list_persistence import JobListPersistenceDb from job.job_list_persistence import JobListPersistencePkl # noinspection PyPackageRequirements -from config.log import Log +from bscearth.utils.log import Log from database.db_common import create_db from experiment.experiment_common import new_experiment from experiment.experiment_common import copy_experiment from database.db_common import delete_experiment from database.db_common import get_autosubmit_version from monitor.monitor import Monitor -from date.chunk_date_lib import date2str +from bscearth.utils.date import date2str from notifications.mail_notifier import MailNotifier from notifications.notifier import Notifier from platforms.saga_submitter import SagaSubmitter from platforms.paramiko_submitter import ParamikoSubmitter +from job.job_exceptions import WrongTemplateException +from job.job_packager import JobPackager # noinspection PyUnusedLocal @@ -447,7 +449,7 @@ class Autosubmit: os.mkdir(os.path.join(BasicConfig.LOCAL_ROOT_DIR, exp_id, 'conf')) Log.info("Copying config files...") - # autosubmit config and experiment copyed from AS. + # autosubmit config and experiment copied from AS. files = resource_listdir('autosubmit.config', 'files') for filename in files: if resource_exists('autosubmit.config', 'files/' + filename): @@ -517,7 +519,7 @@ class Autosubmit: :type force: bool :type expid: str :param expid: identifier of the experiment to delete - :param force: if True, does not ask for confrmation + :param force: if True, does not ask for confirmation :returns: True if succesful, False if not :rtype: bool @@ -563,20 +565,20 @@ class Autosubmit: :rtype: bool """ if expid is None: - Log.critical("Missing expid.") + Log.critical("Missing experiment id") BasicConfig.read() exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) tmp_path = os.path.join(exp_path, BasicConfig.LOCAL_TMP_DIR) if not os.path.exists(exp_path): - Log.critical("The directory %s is needed and does not exist." % exp_path) + Log.critical("The directory %s is needed and does not exist" % exp_path) Log.warning("Does an experiment with the given id exist?") return 1 # checking if there is a lock file to avoid multiple running on the same expid try: with portalocker.Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1): - Log.info("Preparing .lock file to avoid multiple instances with same expid.") + Log.info("Preparing .lock file to avoid multiple instances with same experiment id") Log.set_file(os.path.join(tmp_path, 'run.log')) os.system('clear') @@ -610,7 +612,7 @@ class Autosubmit: job_list = Autosubmit.load_job_list(expid, as_conf) Log.debug("Starting from job list restored from {0} files", pkl_dir) - Log.debug("Length of joblist: {0}", len(job_list)) + Log.debug("Length of the jobs list: {0}", len(job_list)) Autosubmit._load_parameters(as_conf, job_list, submitter.platforms) @@ -656,6 +658,8 @@ class Autosubmit: for platform in platforms_to_test: for job in job_list.get_in_queue(platform): prev_status = job.status + if job.status == Status.FAILED: + continue if prev_status != job.update_status(platform.check_job(job.id), as_conf.get_copy_remote_logs() == 'true'): @@ -682,7 +686,7 @@ class Autosubmit: Log.info("No more jobs to run.") if len(job_list.get_failed()) > 0: - Log.info("Some jobs have failed and reached maximun retrials") + Log.info("Some jobs have failed and reached maximum retrials") return False else: Log.result("Run successful") @@ -691,6 +695,9 @@ class Autosubmit: except portalocker.AlreadyLocked: Autosubmit.show_lock_warning(expid) + except WrongTemplateException: + return False + @staticmethod def submit_ready_jobs(as_conf, job_list, platforms_to_test): """ @@ -705,13 +712,18 @@ class Autosubmit: """ save = False for platform in platforms_to_test: - for job_package in job_list.get_ready_packages(platform): - try: - job_package.submit(as_conf, job_list.parameters) - save = True - except Exception: - Log.error("{0} submission failed", platform.name) - continue + Log.debug("\nJobs ready for {1}: {0}", len(job_list.get_ready(platform)), platform.name) + packages_to_submit = JobPackager(as_conf, platform, job_list).build_packages() + for package in packages_to_submit: + try: + package.submit(as_conf, job_list.parameters) + save = True + except WrongTemplateException as e: + Log.error("Invalid parameter substitution in {0} template", e.job_name) + raise + except Exception: + Log.error("{0} submission failed", platform.name) + raise return save @staticmethod @@ -948,11 +960,11 @@ class Autosubmit: def recovery(expid, noplot, save, all_jobs, hide): """ Method to check all active jobs. If COMPLETED file is found, job status will be changed to COMPLETED, - otherwise it will be set to WAITING. It will also update the joblist. + otherwise it will be set to WAITING. It will also update the jobs list. :param expid: identifier of the experiment to recover :type expid: str - :param save: If true, recovery saves changes to joblist + :param save: If true, recovery saves changes to the jobs list :type save: bool :param all_jobs: if True, it tries to get completed files for all jobs, not only active. :type all_jobs: bool @@ -1022,7 +1034,7 @@ class Autosubmit: Log.info("CHANGED job '{0}' status to WAITING".format(job.name)) end = datetime.datetime.now() Log.info("Time spent: '{0}'".format(end - start)) - Log.info("Updating joblist") + Log.info("Updating the jobs list") sys.setrecursionlimit(50000) job_list.update_list(as_conf) @@ -1034,31 +1046,34 @@ class Autosubmit: Log.result("Recovery finalized") if not noplot: - Log.info("\nPloting joblist...") + Log.info("\nPlotting the jobs list...") monitor_exp = Monitor() monitor_exp.generate_output(expid, job_list.get_job_list(), show=not hide) return True @staticmethod - def check(expid): + def check(experiment_id): """ Checks experiment configuration and warns about any detected error or inconsistency. - :param expid: experiment identifier: - :type expid: str + :param experiment_id: experiment identifier: + :type experiment_id: str """ BasicConfig.read() - exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) + exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, experiment_id) if not os.path.exists(exp_path): - Log.critical("The directory %s is needed and does not exist." % exp_path) + Log.critical("The directory {0} is needed and does not exist.", exp_path) Log.warning("Does an experiment with the given id exist?") - return 1 + return False - Log.set_file(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, BasicConfig.LOCAL_TMP_DIR, 'check_exp.log')) - as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) + log_file = os.path.join(BasicConfig.LOCAL_ROOT_DIR, experiment_id, BasicConfig.LOCAL_TMP_DIR, 'check_exp.log') + Log.set_file(log_file) + + as_conf = AutosubmitConfig(experiment_id, BasicConfig, ConfigParserFactory()) if not as_conf.check_conf_files(): return False + project_type = as_conf.get_project_type() if project_type != "none": if not as_conf.check_proj(): @@ -1069,17 +1084,16 @@ class Autosubmit: if len(submitter.platforms) == 0: return False - pkl_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, 'pkl') - job_list = Autosubmit.load_job_list(expid, as_conf) + pkl_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, experiment_id, 'pkl') + job_list = Autosubmit.load_job_list(experiment_id, as_conf) Log.debug("Job list restored from {0} files", pkl_dir) Autosubmit._load_parameters(as_conf, job_list, submitter.platforms) - hpcarch = as_conf.get_platform() + hpc_architecture = as_conf.get_platform() for job in job_list.get_job_list(): if job.platform_name is None: - job.platform_name = hpcarch - # noinspection PyTypeChecker + job.platform_name = hpc_architecture job.platform = submitter.platforms[job.platform_name.lower()] job.update_parameters(as_conf, job_list.parameters) @@ -1490,35 +1504,30 @@ class Autosubmit: Autosubmit.unarchive(expid) return False - Log.result("Experiment archived succesfully") + Log.result("Experiment archived successfully") return True @staticmethod - def unarchive(expid): + def unarchive(experiment_id): """ Unarchives an experiment: uncompress folder from tar.gz and moves to experiments root folder - :param expid: experiment identifier - :type expid: str + :param experiment_id: experiment identifier + :type experiment_id: str """ BasicConfig.read() - exp_folder = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) - if not os.path.exists(exp_folder): - Log.critical("The directory %s is needed and does not exist." % exp_folder) - Log.warning("Does an experiment with the given id exist?") - return 1 - - Log.set_file(os.path.join(BasicConfig.LOCAL_ROOT_DIR, "ASlogs", 'unarchive{0}.log'.format(expid))) + Log.set_file(os.path.join(BasicConfig.LOCAL_ROOT_DIR, "ASlogs", 'unarchive{0}.log'.format(experiment_id))) + exp_folder = os.path.join(BasicConfig.LOCAL_ROOT_DIR, experiment_id) if os.path.exists(exp_folder): - Log.error("Experiment {0} is not archived", expid) + Log.error("Experiment {0} is not archived", experiment_id) return False # Searching by year. We will store it on database year = datetime.datetime.today().year archive_path = None while year > 2000: - archive_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, str(year), '{0}.tar.gz'.format(expid)) + archive_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, str(year), '{0}.tar.gz'.format(experiment_id)) if os.path.exists(archive_path): break year -= 1 @@ -1540,7 +1549,7 @@ class Autosubmit: Log.critical("Can not extract tar file: {0}".format(e)) return False - Log.info("Unpacking finished.") + Log.info("Unpacking finished") try: os.remove(archive_path) @@ -1548,7 +1557,7 @@ class Autosubmit: Log.error("Can not remove archived file folder: {0}".format(e)) return False - Log.result("Experiment {0} unarchived succesfully", expid) + Log.result("Experiment {0} unarchived successfully", experiment_id) return True @staticmethod @@ -1586,10 +1595,10 @@ class Autosubmit: :param expid: experiment identifier :type expid: str - :param noplot: if True, method omits final ploting of joblist. Only needed on large experiments when plotting - time can be much larger than creation time. + :param noplot: if True, method omits final plotting of the jobs list. Only needed on large experiments when + plotting time can be much larger than creation time. :type noplot: bool - :return: True if succesful, False if not + :return: True if successful, False if not :rtype: bool :param hide: hides plot window :type hide: bool @@ -1641,13 +1650,14 @@ class Autosubmit: Log.error('There are repeated start dates!') return False num_chunks = as_conf.get_num_chunks() + chunk_ini = as_conf.get_chunk_ini() member_list = as_conf.get_member_list() if len(member_list) != len(set(member_list)): Log.error('There are repeated member names!') return False rerun = as_conf.get_rerun() - Log.info("\nCreating joblist...") + Log.info("\nCreating the jobs list...") job_list = JobList(expid, BasicConfig, ConfigParserFactory(), Autosubmit._get_job_list_persistence(expid, as_conf)) @@ -1659,7 +1669,8 @@ class Autosubmit: date_format = 'H' if date.minute > 1: date_format = 'M' - job_list.generate(date_list, member_list, num_chunks, parameters, date_format, as_conf.get_retrials(), + job_list.generate(date_list, member_list, num_chunks, chunk_ini, parameters, date_format, + as_conf.get_retrials(), as_conf.get_default_job_type()) if rerun == "true": chunk_list = Autosubmit._create_json(as_conf.get_chunk_list()) @@ -1667,14 +1678,14 @@ class Autosubmit: else: job_list.remove_rerun_only_jobs() - Log.info("\nSaving joblist...") + Log.info("\nSaving the jobs list...") job_list.save() if not noplot: - Log.info("\nPloting joblist...") + Log.info("\nPlotting the jobs list...") monitor_exp = Monitor() monitor_exp.generate_output(expid, job_list.get_job_list(), output, not hide) - Log.result("\nJob list created succesfully") + Log.result("\nJob list created successfully") Log.user_warning("Remember to MODIFY the MODEL config files!") return True @@ -1722,7 +1733,7 @@ class Autosubmit: project_path) shutil.rmtree(project_path, ignore_errors=True) return False - Log.debug("{0}" % output) + Log.debug("{0}", output) elif project_type == "local": local_project_path = as_conf.get_local_project_path() @@ -1770,7 +1781,7 @@ class Autosubmit: :param expid: experiment identifier :type expid: str - :param save: if true, saves the new joblist + :param save: if true, saves the new jobs list :type save: bool :param final: status to set on jobs :type final: str @@ -1785,7 +1796,6 @@ class Autosubmit: :param hide: hides plot window :type hide: bool """ - root_name = 'job_list' BasicConfig.read() exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) tmp_path = os.path.join(exp_path, BasicConfig.LOCAL_TMP_DIR) @@ -2164,7 +2174,7 @@ class Autosubmit: def _change_conf(testid, hpc, start_date, member, chunks, branch, random_select=False): as_conf = AutosubmitConfig(testid, BasicConfig, ConfigParserFactory()) exp_parser = as_conf.get_parser(ConfigParserFactory(), as_conf.experiment_file) - if AutosubmitConfig.get_bool_option(exp_parser, 'rerun', "RERUN", True): + if exp_parser.get_bool_option('rerun', "RERUN", True): Log.error('Can not test a RERUN experiment') return False @@ -2174,7 +2184,7 @@ class Autosubmit: platforms_parser = as_conf.get_parser(ConfigParserFactory(), as_conf.platforms_file) test_platforms = list() for section in platforms_parser.sections(): - if AutosubmitConfig.get_option(platforms_parser, section, 'TEST_SUITE', 'false').lower() == 'true': + if platforms_parser.get_option(section, 'TEST_SUITE', 'false').lower() == 'true': test_platforms.append(section) if len(test_platforms) == 0: Log.critical('No test HPC defined') @@ -2224,8 +2234,9 @@ class Autosubmit: date_format = 'H' if date.minute > 1: date_format = 'M' - job_list.generate(date_list, as_conf.get_member_list(), as_conf.get_num_chunks(), as_conf.load_parameters(), - date_format, as_conf.get_retrials(), as_conf.get_default_job_type(), False) + job_list.generate(date_list, as_conf.get_member_list(), as_conf.get_num_chunks(), as_conf.get_chunk_ini(), + as_conf.load_parameters(), date_format, as_conf.get_retrials(), + as_conf.get_default_job_type(), False) return job_list @staticmethod diff --git a/autosubmit/config/basicConfig.py b/autosubmit/config/basicConfig.py index f2f467c81e410f865669f4740c5ea40c4b81f2f8..08db17589186393baa685563e0983dd3c94d38ea 100755 --- a/autosubmit/config/basicConfig.py +++ b/autosubmit/config/basicConfig.py @@ -24,7 +24,7 @@ except ImportError: from ConfigParser import SafeConfigParser import os -from autosubmit.config.log import Log +from bscearth.utils.log import Log class BasicConfig: diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index 3b64053897d8bd25889a58ec8d89c7855a5e453d..70356b200bfc2d840b14978c58ac8349a1e14d2b 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -28,12 +28,12 @@ import subprocess from pyparsing import nestedExpr -from autosubmit.date.chunk_date_lib import parse_date -from autosubmit.config.log import Log +from bscearth.utils.date import parse_date +from bscearth.utils.log import Log from autosubmit.config.basicConfig import BasicConfig -class AutosubmitConfig: +class AutosubmitConfig(object): """ Class to handle experiment configuration coming from file or database @@ -48,19 +48,28 @@ class AutosubmitConfig: self.parser_factory = parser_factory + self._conf_parser = None self._conf_parser_file = os.path.join(self.basic_config.LOCAL_ROOT_DIR, expid, "conf", "autosubmit_" + expid + ".conf") + self._exp_parser = None self._exp_parser_file = os.path.join(self.basic_config.LOCAL_ROOT_DIR, expid, "conf", "expdef_" + expid + ".conf") + self._platforms_parser = None self._platforms_parser_file = os.path.join(self.basic_config.LOCAL_ROOT_DIR, expid, "conf", "platforms_" + expid + ".conf") + self._jobs_parser = None self._jobs_parser_file = os.path.join(self.basic_config.LOCAL_ROOT_DIR, expid, "conf", "jobs_" + expid + ".conf") + self._proj_parser = None self._proj_parser_file = os.path.join(self.basic_config.LOCAL_ROOT_DIR, expid, "conf", "proj_" + expid + ".conf") self.check_proj_file() + @property + def jobs_parser(self): + return self._jobs_parser + @property def experiment_file(self): """ @@ -133,7 +142,7 @@ class AutosubmitConfig: :return: wallclock time :rtype: str """ - return AutosubmitConfig.get_option(self.jobs_parser, section, 'WALLCLOCK', '') + return self._jobs_parser.get_option(section, 'WALLCLOCK', '') def get_processors(self, section): """ @@ -143,7 +152,7 @@ class AutosubmitConfig: :return: wallclock time :rtype: str """ - return int(AutosubmitConfig.get_option(self.jobs_parser, section, 'PROCESSORS', 1)) + return str(self._jobs_parser.get_option(section, 'PROCESSORS', 1)) def get_threads(self, section): """ @@ -153,7 +162,7 @@ class AutosubmitConfig: :return: threads needed :rtype: str """ - return int(AutosubmitConfig.get_option(self.jobs_parser, section, 'THREADS', 1)) + return int(self._jobs_parser.get_option(section, 'THREADS', 1)) def get_tasks(self, section): """ @@ -163,7 +172,7 @@ class AutosubmitConfig: :return: tasks (processes) per host :rtype: int """ - return int(AutosubmitConfig.get_option(self.jobs_parser, section, 'TASKS', 0)) + return int(self._jobs_parser.get_option(section, 'TASKS', 0)) def get_scratch_free_space(self, section): """ @@ -173,7 +182,7 @@ class AutosubmitConfig: :return: percentage of scratch free space needed :rtype: int """ - return int(AutosubmitConfig.get_option(self.jobs_parser, section, 'SCRATCH_FREE_SPACE', 0)) + return int(self._jobs_parser.get_option(section, 'SCRATCH_FREE_SPACE', 0)) def get_memory(self, section): """ @@ -183,7 +192,17 @@ class AutosubmitConfig: :return: memory needed :rtype: str """ - return int(AutosubmitConfig.get_option(self.jobs_parser, section, 'MEMORY', 0)) + return str(self._jobs_parser.get_option(section, 'MEMORY', '')) + + def get_memory_per_task(self, section): + """ + Gets memory per task needed for the given job type + :param section: job type + :type section: str + :return: memory per task needed + :rtype: str + """ + return str(self._jobs_parser.get_option(section, 'MEMORY_PER_TASK', '')) def check_conf_files(self): """ @@ -215,12 +234,12 @@ class AutosubmitConfig: result = True self._conf_parser.read(self._conf_parser_file) - result = result and AutosubmitConfig.check_exists(self._conf_parser, 'config', 'AUTOSUBMIT_VERSION') - result = result and AutosubmitConfig.check_is_int(self._conf_parser, 'config', 'MAXWAITINGJOBS', True) - result = result and AutosubmitConfig.check_is_int(self._conf_parser, 'config', 'TOTALJOBS', True) - result = result and AutosubmitConfig.check_is_int(self._conf_parser, 'config', 'SAFETYSLEEPTIME', True) - result = result and AutosubmitConfig.check_is_int(self._conf_parser, 'config', 'RETRIALS', True) - result = result and AutosubmitConfig.check_is_boolean(self._conf_parser, 'mail', 'NOTIFICATIONS', False) + result = result and self._conf_parser.check_exists('config', 'AUTOSUBMIT_VERSION') + result = result and self._conf_parser.check_is_int('config', 'MAXWAITINGJOBS', True) + result = result and self._conf_parser.check_is_int('config', 'TOTALJOBS', True) + result = result and self._conf_parser.check_is_int('config', 'SAFETYSLEEPTIME', True) + result = result and self._conf_parser.check_is_int('config', 'RETRIALS', True) + result = result and self._conf_parser.check_is_boolean('mail', 'NOTIFICATIONS', False) result = result and self.is_valid_communications_library() result = result and self.is_valid_storage_type() @@ -251,23 +270,20 @@ class AutosubmitConfig: Log.error('There are repeated platforms names') for section in self._platforms_parser.sections(): - result = result and AutosubmitConfig.check_exists(self._platforms_parser, section, 'TYPE') - platform_type = AutosubmitConfig.get_option(self._platforms_parser, section, 'TYPE', '').lower() + result = result and self._platforms_parser.check_exists(section, 'TYPE') + platform_type = self._platforms_parser.get_option(section, 'TYPE', '').lower() if platform_type != 'ps': - result = result and AutosubmitConfig.check_exists(self._platforms_parser, section, 'PROJECT') - result = result and AutosubmitConfig.check_exists(self._platforms_parser, section, 'USER') - - # if platform_type in ['pbs', 'ecaccess']: - # result = result and AutosubmitConfig.check_exists(self._platforms_parser, section, 'VERSION') - - result = result and AutosubmitConfig.check_exists(self._platforms_parser, section, 'HOST') - result = result and AutosubmitConfig.check_exists(self._platforms_parser, section, 'SCRATCH_DIR') - result = result and AutosubmitConfig.check_is_boolean(self._platforms_parser, section, - 'ADD_PROJECT_TO_HOST', False) - result = result and AutosubmitConfig.check_is_boolean(self._platforms_parser, section, 'TEST_SUITE', False) - result = result and AutosubmitConfig.check_is_int(self._platforms_parser, section, 'MAX_WAITING_JOBS', - False) - result = result and AutosubmitConfig.check_is_int(self._platforms_parser, section, 'TOTAL_JOBS', False) + result = result and self._platforms_parser.check_exists(section, 'PROJECT') + result = result and self._platforms_parser.check_exists(section, 'USER') + + result = result and self._platforms_parser.check_exists(section, 'HOST') + result = result and self._platforms_parser.check_exists(section, 'SCRATCH_DIR') + result = result and self._platforms_parser.check_is_boolean(section, + 'ADD_PROJECT_TO_HOST', False) + result = result and self._platforms_parser.check_is_boolean(section, 'TEST_SUITE', False) + result = result and self._platforms_parser.check_is_int(section, 'MAX_WAITING_JOBS', + False) + result = result and self._platforms_parser.check_is_int(section, 'TOTAL_JOBS', False) if not result: Log.critical("{0} is not a valid config file".format(os.path.basename(self._platforms_parser_file))) @@ -283,7 +299,7 @@ class AutosubmitConfig: :rtype: bool """ result = True - parser = self.jobs_parser + parser = self._jobs_parser sections = parser.sections() platforms = self._platforms_parser.sections() platforms.append('LOCAL') @@ -294,14 +310,14 @@ class AutosubmitConfig: Log.error('There are repeated job names') for section in sections: - result = result and AutosubmitConfig.check_exists(parser, section, 'FILE') - result = result and AutosubmitConfig.check_is_boolean(parser, section, 'RERUN_ONLY', False) + result = result and parser.check_exists(section, 'FILE') + result = result and parser.check_is_boolean(section, 'RERUN_ONLY', False) if parser.has_option(section, 'PLATFORM'): - result = result and AutosubmitConfig.check_is_choice(parser, section, 'PLATFORM', False, platforms) + result = result and parser.check_is_choice(section, 'PLATFORM', False, platforms) if parser.has_option(section, 'DEPENDENCIES'): - for dependency in str(AutosubmitConfig.get_option(parser, section, 'DEPENDENCIES', '')).split(' '): + for dependency in str(parser.get_option(section, 'DEPENDENCIES', '')).split(' '): if '-' in dependency: dependency = dependency.split('-')[0] elif '+' in dependency: @@ -312,15 +328,15 @@ class AutosubmitConfig: dependency)) if parser.has_option(section, 'RERUN_DEPENDENCIES'): - for dependency in str(AutosubmitConfig.get_option(parser, section, 'RERUN_DEPENDENCIES', - '')).split(' '): + for dependency in str(parser.get_option(section, 'RERUN_DEPENDENCIES', + '')).split(' '): if '-' in dependency: dependency = dependency.split('-')[0] if dependency not in sections: Log.error( 'Job {0} depends on job {1} that is not defined. It will be ignored.'.format(section, dependency)) - result = result and AutosubmitConfig.check_is_choice(parser, section, 'RUNNING', False, + result = result and parser.check_is_choice(section, 'RUNNING', False, ['once', 'date', 'member', 'chunk']) if not result: @@ -340,35 +356,35 @@ class AutosubmitConfig: result = True parser = self._exp_parser - result = result and AutosubmitConfig.check_exists(parser, 'DEFAULT', 'EXPID') - result = result and AutosubmitConfig.check_exists(parser, 'DEFAULT', 'HPCARCH') + result = result and parser.check_exists('DEFAULT', 'EXPID') + result = result and parser.check_exists('DEFAULT', 'HPCARCH') - result = result and AutosubmitConfig.check_exists(parser, 'experiment', 'DATELIST') - result = result and AutosubmitConfig.check_exists(parser, 'experiment', 'MEMBERS') - result = result and AutosubmitConfig.check_is_choice(parser, 'experiment', 'CHUNKSIZEUNIT', True, - ['year', 'month', 'day', 'hour']) - result = result and AutosubmitConfig.check_is_int(parser, 'experiment', 'CHUNKSIZE', True) - result = result and AutosubmitConfig.check_is_int(parser, 'experiment', 'NUMCHUNKS', True) - result = result and AutosubmitConfig.check_is_choice(parser, 'experiment', 'CALENDAR', True, - ['standard', 'noleap']) + result = result and parser.check_exists('experiment', 'DATELIST') + result = result and parser.check_exists('experiment', 'MEMBERS') + result = result and parser.check_is_choice('experiment', 'CHUNKSIZEUNIT', True, + ['year', 'month', 'day', 'hour']) + result = result and parser.check_is_int('experiment', 'CHUNKSIZE', True) + result = result and parser.check_is_int('experiment', 'NUMCHUNKS', True) + result = result and parser.check_is_choice('experiment', 'CALENDAR', True, + ['standard', 'noleap']) - result = result and AutosubmitConfig.check_is_boolean(parser, 'rerun', 'RERUN', True) + result = result and parser.check_is_boolean('rerun', 'RERUN', True) - if AutosubmitConfig.check_is_choice(parser, 'project', 'PROJECT_TYPE', True, - ['none', 'git', 'svn', 'local']): - project_type = AutosubmitConfig.get_option(parser, 'project', 'PROJECT_TYPE', '') + if parser.check_is_choice('project', 'PROJECT_TYPE', True, + ['none', 'git', 'svn', 'local']): + project_type = parser.get_option('project', 'PROJECT_TYPE', '') if project_type == 'git': - result = result and AutosubmitConfig.check_exists(parser, 'git', 'PROJECT_ORIGIN') - result = result and AutosubmitConfig.check_exists(parser, 'git', 'PROJECT_BRANCH') + result = result and parser.check_exists('git', 'PROJECT_ORIGIN') + result = result and parser.check_exists('git', 'PROJECT_BRANCH') elif project_type == 'svn': - result = result and AutosubmitConfig.check_exists(parser, 'svn', 'PROJECT_URL') - result = result and AutosubmitConfig.check_exists(parser, 'svn', 'PROJECT_REVISION') + result = result and parser.check_exists('svn', 'PROJECT_URL') + result = result and parser.check_exists('svn', 'PROJECT_REVISION') elif project_type == 'local': - result = result and AutosubmitConfig.check_exists(parser, 'local', 'PROJECT_PATH') + result = result and parser.check_exists('local', 'PROJECT_PATH') if project_type != 'none': - result = result and AutosubmitConfig.check_exists(parser, 'project_files', 'FILE_PROJECT_CONF') + result = result and parser.check_exists('project_files', 'FILE_PROJECT_CONF') else: result = False @@ -401,7 +417,7 @@ class AutosubmitConfig: """ self._conf_parser = AutosubmitConfig.get_parser(self.parser_factory, self._conf_parser_file) self._platforms_parser = AutosubmitConfig.get_parser(self.parser_factory, self._platforms_parser_file) - self.jobs_parser = AutosubmitConfig.get_parser(self.parser_factory, self._jobs_parser_file) + self._jobs_parser = AutosubmitConfig.get_parser(self.parser_factory, self._jobs_parser_file) self._exp_parser = AutosubmitConfig.get_parser(self.parser_factory, self._exp_parser_file) if self._proj_parser_file == '': self._proj_parser = None @@ -494,7 +510,7 @@ class AutosubmitConfig: :return: path to project config file :rtype: str """ - return AutosubmitConfig.get_option(self._exp_parser, 'project_files', 'FILE_JOBS_CONF', '') + return self._exp_parser.get_option('project_files', 'FILE_JOBS_CONF', '') def get_git_project_origin(self): """ @@ -503,7 +519,7 @@ class AutosubmitConfig: :return: git origin :rtype: str """ - return AutosubmitConfig.get_option(self._exp_parser, 'git', 'PROJECT_ORIGIN', '') + return self._exp_parser.get_option('git', 'PROJECT_ORIGIN', '') def get_git_project_branch(self): """ @@ -512,7 +528,7 @@ class AutosubmitConfig: :return: git branch :rtype: str """ - return AutosubmitConfig.get_option(self._exp_parser, 'git', 'PROJECT_BRANCH', None) + return self._exp_parser.get_option('git', 'PROJECT_BRANCH', None) def get_git_project_commit(self): """ @@ -521,7 +537,7 @@ class AutosubmitConfig: :return: git commit :rtype: str """ - return AutosubmitConfig.get_option(self._exp_parser, 'git', 'PROJECT_COMMIT', None) + return self._exp_parser.get_option('git', 'PROJECT_COMMIT', None) def get_project_destination(self): """ @@ -643,6 +659,19 @@ class AutosubmitConfig: """ return int(self._exp_parser.get('experiment', 'NUMCHUNKS')) + def get_chunk_ini(self, default=1): + """ + Returns the first chunk from where the experiment will start + + :param default: + :return: initial chunk + :rtype: int + """ + chunk_ini = self._exp_parser.get_option('experiment', 'CHUNKINI', default) + if chunk_ini == '': + return default + return int(chunk_ini) + def get_chunk_size_unit(self): """ Unit for the chunk length @@ -745,9 +774,26 @@ class AutosubmitConfig: """ return int(self._conf_parser.get('config', 'TOTALJOBS')) + def get_max_wallclock(self): + """ + Returns max wallclock from autosubmit's config file + + :rtype: str + """ + return self._conf_parser.get_option('config', 'MAX_WALLCLOCK', '') + + def get_max_processors(self): + """ + Returns max processors from autosubmit's config file + + :rtype: str + """ + config_value = self._conf_parser.get_option('config', 'MAX_PROCESSORS', None) + return int(config_value) if config_value is not None else config_value + def get_max_waiting_jobs(self): """ - Returns max number of waitng jobs from autosubmit's config file + Returns max number of waiting jobs from autosubmit's config file :return: main platforms :rtype: int @@ -761,7 +807,7 @@ class AutosubmitConfig: :return: default type such as bash, python, r.. :rtype: str """ - return self.get_option(self._exp_parser, 'project_files', 'JOB_SCRIPTS_TYPE', 'bash') + return self._exp_parser.get_option('project_files', 'JOB_SCRIPTS_TYPE', 'bash') def get_safetysleeptime(self): """ @@ -798,9 +844,27 @@ class AutosubmitConfig: Returns if the user has enabled the notifications from autosubmit's config file :return: if notifications + :rtype: string + """ + return self._conf_parser.get_option('mail', 'NOTIFICATIONS', 'false').lower() + + def get_remote_dependencies(self): + """ + Returns if the user has enabled the remote dependencies from autosubmit's config file + + :return: if remote dependencies :rtype: bool """ - return self.get_option(self._conf_parser, 'mail', 'NOTIFICATIONS', 'false').lower() + return self._conf_parser.get_option('wrapper', 'DEPENDENCIES', 'false').lower() == 'true' + + def get_wrapper_type(self): + """ + Returns what kind of wrapper (VERTICAL, HORIZONTAL, NONE) the user has configured in the autosubmit's config + + :return: wrapper type (or none) + :rtype: string + """ + return self._conf_parser.get_option('wrapper', 'TYPE', 'None').lower() def get_copy_remote_logs(self): """ @@ -809,7 +873,7 @@ class AutosubmitConfig: :return: if logs local copy :rtype: bool """ - return self.get_option(self._conf_parser, 'storage', 'COPY_REMOTE_LOGS', 'true').lower() + return self._conf_parser.get_option('storage', 'COPY_REMOTE_LOGS', 'true').lower() def get_mails_to(self): """ @@ -818,7 +882,7 @@ class AutosubmitConfig: :return: mail address :rtype: [str] """ - return [str(x) for x in self.get_option(self._conf_parser, 'mail', 'TO', '').split(' ')] + return [str(x) for x in self._conf_parser.get_option('mail', 'TO', '').split(' ')] def get_communications_library(self): """ @@ -827,7 +891,7 @@ class AutosubmitConfig: :return: communications library :rtype: str """ - return self.get_option(self._conf_parser, 'communications', 'API', 'paramiko').lower() + return self._conf_parser.get_option('communications', 'API', 'paramiko').lower() def get_storage_type(self): """ @@ -836,7 +900,7 @@ class AutosubmitConfig: :return: communications library :rtype: str """ - return self.get_option(self._conf_parser, 'storage', 'TYPE', 'pkl').lower() + return self._conf_parser.get_option('storage', 'TYPE', 'pkl').lower() @staticmethod def is_valid_mail_address(mail_address): @@ -854,7 +918,7 @@ class AutosubmitConfig: return storage_type in ['pkl', 'db'] def is_valid_git_repository(self): - origin_exists = self.check_exists(self._exp_parser, 'git', 'PROJECT_ORIGIN') + origin_exists = self._exp_parser.check_exists('git', 'PROJECT_ORIGIN') branch = self.get_git_project_branch() commit = self.get_git_project_commit() return origin_exists and (branch is not None or commit is not None) @@ -874,188 +938,3 @@ class AutosubmitConfig: parser.optionxform = str parser.read(file_path) return parser - - @staticmethod - def get_option(parser, section, option, default): - """ - Gets an option from given parser - - :param parser: parser to use - :type parser: SafeConfigParser - :param section: section that contains the option - :type section: str - :param option: option to get - :type option: str - :param default: value to be returned if option is not present - :type default: object - :return: option value - :rtype: str - """ - if parser.has_option(section, option): - return parser.get(section, option) - else: - return default - - @staticmethod - def get_bool_option(parser, section, option, default): - """ - Gets a boolean option from given parser - - :param parser: parser to use - :type parser: SafeConfigParser - :param section: section that contains the option - :type section: str - :param option: option to get - :type option: str - :param default: value to be returned if option is not present - :type default: bool - :return: option value - :rtype: bool - """ - if parser.has_option(section, option): - return parser.get(section, option).lower().strip() == 'true' - else: - return default - - @staticmethod - def check_exists(parser, section, option): - """ - Checks if an option exists in given parser - - :param parser: parser to use - :type parser: SafeConfigParser - :param section: section that contains the option - :type section: str - :param option: option to check - :type option: str - :return: True if option exists, False otherwise - :rtype: bool - """ - if parser.has_option(section, option): - return True - else: - Log.error('Option {0} in section {1} not found'.format(option, section)) - return False - - @staticmethod - def check_is_boolean(parser, section, option, must_exist): - """ - Checks if an option is a boolean value in given parser - - :param parser: parser to use - :type parser: SafeConfigParser - :param section: section that contains the option - :type section: str - :param option: option to check - :type option: str - :param must_exist: if True, option must exist - :type must_exist: bool - :return: True if option value is boolean, False otherwise - :rtype: bool - """ - if must_exist and not AutosubmitConfig.check_exists(parser, section, option): - Log.error('Option {0} in section {1} must exist'.format(option, section)) - return False - if AutosubmitConfig.get_option(parser, section, option, 'false').lower() not in ['false', 'true']: - Log.error('Option {0} in section {1} must be true or false'.format(option, section)) - return False - return True - - @staticmethod - def check_is_choice(parser, section, option, must_exist, choices): - """ - Checks if an option is a valid choice in given parser - - :param parser: parser to use - :type parser: SafeConfigParser - :param section: section that contains the option - :type section: str - :param option: option to check - :type option: str - :param must_exist: if True, option must exist - :type must_exist: bool - :param choices: valid choices - :type choices: list - :return: True if option value is a valid choice, False otherwise - :rtype: bool - """ - if must_exist and not AutosubmitConfig.check_exists(parser, section, option): - return False - value = AutosubmitConfig.get_option(parser, section, option, choices[0]) - if value not in choices: - Log.error('Value {2} in option {0} in section {1} is not a valid choice'.format(option, section, value)) - return False - return True - - @staticmethod - def check_is_int(parser, section, option, must_exist): - """ - Checks if an option is an integer value in given parser - - :param parser: parser to use - :type parser: SafeConfigParser - :param section: section that contains the option - :type section: str - :param option: option to check - :type option: str - :param must_exist: if True, option must exist - :type must_exist: bool - :return: True if option value is integer, False otherwise - :rtype: bool - """ - if must_exist and not AutosubmitConfig.check_exists(parser, section, option): - return False - value = AutosubmitConfig.get_option(parser, section, option, '1') - try: - int(value) - except ValueError: - Log.error('Option {0} in section {1} is not valid an integer'.format(option, section)) - return False - return True - - @staticmethod - def check_regex(parser, section, option, must_exist, regex): - """ - Checks if an option complies with a regular expression in given parser - - :param parser: parser to use - :type parser: SafeConfigParser - :param section: section that contains the option - :type section: str - :param option: option to check - :type option: str - :param must_exist: if True, option must exist - :type must_exist: bool - :param regex: regular expression to check - :type regex: str - :return: True if option complies with regex, False otherwise - :rtype: bool - """ - if must_exist and not AutosubmitConfig.check_exists(parser, section, option): - return False - prog = re.compile(regex) - value = AutosubmitConfig.get_option(parser, section, option, '1') - if not prog.match(value): - Log.error('Option {0} in section {1} is not valid: {2}'.format(option, section, value)) - return False - return True - - @staticmethod - def check_json(key, value): - """ - Checks if value is a valid json - - :param key: key to check - :type key: str - :param value: value - :type value: str - :return: True if value is a valid json, False otherwise - :rtype: bool - """ - # noinspection PyBroadException - try: - nestedExpr('[', ']').parseString(value).asList() - return True - except: - Log.error("Invalid value {0}: {1}", key, value) - return False diff --git a/autosubmit/config/files/expdef.conf b/autosubmit/config/files/expdef.conf index b8afe65843bc51d6da915ec96709978635e2b274..cb66124914e6e25348d0d0cabf753228f57040af 100644 --- a/autosubmit/config/files/expdef.conf +++ b/autosubmit/config/files/expdef.conf @@ -26,16 +26,11 @@ CHUNKSIZEUNIT = month CHUNKSIZE = # Total number of chunks in experiment. NUMERIC = 30, 15, 10 NUMCHUNKS = +# Initial chunk of the experiment. Optional. DEFAULT = 1 +CHUNKINI = # Calendar used. LIST: standard, noleap CALENDAR = standard -[rerun] -# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE -RERUN = FALSE -# If RERUN = TRUE then supply the list of chunks to rerun -# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] -CHUNKLIST = - [project] # Select project type. STRING = git, svn, local, none # If PROJECT_TYPE is set to none, Autosubmit self-contained dummy templates will be used @@ -71,4 +66,11 @@ FILE_PROJECT_CONF = # Where is JOBS CONFIGURATION file location relative to project root path FILE_JOBS_CONF = # Default job scripts type in the project. type = STRING, default = bash, supported = 'bash', 'python' or 'r' -JOB_SCRIPTS_TYPE = \ No newline at end of file +JOB_SCRIPTS_TYPE = + +[rerun] +# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE +RERUN = FALSE +# If RERUN = TRUE then supply the list of chunks to rerun +# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] +CHUNKLIST = \ No newline at end of file diff --git a/autosubmit/config/files/jobs.conf b/autosubmit/config/files/jobs.conf index 6aa9c93d0afb00479762aa10ff7f44b43b4a3d4e..8caccb5cbfc5044c64e351377ba51f4dc5517869 100644 --- a/autosubmit/config/files/jobs.conf +++ b/autosubmit/config/files/jobs.conf @@ -35,8 +35,10 @@ # THREADS = 1 ## Tasks number (number of processes per node) to be submitted to the HPC. If not specified, defaults to empty. # TASKS = 16 -## Memory requirements for the job in MB. If not specified, defaults to empty. +## Memory requirements for the job in MB. Optional. If not specified, then not defined for the scheduler. # MEMORY = 4096 +## Memory per task requirements for the job in MB. Optional. If not specified, then not defined for the scheduler. +# MEMORY_PER_TASK = 1024 ## Scratch free space requirements for the job in percentage (%). If not specified, it won't be defined on the template. # SCRATCH_FREE_SPACE = 10 ## Number of retrials if a job fails. If not specified, defaults to the value given on experiment's autosubmit.conf diff --git a/autosubmit/config/files/platforms.conf b/autosubmit/config/files/platforms.conf index 84531137e6d5c6781d4293e2bccbc964449ede12..0fa5eee7f3bd6719eade3733eb53b9c0d756786e 100644 --- a/autosubmit/config/files/platforms.conf +++ b/autosubmit/config/files/platforms.conf @@ -23,7 +23,7 @@ ## If given, Autosubmit will add jobs to the given queue. Required for some platforms. # QUEUE = ## Optional. If given, Autosubmit will submit the serial jobs with the exclusivity directive. -# QUEUE = +# EXCLUSIVITY = ## Optional. If specified, autosubmit will run jobs with only one processor in the specified platform. # SERIAL_PLATFORM = SERIAL_PLATFORM_NAME ## Optional. If specified, autosubmit will run jobs with only one processor in the specified queue. @@ -39,4 +39,10 @@ # MAX_WAITING_JOBS = ## Optional. Integer. Default maximum number of jobs to be running at the same time at any platform ## Default = 6 -# TOTAL_JOBS = \ No newline at end of file +# TOTAL_JOBS = +## Max wallclock per job submitted to the HPC queue in format HH:MM. If not specified, defaults to empty. +## Optional. Required for wrappers. +# MAX_WALLCLOCK = 72:00 +## Max processors number per job submitted to the HPC. If not specified, defaults to empty. +## Optional. Required for wrappers. +# MAX_PROCESSORS = 1 \ No newline at end of file diff --git a/autosubmit/config/log.py b/autosubmit/config/log.py deleted file mode 100644 index 9e2527e97717e05f6c887335ce1ba8b1f7282357..0000000000000000000000000000000000000000 --- a/autosubmit/config/log.py +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2015 Earth Sciences Department, BSC-CNS - -# This file is part of Autosubmit. - -# Autosubmit is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# Autosubmit is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with Autosubmit. If not, see . - -import logging -import os -import sys -from datetime import datetime - - -class LogFormatter: - """ - Class to format log output. - - :param to_file: If True, creates a LogFormatter for files; if False, for console - :type to_file: bool - """ - RESULT = '\033[32m' - WARNING = '\033[33m' - ERROR = '\033[31m' - CRITICAL = '\033[1m \033[31m' - DEFAULT = '\033[0m\033[39m' - - def __init__(self, to_file=False): - """ - Initializer for LogFormatter - - - """ - self._file = to_file - if self._file: - self._formatter = logging.Formatter('%(asctime)s %(message)s') - else: - self._formatter = logging.Formatter('%(message)s') - - def format(self, record): - """ - Format log output, adding labels if needed for log level. If logging to console, also manages font color. - If logging to file adds timestamp - - :param record: log record to format - :type record: LogRecord - :return: formatted record - :rtype: str - """ - header = '' - if record.levelno == Log.RESULT: - if not self._file: - header = LogFormatter.RESULT - elif record.levelno == Log.USER_WARNING: - if not self._file: - header = LogFormatter.WARNING - elif record.levelno == Log.WARNING: - if not self._file: - header = LogFormatter.WARNING - header += "[WARNING] " - elif record.levelno == Log.ERROR: - if not self._file: - header = LogFormatter.ERROR - header += "[ERROR] " - elif record.levelno == Log.CRITICAL: - if not self._file: - header = LogFormatter.ERROR - header += "[CRITICAL] " - - msg = self._formatter.format(record) - if header != '' and not self._file: - msg += LogFormatter.DEFAULT - return header + msg - - -class Log: - """ - Static class to manage the log for the application. Messages will be sent to console and to file if it is - configured. Levels can be set for each output independently. These levels are (from lower to higher priority): - - - EVERYTHING : this level is just defined to show every output - - DEBUG - - INFO - - RESULT - - USER_WARNING - - WARNING - - ERROR - - CRITICAL - - NO_LOG : this level is just defined to remove every output - - """ - EVERYTHING = 0 - DEBUG = logging.DEBUG - INFO = logging.INFO - RESULT = 25 - USER_WARNING = 29 - WARNING = logging.WARNING - ERROR = logging.ERROR - CRITICAL = logging.CRITICAL - NO_LOG = CRITICAL + 1 - - logging.basicConfig() - - log = logging.Logger('Autosubmit', EVERYTHING) - - console_handler = logging.StreamHandler(sys.stdout) - console_handler.setLevel(INFO) - console_handler.setFormatter(LogFormatter(False)) - log.addHandler(console_handler) - - file_handler = None - file_level = INFO - - @staticmethod - def set_file(file_path): - """ - Configure the file to store the log. If another file was specified earlier, new messages will only go to the - new file. - - :param file_path: file to store the log - :type file_path: str - """ - (directory, filename) = os.path.split(file_path) - if not os.path.exists(directory): - os.mkdir(directory) - files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)) and - f.endswith(filename)] - if len(files) >= 5: - files.sort() - os.remove(os.path.join(directory, files[0])) - file_path = os.path.join(directory, '{0:%Y%m%d_%H%M%S}_'.format(datetime.now()) + filename) - if Log.file_handler is not None: - Log.log.removeHandler(Log.file_handler) - Log.file_handler = logging.FileHandler(file_path, 'w') - Log.file_handler.setLevel(Log.file_level) - Log.file_handler.setFormatter(LogFormatter(True)) - Log.log.addHandler(Log.file_handler) - os.chmod(file_path, 0o775) - - @staticmethod - def set_console_level(level): - """ - Sets log level for logging to console. Every output of level equal or higher to parameter level will be - printed on console - - :param level: new level for console - :return: None - """ - if type(level) is str: - level = getattr(Log, level) - Log.console_handler.level = level - - @staticmethod - def set_file_level(level): - """ - Sets log level for logging to file. Every output of level equal or higher to parameter level will be - added to log file - - :param level: new level for log file - """ - if type(level) is str: - level = getattr(Log, level) - Log.file_level = level - if Log.file_handler is not None: - Log.file_handler.level = level - - @staticmethod - def debug(msg, *args): - """ - Sends debug information to the log - - :param msg: message to show - :param args: arguments for message formating (it will be done using format() method on str) - """ - Log.log.debug(msg.format(*args)) - - @staticmethod - def info(msg, *args): - """ - Sends information to the log - - :param msg: message to show - :param args: arguments for message formatting (it will be done using format() method on str) - """ - Log.log.info(msg.format(*args)) - - @staticmethod - def result(msg, *args): - """ - Sends results information to the log. It will be shown in green in the console. - - :param msg: message to show - :param args: arguments for message formating (it will be done using format() method on str) - """ - Log.log.log(Log.RESULT, msg.format(*args)) - - @staticmethod - def user_warning(msg, *args): - """ - Sends warnings for the user to the log. It will be shown in yellow in the console. - - :param msg: message to show - :param args: arguments for message formating (it will be done using format() method on str) - """ - Log.log.log(Log.USER_WARNING, msg.format(*args)) - - @staticmethod - def warning(msg, *args): - """ - Sends program warnings to the log. It will be shown in yellow in the console. - - :param msg: message to show - :param args: arguments for message formatting (it will be done using format() method on str) - """ - Log.log.warning(msg.format(*args)) - - @staticmethod - def error(msg, *args): - """ - Sends errors to the log. It will be shown in red in the console. - - :param msg: message to show - :param args: arguments for message formatting (it will be done using format() method on str) - """ - Log.log.error(msg.format(*args)) - - @staticmethod - def critical(msg, *args): - """ - Sends critical errors to the log. It will be shown in red in the console. - - :param msg: message to show - :param args: arguments for message formatting (it will be done using format() method on str) - """ - Log.log.critical(msg.format(*args)) diff --git a/autosubmit/database/db_common.py b/autosubmit/database/db_common.py index d58df1888595290e4dc37dcd43e333e8b11b640f..5d717e669ba6f7beee073c441f29dc421afdb2af 100644 --- a/autosubmit/database/db_common.py +++ b/autosubmit/database/db_common.py @@ -23,7 +23,7 @@ Module containing functions to manage autosubmit's database. import os import sqlite3 -from autosubmit.config.log import Log +from bscearth.utils.log import Log from autosubmit.config.basicConfig import BasicConfig CURRENT_DATABASE_VERSION = 1 @@ -284,7 +284,7 @@ def delete_experiment(experiment_id): """ if not check_db(): return False - if check_experiment_exists(experiment_id, False): + if not check_experiment_exists(experiment_id, False): return True try: (conn, cursor) = open_conn() diff --git a/autosubmit/date/chunk_date_lib.py b/autosubmit/date/chunk_date_lib.py deleted file mode 100755 index deae15aad78ac0befdce5c434d8554f5883f0efd..0000000000000000000000000000000000000000 --- a/autosubmit/date/chunk_date_lib.py +++ /dev/null @@ -1,330 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2015 Earth Sciences Department, BSC-CNS - -# This file is part of Autosubmit. - -# Autosubmit is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# Autosubmit is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with Autosubmit. If not, see . -""" -In this python script there are tools to manipulate the dates and make mathematical -operations between them. -""" - -import datetime -import calendar -from dateutil.relativedelta import * - -from autosubmit.config.log import Log - - -def add_time(date, total_size, chunk_unit, cal): - """ - Adds given time to a date - - :param date: base date - :type date: datetime.datetime - :param total_size: time to add - :type total_size: int - :param chunk_unit: unit of time to add - :type chunk_unit: str - :param cal: calendar to use - :type cal: str - :return: result of adding time to base date - :rtype: datetime.datetime - """ - if chunk_unit == 'year': - return add_years(date, total_size) - elif chunk_unit == 'month': - return add_months(date, total_size, cal) - elif chunk_unit == 'day': - return add_days(date, total_size, cal) - elif chunk_unit == 'hour': - return add_hours(date, total_size, cal) - else: - Log.critical('Chunk unit not valid: {0}'.format(chunk_unit)) - - -def add_years(date, number_of_years): - """ - Adds years to a date - - :param date: base date - :type date: datetime.datetime - :param number_of_years: number of years to add - :type number_of_years: int - :return: base date plus added years - :rtype: date - """ - return date + relativedelta(years=number_of_years) - - -def add_months(date, number_of_months, cal): - """ - Adds months to a date - - :param date: base date - :type date: datetime.datetime - :param number_of_months: number of months to add - :type number_of_months: int - :param cal: calendar to use - :type cal: str - :return: base date plus added months - :rtype: date - """ - result = date + relativedelta(months=number_of_months) - if cal == 'noleap': - if result.month == 2 and result.day == 29: - result = result - relativedelta(days=1) - return result - - -def add_days(date, number_of_days, cal): - """ - Adds days to a date - - :param date: base date - :type date: datetime.datetime - :param number_of_days: number of days to add - :type number_of_days: int - :param cal: calendar to use - :type cal: str - :return: base date plus added days - :rtype: date - """ - result = date + relativedelta(days=number_of_days) - if cal == 'noleap': - year = date.year - if date.month > 2: - year += 1 - - while year <= result.year: - if calendar.isleap(year): - if result.year == year and result < datetime.datetime(year, 2, 29): - year += 1 - continue - result += relativedelta(days=1) - year += 1 - if result.month == 2 and result.day == 29: - result += relativedelta(days=1) - return result - - -def sub_days(start_date, number_of_days, cal): - """ - Substract days to a date - - :param start_date: base date - :type start_date: datetime.datetime - :param number_of_days: number of days to subtract - :type number_of_days: int - :param cal: calendar to use - :type cal: str - :return: base date minus subtracted days - :rtype: datetime.datetime - """ - result = start_date - relativedelta(days=number_of_days) - if cal == 'noleap': - # checks if crossing the day 29th - if start_date > result: - # case subtraction - while datetime.datetime(start_date.year, start_date.month, start_date.day) >= \ - datetime.datetime(result.year, result.month, result.day): - if calendar.isleap(start_date.year): - if start_date.month == 2 and start_date.day == 29: - result -= relativedelta(days=1) - start_date -= relativedelta(days=1) - else: - start_date -= relativedelta(months=1) - elif start_date < result: - # case addition - while datetime.datetime(start_date.year, start_date.month, start_date.day) <= \ - datetime.datetime(result.year, result.month, result.day): - if calendar.isleap(start_date.year): - if start_date.month == 2 and start_date.day == 29: - result += relativedelta(days=1) - start_date += relativedelta(days=1) - else: - start_date += relativedelta(months=1) - - return result - - -def add_hours(date, number_of_hours, cal): - """ - Adds hours to a date - - :param date: base date - :type date: datetime.datetime - :param number_of_hours: number of hours to add - :type number_of_hours: int - :param cal: calendar to use - :type cal: str - :return: base date plus added hours - :rtype: datetime - """ - result = date + relativedelta(hours=number_of_hours) - if cal == 'noleap': - year = date.year - if date.month > 2: - year += 1 - - while year <= result.year: - if calendar.isleap(year): - if result.year == year and result < datetime.datetime(year, 2, 29): - year += 1 - continue - result += relativedelta(days=1) - year += 1 - if result.month == 2 and result.day == 29: - result += relativedelta(days=1) - return result - - -def subs_dates(start_date, end_date, cal): - """ - Gets days between start_date and end_date - - :param start_date: interval's start date - :type start_date: datetime.datetime - :param end_date: interval's end date - :type end_date: datetime.datetime - :param cal: calendar to use - :type cal: str - :return: interval length in days - :rtype: int - """ - result = end_date - start_date - if cal == 'noleap': - year = start_date.year - if start_date.month > 2: - year += 1 - - while year <= end_date.year: - if calendar.isleap(year): - if end_date.year == year and end_date < datetime.datetime(year, 2, 29): - year += 1 - continue - result -= datetime.timedelta(days=1) - year += 1 - return result.days - - -def chunk_start_date(date, chunk, chunk_length, chunk_unit, cal): - """ - Gets chunk's interval start date - - :param date: start date for member - :type date: datetime.datetime - :param chunk: number of chunk - :type chunk: int - :param chunk_length: length of chunks - :type chunk_length: int - :param chunk_unit: chunk length unit - :type chunk_unit: str - :param cal: calendar to use - :type cal: str - :return: chunk's start date - :rtype: datetime.datetime - """ - chunk_1 = chunk - 1 - total_months = chunk_1 * chunk_length - result = add_time(date, total_months, chunk_unit, cal) - return result - - -def chunk_end_date(start_date, chunk_length, chunk_unit, cal): - """ - Gets chunk interval end date - - :param start_date: chunk's start date - :type start_date: datetime.datetime - :param chunk_length: length of the chunks - :type chunk_length: int - :param chunk_unit: chunk length unit - :type chunk_unit: str - :param cal: calendar to use - :type cal: str - :return: chunk's end date - :rtype: datetime.datetime - """ - return add_time(start_date, chunk_length, chunk_unit, cal) - - -def previous_day(date, cal): - """ - Gets previous day - - :param date: base date - :type date: datetime.datetime - :param cal: calendar to use - :type cal: str - :return: base date minus one day - :rtype: datetime.datetime - """ - return sub_days(date, 1, cal) - - -def parse_date(string_date): - """ - Parses a string into a datetime object - - :param string_date: string to parse - :type string_date: str - :rtype: datetime.datetime - """ - if string_date is None or string_date == '': - return None - length = len(string_date) - # Date and time can be given as year, year+month, year+month+day, year+month+day+hour or year+month+day+hour+minute - if length == 4: - return datetime.datetime.strptime(string_date, "%Y") - if length == 6: - return datetime.datetime.strptime(string_date, "%Y%m") - if length == 8: - return datetime.datetime.strptime(string_date, "%Y%m%d") - elif length == 10: - return datetime.datetime.strptime(string_date, "%Y%m%d%H") - elif length == 12: - return datetime.datetime.strptime(string_date, "%Y%m%d%H%M") - elif length == 14: - return datetime.datetime.strptime(string_date, "%Y%m%d%H%M%S") - elif length == 19: - return datetime.datetime.strptime(string_date, "%Y-%m-%d %H:%M:%S") - else: - raise ValueError("String '{0}' can not be converted to date".format(string_date)) - - -def date2str(date, date_format=''): - """ - Converts a datetime object to a str - - :param date_format: specifies format for date time convcersion. It can be H to show hours, - M to show hour and minute. Other values will return only the date. - :type date_format: str - :param date: date to convert - :type date: datetime.datetime - :rtype: str - """ - # Can not use strftime because it not works with dates prior to 1-1-1900 - if date is None: - return '' - if date_format == 'H': - return "{0:04}{1:02}{2:02}{3:02}".format(date.year, date.month, date.day, date.hour) - elif date_format == 'M': - return "{0:04}{1:02}{2:02}{3:02}{4:02}".format(date.year, date.month, date.day, date.hour, date.minute) - elif date_format == 'S': - return "{0:04}{1:02}{2:02}{3:02}{4:02}{5:02}".format(date.year, date.month, date.day, date.hour, date.minute, - date.second) - else: - return "{0:04}{1:02}{2:02}".format(date.year, date.month, date.day) diff --git a/autosubmit/experiment/experiment_common.py b/autosubmit/experiment/experiment_common.py index 38bc38a622cb56e2e1f2fd58c45875bbdac6e3d3..43e78191fd5862863ba8f81794ba471dcd257c2e 100644 --- a/autosubmit/experiment/experiment_common.py +++ b/autosubmit/experiment/experiment_common.py @@ -22,7 +22,7 @@ Module containing functions to manage autosubmit's experiments. """ import string import autosubmit.database.db_common as db_common -from autosubmit.config.log import Log +from bscearth.utils.log import Log def new_experiment(description, version, test=False, operational=False): diff --git a/autosubmit/experiment/statistics.py b/autosubmit/experiment/statistics.py new file mode 100644 index 0000000000000000000000000000000000000000..72bd5a4ff1cf0be5bafacfcbead047bfaf69f0f6 --- /dev/null +++ b/autosubmit/experiment/statistics.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import datetime +from autosubmit.job.job import Job +from autosubmit.monitor.utils import FixedSizeList +from bscearth.utils.log import Log + + +def timedelta2hours(deltatime): + return deltatime.days * 24 + deltatime.seconds / 3600.0 + + +class ExperimentStats(object): + + def __init__(self, jobs_list, start, end): + self._jobs_list = jobs_list + self._start = start + self._end = end + # Max variables + self._max_timedelta = 0 + self._max_time = 0 + self._max_fail = 0 + # Totals variables + self._total_jobs_submitted = 0 + self._total_jobs_run = 0 + self._total_jobs_failed = 0 + self._total_jobs_completed = 0 + self._total_queueing_time = datetime.timedelta() + self._cpu_consumption = datetime.timedelta() + self._real_consumption = datetime.timedelta() + self._expected_cpu_consumption = 0 + self._expected_real_consumption = 0 + self._threshold = 0 + # Totals arrays + self._totals = [] + self._run = [datetime.timedelta()] * len(jobs_list) + self._queued = [datetime.timedelta()] * len(jobs_list) + self._failed_jobs = [0] * len(jobs_list) + self._fail_queued = [datetime.timedelta()] * len(jobs_list) + self._fail_run = [datetime.timedelta()] * len(jobs_list) + # Do calculations + self._calculate_stats() + self._calculate_maxs() + self._calculate_totals() + self._format_stats() + + @property + def totals(self): + return self._totals + + @property + def max_time(self): + return self._max_time + + @property + def max_fail(self): + return self._max_fail + + @property + def threshold(self): + return self._threshold + + @property + def run(self): + return FixedSizeList(self._run, 0.0) + + @property + def queued(self): + return FixedSizeList(self._queued, 0.0) + + @property + def failed_jobs(self): + return FixedSizeList(self._failed_jobs, 0.0) + + @property + def fail_queued(self): + return FixedSizeList(self._fail_queued, 0.0) + + @property + def fail_run(self): + return FixedSizeList(self._fail_run, 0.0) + + def _calculate_stats(self): + for i, job in enumerate(self._jobs_list): + processors = job.processors + last_retrials = job.get_last_retrials() + for retrial in last_retrials: + if Job.is_a_completed_retrial(retrial): + self._queued[i] += retrial[1] - retrial[0] + self._run[i] += retrial[2] - retrial[1] + self._cpu_consumption += self.run[i] * int(processors) + self._real_consumption += self.run[i] + self._total_jobs_completed += 1 + else: + if len(retrial) > 2: + self._fail_run[i] += retrial[2] - retrial[1] + if len(retrial) > 1: + self._fail_queued[i] += retrial[1] - retrial[0] + self._cpu_consumption += self.fail_run[i] * int(processors) + self._real_consumption += self.fail_run[i] + self._failed_jobs[i] += 1 + self._total_jobs_submitted += len(last_retrials) + self._total_jobs_run += len(last_retrials) + self._total_jobs_failed += self.failed_jobs[i] + self._threshold = max(self._threshold, job.total_wallclock) + self._expected_cpu_consumption += job.total_wallclock * int(job.total_processors) + self._expected_real_consumption += job.total_wallclock + self._total_queueing_time += self._queued[i] + + def _calculate_maxs(self): + max_run = max(max(self._run), max(self._fail_run)) + max_queued = max(max(self._queued), max(self._fail_queued)) + self._max_timedelta = max(max_run, max_queued, datetime.timedelta(hours=self._threshold)) + self._max_time = max(self._max_time, self._max_timedelta.days * 24 + self._max_timedelta.seconds / 3600.0) + self._max_fail = max(self._max_fail, max(self._failed_jobs)) + + def _calculate_totals(self): + percentage_consumption = timedelta2hours(self._cpu_consumption) / self._expected_cpu_consumption * 100 + self._totals = ['Period: ' + str(self._start) + " ~ " + str(self._end), + 'Submitted (#): ' + str(self._total_jobs_submitted), + 'Run (#): ' + str(self._total_jobs_run), + 'Failed (#): ' + str(self._total_jobs_failed), + 'Completed (#): ' + str(self._total_jobs_completed), + 'Queueing time (h): ' + str(round(timedelta2hours(self._total_queueing_time), 2)), + 'Expected consumption real (h): ' + str(round(self._expected_real_consumption, 2)), + 'Expected consumption CPU time (h): ' + str(round(self._expected_cpu_consumption, 2)), + 'Consumption real (h): ' + str(round(timedelta2hours(self._real_consumption), 2)), + 'Consumption CPU time (h): ' + str(round(timedelta2hours(self._cpu_consumption), 2)), + 'Consumption (%): ' + str(round(percentage_consumption, 2))] + Log.result('\n'.join(self._totals)) + + def _format_stats(self): + self._queued = map(lambda y: timedelta2hours(y), self._queued) + self._run = map(lambda y: timedelta2hours(y), self._run) + self._fail_queued = map(lambda y: timedelta2hours(y), self._fail_queued) + self._fail_run = map(lambda y: timedelta2hours(y), self._fail_run) diff --git a/autosubmit/git/autosubmit_git.py b/autosubmit/git/autosubmit_git.py index 323d1cbb6c16bdd43003b51b4d6e87613469eacb..283644fc42df272c84e2716f387715e4e2d315c7 100644 --- a/autosubmit/git/autosubmit_git.py +++ b/autosubmit/git/autosubmit_git.py @@ -24,7 +24,7 @@ import subprocess import shutil from autosubmit.config.basicConfig import BasicConfig -from autosubmit.config.log import Log +from bscearth.utils.log import Log class AutosubmitGit: diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 5181d240dd409aa31a8c03a2e45814dc4b79c07e..d0d8a18420882ef94ca6f8c17dd74130be7d2682 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -25,9 +25,10 @@ import re import time from autosubmit.job.job_common import Status, Type -from autosubmit.job.job_common import StatisticsSnippetBash, StatisticsSnippetPython, StatisticsSnippetR +from autosubmit.job.job_common import StatisticsSnippetBash, StatisticsSnippetPython +from autosubmit.job.job_common import StatisticsSnippetR, StatisticsSnippetEmpty from autosubmit.config.basicConfig import BasicConfig -from autosubmit.date.chunk_date_lib import * +from bscearth.utils.date import * class Job(object): @@ -47,6 +48,8 @@ class Job(object): :type priority: int """ + CHECK_ON_SUBMISSION = 'on_submission' + def __str__(self): return "{0} STATUS: {1}".format(self.name, self.status) @@ -58,12 +61,12 @@ class Job(object): self.wallclock = None self.tasks = None self.threads = None - self.processors = None - self.memory = None + self.processors = '1' + self.memory = '' + self.memory_per_task = '' self.chunk = None self.member = None self.date = None - self.memory = None self.name = name self._long_name = None self.long_name = name @@ -85,7 +88,7 @@ class Job(object): self._tmp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, self.expid, BasicConfig.LOCAL_TMP_DIR) self.write_start = False self._platform = None - self.check = True + self.check = 'True' def __getstate__(self): odict = self.__dict__ @@ -132,10 +135,10 @@ class Job(object): :return HPCPlatform object for the job to use :rtype: HPCPlatform """ - if self.processors > 1: - return self._platform - else: + if str(self.processors) == '1': return self._platform.serial_platform + else: + return self._platform @platform.setter def platform(self, value): @@ -157,10 +160,10 @@ class Job(object): """ if self._queue is not None: return self._queue - if self.processors > 1: - return self._platform.queue - else: + if str(self.processors) == '1': return self._platform.serial_platform.serial_queue + else: + return self._platform.queue @queue.setter def queue(self, value): @@ -229,6 +232,19 @@ class Job(object): def remote_logs(self, value): self._remote_logs = value + @property + def total_processors(self): + if ':' in self.processors: + return reduce(lambda x, y: int(x) + int(y), self.processors.split(':')) + return int(self.processors) + + @property + def total_wallclock(self): + if self.wallclock: + hours, minutes = self.wallclock.split(':') + return float(minutes) / 60 + float(hours) + return 0 + def log_job(self): """ Prints job information in log @@ -423,7 +439,23 @@ class Job(object): """ return self._get_from_total_stats(1) - def update_status(self, new_status, copy_remote_logs): + def get_last_retrials(self): + log_name = os.path.join(self._tmp_path, self.name + '_TOTAL_STATS') + retrials_list = [] + if os.path.exists(log_name): + already_completed = False + for retrial in reversed(open(log_name).readlines()): + retrial_fields = retrial.split() + if Job.is_a_completed_retrial(retrial_fields): + if already_completed: + break + already_completed = True + retrial_dates = map(lambda y: parse_date(y) if y != 'COMPLETED' and y != 'FAILED' else y, + retrial_fields) + retrials_list.insert(0, retrial_dates) + return retrials_list + + def update_status(self, new_status, copy_remote_logs=False): """ Updates job status, checking COMPLETED file if needed @@ -434,7 +466,7 @@ class Job(object): previous_status = self.status if new_status == Status.COMPLETED: - Log.debug("This job seems to have completed...checking") + Log.debug("This job seems to have completed: checking...") self.platform.get_completed_files(self.name) self.check_completion() else: @@ -446,9 +478,16 @@ class Job(object): elif self.status is Status.COMPLETED: Log.result("Job {0} is COMPLETED", self.name) elif self.status is Status.FAILED: - Log.user_warning("Job {0} is FAILED", self.name) + Log.user_warning("Job {0} is FAILED. Checking completed files to confirm the failure...", self.name) + self.platform.get_completed_files(self.name) + self.check_completion() + if self.status is Status.COMPLETED: + Log.warning('Job {0} seems to have failed but there is a COMPLETED file', self.name) + Log.result("Job {0} is COMPLETED", self.name) + else: + self.update_children_status() elif self.status is Status.UNKNOWN: - Log.debug("Job {0} in UNKNOWN status. Checking completed files", self.name) + Log.debug("Job {0} in UNKNOWN status. Checking completed files...", self.name) self.platform.get_completed_files(self.name) self.check_completion(Status.UNKNOWN) if self.status is Status.UNKNOWN: @@ -470,6 +509,13 @@ class Job(object): self.platform.get_logs_files(self.expid, self.remote_logs) return self.status + def update_children_status(self): + children = list(self.children) + for child in children: + if child.status in [Status.SUBMITTED, Status.RUNNING, Status.QUEUING, Status.UNKNOWN]: + child.status = Status.FAILED + children += list(child.children) + def check_completion(self, default_status=Status.FAILED): """ Check the presence of *COMPLETED* file. @@ -481,7 +527,7 @@ class Job(object): if os.path.exists(log_name): self.status = Status.COMPLETED else: - Log.warning("Job {0} seemed to be completed but there is no COMPLETED file", self.name) + Log.warning("Job {0} completion check failed. There is no COMPLETED file", self.name) self.status = default_status def update_parameters(self, as_conf, parameters, @@ -559,6 +605,7 @@ class Job(object): if self.tasks == 0: self.tasks = job_platform.processors_per_node self.memory = as_conf.get_memory(self.section) + self.memory_per_task = as_conf.get_memory_per_task(self.section) self.wallclock = as_conf.get_wallclock(self.section) self.scratch_free_space = as_conf.get_scratch_free_space(self.section) if self.scratch_free_space == 0: @@ -566,11 +613,11 @@ class Job(object): parameters['NUMPROC'] = self.processors parameters['MEMORY'] = self.memory + parameters['MEMORY_PER_TASK'] = self.memory_per_task parameters['NUMTHREADS'] = self.threads parameters['NUMTASK'] = self.tasks parameters['WALLCLOCK'] = self.wallclock parameters['TASKTYPE'] = self.section - parameters['MEMORY'] = self.memory parameters['SCRATCH_FREE_SPACE'] = self.scratch_free_space parameters['CURRENT_ARCH'] = job_platform.name @@ -581,6 +628,7 @@ class Job(object): parameters['CURRENT_BUDG'] = job_platform.budget parameters['CURRENT_RESERVATION'] = job_platform.reservation parameters['CURRENT_EXCLUSIVITY'] = job_platform.exclusivity + parameters['CURRENT_HYPERTHREADING'] = job_platform.hyperthreading parameters['CURRENT_TYPE'] = job_platform.type parameters['CURRENT_SCRATCH_DIR'] = job_platform.scratch parameters['CURRENT_ROOTDIR'] = job_platform.root_dir @@ -630,6 +678,12 @@ class Job(object): return template_content + def get_wrapped_content(self, as_conf): + snippet = StatisticsSnippetEmpty + template = 'python $SCRATCH/{1}/LOG_{1}/{0}.cmd'.format(self.name, self.expid) + template_content = self._get_template_content(as_conf, snippet, template) + return template_content + def _get_template_content(self, as_conf, snippet, template): communications_library = as_conf.get_communications_library() if communications_library == 'saga': @@ -651,6 +705,13 @@ class Job(object): template, snippet.as_tailer()]) + @staticmethod + def is_a_completed_retrial(fields): + if len(fields) == 4: + if fields[3] == 'COMPLETED': + return True + return False + def create_script(self, as_conf): """ Creates script file to be run for the job @@ -666,12 +727,22 @@ class Job(object): template_content = re.sub('%(?. + +from autosubmit.job.job import Job +from bscearth.utils.date import date2str +from autosubmit.job.job_common import Status, Type + + +class DicJobs: + """ + Class to create jobs from conf file and to find jobs by start date, member and chunk + + :param jobs_list: jobs list to use + :type job_list: JobList + :param parser: jobs conf file parser + :type parser: SafeConfigParser + :param date_list: start dates + :type date_list: list + :param member_list: member + :type member_list: list + :param chunk_list: chunks + :type chunk_list: list + :param date_format: option to format dates + :type date_format: str + :param default_retrials: default retrials for ech job + :type default_retrials: int + + """ + + def __init__(self, jobs_list, parser, date_list, member_list, chunk_list, date_format, default_retrials): + self._date_list = date_list + self._jobs_list = jobs_list + self._member_list = member_list + self._chunk_list = chunk_list + self._parser = parser + self._date_format = date_format + self.default_retrials = default_retrials + self._dic = dict() + + def read_section(self, section, priority, default_job_type, jobs_data=dict()): + """ + Read a section from jobs conf and creates all jobs for it + + :param default_job_type: default type for jobs + :type default_job_type: str + :param jobs_data: dictionary containing the plain data from jobs + :type jobs_data: dict + :param section: section to read + :type section: str + :param priority: priority for the jobs + :type priority: int + """ + running = 'once' + if self._parser.has_option(section, 'RUNNING'): + running = self._parser.get(section, 'RUNNING').lower() + frequency = int(self.get_option(section, "FREQUENCY", 1)) + if running == 'once': + self._create_jobs_once(section, priority, default_job_type, jobs_data) + elif running == 'date': + self._create_jobs_startdate(section, priority, frequency, default_job_type, jobs_data) + elif running == 'member': + self._create_jobs_member(section, priority, frequency, default_job_type, jobs_data) + elif running == 'chunk': + synchronize = self.get_option(section, "SYNCHRONIZE", None) + self._create_jobs_chunk(section, priority, frequency, default_job_type, synchronize, jobs_data) + + def _create_jobs_once(self, section, priority, default_job_type, jobs_data=dict()): + """ + Create jobs to be run once + + :param section: section to read + :type section: str + :param priority: priority for the jobs + :type priority: int + """ + self._dic[section] = self.build_job(section, priority, None, None, None, default_job_type, jobs_data) + self._jobs_list.graph.add_node(self._dic[section].name) + + def _create_jobs_startdate(self, section, priority, frequency, default_job_type, jobs_data=dict()): + """ + Create jobs to be run once per start date + + :param section: section to read + :type section: str + :param priority: priority for the jobs + :type priority: int + :param frequency: if greater than 1, only creates one job each frequency startdates. Allways creates one job + for the last + :type frequency: int + """ + self._dic[section] = dict() + count = 0 + for date in self._date_list: + count += 1 + if count % frequency == 0 or count == len(self._date_list): + self._dic[section][date] = self.build_job(section, priority, date, None, None, default_job_type, + jobs_data) + self._jobs_list.graph.add_node(self._dic[section][date].name) + + def _create_jobs_member(self, section, priority, frequency, default_job_type, jobs_data=dict()): + """ + Create jobs to be run once per member + + :param section: section to read + :type section: str + :param priority: priority for the jobs + :type priority: int + :param frequency: if greater than 1, only creates one job each frequency members. Allways creates one job + for the last + :type frequency: int + """ + self._dic[section] = dict() + for date in self._date_list: + self._dic[section][date] = dict() + count = 0 + for member in self._member_list: + count += 1 + if count % frequency == 0 or count == len(self._member_list): + self._dic[section][date][member] = self.build_job(section, priority, date, member, None, + default_job_type, jobs_data) + self._jobs_list.graph.add_node(self._dic[section][date][member].name) + + ''' + Maybe a good choice could be split this function or ascend the + conditional decision to the father which makes the call + ''' + + def _create_jobs_chunk(self, section, priority, frequency, default_job_type, synchronize=None, jobs_data=dict()): + """ + Create jobs to be run once per chunk + + :param synchronize: + :param section: section to read + :type section: str + :param priority: priority for the jobs + :type priority: int + :param frequency: if greater than 1, only creates one job each frequency chunks. Always creates one job + for the last + :type frequency: int + """ + # Temporally creation for unified jobs in case of synchronize + if synchronize is not None: + tmp_dic = dict() + count = 0 + for chunk in self._chunk_list: + count += 1 + if count % frequency == 0 or count == len(self._chunk_list): + if synchronize == 'date': + tmp_dic[chunk] = self.build_job(section, priority, None, None, + chunk, default_job_type, jobs_data) + elif synchronize == 'member': + tmp_dic[chunk] = dict() + for date in self._date_list: + tmp_dic[chunk][date] = self.build_job(section, priority, date, None, + chunk, default_job_type, jobs_data) + # Real dic jobs assignment/creation + self._dic[section] = dict() + for date in self._date_list: + self._dic[section][date] = dict() + for member in self._member_list: + self._dic[section][date][member] = dict() + count = 0 + for chunk in self._chunk_list: + count += 1 + if count % frequency == 0 or count == len(self._chunk_list): + if synchronize == 'date': + self._dic[section][date][member][chunk] = tmp_dic[chunk] + elif synchronize == 'member': + self._dic[section][date][member][chunk] = tmp_dic[chunk][date] + else: + self._dic[section][date][member][chunk] = self.build_job(section, priority, date, member, + chunk, default_job_type, jobs_data) + self._jobs_list.graph.add_node(self._dic[section][date][member][chunk].name) + + def get_jobs(self, section, date=None, member=None, chunk=None): + """ + Return all the jobs matching section, date, member and chunk provided. If any parameter is none, returns all + the jobs without checking that parameter value. If a job has one parameter to None, is returned if all the + others match parameters passed + + :param section: section to return + :type section: str + :param date: stardate to return + :type date: str + :param member: member to return + :type member: str + :param chunk: chunk to return + :type chunk: int + :return: jobs matching parameters passed + :rtype: list + """ + jobs = list() + + if section not in self._dic: + return jobs + + dic = self._dic[section] + if type(dic) is not dict: + jobs.append(dic) + else: + if date is not None: + self._get_date(jobs, dic, date, member, chunk) + else: + for d in self._date_list: + self._get_date(jobs, dic, d, member, chunk) + return jobs + + def _get_date(self, jobs, dic, date, member, chunk): + if date not in dic: + return jobs + dic = dic[date] + if type(dic) is not dict: + jobs.append(dic) + else: + if member is not None: + self._get_member(jobs, dic, member, chunk) + else: + for m in self._member_list: + self._get_member(jobs, dic, m, chunk) + + return jobs + + def _get_member(self, jobs, dic, member, chunk): + if member not in dic: + return jobs + dic = dic[member] + if type(dic) is not dict: + jobs.append(dic) + else: + if chunk is not None and chunk in dic: + jobs.append(dic[chunk]) + else: + for c in self._chunk_list: + if c not in dic: + continue + jobs.append(dic[c]) + return jobs + + def build_job(self, section, priority, date, member, chunk, default_job_type, jobs_data=dict()): + name = self._jobs_list.expid + if date is not None: + name += "_" + date2str(date, self._date_format) + if member is not None: + name += "_" + member + if chunk is not None: + name += "_{0}".format(chunk) + name += "_" + section + if name in jobs_data: + job = Job(name, jobs_data[name][1], jobs_data[name][2], priority) + job.local_logs = (jobs_data[name][8], jobs_data[name][9]) + job.remote_logs = (jobs_data[name][10], jobs_data[name][11]) + else: + job = Job(name, 0, Status.WAITING, priority) + job.section = section + job.date = date + job.member = member + job.chunk = chunk + job.date_format = self._date_format + + job.frequency = int(self.get_option(section, "FREQUENCY", 1)) + job.wait = self.get_option(section, "WAIT", 'true').lower() == 'true' + job.rerun_only = self.get_option(section, "RERUN_ONLY", 'false').lower() == 'true' + + job_type = self.get_option(section, "TYPE", default_job_type).lower() + if job_type == 'bash': + job.type = Type.BASH + elif job_type == 'python': + job.type = Type.PYTHON + elif job_type == 'r': + job.type = Type.R + + job.platform_name = self.get_option(section, "PLATFORM", None) + if job.platform_name is not None: + job.platform_name = job.platform_name + job.file = self.get_option(section, "FILE", None) + job.queue = self.get_option(section, "QUEUE", None) + job.check = self.get_option(section, "CHECK", 'True').lower() + job.processors = str(self.get_option(section, "PROCESSORS", 1)) + job.threads = self.get_option(section, "THREADS", '') + job.tasks = self.get_option(section, "TASKS", '') + job.memory = self.get_option(section, "MEMORY", '') + job.memory_per_task = self.get_option(section, "MEMORY_PER_TASK", '') + job.wallclock = self.get_option(section, "WALLCLOCK", '') + job.retrials = int(self.get_option(section, 'RETRIALS', -1)) + if job.retrials == -1: + job.retrials = None + job.notify_on = [x.upper() for x in self.get_option(section, "NOTIFY_ON", '').split(' ')] + self._jobs_list.get_job_list().append(job) + return job + + def get_option(self, section, option, default): + """ + Returns value for a given option + + :param section: section name + :type section: str + :param option: option to return + :type option: str + :param default: value to return if not defined in configuration file + :type default: object + """ + if self._parser.has_option(section, option): + return self._parser.get(section, option) + else: + return default diff --git a/autosubmit/config/parser_factory.py b/autosubmit/job/job_exceptions.py similarity index 65% rename from autosubmit/config/parser_factory.py rename to autosubmit/job/job_exceptions.py index 35841cd10c9b9124a78bef99f5569c22269091f8..7bd32c8f9748a9bc29933e23fd7c4614576b9a14 100644 --- a/autosubmit/config/parser_factory.py +++ b/autosubmit/job/job_exceptions.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -17,18 +17,13 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . -try: - # noinspection PyCompatibility - from configparser import SafeConfigParser -except ImportError: - # noinspection PyCompatibility - from ConfigParser import SafeConfigParser +class WrongTemplateException(Exception): + """ + Class to alert when the template checking fails for a given job + """ -class ConfigParserFactory: + def __init__(self, job_name): + super(WrongTemplateException, self).__init__() + self.job_name = job_name - def __init__(self): - pass - - def create_parser(self): - return SafeConfigParser() diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index 48515eac74fb3787528866053b4cbd477679dc83..2c4022c040347d5214f3f7dd5d2fe35e33f51260 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -30,12 +30,13 @@ from time import localtime, strftime from sys import setrecursionlimit from shutil import move -from autosubmit.job.job_common import Status, Type from autosubmit.job.job import Job -from autosubmit.job.job_package import JobPackageSimple -from autosubmit.job.job_package import JobPackageArray -from autosubmit.config.log import Log -from autosubmit.date.chunk_date_lib import date2str, parse_date +from bscearth.utils.log import Log +from autosubmit.job.job_dict import DicJobs +from autosubmit.job.job_utils import Dependency +from autosubmit.job.job_common import Status, Type +from bscearth.utils.date import date2str, parse_date, sum_str_hours +from autosubmit.job.job_packages import JobPackageSimple, JobPackageArray, JobPackageThread from networkx import DiGraph from autosubmit.job.job_utils import transitive_reduction @@ -89,8 +90,8 @@ class JobList: def graph(self, value): self._graph = value - def generate(self, date_list, member_list, num_chunks, parameters, date_format, default_retrials, default_job_type, - new=True): + def generate(self, date_list, member_list, num_chunks, chunk_ini, parameters, date_format, default_retrials, + default_job_type, new=True): """ Creates all jobs needed for the current workflow @@ -102,6 +103,8 @@ class JobList: :type member_list: list :param num_chunks: number of chunks to run :type num_chunks: int + :param chunk_ini: the experiment will start by the given chunk + :type chunk_ini: int :param parameters: parameters for the jobs :type parameters: dict :param date_format: option to format dates @@ -115,7 +118,7 @@ class JobList: self._date_list = date_list self._member_list = member_list - chunk_list = range(1, num_chunks + 1) + chunk_list = range(chunk_ini, num_chunks + 1) self._chunk_list = chunk_list jobs_parser = self._get_jobs_parser() @@ -169,7 +172,8 @@ class JobList: return dependencies @staticmethod - def _manage_job_dependencies(dic_jobs, job, date_list, member_list, chunk_list, dependencies_keys, dependencies, graph): + def _manage_job_dependencies(dic_jobs, job, date_list, member_list, chunk_list, dependencies_keys, dependencies, + graph): for key in dependencies_keys: dependency = dependencies[key] skip, (chunk, member, date) = JobList._calculate_dependency_metadata(job.chunk, chunk_list, @@ -515,7 +519,7 @@ class JobList: def update_from_file(self, store_change=True): """ - Updates joblist on the fly from and update file + Updates jobs list on the fly from and update file :param store_change: if True, renames the update file to avoid reloading it at the next iteration """ if os.path.exists(os.path.join(self._persistence_path, self._update_file)): @@ -632,9 +636,12 @@ class JobList: for job in self._job_list: if job.section in sections_checked: continue - if not job.check_script(as_conf, self.parameters): - out = False - Log.warning("Invalid parameter substitution in {0} template!!!", job.section) + if job.check.lower() != 'true': + Log.warning('Template {0} will not be checked'.format(job.section)) + else: + if not job.check_script(as_conf, self.parameters): + out = False + Log.warning("Invalid parameter substitution in {0} template", job.section) sections_checked.add(job.section) if out: Log.result("Scripts OK") @@ -670,6 +677,7 @@ class JobList: jobs_parser = self._get_jobs_parser() Log.info("Adding dependencies...") + dependencies = dict() for job_section in jobs_parser.sections(): Log.debug("Reading rerun dependencies for {0} jobs".format(job_section)) @@ -745,366 +753,3 @@ class JobList: if flag: self.update_genealogy() del self._dic_jobs - - def get_ready_packages(self, platform): - # Check there are ready jobs - jobs_available = self.get_ready(platform) - if len(jobs_available) == 0: - return list() - Log.info("\nJobs ready for {1}: {0}", len(jobs_available), platform.name) - # Checking available submission slots - max_waiting_jobs = platform.max_waiting_jobs - waiting_jobs = len(self.get_submitted(platform) + self.get_queuing(platform)) - max_wait_jobs_to_submit = max_waiting_jobs - waiting_jobs - max_jobs_to_submit = platform.total_jobs - len(self.get_in_queue(platform)) - # Logging obtained data - Log.debug("Number of jobs ready: {0}", len(jobs_available)) - Log.debug("Number of jobs available: {0}", max_wait_jobs_to_submit) - Log.info("Jobs to submit: {0}", min(max_wait_jobs_to_submit, len(jobs_available))) - # If can submit jobs - if max_wait_jobs_to_submit > 0 and max_jobs_to_submit > 0: - available_sorted = sorted(jobs_available, key=lambda k: k.long_name.split('_')[1][:6]) - list_of_available = sorted(available_sorted, key=lambda k: k.priority, reverse=True) - num_jobs_to_submit = min(max_wait_jobs_to_submit, len(jobs_available), max_jobs_to_submit) - jobs_to_submit = list_of_available[0:num_jobs_to_submit] - jobs_to_submit_by_section = self.divide_list_by_section(jobs_to_submit) - packages_to_submit = list() - if platform.allow_arrays: - for section_list in jobs_to_submit_by_section.values(): - packages_to_submit.append(JobPackageArray(section_list)) - return packages_to_submit - for job in jobs_to_submit: - packages_to_submit.append(JobPackageSimple([job])) - return packages_to_submit - return list() # no packages to submit - - @staticmethod - def divide_list_by_section(jobs_list): - """ - Returns a dict() with as many keys as 'jobs_list' different sections. - The value for each key is a list() with all the jobs with the key section. - - :param jobs_list: list of jobs to be divided - :rtype: dict - """ - by_section = dict() - for job in jobs_list: - if job.section not in by_section: - by_section[job.section] = list() - by_section[job.section].append(job) - return by_section - - -class DicJobs: - """ - Class to create jobs from conf file and to find jobs by stardate, member and chunk - - :param joblist: joblist to use - :type joblist: JobList - :param parser: jobs conf file parser - :type parser: SafeConfigParser - :param date_list: startdates - :type date_list: list - :param member_list: member - :type member_list: list - :param chunk_list: chunks - :type chunk_list: list - :param date_format: option to formate dates - :type date_format: str - :param default_retrials: default retrials for ech job - :type default_retrials: int - - """ - - def __init__(self, joblist, parser, date_list, member_list, chunk_list, date_format, default_retrials): - self._date_list = date_list - self._joblist = joblist - self._member_list = member_list - self._chunk_list = chunk_list - self._parser = parser - self._date_format = date_format - self.default_retrials = default_retrials - self._dic = dict() - - def read_section(self, section, priority, default_job_type, jobs_data=dict()): - """ - Read a section from jobs conf and creates all jobs for it - - :param default_job_type: default type for jobs - :type default_job_type: str - :param jobs_data: dictionary containing the plain data from jobs - :type jobs_data: dict - :param section: section to read - :type section: str - :param priority: priority for the jobs - :type priority: int - """ - running = 'once' - if self._parser.has_option(section, 'RUNNING'): - running = self._parser.get(section, 'RUNNING').lower() - frequency = int(self.get_option(section, "FREQUENCY", 1)) - if running == 'once': - self._create_jobs_once(section, priority, default_job_type, jobs_data) - elif running == 'date': - self._create_jobs_startdate(section, priority, frequency, default_job_type, jobs_data) - elif running == 'member': - self._create_jobs_member(section, priority, frequency, default_job_type, jobs_data) - elif running == 'chunk': - synchronize = self.get_option(section, "SYNCHRONIZE", None) - self._create_jobs_chunk(section, priority, frequency, default_job_type, synchronize, jobs_data) - - def _create_jobs_once(self, section, priority, default_job_type, jobs_data=dict()): - """ - Create jobs to be run once - - :param section: section to read - :type section: str - :param priority: priority for the jobs - :type priority: int - """ - self._dic[section] = self.build_job(section, priority, None, None, None, default_job_type, jobs_data) - self._joblist.graph.add_node(self._dic[section].name) - - def _create_jobs_startdate(self, section, priority, frequency, default_job_type, jobs_data=dict()): - """ - Create jobs to be run once per startdate - - :param section: section to read - :type section: str - :param priority: priority for the jobs - :type priority: int - :param frequency: if greater than 1, only creates one job each frequency startdates. Allways creates one job - for the last - :type frequency: int - """ - self._dic[section] = dict() - count = 0 - for date in self._date_list: - count += 1 - if count % frequency == 0 or count == len(self._date_list): - self._dic[section][date] = self.build_job(section, priority, date, None, None, default_job_type, - jobs_data) - self._joblist.graph.add_node(self._dic[section][date].name) - - def _create_jobs_member(self, section, priority, frequency, default_job_type, jobs_data=dict()): - """ - Create jobs to be run once per member - - :param section: section to read - :type section: str - :param priority: priority for the jobs - :type priority: int - :param frequency: if greater than 1, only creates one job each frequency members. Allways creates one job - for the last - :type frequency: int - """ - self._dic[section] = dict() - for date in self._date_list: - self._dic[section][date] = dict() - count = 0 - for member in self._member_list: - count += 1 - if count % frequency == 0 or count == len(self._member_list): - self._dic[section][date][member] = self.build_job(section, priority, date, member, None, - default_job_type, jobs_data) - self._joblist.graph.add_node(self._dic[section][date][member].name) - - ''' - Maybe a good choice could be split this function or ascend the - conditional decision to the father which makes the call - ''' - - def _create_jobs_chunk(self, section, priority, frequency, default_job_type, synchronize=None, jobs_data=dict()): - """ - Create jobs to be run once per chunk - - :param synchronize: - :param section: section to read - :type section: str - :param priority: priority for the jobs - :type priority: int - :param frequency: if greater than 1, only creates one job each frequency chunks. Always creates one job - for the last - :type frequency: int - """ - # Temporally creation for unified jobs in case of synchronize - if synchronize is not None: - tmp_dic = dict() - count = 0 - for chunk in self._chunk_list: - count += 1 - if count % frequency == 0 or count == len(self._chunk_list): - if synchronize == 'date': - tmp_dic[chunk] = self.build_job(section, priority, None, None, - chunk, default_job_type, jobs_data) - elif synchronize == 'member': - tmp_dic[chunk] = dict() - for date in self._date_list: - tmp_dic[chunk][date] = self.build_job(section, priority, date, None, - chunk, default_job_type, jobs_data) - # Real dic jobs assignment/creation - self._dic[section] = dict() - for date in self._date_list: - self._dic[section][date] = dict() - for member in self._member_list: - self._dic[section][date][member] = dict() - count = 0 - for chunk in self._chunk_list: - count += 1 - if count % frequency == 0 or count == len(self._chunk_list): - if synchronize == 'date': - self._dic[section][date][member][chunk] = tmp_dic[chunk] - elif synchronize == 'member': - self._dic[section][date][member][chunk] = tmp_dic[chunk][date] - else: - self._dic[section][date][member][chunk] = self.build_job(section, priority, date, member, - chunk, default_job_type, jobs_data) - self._joblist.graph.add_node(self._dic[section][date][member][chunk].name) - - def get_jobs(self, section, date=None, member=None, chunk=None): - """ - Return all the jobs matching section, date, member and chunk provided. If any parameter is none, returns all - the jobs without checking that parameter value. If a job has one parameter to None, is returned if all the - others match parameters passed - - :param section: section to return - :type section: str - :param date: stardate to return - :type date: str - :param member: member to return - :type member: str - :param chunk: chunk to return - :type chunk: int - :return: jobs matching parameters passed - :rtype: list - """ - jobs = list() - - if section not in self._dic: - return jobs - - dic = self._dic[section] - if type(dic) is not dict: - jobs.append(dic) - else: - if date is not None: - self._get_date(jobs, dic, date, member, chunk) - else: - for d in self._date_list: - self._get_date(jobs, dic, d, member, chunk) - return jobs - - def _get_date(self, jobs, dic, date, member, chunk): - if date not in dic: - return jobs - dic = dic[date] - if type(dic) is not dict: - jobs.append(dic) - else: - if member is not None: - self._get_member(jobs, dic, member, chunk) - else: - for m in self._member_list: - self._get_member(jobs, dic, m, chunk) - - return jobs - - def _get_member(self, jobs, dic, member, chunk): - if member not in dic: - return jobs - dic = dic[member] - if type(dic) is not dict: - jobs.append(dic) - else: - if chunk is not None and chunk in dic: - jobs.append(dic[chunk]) - else: - for c in self._chunk_list: - if c not in dic: - continue - jobs.append(dic[c]) - return jobs - - def build_job(self, section, priority, date, member, chunk, default_job_type, jobs_data=dict()): - name = self._joblist.expid - if date is not None: - name += "_" + date2str(date, self._date_format) - if member is not None: - name += "_" + member - if chunk is not None: - name += "_{0}".format(chunk) - name += "_" + section - if name in jobs_data: - job = Job(name, jobs_data[name][1], jobs_data[name][2], priority) - job.local_logs = (jobs_data[name][8], jobs_data[name][9]) - job.remote_logs = (jobs_data[name][10], jobs_data[name][11]) - else: - job = Job(name, 0, Status.WAITING, priority) - job.section = section - job.date = date - job.member = member - job.chunk = chunk - job.date_format = self._date_format - - job.frequency = int(self.get_option(section, "FREQUENCY", 1)) - job.wait = self.get_option(section, "WAIT", 'true').lower() == 'true' - job.rerun_only = self.get_option(section, "RERUN_ONLY", 'false').lower() == 'true' - - type = self.get_option(section, "TYPE", default_job_type).lower() - if type == 'bash': - job.type = Type.BASH - elif type == 'python': - job.type = Type.PYTHON - elif type == 'r': - job.type = Type.R - - job.platform_name = self.get_option(section, "PLATFORM", None) - if job.platform_name is not None: - job.platform_name = job.platform_name - job.file = self.get_option(section, "FILE", None) - job.queue = self.get_option(section, "QUEUE", None) - if self.get_option(section, "CHECK", 'True').lower() == 'true': - job.check = True - else: - job.check = False - - job.processors = self.get_option(section, "PROCESSORS", 1) - job.threads = self.get_option(section, "THREADS", '') - job.tasks = self.get_option(section, "TASKS", '') - job.memory = self.get_option(section, "MEMORY", '') - job.wallclock = self.get_option(section, "WALLCLOCK", '') - job.retrials = int(self.get_option(section, 'RETRIALS', -1)) - if job.retrials == -1: - job.retrials = None - job.notify_on = [x.upper() for x in self.get_option(section, "NOTIFY_ON", '').split(' ')] - self._joblist.get_job_list().append(job) - return job - - def get_option(self, section, option, default): - """ - Returns value for a given option - - :param section: section name - :type section: str - :param option: option to return - :type option: str - :param default: value to return if not defined in configuration file - :type default: object - """ - if self._parser.has_option(section, option): - return self._parser.get(section, option) - else: - return default - - -class Dependency(object): - """ - Class to manage the metadata related with a dependency - - """ - - def __init__(self, section, distance=None, running=None, sign=None): - self.section = section - self.distance = distance - self.running = running - self.sign = sign diff --git a/autosubmit/job/job_list_persistence.py b/autosubmit/job/job_list_persistence.py index 23008451c67b58dd72b5a9fe563f26b1aa30319f..735f56e6bb48c4361e9935da95f00732f84a73ba 100644 --- a/autosubmit/job/job_list_persistence.py +++ b/autosubmit/job/job_list_persistence.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -16,12 +16,13 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . + import pickle from sys import setrecursionlimit import os -from autosubmit.config.log import Log +from bscearth.utils.log import Log from autosubmit.database.db_manager import DbManager @@ -105,7 +106,7 @@ class JobListPersistenceDb(JobListPersistence): JOB_LIST_TABLE = 'job_list' TABLE_FIELDS = ['name', 'id', 'status', 'priority', 'section', 'date', 'member', 'chunk', - 'local_out', 'local_err' + 'local_out', 'local_err', 'remote_out', 'remote_err'] def __init__(self, persistence_path, persistence_file): diff --git a/autosubmit/job/job_package.py b/autosubmit/job/job_package.py deleted file mode 100644 index 58c7d84a9e917bcc9c9d4d52cd42dccebc327ab4..0000000000000000000000000000000000000000 --- a/autosubmit/job/job_package.py +++ /dev/null @@ -1,179 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2016 Earth Sciences Department, BSC-CNS - -# This file is part of Autosubmit. - -# Autosubmit is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# Autosubmit is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with Autosubmit. If not, see . -try: - # noinspection PyCompatibility - from configparser import SafeConfigParser -except ImportError: - # noinspection PyCompatibility - from ConfigParser import SafeConfigParser - -import time -import os -from autosubmit.job.job_common import Status -from autosubmit.config.log import Log - - -class JobPackageBase(object): - """ - Class to manage the package of jobs to be submitted by autosubmit - """ - - def __init__(self, jobs): - self._jobs = jobs - try: - self._tmp_path = jobs[0]._tmp_path - self._platform = jobs[0].platform - for job in jobs: - if job.platform != self._platform or job.platform is None: - raise Exception('Only one valid platform per package') - except IndexError: - raise Exception('No jobs given') - - def __len__(self): - return self._jobs.__len__() - - @property - def jobs(self): - """ - Returns the jobs - - :return: jobs - :rtype: List[Job] - """ - return self._jobs - - @property - def platform(self): - """ - Returns the platform - - :return: platform - :rtype: Platform - """ - return self._platform - - def submit(self, configuration, parameters): - for job in self.jobs: - job.update_parameters(configuration, parameters) - self._create_scripts(configuration) - self._send_files() - self._do_submission() - - def _create_scripts(self, configuration): - raise Exception('Not implemented') - - def _send_files(self): - raise Exception('Not implemented') - - def _do_submission(self): - raise Exception('Not implemented') - - -class JobPackageSimple(JobPackageBase): - """ - Class to manage the package of jobs to be submitted by autosubmit - """ - - def __init__(self, jobs): - self._job_scripts = {} - super(JobPackageSimple, self).__init__(jobs) - - def _create_scripts(self, configuration): - for job in self.jobs: - self._job_scripts[job.name] = job.create_script(configuration) - - def _send_files(self): - for job in self.jobs: - self.platform.send_file(self._job_scripts[job.name]) - - def _do_submission(self): - for job in self.jobs: - self.platform.remove_stat_file(job.name) - self.platform.remove_completed_file(job.name) - job.id = self.platform.submit_job(job, self._job_scripts[job.name]) - if job.id is None: - continue - Log.info("{0} submitted", job.name) - job.status = Status.SUBMITTED - job.write_submit_time() - - -class JobPackageArray(JobPackageBase): - """ - Class to manage the package of jobs to be submitted by autosubmit - """ - - def __init__(self, jobs): - self._job_inputs = {} - self._job_scripts = {} - self._common_script = None - self._array_size_id = "[1-" + str(len(jobs)) + "]" - self._wallclock = '00:00' - self._num_processors = 1 - for job in jobs: - if job.wallclock > self._wallclock: - self._wallclock = job.wallclock - if job.processors > self._num_processors: - self._num_processors = job.processors - super(JobPackageArray, self).__init__(jobs) - - def _create_scripts(self, configuration): - timestamp = str(int(time.time())) - for i in range(1, len(self.jobs) + 1): - self._job_scripts[self.jobs[i - 1].name] = self.jobs[i - 1].create_script(configuration) - self._job_inputs[self.jobs[i - 1].name] = self._create_i_input(timestamp, i) - self.jobs[i - 1].remote_logs = (timestamp + ".{0}.out".format(i), timestamp + ".{0}.err".format(i)) - self._common_script = self._create_common_script(timestamp) - - def _create_i_input(self, filename, index): - filename += '.{0}'.format(index) - input_content = self._job_scripts[self.jobs[index - 1].name] - open(os.path.join(self._tmp_path, filename), 'w').write(input_content) - os.chmod(os.path.join(self._tmp_path, filename), 0o775) - return filename - - def _create_common_script(self, filename): - script_content = self.platform.header.array_header(filename, self._array_size_id, self._wallclock, - self._num_processors) - filename += '.cmd' - open(os.path.join(self._tmp_path, filename), 'w').write(script_content) - os.chmod(os.path.join(self._tmp_path, filename), 0o775) - return filename - - def _send_files(self): - for job in self.jobs: - self.platform.send_file(self._job_scripts[job.name]) - self.platform.send_file(self._job_inputs[job.name]) - self.platform.send_file(self._common_script) - - def _do_submission(self): - for job in self.jobs: - self.platform.remove_stat_file(job.name) - self.platform.remove_completed_file(job.name) - - package_id = self.platform.submit_job(None, self._common_script) - - if package_id is None: - raise Exception('Submission failed') - - for i in range(1, len(self.jobs) + 1): - Log.info("{0} submitted", self.jobs[i - 1].name) - self.jobs[i - 1].id = str(package_id) + '[{0}]'.format(i) - self.jobs[i - 1].status = Status.SUBMITTED - self.jobs[i - 1].write_submit_time() diff --git a/autosubmit/job/job_packager.py b/autosubmit/job/job_packager.py new file mode 100644 index 0000000000000000000000000000000000000000..726f509ed38ee8c96da2dff584846182ab7d468a --- /dev/null +++ b/autosubmit/job/job_packager.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +from bscearth.utils.log import Log +from autosubmit.job.job_common import Status, Type +from bscearth.utils.date import date2str, parse_date, sum_str_hours +from autosubmit.job.job_packages import JobPackageSimple, JobPackageArray, JobPackageVertical, JobPackageHorizontal, \ + JobPackageSimpleWrapped + + +class JobPackager(object): + """ + The main responsibility of this class is to manage the packages of jobs that have to be submitted. + """ + + def __init__(self, as_config, platform, jobs_list): + self._as_config = as_config + self._platform = platform + self._jobs_list = jobs_list + + waiting_jobs = len(jobs_list.get_submitted(platform) + jobs_list.get_queuing(platform)) + self._max_wait_jobs_to_submit = platform.max_waiting_jobs - waiting_jobs + self._max_jobs_to_submit = platform.total_jobs - len(jobs_list.get_in_queue(platform)) + + Log.debug("Number of jobs ready: {0}", len(jobs_list.get_ready(platform))) + Log.debug("Number of jobs available: {0}", self._max_wait_jobs_to_submit) + if len(jobs_list.get_ready(platform)) > 0: + Log.info("Jobs ready for {0}: {1}", self._platform.name, len(jobs_list.get_ready(platform))) + + def build_packages(self): + """ + Returns the list of the built packages to be submitted + + :return: list of packages + :rtype list + """ + packages_to_submit = list() + jobs_ready = self._jobs_list.get_ready(self._platform) + if jobs_ready == 0: + return packages_to_submit + if not (self._max_wait_jobs_to_submit > 0 and self._max_jobs_to_submit > 0): + return packages_to_submit + + available_sorted = sorted(jobs_ready, key=lambda k: k.long_name.split('_')[1][:6]) + list_of_available = sorted(available_sorted, key=lambda k: k.priority, reverse=True) + num_jobs_to_submit = min(self._max_wait_jobs_to_submit, len(jobs_ready), self._max_jobs_to_submit) + jobs_to_submit = list_of_available[0:num_jobs_to_submit] + jobs_to_submit_by_section = JobPackager._divide_list_by_section(jobs_to_submit) + + # If wrapper allowed / well-configured + wrapper_type = self._as_config.get_wrapper_type() + if self._platform.allow_wrappers and wrapper_type in ['horizontal', 'vertical']: + remote_dependencies = self._as_config.get_remote_dependencies() + max_jobs = min(self._max_wait_jobs_to_submit, self._max_jobs_to_submit) + if wrapper_type == 'vertical': + for section_list in jobs_to_submit_by_section.values(): + built_packages, max_jobs = JobPackager._build_vertical_packages(section_list, + max_jobs, + self._platform.max_wallclock, + remote_dependencies) + packages_to_submit += built_packages + return packages_to_submit + elif wrapper_type == 'horizontal': + for section_list in jobs_to_submit_by_section.values(): + built_packages, max_jobs = JobPackager._build_horizontal_packages(section_list, + max_jobs, + self._platform.max_processors, + remote_dependencies) + packages_to_submit += built_packages + return packages_to_submit + # No wrapper allowed / well-configured + for job in jobs_to_submit: + if job.type == Type.PYTHON and not self._platform.allow_python_jobs: + package = JobPackageSimpleWrapped([job]) + else: + package = JobPackageSimple([job]) + packages_to_submit.append(package) + return packages_to_submit + + @staticmethod + def _divide_list_by_section(jobs_list): + """ + Returns a dict() with as many keys as 'jobs_list' different sections. + The value for each key is a list() with all the jobs with the key section. + + :param jobs_list: list of jobs to be divided + :rtype: dict + """ + by_section = dict() + for job in jobs_list: + if job.section not in by_section: + by_section[job.section] = list() + by_section[job.section].append(job) + return by_section + + @staticmethod + def _build_horizontal_packages(section_list, max_jobs, max_processors, remote_dependencies=False): + # TODO: Implement remote dependencies for horizontal wrapper + packages = [] + current_package = [] + current_processors = 0 + for job in section_list: + if max_jobs > 0: + max_jobs -= 1 + if (current_processors + job.total_processors) <= int(max_processors): + current_package.append(job) + current_processors += job.total_processors + else: + packages.append(JobPackageHorizontal(current_package)) + current_package = [job] + current_processors = job.total_processors + else: + break + if len(current_package) > 0: + packages.append(JobPackageHorizontal(current_package)) + return packages, max_jobs + + @staticmethod + def _build_vertical_packages(section_list, max_jobs, max_wallclock, remote_dependencies=False): + packages = [] + potential_dependency = None + for job in section_list: + if max_jobs > 0: + jobs_list = JobPackager._build_vertical_package(job, [job], job.wallclock, max_jobs, max_wallclock) + max_jobs -= len(jobs_list) + if job.status is Status.READY: + packages.append(JobPackageVertical(jobs_list)) + else: + packages.append(JobPackageVertical(jobs_list, potential_dependency)) + if remote_dependencies: + child = JobPackager._get_wrappable_child(jobs_list[-1], JobPackager._is_wrappable) + if child is not None: + section_list.insert(section_list.index(job) + 1, child) + potential_dependency = packages[-1].name + else: + break + return packages, max_jobs + + @staticmethod + def _build_vertical_package(job, jobs_list, total_wallclock, max_jobs, max_wallclock): + if len(jobs_list) >= max_jobs: + return jobs_list + child = JobPackager._get_wrappable_child(job, JobPackager._is_wrappable) + if child is not None: + total_wallclock = sum_str_hours(total_wallclock, child.wallclock) + if total_wallclock <= max_wallclock: + jobs_list.append(child) + return JobPackager._build_vertical_package(child, jobs_list, total_wallclock, max_jobs, max_wallclock) + return jobs_list + + @staticmethod + def _get_wrappable_child(job, check_function): + for child in job.children: + if check_function(job, child): + return child + continue + return None + + @staticmethod + def _is_wrappable(parent, child): + if child.section != parent.section: + return False + if len(child.parents) > 1: + return False + return True diff --git a/autosubmit/job/job_packages.py b/autosubmit/job/job_packages.py new file mode 100644 index 0000000000000000000000000000000000000000..11d206d15b076797916a7be9c92f005113016d5f --- /dev/null +++ b/autosubmit/job/job_packages.py @@ -0,0 +1,405 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +try: + # noinspection PyCompatibility + from configparser import SafeConfigParser +except ImportError: + # noinspection PyCompatibility + from ConfigParser import SafeConfigParser + +import os +import time +import random +from autosubmit.job.job_common import Status +from bscearth.utils.log import Log +from autosubmit.job.job_exceptions import WrongTemplateException +from autosubmit.job.job import Job +from bscearth.utils.date import sum_str_hours + + +class JobPackageBase(object): + """ + Class to manage the package of jobs to be submitted by autosubmit + """ + + def __init__(self, jobs): + self._jobs = jobs + self._expid = jobs[0].expid + try: + self._tmp_path = jobs[0]._tmp_path + self._platform = jobs[0].platform + for job in jobs: + if job.platform != self._platform or job.platform is None: + raise Exception('Only one valid platform per package') + except IndexError: + raise Exception('No jobs given') + + def __len__(self): + return self._jobs.__len__() + + @property + def jobs(self): + """ + Returns the jobs + + :return: jobs + :rtype: List[Job] + """ + return self._jobs + + @property + def platform(self): + """ + Returns the platform + + :return: platform + :rtype: Platform + """ + return self._platform + + def submit(self, configuration, parameters): + for job in self.jobs: + if job.check.lower() == Job.CHECK_ON_SUBMISSION: + if not job.check_script(configuration, parameters): + raise WrongTemplateException(job.name) + job.update_parameters(configuration, parameters) + self._create_scripts(configuration) + self._send_files() + self._do_submission() + + def _create_scripts(self, configuration): + raise Exception('Not implemented') + + def _send_files(self): + raise Exception('Not implemented') + + def _do_submission(self): + raise Exception('Not implemented') + + +class JobPackageSimple(JobPackageBase): + """ + Class to manage a group of simple jobs, not packaged, to be submitted by autosubmit + """ + + def __init__(self, jobs): + super(JobPackageSimple, self).__init__(jobs) + self._job_scripts = {} + + def _create_scripts(self, configuration): + for job in self.jobs: + self._job_scripts[job.name] = job.create_script(configuration) + + def _send_files(self): + for job in self.jobs: + self.platform.send_file(self._job_scripts[job.name]) + + def _do_submission(self, job_scripts=None): + if job_scripts is None: + job_scripts = self._job_scripts + for job in self.jobs: + self.platform.remove_stat_file(job.name) + self.platform.remove_completed_file(job.name) + job.id = self.platform.submit_job(job, job_scripts[job.name]) + if job.id is None: + continue + Log.info("{0} submitted", job.name) + job.status = Status.SUBMITTED + job.write_submit_time() + + +class JobPackageSimpleWrapped(JobPackageSimple): + """ + Class to manage a group of simple wrapped jobs, not packaged, to be submitted by autosubmit + """ + + def __init__(self, jobs): + super(JobPackageSimpleWrapped, self).__init__(jobs) + self._job_wrapped_scripts = {} + + def _create_scripts(self, configuration): + super(JobPackageSimpleWrapped, self)._create_scripts(configuration) + for job in self.jobs: + self._job_wrapped_scripts[job.name] = job.create_wrapped_script(configuration) + + def _send_files(self): + super(JobPackageSimpleWrapped, self)._send_files() + for job in self.jobs: + self.platform.send_file(self._job_wrapped_scripts[job.name]) + + def _do_submission(self, job_scripts=None): + if job_scripts is None: + job_scripts = self._job_wrapped_scripts + super(JobPackageSimpleWrapped, self)._do_submission(job_scripts) + + +class JobPackageArray(JobPackageBase): + """ + Class to manage an array-based package of jobs to be submitted by autosubmit + """ + + def __init__(self, jobs): + self._job_inputs = {} + self._job_scripts = {} + self._common_script = None + self._array_size_id = "[1-" + str(len(jobs)) + "]" + self._wallclock = '00:00' + self._num_processors = '1' + for job in jobs: + if job.wallclock > self._wallclock: + self._wallclock = job.wallclock + if job.processors > self._num_processors: + self._num_processors = job.processors + super(JobPackageArray, self).__init__(jobs) + + def _create_scripts(self, configuration): + timestamp = str(int(time.time())) + for i in range(1, len(self.jobs) + 1): + self._job_scripts[self.jobs[i - 1].name] = self.jobs[i - 1].create_script(configuration) + self._job_inputs[self.jobs[i - 1].name] = self._create_i_input(timestamp, i) + self.jobs[i - 1].remote_logs = (timestamp + ".{0}.out".format(i), timestamp + ".{0}.err".format(i)) + self._common_script = self._create_common_script(timestamp) + + def _create_i_input(self, filename, index): + filename += '.{0}'.format(index) + input_content = self._job_scripts[self.jobs[index - 1].name] + open(os.path.join(self._tmp_path, filename), 'w').write(input_content) + os.chmod(os.path.join(self._tmp_path, filename), 0o775) + return filename + + def _create_common_script(self, filename): + script_content = self.platform.header.array_header(filename, self._array_size_id, self._wallclock, + self._num_processors) + filename += '.cmd' + open(os.path.join(self._tmp_path, filename), 'w').write(script_content) + os.chmod(os.path.join(self._tmp_path, filename), 0o775) + return filename + + def _send_files(self): + for job in self.jobs: + self.platform.send_file(self._job_scripts[job.name]) + self.platform.send_file(self._job_inputs[job.name]) + self.platform.send_file(self._common_script) + + def _do_submission(self): + for job in self.jobs: + self.platform.remove_stat_file(job.name) + self.platform.remove_completed_file(job.name) + + package_id = self.platform.submit_job(None, self._common_script) + + if package_id is None: + raise Exception('Submission failed') + + for i in range(1, len(self.jobs) + 1): + Log.info("{0} submitted", self.jobs[i - 1].name) + self.jobs[i - 1].id = str(package_id) + '[{0}]'.format(i) + self.jobs[i - 1].status = Status.SUBMITTED + self.jobs[i - 1].write_submit_time() + + +class JobPackageThread(JobPackageBase): + """ + Class to manage a thread-based package of jobs to be submitted by autosubmit + """ + FILE_PREFIX = 'ASThread' + + def __init__(self, jobs, dependency=None): + super(JobPackageThread, self).__init__(jobs) + self._job_scripts = {} + self._job_dependency = dependency + self._common_script = None + self._wallclock = '00:00' + self._num_processors = '0' + + @property + def name(self): + return self._name + + @property + def _jobs_scripts(self): + jobs_scripts = [] + for job in self.jobs: + jobs_scripts.append(self._job_scripts[job.name]) + return jobs_scripts + + @property + def _queue(self): + if str(self._num_processors) == '1': + return self.platform.serial_platform.serial_queue + else: + return self.platform.queue + + @property + def _project(self): + return self._platform.project + + def _create_scripts(self, configuration): + for i in range(1, len(self.jobs) + 1): + self._job_scripts[self.jobs[i - 1].name] = self.jobs[i - 1].create_script(configuration) + self.jobs[i - 1].remote_logs = ( + self._job_scripts[self.jobs[i - 1].name] + ".{0}.out".format(i - 1), + self._job_scripts[self.jobs[i - 1].name] + ".{0}.err".format(i - 1) + ) + self._common_script = self._create_common_script() + + def _create_common_script(self): + script_content = self._common_script_content() + script_file = self.name + '.cmd' + open(os.path.join(self._tmp_path, script_file), 'w').write(script_content) + os.chmod(os.path.join(self._tmp_path, script_file), 0o775) + return script_file + + def _send_files(self): + for job in self.jobs: + self.platform.send_file(self._job_scripts[job.name]) + self.platform.send_file(self._common_script) + + def _do_submission(self): + for job in self.jobs: + self.platform.remove_stat_file(job.name) + self.platform.remove_completed_file(job.name) + + package_id = self.platform.submit_job(None, self._common_script) + + if package_id is None: + raise Exception('Submission failed') + + for i in range(1, len(self.jobs) + 1): + Log.info("{0} submitted", self.jobs[i - 1].name) + self.jobs[i - 1].id = str(package_id) + self.jobs[i - 1].status = Status.SUBMITTED + self.jobs[i - 1].write_submit_time() + + +class JobPackageThreadWrapped(JobPackageThread): + """ + Class to manage a thread-based package of jobs to be submitted by autosubmit + """ + FILE_PREFIX = 'ASThread' + + def __init__(self, jobs, dependency=None): + super(JobPackageThreadWrapped, self).__init__(jobs) + self._job_scripts = {} + self._job_dependency = dependency + self._common_script = None + self._wallclock = '00:00' + self._num_processors = '0' + + @property + def name(self): + return self._name + + @property + def _jobs_scripts(self): + jobs_scripts = [] + for job in self.jobs: + jobs_scripts.append(self._job_scripts[job.name]) + return jobs_scripts + + @property + def _queue(self): + if str(self._num_processors) == '1': + return self.platform.serial_platform.serial_queue + else: + return self.platform.queue + + @property + def _project(self): + return self._platform.project + + def _create_scripts(self, configuration): + for i in range(1, len(self.jobs) + 1): + self._job_scripts[self.jobs[i - 1].name] = self.jobs[i - 1].create_script(configuration) + self.jobs[i - 1].remote_logs = ( + self._job_scripts[self.jobs[i - 1].name] + ".{0}.out".format(i - 1), + self._job_scripts[self.jobs[i - 1].name] + ".{0}.err".format(i - 1) + ) + self._common_script = self._create_common_script() + + def _create_common_script(self): + script_content = self._common_script_content() + script_file = self.name + '.cmd' + open(os.path.join(self._tmp_path, script_file), 'w').write(script_content) + os.chmod(os.path.join(self._tmp_path, script_file), 0o775) + return script_file + + def _send_files(self): + for job in self.jobs: + self.platform.send_file(self._job_scripts[job.name]) + self.platform.send_file(self._common_script) + + def _do_submission(self): + for job in self.jobs: + self.platform.remove_stat_file(job.name) + self.platform.remove_completed_file(job.name) + + package_id = self.platform.submit_job(None, self._common_script) + + if package_id is None: + raise Exception('Submission failed') + + for i in range(1, len(self.jobs) + 1): + Log.info("{0} submitted", self.jobs[i - 1].name) + self.jobs[i - 1].id = str(package_id) + self.jobs[i - 1].status = Status.SUBMITTED + self.jobs[i - 1].write_submit_time() + + +class JobPackageVertical(JobPackageThread): + """ + Class to manage a vertical thread-based package of jobs to be submitted by autosubmit + """ + + def __init__(self, jobs, dependency=None): + super(JobPackageVertical, self).__init__(jobs, dependency) + for job in jobs: + if job.processors > self._num_processors: + self._num_processors = job.processors + self._wallclock = sum_str_hours(self._wallclock, job.wallclock) + self._name = self.FILE_PREFIX + "_{0}_{1}_{2}".format(str(int(time.time())) + str(random.randint(1, 10000)), + self._num_processors, + len(self._jobs)) + + def _common_script_content(self): + return self.platform.wrapper.vertical(self._name, self._queue, self._project, + self._wallclock, self._num_processors, + self._jobs_scripts, self._job_dependency, expid=self._expid) + + +class JobPackageHorizontal(JobPackageThread): + """ + Class to manage a horizontal thread-based package of jobs to be submitted by autosubmit + """ + + def __init__(self, jobs, dependency=None): + super(JobPackageHorizontal, self).__init__(jobs, dependency) + for job in jobs: + if job.wallclock > self._wallclock: + self._wallclock = job.wallclock + self._num_processors = str(int(self._num_processors) + int(job.processors)) + self._name = self.FILE_PREFIX + "_{0}_{1}_{2}".format(str(int(time.time())) + str(random.randint(1, 10000)), + self._num_processors, + len(self._jobs)) + + def _common_script_content(self): + return self.platform.wrapper.horizontal(self._name, self._queue, self._project, self._wallclock, + self._num_processors, len(self.jobs), self._jobs_scripts, + self._job_dependency, expid=self._expid) diff --git a/autosubmit/job/job_utils.py b/autosubmit/job/job_utils.py index f6f3cde17afb4c8905dff63c5ea6401eb6564952..25168ca0cca448dfaf3454cbd1f66d60f25731de 100644 --- a/autosubmit/job/job_utils.py +++ b/autosubmit/job/job_utils.py @@ -1,3 +1,22 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + import networkx from networkx.algorithms.dag import is_directed_acyclic_graph @@ -17,3 +36,17 @@ def transitive_reduction(graph): u_edges -= {y for x, y in dfs_edges(graph, v)} reduced_graph.add_edges_from((u, v) for v in u_edges) return reduced_graph + + +class Dependency(object): + """ + Class to manage the metadata related with a dependency + + """ + + def __init__(self, section, distance=None, running=None, sign=None): + self.section = section + self.distance = distance + self.running = running + self.sign = sign + diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py new file mode 100644 index 0000000000000000000000000000000000000000..696e87c577968ff34b18c2ccb1e3d0a2e19b960c --- /dev/null +++ b/autosubmit/monitor/diagram.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import datetime +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import matplotlib.patches as mpatches +from autosubmit.experiment.statistics import ExperimentStats +from autosubmit.job.job_common import Status +from bscearth.utils.log import Log +from autosubmit.job.job import Job + +# Autosubmit stats constants +RATIO = 4 +MAX_JOBS_PER_PLOT = 12.0 + + +def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, period_ini=None, period_fi=None): + # Error prevention + plt.close('all') + # Stats variables definition + num_plots = int(np.ceil(len(jobs_list) / MAX_JOBS_PER_PLOT)) + ind = np.arange(int(MAX_JOBS_PER_PLOT)) + width = 0.16 + # Creating stats figure + fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * num_plots)) + fig.suptitle('STATS - ' + experiment_id, fontsize=24, fontweight='bold') + # Variables initialization + ax, ax2 = [], [] + rects = [None] * 6 + exp_stats = ExperimentStats(jobs_list, period_ini, period_fi) + grid_spec = gridspec.GridSpec(RATIO * num_plots + 2, 1) + for plot in range(1, num_plots + 1): + # Calculating jobs inside the given plot + l1 = int((plot - 1) * MAX_JOBS_PER_PLOT) + l2 = int(plot * MAX_JOBS_PER_PLOT) + # Building plot axis + ax.append(fig.add_subplot(grid_spec[RATIO * plot - RATIO + 2:RATIO * plot + 1])) + ax[plot - 1].set_ylabel('hours') + ax[plot - 1].set_xticks(ind + width) + ax[plot - 1].set_xticklabels([job.name for job in jobs_list[l1:l2]], rotation='vertical') + ax[plot - 1].set_title(experiment_id, fontsize=20) + ax[plot - 1].set_ylim(0, float(1.10 * exp_stats.max_time)) + # Axis 2 + ax2.append(ax[plot - 1].twinx()) + ax2[plot - 1].set_ylabel('# failed jobs') + ax2[plot - 1].set_yticks(range(0, exp_stats.max_fail + 2)) + ax2[plot - 1].set_ylim(0, exp_stats.max_fail + 1) + # Building rects + rects[0] = ax[plot - 1].bar(ind, exp_stats.queued[l1:l2], width, color='orchid') + rects[1] = ax[plot - 1].bar(ind + width, exp_stats.run[l1:l2], width, color='limegreen') + rects[2] = ax2[plot - 1].bar(ind + width * 2, exp_stats.failed_jobs[l1:l2], width, color='red') + rects[3] = ax[plot - 1].bar(ind + width * 3, exp_stats.fail_queued[l1:l2], width, color='purple') + rects[4] = ax[plot - 1].bar(ind + width * 4, exp_stats.fail_run[l1:l2], width, color='tomato') + rects[5] = ax[plot - 1].plot([0., width * 6 * MAX_JOBS_PER_PLOT], [exp_stats.threshold, exp_stats.threshold], + "k--", label='wallclock sim') + + # Building legends subplot + legends_plot = fig.add_subplot(grid_spec[0, 0]) + legends_plot.set_frame_on(False) + legends_plot.axes.get_xaxis().set_visible(False) + legends_plot.axes.get_yaxis().set_visible(False) + + # Building legends + build_legends(legends_plot, rects, exp_stats, general_stats) + + # Saving output figure + grid_spec.tight_layout(fig, rect=[0, 0.03, 1, 0.97]) + plt.savefig(output_file) + + +def build_legends(plot, rects, experiment_stats, general_stats): + # Main legend with colourful rectangles + legend_rects = [[rect[0] for rect in rects]] + legend_titles = [ + ['Queued (h)', 'Run (h)', 'Failed jobs (#)', 'Fail Queued (h)', 'Fail Run (h)', 'Max wallclock (h)'] + ] + legend_locs = ["upper right"] + legend_handlelengths = [None] + + # General stats legends, if exists + if len(general_stats) > 0: + legend_rects.append(get_whites_array(len(general_stats))) + legend_titles.append([str(key) + ': ' + str(value) for key, value in general_stats]) + legend_locs.append("upper center") + legend_handlelengths.append(0) + + # Total stats legend + legend_rects.append(get_whites_array(len(experiment_stats.totals))) + legend_titles.append(experiment_stats.totals) + legend_locs.append("upper left") + legend_handlelengths.append(0) + + # Creating the legends + legends = create_legends(plot, legend_rects, legend_titles, legend_locs, legend_handlelengths) + for legend in legends: + plt.gca().add_artist(legend) + + +def create_legends(plot, rects, titles, locs, handlelengths): + legends = [] + for i in xrange(len(rects)): + legends.append(create_legend(plot, rects[i], titles[i], locs[i], handlelengths[i])) + return legends + + +def create_legend(plot, rects, titles, loc, handlelength=None): + return plot.legend(rects, titles, loc=loc, handlelength=handlelength) + + +def get_whites_array(length): + white = mpatches.Rectangle((0, 0), 0, 0, alpha=0.0) + return [white for _ in xrange(length)] diff --git a/autosubmit/monitor/monitor.py b/autosubmit/monitor/monitor.py index 7a3a0b663e644d08b2cff2143f437e017e73707c..bd4d31033d962e437fd26622e79134378934db7f 100644 --- a/autosubmit/monitor/monitor.py +++ b/autosubmit/monitor/monitor.py @@ -26,12 +26,7 @@ from os import listdir from os import remove import pydotplus - - -# These packages produce errors when added to setup. -# noinspection PyPackageRequirements import numpy as np -# noinspection PyPackageRequirements import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import matplotlib.patches as mpatches @@ -40,7 +35,11 @@ import subprocess from autosubmit.job.job_common import Status from autosubmit.config.basicConfig import BasicConfig -from autosubmit.config.log import Log +from autosubmit.config.config_common import AutosubmitConfig +from bscearth.utils.log import Log +from bscearth.utils.config_parser import ConfigParserFactory + +from diagram import create_bar_diagram class Monitor: @@ -94,22 +93,24 @@ class Monitor: Log.debug('Creating legend...') legend = pydotplus.Subgraph(graph_name='Legend', label='Legend', rank="source") + legend.add_node(pydotplus.Node(name='UNKNOWN', shape='box', style="", + fillcolor=self._table[Status.UNKNOWN])) legend.add_node(pydotplus.Node(name='WAITING', shape='box', style="filled", fillcolor=self._table[Status.WAITING])) legend.add_node(pydotplus.Node(name='READY', shape='box', style="filled", fillcolor=self._table[Status.READY])) - legend.add_node( - pydotplus.Node(name='SUBMITTED', shape='box', style="filled", fillcolor=self._table[Status.SUBMITTED])) + legend.add_node(pydotplus.Node(name='SUBMITTED', shape='box', style="filled", + fillcolor=self._table[Status.SUBMITTED])) legend.add_node(pydotplus.Node(name='QUEUING', shape='box', style="filled", fillcolor=self._table[Status.QUEUING])) legend.add_node(pydotplus.Node(name='RUNNING', shape='box', style="filled", fillcolor=self._table[Status.RUNNING])) - legend.add_node( - pydotplus.Node(name='COMPLETED', shape='box', style="filled", fillcolor=self._table[Status.COMPLETED])) + legend.add_node(pydotplus.Node(name='COMPLETED', shape='box', style="filled", + fillcolor=self._table[Status.COMPLETED])) legend.add_node(pydotplus.Node(name='FAILED', shape='box', style="filled", fillcolor=self._table[Status.FAILED])) - legend.add_node( - pydotplus.Node(name='SUSPENDED', shape='box', style="filled", fillcolor=self._table[Status.SUSPENDED])) + legend.add_node(pydotplus.Node(name='SUSPENDED', shape='box', style="filled", + fillcolor=self._table[Status.SUSPENDED])) graph.add_subgraph(legend) exp = pydotplus.Subgraph(graph_name='Experiment', label=expid) @@ -213,7 +214,7 @@ class Monitor: output_date = time.strftime("%Y%m%d_%H%M", now) output_file = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "plot", expid + "_statistics_" + output_date + "." + output_format) - self.create_bar_diagram(expid, joblist, output_file, period_ini, period_fi) + create_bar_diagram(expid, joblist, self.get_general_stats(expid), output_file, period_ini, period_fi) Log.result('Stats created at {0}', output_file) if show: try: @@ -221,174 +222,6 @@ class Monitor: except subprocess.CalledProcessError: Log.error('File {0} could not be opened', output_file) - @staticmethod - def create_bar_diagram(expid, joblist, output_file, period_ini=None, period_fi=None): - """ - Function to plot statistics - - :param expid: experiment's identifier - :type expid: str - :param joblist: joblist to plot - :type joblist: JobList - :param output_file: path to create file - :type output_file: str - :param period_ini: initial datetime of filtered period - :type period_ini: datetime - :param period_fi: final datetime of filtered period - :type period_fi: datetime - """ - - def timedelta2hours(deltatime): - return deltatime.days * 24 + deltatime.seconds / 3600.0 - - total_jobs_submitted = 0 - cpu_consumption = datetime.timedelta() - real_consumption = datetime.timedelta() - total_jobs_run = 0 - total_jobs_failed = 0 - total_jobs_completed = 0 - expected_cpu_consumption = 0 - expected_real_consumption = 0 - threshold = 0 - for job in joblist: - total_jobs_submitted += len(job.check_retrials_submit_time()) - if job.wallclock: - l = job.wallclock.split(':') - hours = float(l[1]) / 60 + float(l[0]) - else: - hours = 0 - threshold = max(threshold, hours) - expected_cpu_consumption += hours * int(job.processors) - expected_real_consumption += hours - # These are constants, so they need to be CAPS. Suppress PyCharm warning - # noinspection PyPep8Naming - MAX = 12.0 - # noinspection PyPep8Naming - N = len(joblist) - num_plots = int(np.ceil(N / MAX)) - - ind = np.arange(int(MAX)) # the x locations for the groups - width = 0.16 # the width of the bars - - plt.close('all') - - # noinspection PyPep8Naming - RATIO = 4 - fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * num_plots)) - gs = gridspec.GridSpec(RATIO * num_plots + 2, 1) - fig.suptitle('STATS - ' + expid, fontsize=24, fontweight='bold') - - ax = [] - ax2 = [] - max_time = 0 - max_fail = 0 - for plot in range(1, num_plots + 1): - ax.append(fig.add_subplot(gs[RATIO * plot - RATIO + 2:RATIO * plot + 1])) - ax2.append(ax[plot - 1].twinx()) - l1 = int((plot - 1) * MAX) - l2 = int(plot * MAX) - - run = [datetime.timedelta()] * (l2 - l1) - queued = [datetime.timedelta()] * (l2 - l1) - failed_jobs = [0] * (l2 - l1) - fail_queued = [datetime.timedelta()] * (l2 - l1) - fail_run = [datetime.timedelta()] * (l2 - l1) - - for i, job in enumerate(joblist[l1:l2]): - submit_times = job.check_retrials_submit_time() - start_times = job.check_retrials_start_time() - end_times = job.check_retrials_end_time() - - for j, t in enumerate(submit_times): - - if j >= len(end_times): - if j < len(start_times): - queued[i] += start_times[j] - submit_times[j] - elif j == (len(submit_times) - 1) and job.status == Status.COMPLETED: - queued[i] += start_times[j] - submit_times[j] - run[i] += end_times[j] - start_times[j] - cpu_consumption += run[i] * int(job.processors) - real_consumption += run[i] - else: - failed_jobs[i] += 1 - fail_queued[i] += start_times[j] - submit_times[j] - fail_run[i] += end_times[j] - start_times[j] - cpu_consumption += fail_run[i] * int(job.processors) - real_consumption += fail_run[i] - total_jobs_run += len(start_times) - total_jobs_failed += failed_jobs[i] - total_jobs_completed += len(end_times) - failed_jobs[i] - max_timedelta = max(max(max(run, fail_run, queued, fail_queued)), datetime.timedelta(hours=threshold)) - max_time = max(max_time, max_timedelta.days * 24 + max_timedelta.seconds / 3600.0) - max_fail = max(max_fail, max(failed_jobs)) - - for i, delta in enumerate(queued): - queued[i] = timedelta2hours(delta) - - for i, delta in enumerate(run): - run[i] = timedelta2hours(delta) - - for i, delta in enumerate(fail_queued): - fail_queued[i] = timedelta2hours(delta) - - for i, delta in enumerate(fail_run): - fail_run[i] = timedelta2hours(delta) - - rects1 = ax[plot - 1].bar(ind, queued, width, color='orchid') - rects2 = ax[plot - 1].bar(ind + width, run, width, color='limegreen') - rects3 = ax2[plot - 1].bar(ind + width * 2, failed_jobs, width, color='red') - rects4 = ax[plot - 1].bar(ind + width * 3, fail_queued, width, color='purple') - rects5 = ax[plot - 1].bar(ind + width * 4, fail_run, width, color='tomato') - ax[plot - 1].set_ylabel('hours') - ax2[plot - 1].set_ylabel('# failed jobs') - ax[plot - 1].set_xticks(ind + width) - ax[plot - 1].set_xticklabels([job.name for job in joblist[l1:l2]], rotation='vertical') - ax[plot - 1].set_title(expid, fontsize=20) - # autolabel(rects1) - # autolabel(rects2) - # autolabel(rects4) - # autolabel(rects5) - - rects6 = ax[plot - 1].plot([0., width * 6 * MAX], [threshold, threshold], "k--", label='wallclock sim') - - for plot in range(1, num_plots + 1): - ax[plot - 1].set_ylim(0, float(1.10 * max_time)) - ax2[plot - 1].set_yticks(range(0, max_fail + 2)) - ax2[plot - 1].set_ylim(0, max_fail + 1) - - percentage_consumption = timedelta2hours(cpu_consumption) / expected_cpu_consumption * 100 - white = mpatches.Rectangle((0, 0), 0, 0, alpha=0.0) - totals = ['Period: ' + str(period_ini) + " ~ " + str(period_fi), - 'Submitted (#): ' + str(total_jobs_submitted), - 'Run (#): ' + str(total_jobs_run), - 'Failed (#): ' + str(total_jobs_failed), - 'Completed (#): ' + str(total_jobs_completed), - 'Expected consumption real (h): ' + str(round(expected_real_consumption, 2)), - 'Expected consumption CPU time (h): ' + str(round(expected_cpu_consumption, 2)), - 'Consumption real (h): ' + str(round(timedelta2hours(real_consumption), 2)), - 'Consumption CPU time (h): ' + str(round(timedelta2hours(cpu_consumption), 2)), - 'Consumption (%): ' + str(round(percentage_consumption, 2))] - Log.result('\n'.join(totals)) - - ax0 = fig.add_subplot(gs[0, 0]) - ax0.set_frame_on(False) - ax0.axes.get_xaxis().set_visible(False) - ax0.axes.get_yaxis().set_visible(False) - # noinspection PyUnboundLocalVariable - first_legend = ax0.legend((rects1[0], rects2[0], rects3[0], rects4[0], rects5[0], rects6[0]), - ('Queued (h)', 'Run (h)', 'Failed jobs (#)', 'Fail Queued (h)', 'Fail Run (h)', - 'Max wallclock (h)'), loc="upper right") - plt.gca().add_artist(first_legend) - - ax0.legend([white, white, white, white, white, white, white, white, white, white], - totals, - handlelength=0, - loc="upper left") - - gs.tight_layout(fig, rect=[0, 0.03, 1, 0.97]) # adjust rect parameter while leaving some room for suptitle. - # plt.show() - plt.savefig(output_file) - @staticmethod def clean_plot(expid): """ @@ -428,3 +261,13 @@ class Monitor: for f in filelist: remove(f) Log.result("Stats cleaned!\nLast stats' plot remanining there.\n") + + @staticmethod + def get_general_stats(expid): + general_stats = [] + general_stats_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "tmp", expid + "_GENERAL_STATS") + parser = AutosubmitConfig.get_parser(ConfigParserFactory(), general_stats_path) + for section in parser.sections(): + general_stats.append((section, '')) + general_stats += parser.items(section) + return general_stats diff --git a/autosubmit/monitor/utils.py b/autosubmit/monitor/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..86d0758c7cc5844a148773285fa9f3eab9097b68 --- /dev/null +++ b/autosubmit/monitor/utils.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + + +class FixedSizeList(list): + """ + Customized list to retrieve a fixed sublist / slice of its elements. + The list will be filled with as many items (default) as needed to + get a list with the expected size. + """ + + def __init__(self, lst, default=None): + super(FixedSizeList, self).__init__(lst) + self._default = default + + def __getitem__(self, key): + if isinstance(key, slice): + return [list(self)[i] if 0 <= i < len(self) else self._default for i in xrange(key.start, key.stop, key.step or 1)] + return list(self)[key] + + def __getslice__(self, i, j): + return self.__getitem__(slice(i, j)) diff --git a/autosubmit/notifications/mail_notifier.py b/autosubmit/notifications/mail_notifier.py index fb4872579ce6998daa3f83b9e1a6d1ab4e11ae96..1e3791b16d36a41a24961fa3a3b1b964463d0a4e 100644 --- a/autosubmit/notifications/mail_notifier.py +++ b/autosubmit/notifications/mail_notifier.py @@ -20,7 +20,7 @@ import smtplib import email.utils from email.mime.text import MIMEText -from autosubmit.config.log import Log +from bscearth.utils.log import Log class MailNotifier: diff --git a/autosubmit/platforms/ecplatform.py b/autosubmit/platforms/ecplatform.py index e8d96d289847d5a9d60da0c7bc7129caa0d820c1..3ab8349a36ae8d722188da3661d754780cc919da 100644 --- a/autosubmit/platforms/ecplatform.py +++ b/autosubmit/platforms/ecplatform.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -16,17 +16,21 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . -import textwrap + import os import subprocess from autosubmit.platforms.paramiko_platform import ParamikoPlatform, ParamikoPlatformException -from autosubmit.config.log import Log +from bscearth.utils.log import Log + +from autosubmit.platforms.headers.ec_header import EcHeader +from autosubmit.platforms.headers.ec_cca_header import EcCcaHeader +from autosubmit.platforms.wrappers.ec_wrapper import EcWrapper class EcPlatform(ParamikoPlatform): """ - Class to manage queues with ecacces + Class to manage queues with ecaccess :param expid: experiment's identifier :type expid: str @@ -42,12 +46,16 @@ class EcPlatform(ParamikoPlatform): self._header = EcHeader() else: raise ParamikoPlatformException('ecaccess scheduler {0} not supported'.format(scheduler)) + self._wrapper = EcWrapper() self.job_status = dict() self.job_status['COMPLETED'] = ['DONE'] self.job_status['RUNNING'] = ['EXEC'] self.job_status['QUEUING'] = ['INIT', 'RETR', 'STDBY', 'WAIT'] self.job_status['FAILED'] = ['STOP'] self._pathdir = "\$HOME/LOG_" + self.expid + self._allow_arrays = False + self._allow_wrappers = True + self._allow_python_jobs = False self.update_cmds() def update_cmds(self): @@ -135,11 +143,15 @@ class EcPlatform(ParamikoPlatform): return True def get_file(self, filename, must_exist=True, relative_path=''): - local_path = os.path.join(self.tmp_path, relative_path, filename) - if os.path.exists(local_path): - os.remove(local_path) + local_path = os.path.join(self.tmp_path, relative_path) + if not os.path.exists(local_path): + os.makedirs(local_path) - command = '{0} {3}:{2} {1}'.format(self.get_cmd, local_path, os.path.join(self.get_files_path(), filename), + file_path = os.path.join(local_path, filename) + if os.path.exists(file_path): + os.remove(file_path) + + command = '{0} {3}:{2} {1}'.format(self.get_cmd, file_path, os.path.join(self.get_files_path(), filename), self.host) try: process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) @@ -163,127 +175,3 @@ class EcPlatform(ParamikoPlatform): def get_ssh_output(self): return self._ssh_output - - -class EcHeader: - """Class to handle the ECMWF headers of a job""" - - # noinspection PyMethodMayBeStatic,PyUnusedLocal - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - # There is no queue, so directive is empty - return "" - - # noinspection PyPep8 - SERIAL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #@ shell = /usr/bin/ksh - #@ class = ns - #@ job_type = serial - #@ job_name = %JOBNAME% - #@ output = %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/$(job_name).$(jobid).out - #@ error = %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/$(job_name).$(jobid).err - #@ notification = error - #@ resources = ConsumableCpus(1) ConsumableMemory(1200mb) - #@ wall_clock_limit = %WALLCLOCK%:00 - #@ platforms - # - ############################################################################### - """) - - # noinspection PyPep8 - PARALLEL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #@ shell = /usr/bin/ksh - #@ class = np - #@ job_type = parallel - #@ job_name = %JOBNAME% - #@ output = %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/$(job_name).$(jobid).out - #@ error = %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/$(job_name).$(jobid).err - #@ notification = error - #@ resources = ConsumableCpus(1) ConsumableMemory(1200mb) - #@ ec_smt = no - #@ total_tasks = %NUMPROC% - #@ wall_clock_limit = %WALLCLOCK%:00 - #@ platforms - # - ############################################################################### - """) - - -class EcCcaHeader: - """Class to handle the ECMWF headers of a job""" - - # noinspection PyMethodMayBeStatic,PyUnusedLocal - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - # There is no queue, so directive is empty - return "" - - # noinspection PyMethodMayBeStatic - def get_tasks_per_node(self, job): - if not isinstance(job.tasks, int): - return "" - else: - return '#PBS -l EC_tasks_per_node={0}'.format(job.tasks) - - # noinspection PyMethodMayBeStatic - def get_threads_per_task(self, job): - if not isinstance(job.threads, int): - return "" - else: - return '#PBS -l EC_threads_per_task={0}'.format(job.threads) - - SERIAL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #PBS -N %JOBNAME% - #PBS -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%OUT_LOG_DIRECTIVE% - #PBS -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%ERR_LOG_DIRECTIVE% - #PBS -q ns - #PBS -l walltime=%WALLCLOCK%:00 - #PBS -l EC_billing_account=%CURRENT_BUDG% - # - ############################################################################### - - """) - - PARALLEL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #PBS -N %JOBNAME% - #PBS -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%OUT_LOG_DIRECTIVE% - #PBS -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%ERR_LOG_DIRECTIVE% - #PBS -q np - #PBS -l EC_total_tasks=%NUMPROC% - %THREADS_PER_TASK_DIRECTIVE% - %TASKS_PER_NODE_DIRECTIVE% - #PBS -l walltime=%WALLCLOCK%:00 - #PBS -l EC_billing_account=%CURRENT_BUDG% - # - ############################################################################### - """) diff --git a/autosubmit/date/__init__.py b/autosubmit/platforms/headers/__init__.py similarity index 100% rename from autosubmit/date/__init__.py rename to autosubmit/platforms/headers/__init__.py diff --git a/autosubmit/platforms/headers/ec_cca_header.py b/autosubmit/platforms/headers/ec_cca_header.py new file mode 100644 index 0000000000000000000000000000000000000000..9037c9c59d2391e9161343aa546d6413795b6245 --- /dev/null +++ b/autosubmit/platforms/headers/ec_cca_header.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +class EcCcaHeader(object): + """Class to handle the ECMWF headers of a job""" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_queue_directive(self, job): + """ + Returns queue directive for the specified job + + :param job: job to create queue directive for + :type job: Job + :return: queue directive + :rtype: str + """ + # There is no queue, so directive is empty + return "" + + # noinspection PyMethodMayBeStatic + def get_tasks_per_node(self, job): + if not isinstance(job.tasks, int): + return "" + else: + return '#PBS -l EC_tasks_per_node={0}'.format(job.tasks) + + # noinspection PyMethodMayBeStatic + def get_threads_per_task(self, job): + if not isinstance(job.threads, int): + return "" + else: + return '#PBS -l EC_threads_per_task={0}'.format(job.threads) + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_memory_per_task_directive(self, job): + """ + Returns memory per task directive for the specified job + + :param job: job to create memory per task directive for + :type job: Job + :return: memory per task directive + :rtype: str + """ + # There is no memory per task, so directive is empty + if job.parameters['MEMORY_PER_TASK'] != '': + return "#PBS -l EC_memory_per_task={0}mb".format(job.parameters['MEMORY_PER_TASK']) + return "" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_hyperthreading_directive(self, job): + """ + Returns hyperthreading directive for the specified job + + :param job: job to create hyperthreading directive for + :type job: Job + :return: hyperthreading per task directive + :rtype: str + """ + # There is no memory per task, so directive is empty + if job.parameters['CURRENT_HYPERTHREADING'] == 'true': + return "#PBS -l EC_hyperthreads=2" + return "#PBS -l EC_hyperthreads=1" + + SERIAL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #PBS -N %JOBNAME% + #PBS -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%OUT_LOG_DIRECTIVE% + #PBS -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%ERR_LOG_DIRECTIVE% + #PBS -q ns + #PBS -l walltime=%WALLCLOCK%:00 + #PBS -l EC_billing_account=%CURRENT_BUDG% + # + ############################################################################### + + """) + + PARALLEL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #PBS -N %JOBNAME% + #PBS -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%OUT_LOG_DIRECTIVE% + #PBS -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%ERR_LOG_DIRECTIVE% + #PBS -q np + #PBS -l EC_total_tasks=%NUMPROC% + %THREADS_PER_TASK_DIRECTIVE% + %TASKS_PER_NODE_DIRECTIVE% + %MEMORY_PER_TASK_DIRECTIVE% + %HYPERTHREADING_DIRECTIVE% + #PBS -l walltime=%WALLCLOCK%:00 + #PBS -l EC_billing_account=%CURRENT_BUDG% + # + ############################################################################### + """) diff --git a/autosubmit/platforms/headers/ec_header.py b/autosubmit/platforms/headers/ec_header.py new file mode 100644 index 0000000000000000000000000000000000000000..ff1eaadc060e0866f44e8ccf9960d38284e70c65 --- /dev/null +++ b/autosubmit/platforms/headers/ec_header.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +class EcHeader(object): + """Class to handle the ECMWF headers of a job""" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_queue_directive(self, job): + """ + Returns queue directive for the specified job + + :param job: job to create queue directive for + :type job: Job + :return: queue directive + :rtype: str + """ + # There is no queue, so directive is empty + return "" + + # noinspection PyPep8 + SERIAL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #@ shell = /usr/bin/ksh + #@ class = ns + #@ job_type = serial + #@ job_name = %JOBNAME% + #@ output = %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/$(job_name).$(jobid).out + #@ error = %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/$(job_name).$(jobid).err + #@ notification = error + #@ resources = ConsumableCpus(1) ConsumableMemory(1200mb) + #@ wall_clock_limit = %WALLCLOCK%:00 + #@ platforms + # + ############################################################################### + """) + + # noinspection PyPep8 + PARALLEL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #@ shell = /usr/bin/ksh + #@ class = np + #@ job_type = parallel + #@ job_name = %JOBNAME% + #@ output = %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/$(job_name).$(jobid).out + #@ error = %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/$(job_name).$(jobid).err + #@ notification = error + #@ resources = ConsumableCpus(1) ConsumableMemory(1200mb) + #@ ec_smt = no + #@ total_tasks = %NUMPROC% + #@ wall_clock_limit = %WALLCLOCK%:00 + #@ platforms + # + ############################################################################### + """) diff --git a/autosubmit/platforms/headers/local_header.py b/autosubmit/platforms/headers/local_header.py new file mode 100644 index 0000000000000000000000000000000000000000..72a3a869f5f1c826a8d63f794ec1d2cc344c3907 --- /dev/null +++ b/autosubmit/platforms/headers/local_header.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +class LocalHeader(object): + """Class to handle the Ps headers of a job""" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_queue_directive(self, job): + """ + Returns queue directive for the specified job + + :param job: job to create queue directive for + :type job: Job + :return: queue directive + :rtype: str + """ + # There is no queue, so directive is empty + return "" + + SERIAL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + """) + + PARALLEL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + """) diff --git a/autosubmit/platforms/headers/lsf_header.py b/autosubmit/platforms/headers/lsf_header.py new file mode 100644 index 0000000000000000000000000000000000000000..331ffe154c4ba5f21ef48547833787da1155f5ff --- /dev/null +++ b/autosubmit/platforms/headers/lsf_header.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +class LsfHeader(object): + """Class to handle the MareNostrum3 headers of a job""" + + # noinspection PyMethodMayBeStatic + def get_queue_directive(self, job): + """ + Returns queue directive for the specified job + + :param job: job to create queue directive for + :type job: Job + :return: queue directive + :rtype: str + """ + if job.parameters['CURRENT_QUEUE'] == '': + return "" + else: + return "BSUB -q {0}".format(job.parameters['CURRENT_QUEUE']) + + # noinspection PyMethodMayBeStatic + def get_scratch_free_space(self, job): + if not isinstance(job.scratch_free_space, int): + return "" + else: + return '#BSUB -R "select[(scratch<{0})]"'.format(job.scratch_free_space) + + # noinspection PyMethodMayBeStatic + def get_tasks_per_node(self, job): + if not isinstance(job.tasks, int): + return "" + else: + return '#BSUB -R "span[ptile={0}]"'.format(job.tasks) + + # noinspection PyMethodMayBeStatic + def get_exclusivity(self, job): + if job.platform.exclusivity == 'true': + return "#BSUB -x" + else: + return "" + + @classmethod + def array_header(cls, filename, array_id, wallclock, num_processors): + return textwrap.dedent("""\ + ############################################################################### + # {0} + ############################################################################### + # + # + #BSUB -J {0}{1} + #BSUB -oo {0}.%I.out + #BSUB -eo {0}.%I.err + #BSUB -W {2} + #BSUB -n {3} + # + ############################################################################### + + SCRIPT=$(cat {0}.$LSB_JOBINDEX | awk 'NR==1') + chmod +x $SCRIPT + ./$SCRIPT + """.format(filename, array_id, wallclock, num_processors)) + + @classmethod + def thread_header(cls, filename, wallclock, num_processors, job_scripts, dependency_directive): + return textwrap.dedent("""\ + #!/usr/bin/env python + ############################################################################### + # {0} + ############################################################################### + # + #BSUB -J {0} + #BSUB -o {0}.out + #BSUB -e {0}.err + #BSUB -W {1} + #BSUB -n {2} + {4} + # + ############################################################################### + + import os + import sys + from threading import Thread + from commands import getstatusoutput + + class JobThread(Thread): + def __init__ (self, template, id_run): + Thread.__init__(self) + self.template = template + self.id_run = id_run + + def run(self): + out = str(self.template) + '.' + str(self.id_run) + '.out' + err = str(self.template) + '.' + str(self.id_run) + '.err' + command = str(self.template) + ' ' + str(self.id_run) + ' ' + os.getcwd() + (self.status) = getstatusoutput(command + ' > ' + out + ' 2> ' + err) + + scripts = {3} + + for i in range(len(scripts)): + current = JobThread(scripts[i], i) + current.start() + current.join() + completed_filename = scripts[i].replace('.cmd', '_COMPLETED') + completed_path = os.path.join(os.getcwd(), completed_filename) + if os.path.exists(completed_path): + print "The job ", current.template," has been COMPLETED" + else: + print "The job ", current.template," has FAILED" + os._exit(1) + """.format(filename, wallclock, num_processors, str(job_scripts), dependency_directive)) + + SERIAL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #%QUEUE_DIRECTIVE% + #BSUB -J %JOBNAME% + #BSUB -oo %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%OUT_LOG_DIRECTIVE% + #BSUB -eo %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%ERR_LOG_DIRECTIVE% + #BSUB -W %WALLCLOCK% + #BSUB -n %NUMPROC% + %EXCLUSIVITY_DIRECTIVE% + # + ############################################################################### + """) + + PARALLEL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #%QUEUE_DIRECTIVE% + #BSUB -J %JOBNAME% + #BSUB -oo %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%OUT_LOG_DIRECTIVE% + #BSUB -eo %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%ERR_LOG_DIRECTIVE% + #BSUB -W %WALLCLOCK% + #BSUB -n %NUMPROC% + %TASKS_PER_NODE_DIRECTIVE% + %SCRATCH_FREE_SPACE_DIRECTIVE% + # + ############################################################################### + """) diff --git a/autosubmit/platforms/headers/pbs10_header.py b/autosubmit/platforms/headers/pbs10_header.py new file mode 100644 index 0000000000000000000000000000000000000000..3197603e356b36f9b28a7f57e355ce73683c640f --- /dev/null +++ b/autosubmit/platforms/headers/pbs10_header.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +class Pbs10Header(object): + """Class to handle the Hector headers of a job""" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_queue_directive(self, job): + """ + Returns queue directive for the specified job + + :param job: job to create queue directive for + :type job: Job + :return: queue directive + :rtype: str + """ + # There is no queue, so directive is empty + return "" + + SERIAL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #PBS -N %JOBNAME% + #PBS -q serial + #PBS -l cput=%WALLCLOCK%:00 + #PBS -A %CURRENT_BUDG% + # + ############################################################################### + """) + + PARALLEL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #PBS -N %JOBNAME% + #PBS -l mppwidth=%NUMPROC% + #PBS -l mppnppn=32 + #PBS -l walltime=%WALLCLOCK%:00 + #PBS -A %CURRENT_BUDG% + # + ############################################################################### + """) diff --git a/autosubmit/platforms/headers/pbs11_header.py b/autosubmit/platforms/headers/pbs11_header.py new file mode 100644 index 0000000000000000000000000000000000000000..9f9919799139297098ebe7dfd7f7a2a539f537b4 --- /dev/null +++ b/autosubmit/platforms/headers/pbs11_header.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +class Pbs11Header(object): + """Class to handle the Lindgren headers of a job""" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_queue_directive(self, job): + """ + Returns queue directive for the specified job + + :param job: job to create queue directive for + :type job: Job + :return: queue directive + :rtype: str + """ + # There is no queue, so directive is empty + return "" + + SERIAL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #!/bin/sh --login + #PBS -N %JOBNAME% + #PBS -l mppwidth=%NUMPROC% + #PBS -l mppnppn=%NUMTASK% + #PBS -l walltime=%WALLCLOCK% + #PBS -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID% + #PBS -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID% + # + ############################################################################### + """) + + PARALLEL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #!/bin/sh --login + #PBS -N %JOBNAME% + #PBS -l mppwidth=%NUMPROC% + #PBS -l mppnppn=%NUMTASK% + #PBS -l walltime=%WALLCLOCK% + #PBS -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID% + #PBS -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID% + # + ############################################################################### + """) diff --git a/autosubmit/platforms/headers/pbs12_header.py b/autosubmit/platforms/headers/pbs12_header.py new file mode 100644 index 0000000000000000000000000000000000000000..014ebb63a9c5da028d06ada447ba188f507a3a7d --- /dev/null +++ b/autosubmit/platforms/headers/pbs12_header.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +class Pbs12Header(object): + """Class to handle the Archer headers of a job""" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_queue_directive(self, job): + """ + Returns queue directive for the specified job + + :param job: job to create queue directive for + :type job: Job + :return: queue directive + :rtype: str + """ + # There is no queue, so directive is empty + return "" + + SERIAL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #PBS -N %JOBNAME% + #PBS -l select=serial=true:ncpus=1 + #PBS -l walltime=%WALLCLOCK%:00 + #PBS -A %CURRENT_BUDG% + # + ############################################################################### + """) + + PARALLEL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #PBS -N %JOBNAME% + #PBS -l select=%NUMPROC% + #PBS -l walltime=%WALLCLOCK%:00 + #PBS -A %CURRENT_BUDG% + # + ############################################################################### + """) diff --git a/autosubmit/platforms/headers/ps_header.py b/autosubmit/platforms/headers/ps_header.py new file mode 100644 index 0000000000000000000000000000000000000000..436bb08939582888368a4fe903b4daad0d067c0d --- /dev/null +++ b/autosubmit/platforms/headers/ps_header.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +class PsHeader(object): + """Class to handle the Ps headers of a job""" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_queue_directive(self, job): + """ + Returns queue directive for the specified job + + :param job: job to create queue directive for + :type job: Job + :return: queue directive + :rtype: str + """ + # There is no queue, so directive is empty + return "" + + SERIAL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + """) + + PARALLEL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + """) diff --git a/autosubmit/platforms/headers/sge_header.py b/autosubmit/platforms/headers/sge_header.py new file mode 100644 index 0000000000000000000000000000000000000000..540c5f642f4c9e042d67d17270b93c26de82bddf --- /dev/null +++ b/autosubmit/platforms/headers/sge_header.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +class SgeHeader(object): + """Class to handle the Ithaca headers of a job""" + + # noinspection PyMethodMayBeStatic + def get_queue_directive(self, job): + """ + Returns queue directive for the specified job + + :param job: job to create queue directive for + :type job: Job + :return: queue directive + :rtype: str + """ + # There is no queue, so directive is empty + if job.parameters['CURRENT_QUEUE'] == '': + return "" + else: + return "$ -q {0}".format(job.parameters['CURRENT_QUEUE']) + + SERIAL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #$ -S /bin/sh + #$ -N %JOBNAME% + #$ -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/ + #$ -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/ + #$ -V + #$ -l h_rt=%WALLCLOCK%:00 + #$ -l s_rt=%WALLCLOCK%:00 + #%QUEUE_DIRECTIVE% + # + ############################################################################### + """) + + PARALLEL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #$ -S /bin/sh + #$ -N %JOBNAME% + #$ -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/ + #$ -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/ + #$ -V + #$ -l h_rt=%WALLCLOCK%:00 + #$ -l s_rt=%WALLCLOCK%:00 + #$ -pe orte %NUMPROC% + #%QUEUE_DIRECTIVE% + # + ############################################################################### + """) diff --git a/autosubmit/platforms/headers/slurm_header.py b/autosubmit/platforms/headers/slurm_header.py new file mode 100644 index 0000000000000000000000000000000000000000..677a26d569fe005d34ee11981d8fd782004a0d44 --- /dev/null +++ b/autosubmit/platforms/headers/slurm_header.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +class SlurmHeader(object): + """Class to handle the SLURM headers of a job""" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_queue_directive(self, job): + """ + Returns queue directive for the specified job + + :param job: job to create queue directive for + :type job: Job + :return: queue directive + :rtype: str + """ + # There is no queue, so directive is empty + if job.parameters['CURRENT_QUEUE'] == '': + return "" + else: + return "SBATCH -p {0}".format(job.parameters['CURRENT_QUEUE']) + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_account_directive(self, job): + """ + Returns account directive for the specified job + + :param job: job to create account directive for + :type job: Job + :return: account directive + :rtype: str + """ + # There is no account, so directive is empty + if job.parameters['CURRENT_PROJ'] != '': + return "SBATCH -A {0}".format(job.parameters['CURRENT_PROJ']) + return "" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_memory_directive(self, job): + """ + Returns memory directive for the specified job + + :param job: job to create memory directive for + :type job: Job + :return: memory directive + :rtype: str + """ + # There is no memory, so directive is empty + if job.parameters['MEMORY'] != '': + return "SBATCH --mem {0}".format(job.parameters['MEMORY']) + return "" + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def get_memory_per_task_directive(self, job): + """ + Returns memory per task directive for the specified job + + :param job: job to create memory per task directive for + :type job: Job + :return: memory per task directive + :rtype: str + """ + # There is no memory per task, so directive is empty + if job.parameters['MEMORY_PER_TASK'] != '': + return "SBATCH --mem-per-cpu {0}".format(job.parameters['MEMORY_PER_TASK']) + return "" + + SERIAL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #%QUEUE_DIRECTIVE% + #%ACCOUNT_DIRECTIVE% + #%MEMORY_DIRECTIVE% + #%MEMORY_PER_TASK_DIRECTIVE% + #SBATCH -n %NUMPROC% + #SBATCH -t %WALLCLOCK%:00 + #SBATCH -J %JOBNAME% + #SBATCH -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%OUT_LOG_DIRECTIVE% + #SBATCH -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%ERR_LOG_DIRECTIVE% + # + ############################################################################### + """) + + PARALLEL = textwrap.dedent("""\ + ############################################################################### + # %TASKTYPE% %EXPID% EXPERIMENT + ############################################################################### + # + #%QUEUE_DIRECTIVE% + #%ACCOUNT_DIRECTIVE% + #%MEMORY_DIRECTIVE% + #%MEMORY_PER_TASK_DIRECTIVE% + #SBATCH -n %NUMPROC% + #SBATCH -t %WALLCLOCK%:00 + #SBATCH -J %JOBNAME% + #SBATCH -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%OUT_LOG_DIRECTIVE% + #SBATCH -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%ERR_LOG_DIRECTIVE% + # + ############################################################################### + """) diff --git a/autosubmit/platforms/locplatform.py b/autosubmit/platforms/locplatform.py index b4bca241253a376c2313bfd1f0c2e57802259a2f..c2462f94e6571af3d9705c7ba5ece12416b39716 100644 --- a/autosubmit/platforms/locplatform.py +++ b/autosubmit/platforms/locplatform.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -18,13 +18,14 @@ # along with Autosubmit. If not, see . import os -import textwrap from xml.dom.minidom import parseString import subprocess from autosubmit.platforms.paramiko_platform import ParamikoPlatform +from autosubmit.platforms.headers.local_header import LocalHeader + from autosubmit.config.basicConfig import BasicConfig -from autosubmit.config.log import Log +from bscearth.utils.log import Log class LocalPlatform(ParamikoPlatform): @@ -113,12 +114,16 @@ class LocalPlatform(ParamikoPlatform): return True def get_file(self, filename, must_exist=True, relative_path=''): - local_path = os.path.join(self.tmp_path, relative_path, filename) - if os.path.exists(local_path): - os.remove(local_path) + local_path = os.path.join(self.tmp_path, relative_path) + if not os.path.exists(local_path): + os.makedirs(local_path) + + file_path = os.path.join(local_path, filename) + if os.path.exists(file_path): + os.remove(file_path) command = '{0} {1} {2}'.format(self.get_cmd, os.path.join(self.tmp_path, 'LOG_' + self.expid, filename), - local_path) + file_path) try: subprocess.check_call(command, stdout=open(os.devnull, 'w'), stderr=open(os.devnull, 'w'), shell=True) except subprocess.CalledProcessError: @@ -150,32 +155,3 @@ class LocalPlatform(ParamikoPlatform): :type remote_logs: (str, str) """ return - - -class LocalHeader: - """Class to handle the Ps headers of a job""" - - # noinspection PyMethodMayBeStatic,PyUnusedLocal - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - # There is no queue, so directive is empty - return "" - - SERIAL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - """) - - PARALLEL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - """) diff --git a/autosubmit/platforms/lsfplatform.py b/autosubmit/platforms/lsfplatform.py index a3751cd922ec543026700987dae7d2afea951754..e0bdd602dfa6a66a8af4a6d88ca0a1c61bc910a8 100644 --- a/autosubmit/platforms/lsfplatform.py +++ b/autosubmit/platforms/lsfplatform.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -16,10 +16,12 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . -import textwrap + import os from autosubmit.platforms.paramiko_platform import ParamikoPlatform +from autosubmit.platforms.headers.lsf_header import LsfHeader +from autosubmit.platforms.wrappers.lsf_wrapper import LsfWrapper class LsfPlatform(ParamikoPlatform): @@ -32,12 +34,14 @@ class LsfPlatform(ParamikoPlatform): def __init__(self, expid, name, config): ParamikoPlatform.__init__(self, expid, name, config) self._header = LsfHeader() + self._wrapper = LsfWrapper() self.job_status = dict() self.job_status['COMPLETED'] = ['DONE'] self.job_status['RUNNING'] = ['RUN'] self.job_status['QUEUING'] = ['PEND', 'FW_PEND'] self.job_status['FAILED'] = ['SSUSP', 'USUSP', 'EXIT'] self._allow_arrays = True + self._allow_wrappers = True self.update_cmds() def update_cmds(self): @@ -84,97 +88,3 @@ class LsfPlatform(ParamikoPlatform): def get_submit_cmd(self, job_script, job): return self._submit_cmd + job_script - - -class LsfHeader: - """Class to handle the MareNostrum3 headers of a job""" - - # noinspection PyMethodMayBeStatic - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - if job.parameters['CURRENT_QUEUE'] == '': - return "" - else: - return "BSUB -q {0}".format(job.parameters['CURRENT_QUEUE']) - - # noinspection PyMethodMayBeStatic - def get_scratch_free_space(self, job): - if not isinstance(job.scratch_free_space, int): - return "" - else: - return '#BSUB -R "select[(scratch<{0})]"'.format(job.scratch_free_space) - - # noinspection PyMethodMayBeStatic - def get_tasks_per_node(self, job): - if not isinstance(job.tasks, int): - return "" - else: - return '#BSUB -R "span[ptile={0}]"'.format(job.tasks) - - # noinspection PyMethodMayBeStatic - def get_exclusivity(self, job): - if job.platform.exclusivity == 'true': - return "#BSUB -x" - else: - return "" - - @classmethod - def array_header(cls, filename, array_id, wallclock, num_processors): - return textwrap.dedent("""\ - ############################################################################### - # {0} - ############################################################################### - # - # - #BSUB -J {0}{1} - #BSUB -oo {0}.%I.out - #BSUB -eo {0}.%I.err - #BSUB -W {2} - #BSUB -n {3} - # - ############################################################################### - - SCRIPT=$(cat {0}.$LSB_JOBINDEX | awk 'NR==1') - chmod +x $SCRIPT - ./$SCRIPT - """.format(filename, array_id, wallclock, num_processors)) - - SERIAL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #%QUEUE_DIRECTIVE% - #BSUB -J %JOBNAME% - #BSUB -oo %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%OUT_LOG_DIRECTIVE% - #BSUB -eo %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%ERR_LOG_DIRECTIVE% - #BSUB -W %WALLCLOCK% - #BSUB -n %NUMPROC% - %EXCLUSIVITY_DIRECTIVE% - # - ############################################################################### - """) - - PARALLEL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #%QUEUE_DIRECTIVE% - #BSUB -J %JOBNAME% - #BSUB -oo %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%OUT_LOG_DIRECTIVE% - #BSUB -eo %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%ERR_LOG_DIRECTIVE% - #BSUB -W %WALLCLOCK% - #BSUB -n %NUMPROC% - %TASKS_PER_NODE_DIRECTIVE% - %SCRATCH_FREE_SPACE_DIRECTIVE% - # - ############################################################################### - """) \ No newline at end of file diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 6f7f86cb97ad52f2bdd1af748089cc45a6561148..0bb98c347dd27da63c8d5b72f70dbf956d7a7ddf 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -4,11 +4,11 @@ import os import paramiko import datetime -from autosubmit.config.log import Log +from bscearth.utils.log import Log from autosubmit.job.job_common import Status from autosubmit.job.job_common import Type from autosubmit.platforms.platform import Platform -from autosubmit.date.chunk_date_lib import date2str +from bscearth.utils.date import date2str class ParamikoPlatform(Platform): @@ -43,6 +43,16 @@ class ParamikoPlatform(Platform): """ return self._header + @property + def wrapper(self): + """ + Handler to manage wrappers + + :return: wrapper-handler + :rtype: object + """ + return self._wrapper + def connect(self): """ Creates ssh connection to host @@ -91,6 +101,8 @@ class ParamikoPlatform(Platform): try: ftp = self._ssh.open_sftp() ftp.put(os.path.join(self.tmp_path, filename), os.path.join(self.get_files_path(), filename)) + ftp.chmod(os.path.join(self.get_files_path(), filename), + os.stat(os.path.join(self.tmp_path, filename)).st_mode) ftp.close() return True except BaseException as e: @@ -112,9 +124,13 @@ class ParamikoPlatform(Platform): :rtype: bool """ - local_path = os.path.join(self.tmp_path, relative_path, filename) - if os.path.exists(local_path): - os.remove(local_path) + local_path = os.path.join(self.tmp_path, relative_path) + if not os.path.exists(local_path): + os.makedirs(local_path) + + file_path = os.path.join(local_path, filename) + if os.path.exists(file_path): + os.remove(file_path) if self._ssh is None: if not self.connect(): @@ -122,10 +138,13 @@ class ParamikoPlatform(Platform): try: ftp = self._ssh.open_sftp() - ftp.get(os.path.join(self.get_files_path(), filename), local_path) + ftp.get(os.path.join(self.get_files_path(), filename), file_path) ftp.close() return True except BaseException: + # ftp.get creates a local file anyway + if os.path.exists(file_path): + os.remove(file_path) if must_exist: raise Exception('File {0} does not exists'.format(filename)) return False @@ -354,7 +373,7 @@ class ParamikoPlatform(Platform): :return: command to check job status script :rtype: str """ - return 'nohup kill -0 {0}; echo $?'.format(job_id) + return 'nohup kill -0 {0} >& /dev/null; echo $?'.format(job_id) def get_submitted_job_id(self, output): """ @@ -375,10 +394,10 @@ class ParamikoPlatform(Platform): :return: header to use :rtype: str """ - if job.processors > 1: - header = self.header.PARALLEL - else: + if str(job.processors) == '1': header = self.header.SERIAL + else: + header = self.header.PARALLEL str_datetime = date2str(datetime.datetime.now(), 'S') out_filename = "{0}.{1}.out".format(job.name, str_datetime) @@ -397,6 +416,14 @@ class ParamikoPlatform(Platform): header = header.replace('%SCRATCH_FREE_SPACE_DIRECTIVE%', self.header.get_scratch_free_space(job)) if hasattr(self.header, 'get_exclusivity'): header = header.replace('%EXCLUSIVITY_DIRECTIVE%', self.header.get_exclusivity(job)) + if hasattr(self.header, 'get_account_directive'): + header = header.replace('%ACCOUNT_DIRECTIVE%', self.header.get_account_directive(job)) + if hasattr(self.header, 'get_memory_directive'): + header = header.replace('%MEMORY_DIRECTIVE%', self.header.get_memory_directive(job)) + if hasattr(self.header, 'get_memory_per_task_directive'): + header = header.replace('%MEMORY_PER_TASK_DIRECTIVE%', self.header.get_memory_per_task_directive(job)) + if hasattr(self.header, 'get_hyperthreading_directive'): + header = header.replace('%HYPERTHREADING_DIRECTIVE%', self.header.get_hyperthreading_directive(job)) return header def check_remote_log_dir(self): diff --git a/autosubmit/platforms/paramiko_submitter.py b/autosubmit/platforms/paramiko_submitter.py index 080b2a34ca4647abb081aa44afa48b162fef3ea3..cc65991182cc025460b742e3f597493599ab27bd 100644 --- a/autosubmit/platforms/paramiko_submitter.py +++ b/autosubmit/platforms/paramiko_submitter.py @@ -56,7 +56,7 @@ class ParamikoSubmitter(Submitter): job_parser = asconf.jobs_parser for job in job_parser.sections(): - hpc = AutosubmitConfig.get_option(job_parser, job, 'PLATFORM', hpcarch).lower() + hpc = job_parser.get_option(job, 'PLATFORM', hpcarch).lower() if hpc not in platforms_used: platforms_used.append(hpc) @@ -64,6 +64,8 @@ class ParamikoSubmitter(Submitter): platforms = dict() local_platform = LocalPlatform(asconf.expid, 'local', BasicConfig) + local_platform.max_wallclock = asconf.get_max_wallclock() + local_platform.max_processors = asconf.get_max_processors() local_platform.max_waiting_jobs = asconf.get_max_waiting_jobs() local_platform.total_jobs = asconf.get_total_jobs() local_platform.scratch = os.path.join(BasicConfig.LOCAL_ROOT_DIR, asconf.expid, BasicConfig.LOCAL_TMP_DIR) @@ -77,8 +79,8 @@ class ParamikoSubmitter(Submitter): if section.lower() not in platforms_used: continue - platform_type = AutosubmitConfig.get_option(parser, section, 'TYPE', '').lower() - platform_version = AutosubmitConfig.get_option(parser, section, 'VERSION', '') + platform_type = parser.get_option(section, 'TYPE', '').lower() + platform_version = parser.get_option(section, 'VERSION', '') try: if platform_type == 'pbs': remote_platform = PBSPlatform(asconf.expid, section.lower(), BasicConfig, platform_version) @@ -102,29 +104,35 @@ class ParamikoSubmitter(Submitter): remote_platform.type = platform_type remote_platform._version = platform_version - if AutosubmitConfig.get_option(parser, section, 'ADD_PROJECT_TO_HOST', '').lower() == 'true': - host = '{0}-{1}'.format(AutosubmitConfig.get_option(parser, section, 'HOST', None), - AutosubmitConfig.get_option(parser, section, 'PROJECT', None)) + if parser.get_option(section, 'ADD_PROJECT_TO_HOST', '').lower() == 'true': + host = '{0}-{1}'.format(parser.get_option(section, 'HOST', None), + parser.get_option(section, 'PROJECT', None)) else: - host = AutosubmitConfig.get_option(parser, section, 'HOST', None) + host = parser.get_option(section, 'HOST', None) remote_platform.host = host - remote_platform.max_waiting_jobs = int(AutosubmitConfig.get_option(parser, section, 'MAX_WAITING_JOBS', - asconf.get_max_waiting_jobs())) - remote_platform.total_jobs = int(AutosubmitConfig.get_option(parser, section, 'TOTAL_JOBS', - asconf.get_total_jobs())) - remote_platform.project = AutosubmitConfig.get_option(parser, section, 'PROJECT', None) - remote_platform.budget = AutosubmitConfig.get_option(parser, section, 'BUDGET', remote_platform.project) - remote_platform.reservation = AutosubmitConfig.get_option(parser, section, 'RESERVATION', '') - remote_platform.exclusivity = AutosubmitConfig.get_option(parser, section, 'EXCLUSIVITY', '').lower() - remote_platform.user = AutosubmitConfig.get_option(parser, section, 'USER', None) - remote_platform.scratch = AutosubmitConfig.get_option(parser, section, 'SCRATCH_DIR', None) - remote_platform._default_queue = AutosubmitConfig.get_option(parser, section, 'QUEUE', None) - remote_platform._serial_queue = AutosubmitConfig.get_option(parser, section, 'SERIAL_QUEUE', None) - remote_platform.processors_per_node = AutosubmitConfig.get_option(parser, section, 'PROCESSORS_PER_NODE', - None) - remote_platform.scratch_free_space = AutosubmitConfig.get_option(parser, section, 'SCRATCH_FREE_SPACE', - None) + remote_platform.max_wallclock = parser.get_option(section, 'MAX_WALLCLOCK', + asconf.get_max_wallclock()) + remote_platform.max_processors = parser.get_option(section, 'MAX_PROCESSORS', + asconf.get_max_processors()) + remote_platform.max_waiting_jobs = int(parser.get_option(section, 'MAX_WAITING_JOBS', + asconf.get_max_waiting_jobs())) + remote_platform.total_jobs = int(parser.get_option(section, 'TOTAL_JOBS', + asconf.get_total_jobs())) + remote_platform.hyperthreading = parser.get_option(section, 'HYPERTHREADING', + 'false').lower() + remote_platform.project = parser.get_option(section, 'PROJECT', None) + remote_platform.budget = parser.get_option(section, 'BUDGET', remote_platform.project) + remote_platform.reservation = parser.get_option(section, 'RESERVATION', '') + remote_platform.exclusivity = parser.get_option(section, 'EXCLUSIVITY', '').lower() + remote_platform.user = parser.get_option(section, 'USER', None) + remote_platform.scratch = parser.get_option(section, 'SCRATCH_DIR', None) + remote_platform._default_queue = parser.get_option(section, 'QUEUE', None) + remote_platform._serial_queue = parser.get_option(section, 'SERIAL_QUEUE', None) + remote_platform.processors_per_node = parser.get_option(section, 'PROCESSORS_PER_NODE', + None) + remote_platform.scratch_free_space = parser.get_option(section, 'SCRATCH_FREE_SPACE', + None) remote_platform.root_dir = os.path.join(remote_platform.scratch, remote_platform.project, remote_platform.user, remote_platform.expid) remote_platform.update_cmds() @@ -132,8 +140,8 @@ class ParamikoSubmitter(Submitter): for section in parser.sections(): if parser.has_option(section, 'SERIAL_PLATFORM'): - platforms[section.lower()].serial_platform = platforms[AutosubmitConfig.get_option(parser, section, - 'SERIAL_PLATFORM', - None).lower()] + platforms[section.lower()].serial_platform = platforms[parser.get_option(section, + 'SERIAL_PLATFORM', + None).lower()] self.platforms = platforms diff --git a/autosubmit/platforms/pbsplatform.py b/autosubmit/platforms/pbsplatform.py index d94d3d5aa68ce15f1deccccda1eafce6f03de861..6e887d8e8cbc80e3b82249ee07215748ff37e14e 100644 --- a/autosubmit/platforms/pbsplatform.py +++ b/autosubmit/platforms/pbsplatform.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -16,11 +16,15 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . -import textwrap + import os from autosubmit.platforms.paramiko_platform import ParamikoPlatform, ParamikoPlatformException -from autosubmit.config.log import Log +from bscearth.utils.log import Log + +from autosubmit.platforms.headers.pbs10_header import Pbs10Header +from autosubmit.platforms.headers.pbs11_header import Pbs11Header +from autosubmit.platforms.headers.pbs12_header import Pbs12Header class PBSPlatform(ParamikoPlatform): @@ -98,139 +102,3 @@ class PBSPlatform(ParamikoPlatform): return self._checkjob_cmd + str(job_id) else: return "ssh " + self.host + " " + self.get_qstatjob(job_id) - - -class Pbs12Header: - """Class to handle the Archer headers of a job""" - - # noinspection PyMethodMayBeStatic,PyUnusedLocal - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - # There is no queue, so directive is empty - return "" - - SERIAL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #PBS -N %JOBNAME% - #PBS -l select=serial=true:ncpus=1 - #PBS -l walltime=%WALLCLOCK%:00 - #PBS -A %CURRENT_BUDG% - # - ############################################################################### - """) - - PARALLEL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #PBS -N %JOBNAME% - #PBS -l select=%NUMPROC% - #PBS -l walltime=%WALLCLOCK%:00 - #PBS -A %CURRENT_BUDG% - # - ############################################################################### - """) - - -class Pbs10Header: - """Class to handle the Hector headers of a job""" - - # noinspection PyMethodMayBeStatic,PyUnusedLocal - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - # There is no queue, so directive is empty - return "" - - SERIAL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #PBS -N %JOBNAME% - #PBS -q serial - #PBS -l cput=%WALLCLOCK%:00 - #PBS -A %CURRENT_BUDG% - # - ############################################################################### - """) - - PARALLEL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #PBS -N %JOBNAME% - #PBS -l mppwidth=%NUMPROC% - #PBS -l mppnppn=32 - #PBS -l walltime=%WALLCLOCK%:00 - #PBS -A %CURRENT_BUDG% - # - ############################################################################### - """) - - -class Pbs11Header: - """Class to handle the Lindgren headers of a job""" - - # noinspection PyMethodMayBeStatic,PyUnusedLocal - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - # There is no queue, so directive is empty - return "" - - SERIAL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #!/bin/sh --login - #PBS -N %JOBNAME% - #PBS -l mppwidth=%NUMPROC% - #PBS -l mppnppn=%NUMTASK% - #PBS -l walltime=%WALLCLOCK% - #PBS -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID% - #PBS -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID% - # - ############################################################################### - """) - - PARALLEL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #!/bin/sh --login - #PBS -N %JOBNAME% - #PBS -l mppwidth=%NUMPROC% - #PBS -l mppnppn=%NUMTASK% - #PBS -l walltime=%WALLCLOCK% - #PBS -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID% - #PBS -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID% - # - ############################################################################### - """) \ No newline at end of file diff --git a/autosubmit/platforms/platform.py b/autosubmit/platforms/platform.py index ea3290cc90d4c345a06c5ea2aff077e833025645..50334edaa4bc1bb68fc71e9d5397debb8d4b4b82 100644 --- a/autosubmit/platforms/platform.py +++ b/autosubmit/platforms/platform.py @@ -2,7 +2,7 @@ from time import sleep import os -from autosubmit.config.log import Log +from bscearth.utils.log import Log from autosubmit.job.job_common import Status @@ -39,7 +39,12 @@ class Platform(object): self.service = None self.scheduler = None self.directory = None + self.hyperthreading = 'false' + self.max_wallclock = '' + self.max_processors = None self._allow_arrays = False + self._allow_wrappers = False + self._allow_python_jobs = True @property def serial_platform(self): @@ -90,6 +95,14 @@ class Platform(object): def allow_arrays(self): return self._allow_arrays is True + @property + def allow_wrappers(self): + return self._allow_wrappers is True + + @property + def allow_python_jobs(self): + return self._allow_python_jobs is True + def add_parameters(self, parameters, main_hpc=False): """ Add parameters for the current platform to the given parameters list @@ -190,7 +203,7 @@ class Platform(object): (job_out_filename, job_err_filename) = remote_logs self.get_files([job_out_filename, job_err_filename], False, 'LOG_{0}'.format(exp_id)) - def get_completed_files(self, job_name, retries=5): + def get_completed_files(self, job_name, retries=0): """ Get the COMPLETED file of the given job @@ -216,7 +229,7 @@ class Platform(object): :param job_name: name of job to check :type job_name: str - :return: True if succesful, False otherwise + :return: True if successful, False otherwise :rtype: bool """ filename = job_name + '_STAT' @@ -231,7 +244,7 @@ class Platform(object): :param job_name: name of job to check :type job_name: str - :return: True if succesful, False otherwise + :return: True if successful, False otherwise :rtype: bool """ filename = job_name + '_COMPLETED' @@ -240,7 +253,7 @@ class Platform(object): return True return False - def get_stat_file(self, job_name, retries=1): + def get_stat_file(self, job_name, retries=0): """ Copies *STAT* files from remote to local diff --git a/autosubmit/platforms/psplatform.py b/autosubmit/platforms/psplatform.py index 8ba0f1b293c30528e76039cc6548ef99925bd41e..83fc6a18a4b3e665b1b11489fa2dab72df3f8596 100644 --- a/autosubmit/platforms/psplatform.py +++ b/autosubmit/platforms/psplatform.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -16,11 +16,12 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . -import textwrap + import os from xml.dom.minidom import parseString from autosubmit.platforms.paramiko_platform import ParamikoPlatform +from autosubmit.platforms.headers.ps_header import PsHeader class PsPlatform(ParamikoPlatform): @@ -78,32 +79,3 @@ class PsPlatform(ParamikoPlatform): def get_checkjob_cmd(self, job_id): return self.get_pscall(job_id) - - -class PsHeader: - """Class to handle the Ps headers of a job""" - - # noinspection PyMethodMayBeStatic,PyUnusedLocal - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - # There is no queue, so directive is empty - return "" - - SERIAL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - """) - - PARALLEL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - """) \ No newline at end of file diff --git a/autosubmit/platforms/saga_platform.py b/autosubmit/platforms/saga_platform.py index eeec8f6ec924bcef2ba3cf2871c3bbf23a478c85..3d1f1e88bbb67b0a1a1383fa70b1629e3fec4557 100644 --- a/autosubmit/platforms/saga_platform.py +++ b/autosubmit/platforms/saga_platform.py @@ -5,8 +5,8 @@ from time import sleep import os import saga -from autosubmit.config.log import Log -from autosubmit.date.chunk_date_lib import date2str +from bscearth.utils.log import Log +from bscearth.utils.date import date2str from autosubmit.job.job_common import Status, Type from autosubmit.platforms.platform import Platform @@ -46,7 +46,8 @@ class SagaPlatform(Platform): raise Exception("Could't send file {0} to {1}:{2}".format(os.path.join(self.tmp_path, filename), self.host, self.get_files_path())) # noinspection PyTypeChecker - out = saga.filesystem.File("file://{0}".format(os.path.join(self.tmp_path, filename))) + out = saga.filesystem.File("file://{0}".format(os.path.join(self.tmp_path, filename)), + session=self.service.session) if self.type == 'local': out.copy("file://{0}".format(os.path.join(self.tmp_path, 'LOG_' + self.expid, filename)), saga.filesystem.CREATE_PARENTS) @@ -96,23 +97,28 @@ class SagaPlatform(Platform): :type must_exist: bool :param relative_path: relative path inside tmp folder :type relative_path: str - :return: True if file is copied succesfully, false otherwise + :return: True if file is copied successfully, false otherwise :rtype: bool """ - local_path = os.path.join(self.tmp_path, filename) - if os.path.exists(local_path): - os.remove(local_path) + + local_path = os.path.join(self.tmp_path, relative_path) + if not os.path.exists(local_path): + os.makedirs(local_path) + + file_path = os.path.join(local_path, filename) + if os.path.exists(file_path): + os.remove(file_path) if self.type == 'ecaccess': try: subprocess.check_call(['ecaccess-file-get', '{0}:{1}'.format(self.host, os.path.join(self.get_files_path(), filename)), - local_path]) + file_path]) return True except subprocess.CalledProcessError: if must_exist: - raise Exception("Could't get file {0} from {1}:{2}".format(local_path, + raise Exception("Could't get file {0} from {1}:{2}".format(file_path, self.host, self.get_files_path())) return False @@ -123,7 +129,7 @@ class SagaPlatform(Platform): out = self.directory.open(os.path.join(str(self.directory.url), filename)) - out.copy("file://{0}".format(local_path)) + out.copy("file://{0}".format(file_path)) out.close() return True @@ -144,7 +150,8 @@ class SagaPlatform(Platform): 'LOG_' + self.expid))) else: # noinspection PyTypeChecker - self.directory = saga.filesystem.Directory("sftp://{0}{1}".format(self.host, self.get_files_path())) + self.directory = saga.filesystem.Directory("sftp://{0}{1}".format(self.host, self.get_files_path()), + session=self.service.session) except: return False @@ -184,7 +191,8 @@ class SagaPlatform(Platform): else: # noinspection PyTypeChecker out = saga.filesystem.File("sftp://{0}{1}".format(self.host, os.path.join(self.get_files_path(), - filename))) + filename)), + session=self.service.session) out.remove() out.close() return True diff --git a/autosubmit/platforms/saga_submitter.py b/autosubmit/platforms/saga_submitter.py index 8bfdd9e90b167938202037dbf35e73f14f3a2205..acb46a2828e0487d5964802c991962542bc2d947 100644 --- a/autosubmit/platforms/saga_submitter.py +++ b/autosubmit/platforms/saga_submitter.py @@ -33,6 +33,7 @@ class SagaSubmitter(Submitter): """ Class to manage the experiments platform """ + def load_platforms(self, asconf, retries=10): """ Create all the platforms object that will be used by the experiment @@ -61,7 +62,7 @@ class SagaSubmitter(Submitter): job_parser = asconf.jobs_parser for job in job_parser.sections(): - hpc = AutosubmitConfig.get_option(job_parser, job, 'PLATFORM', hpcarch).lower() + hpc = job_parser.get_option(job, 'PLATFORM', hpcarch).lower() if hpc not in platforms_used: platforms_used.append(hpc) @@ -82,6 +83,8 @@ class SagaSubmitter(Submitter): time.sleep(5) local_platform.type = 'local' local_platform.queue = '' + local_platform.max_wallclock = asconf.get_max_wallclock() + local_platform.max_processors = asconf.get_max_processors() local_platform.max_waiting_jobs = asconf.get_max_waiting_jobs() local_platform.total_jobs = asconf.get_total_jobs() local_platform.scratch = os.path.join(BasicConfig.LOCAL_ROOT_DIR, asconf.expid, BasicConfig.LOCAL_TMP_DIR) @@ -101,12 +104,12 @@ class SagaSubmitter(Submitter): if section.lower() not in platforms_used: continue - platform_type = AutosubmitConfig.get_option(parser, section, 'TYPE', '').lower() + platform_type = parser.get_option(section, 'TYPE', '').lower() remote_platform = SagaPlatform(asconf.expid, section.lower(), BasicConfig) remote_platform.type = platform_type - platform_version = AutosubmitConfig.get_option(parser, section, 'VERSION', '') + platform_version = parser.get_option(section, 'VERSION', '') if platform_type == 'pbs': adaptor = 'pbs+ssh' elif platform_type == 'sge': @@ -120,7 +123,7 @@ class SagaSubmitter(Submitter): adaptor = 'lsf+ssh' elif platform_type == 'ecaccess': adaptor = 'ecaccess' - remote_platform.scheduler = AutosubmitConfig.get_option(parser, section, 'SCHEDULER', 'pbs').lower() + remote_platform.scheduler = parser.get_option(section, 'SCHEDULER', 'pbs').lower() elif platform_type == 'slurm': adaptor = 'slurm+ssh' elif platform_type == '': @@ -128,15 +131,15 @@ class SagaSubmitter(Submitter): else: adaptor = platform_type - if AutosubmitConfig.get_option(parser, section, 'ADD_PROJECT_TO_HOST', '').lower() == 'true': - host = '{0}-{1}'.format(AutosubmitConfig.get_option(parser, section, 'HOST', None), - AutosubmitConfig.get_option(parser, section, 'PROJECT', None)) + if parser.get_option(section, 'ADD_PROJECT_TO_HOST', '').lower() == 'true': + host = '{0}-{1}'.format(parser.get_option(section, 'HOST', None), + parser.get_option(section, 'PROJECT', None)) else: - host = AutosubmitConfig.get_option(parser, section, 'HOST', None) + host = parser.get_option(section, 'HOST', None) if adaptor.endswith('ssh'): ctx = saga.Context('ssh') - ctx.user_id = AutosubmitConfig.get_option(parser, section, 'USER', None) + ctx.user_id = parser.get_option(section, 'USER', None) session = saga.Session(False) session.add_context(ctx) else: @@ -160,34 +163,34 @@ class SagaSubmitter(Submitter): remote_platform.service._adaptor.host = remote_platform.host # noinspection PyProtectedMember remote_platform.service._adaptor.scheduler = remote_platform.scheduler - - remote_platform.max_waiting_jobs = int(AutosubmitConfig.get_option(parser, section, 'MAX_WAITING_JOBS', - asconf.get_max_waiting_jobs())) - remote_platform.total_jobs = int(AutosubmitConfig.get_option(parser, section, 'TOTAL_JOBS', - asconf.get_total_jobs())) - - remote_platform.project = AutosubmitConfig.get_option(parser, section, 'PROJECT', None) - remote_platform.budget = AutosubmitConfig.get_option(parser, section, 'BUDGET', remote_platform.project) - remote_platform.reservation = AutosubmitConfig.get_option(parser, section, 'RESERVATION', '') - remote_platform.exclusivity = AutosubmitConfig.get_option(parser, section, 'EXCLUSIVITY', '').lower() - remote_platform.user = AutosubmitConfig.get_option(parser, section, 'USER', None) - remote_platform.scratch = AutosubmitConfig.get_option(parser, section, 'SCRATCH_DIR', None) - remote_platform._default_queue = AutosubmitConfig.get_option(parser, section, 'QUEUE', None) - remote_platform._serial_queue = AutosubmitConfig.get_option(parser, section, 'SERIAL_QUEUE', None) - remote_platform.processors_per_node = AutosubmitConfig.get_option(parser, section, 'PROCESSORS_PER_NODE', - None) - remote_platform.scratch_free_space = AutosubmitConfig.get_option(parser, section, 'SCRATCH_FREE_SPACE', - None) + remote_platform.max_wallclock = parser.get_option(section, 'MAX_WALLCLOCK', + asconf.get_max_wallclock()) + remote_platform.max_processors = parser.get_option(section, 'MAX_PROCESSORS', + asconf.get_max_processors()) + remote_platform.max_waiting_jobs = int(parser.get_option(section, 'MAX_WAITING_JOBS', + asconf.get_max_waiting_jobs())) + remote_platform.total_jobs = int(parser.get_option(section, 'TOTAL_JOBS', + asconf.get_total_jobs())) + remote_platform.project = parser.get_option(section, 'PROJECT', None) + remote_platform.budget = parser.get_option(section, 'BUDGET', remote_platform.project) + remote_platform.reservation = parser.get_option(section, 'RESERVATION', '') + remote_platform.exclusivity = parser.get_option(section, 'EXCLUSIVITY', '').lower() + remote_platform.user = parser.get_option(section, 'USER', None) + remote_platform.scratch = parser.get_option(section, 'SCRATCH_DIR', None) + remote_platform._default_queue = parser.get_option(section, 'QUEUE', None) + remote_platform._serial_queue = parser.get_option(section, 'SERIAL_QUEUE', None) + remote_platform.processors_per_node = parser.get_option(section, 'PROCESSORS_PER_NODE', + None) + remote_platform.scratch_free_space = parser.get_option(section, 'SCRATCH_FREE_SPACE', + None) remote_platform.root_dir = os.path.join(remote_platform.scratch, remote_platform.project, remote_platform.user, remote_platform.expid) platforms[section.lower()] = remote_platform for section in parser.sections(): if parser.has_option(section, 'SERIAL_PLATFORM'): - platforms[section.lower()].serial_platform = platforms[AutosubmitConfig.get_option(parser, section, - 'SERIAL_PLATFORM', - None).lower()] + platforms[section.lower()].serial_platform = platforms[parser.get_option(section, + 'SERIAL_PLATFORM', + None).lower()] self.platforms = platforms - - diff --git a/autosubmit/platforms/sgeplatform.py b/autosubmit/platforms/sgeplatform.py index 73aa130b8b8088e23f706071904e522c044d0d8d..f71c084d475f3c0ecfb41aa4844617b7c0524d69 100644 --- a/autosubmit/platforms/sgeplatform.py +++ b/autosubmit/platforms/sgeplatform.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -16,13 +16,14 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . -import textwrap + import os import subprocess from xml.dom.minidom import parseString from autosubmit.platforms.paramiko_platform import ParamikoPlatform +from autosubmit.platforms.headers.sge_header import SgeHeader class SgePlatform(ParamikoPlatform): @@ -83,58 +84,3 @@ class SgePlatform(ParamikoPlatform): def get_checkjob_cmd(self, job_id): return self.get_qstatjob(job_id) - - -class SgeHeader: - """Class to handle the Ithaca headers of a job""" - - # noinspection PyMethodMayBeStatic - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - # There is no queue, so directive is empty - if job.parameters['CURRENT_QUEUE'] == '': - return "" - else: - return "$ -q {0}".format(job.parameters['CURRENT_QUEUE']) - - SERIAL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #$ -S /bin/sh - #$ -N %JOBNAME% - #$ -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/ - #$ -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/ - #$ -V - #$ -l h_rt=%WALLCLOCK%:00 - #$ -l s_rt=%WALLCLOCK%:00 - #%QUEUE_DIRECTIVE% - # - ############################################################################### - """) - - PARALLEL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #$ -S /bin/sh - #$ -N %JOBNAME% - #$ -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/ - #$ -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/ - #$ -V - #$ -l h_rt=%WALLCLOCK%:00 - #$ -l s_rt=%WALLCLOCK%:00 - #$ -pe orte %NUMPROC% - #%QUEUE_DIRECTIVE% - # - ############################################################################### - """) \ No newline at end of file diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index 4bf4a03f84b83575b44d0cfe96f4a2a06555ae46..4234c3621c0f1a16c622c2a29e94da1649793d28 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2015 Earth Sciences Department, BSC-CNS +# Copyright 2017 Earth Sciences Department, BSC-CNS # This file is part of Autosubmit. @@ -16,12 +16,14 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . -import textwrap + import os from xml.dom.minidom import parseString from autosubmit.platforms.paramiko_platform import ParamikoPlatform +from autosubmit.platforms.headers.slurm_header import SlurmHeader +from autosubmit.platforms.wrappers.slurm_wrapper import SlurmWrapper class SlurmPlatform(ParamikoPlatform): @@ -32,29 +34,18 @@ class SlurmPlatform(ParamikoPlatform): :type expid: str """ - # noinspection PyMethodMayBeStatic,PyUnusedLocal - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - # There is no queue, so directive is empty - return "#" - def __init__(self, expid, name, config): ParamikoPlatform.__init__(self, expid, name, config) self._header = SlurmHeader() + self._wrapper = SlurmWrapper() self.job_status = dict() - self.job_status['COMPLETED'] = ['COMPLETED'] self.job_status['RUNNING'] = ['RUNNING'] self.job_status['QUEUING'] = ['PENDING', 'CONFIGURING', 'RESIZING'] self.job_status['FAILED'] = ['FAILED', 'CANCELLED', 'NODE_FAIL', 'PREEMPTED', 'SUSPENDED', 'TIMEOUT'] self._pathdir = "\$HOME/LOG_" + self.expid + self._allow_arrays = False + self._allow_wrappers = True self.update_cmds() def update_cmds(self): @@ -95,53 +86,3 @@ class SlurmPlatform(ParamikoPlatform): def get_checkjob_cmd(self, job_id): return 'sacct -n -j {1} -o "State"'.format(self.host, job_id) - - -class SlurmHeader: - """Class to handle the SLURM headers of a job""" - - # noinspection PyMethodMayBeStatic,PyUnusedLocal - def get_queue_directive(self, job): - """ - Returns queue directive for the specified job - - :param job: job to create queue directibve for - :type job: Job - :return: queue directive - :rtype: str - """ - # There is no queue, so directive is empty - if job.parameters['CURRENT_QUEUE'] == '': - return "" - else: - return "SBATCH --qos {0}".format(job.parameters['CURRENT_QUEUE']) - - SERIAL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #%QUEUE_DIRECTIVE% - #SBATCH -n %NUMPROC% - #SBATCH -t %WALLCLOCK%:00 - #SBATCH -J %JOBNAME% - #SBATCH -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%JOBNAME%-%j.out - #SBATCH -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%JOBNAME%-%j.err - # - ############################################################################### - """) - - PARALLEL = textwrap.dedent("""\ - ############################################################################### - # %TASKTYPE% %EXPID% EXPERIMENT - ############################################################################### - # - #%QUEUE_DIRECTIVE% - #SBATCH -n %NUMPROC% - #SBATCH -t %WALLCLOCK%:00 - #SBATCH -J %JOBNAME% - #SBATCH -o %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%JOBNAME%-%j.out - #SBATCH -e %CURRENT_SCRATCH_DIR%/%CURRENT_PROJ%/%CURRENT_USER%/%EXPID%/LOG_%EXPID%/%JOBNAME%-%j.err - # - ############################################################################### - """) \ No newline at end of file diff --git a/autosubmit/platforms/wrappers/__init__.py b/autosubmit/platforms/wrappers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/autosubmit/platforms/wrappers/ec_wrapper.py b/autosubmit/platforms/wrappers/ec_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..71ffdf57c3a93cbac834b6af06dcc5bf2afe385a --- /dev/null +++ b/autosubmit/platforms/wrappers/ec_wrapper.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +# TODO: Refactor with kwargs +class EcWrapper(object): + """Class to handle wrappers on ECMWF platform""" + + @classmethod + def vertical(cls, filename, queue, project, wallclock, num_procs, job_scripts, dependency, **kwargs): + return textwrap.dedent("""\ + #!/bin/bash + ############################################################################### + # {0} + ############################################################################### + # + #PBS -N {0} + #PBS -q {1} + #PBS -l EC_billing_account={2} + #PBS -o $SCRATCH/{6}/LOG_{6}/{0}.out + #PBS -o $SCRATCH/{6}/LOG_{6}/{0}.err + #PBS -l walltime={3}:00 + #PBS -l EC_total_tasks={4} + #PBS -l EC_hyperthreads=1 + {7} + # + ############################################################################### + + # Function to execute each thread + execute_script() + {{ + out="$1.$2.out" + err="$1.$2.err" + bash $1 > $out 2> $err & + pid=$! + }} + + # Initializing variables + scripts="{5}" + i=0 + pids="" + + # Initializing the scripts + for script in $scripts; do + execute_script "$SCRATCH/{6}/LOG_{6}/$script" $i + wait $pid + if [ $? -eq 0 ]; then + echo "The job $script has been COMPLETED" + else + echo "The job $script has FAILED" + fi + i=$((i+1)) + done + """.format(filename, queue, project, wallclock, num_procs, + ' '.join(str(s) for s in job_scripts), kwargs['expid'], + cls.dependency_directive(dependency))) + + @classmethod + def horizontal(cls, filename, queue, project, wallclock, num_procs, _, job_scripts, dependency, **kwargs): + return textwrap.dedent("""\ + #!/bin/bash + ############################################################################### + # {0} + ############################################################################### + # + #PBS -N {0} + #PBS -q {1} + #PBS -l EC_billing_account={2} + #PBS -o $SCRATCH/{6}/LOG_{6}/{0}.out + #PBS -e $SCRATCH/{6}/LOG_{6}/{0}.err + #PBS -l walltime={3}:00 + #PBS -l EC_total_tasks={4} + #PBS -l EC_hyperthreads=1 + {7} + # + ############################################################################### + + # Function to execute each thread + execute_script() + {{ + out="$1.$2.out" + err="$1.$2.err" + bash $1 > $out 2> $err & + pid=$! + }} + + # Initializing variables + scripts="{5}" + i=0 + pids="" + + # Initializing the scripts + for script in $scripts; do + execute_script "$SCRATCH/{6}/LOG_{6}/$script" $i + pids+="$pid " + i=$((i+1)) + done + + # Waiting until all scripts finish + for pid in $pids; do + wait $pid + if [ $? -eq 0 ]; then + echo "The job $pid has been COMPLETED" + else + echo "The job $pid has FAILED" + fi + done + """.format(filename, queue, project, wallclock, num_procs, + ' '.join(str(s) for s in job_scripts), kwargs['expid'], + cls.dependency_directive(dependency))) + + @classmethod + def dependency_directive(cls, dependency): + return '#' if dependency is None else '#PBS -W depend=afterok:{0}'.format(dependency) diff --git a/autosubmit/platforms/wrappers/lsf_wrapper.py b/autosubmit/platforms/wrappers/lsf_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..a2d2205956b6868c3da89026d03a92c0726a0f10 --- /dev/null +++ b/autosubmit/platforms/wrappers/lsf_wrapper.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +# TODO: Refactor with kwargs +class LsfWrapper(object): + """Class to handle wrappers on LSF platforms""" + + @classmethod + def array(cls, filename, array_id, wallclock, num_procs): + return textwrap.dedent("""\ + ############################################################################### + # {0} + ############################################################################### + # + # + #BSUB -J {0}{1} + #BSUB -oo {0}.%I.out + #BSUB -eo {0}.%I.err + #BSUB -W {2} + #BSUB -n {3} + # + ############################################################################### + + SCRIPT=$(cat {0}.$LSB_JOBINDEX | awk 'NR==1') + chmod +x $SCRIPT + ./$SCRIPT + """.format(filename, array_id, wallclock, num_procs)) + + @classmethod + def vertical(cls, filename, queue, project, wallclock, num_procs, job_scripts, dependency, **kwargs): + return textwrap.dedent("""\ + #!/usr/bin/env python + ############################################################################### + # {0} + ############################################################################### + # + #BSUB -J {0} + #BSUB -q {1} + #BSUB -P {2} + #BSUB -oo {0}.out + #BSUB -eo {0}.err + #BSUB -W {3} + #BSUB -n {4} + {6} + # + ############################################################################### + + import os + import sys + from threading import Thread + from commands import getstatusoutput + + class JobThread(Thread): + def __init__ (self, template, id_run): + Thread.__init__(self) + self.template = template + self.id_run = id_run + + def run(self): + out = str(self.template) + '.' + str(self.id_run) + '.out' + err = str(self.template) + '.' + str(self.id_run) + '.err' + command = str(self.template) + ' ' + str(self.id_run) + ' ' + os.getcwd() + (self.status) = getstatusoutput(command + ' > ' + out + ' 2> ' + err) + + scripts = {5} + + for i in range(len(scripts)): + current = JobThread(scripts[i], i) + current.start() + current.join() + completed_filename = scripts[i].replace('.cmd', '_COMPLETED') + completed_path = os.path.join(os.getcwd(), completed_filename) + if os.path.exists(completed_path): + print "The job ", current.template," has been COMPLETED" + else: + print "The job ", current.template," has FAILED" + os._exit(1) + """.format(filename, queue, project, wallclock, num_procs, str(job_scripts), + cls.dependency_directive(dependency))) + + @classmethod + def horizontal(cls, filename, queue, project, wallclock, num_procs, num_jobs, job_scripts, dependency, **kwargs): + return textwrap.dedent("""\ + #!/usr/bin/env python + ############################################################################### + # {0} + ############################################################################### + # + #BSUB -J {0} + #BSUB -q {1} + #BSUB -P {2} + #BSUB -oo {0}.out + #BSUB -eo {0}.err + #BSUB -W {3} + #BSUB -n {4} + {7} + # + ############################################################################### + + import os + import sys + from threading import Thread + from commands import getstatusoutput + + class JobThread(Thread): + def __init__ (self, template, id_run): + Thread.__init__(self) + self.template = template + self.id_run = id_run + + def run(self): + out = str(self.template) + "." + str(self.id_run) + ".out" + err = str(self.template) + "." + str(self.id_run) + ".err" + command = str(self.template) + " " + str(self.id_run) + " " + os.getcwd() + (self.status) = getstatusoutput(command + " > " + out + " 2> " + err) + + # Splitting the original hosts file + os.system("cat {8} | split -a 2 -d -l {5} - mlist-{9}-") + + # Defining scripts to be run + scripts = {6} + + # Initializing PIDs container + pid_list = [] + + # Initializing the scripts + for i in range(len(scripts)): + current = JobThread(scripts[i], i) + pid_list.append(current) + current.start() + + # Waiting until all scripts finish + for pid in pid_list: + pid.join() + completed_filename = scripts[i].replace('.cmd', '_COMPLETED') + completed_path = os.path.join(os.getcwd(), completed_filename) + if os.path.exists(completed_path): + print "The job ", pid.template," has been COMPLETED" + else: + print "The job ", pid.template," has FAILED" + """.format(filename, queue, project, wallclock, num_procs, (int(num_procs) / num_jobs), + str(job_scripts), cls.dependency_directive(dependency), "${LSB_DJOB_HOSTFILE}", "${LSB_JOBID}")) + + @classmethod + def dependency_directive(cls, dependency): + return '#' if dependency is None else '#BSUB -w \'done("{0}")\' [-ti]'.format(dependency) diff --git a/autosubmit/platforms/wrappers/slurm_wrapper.py b/autosubmit/platforms/wrappers/slurm_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..6f11cfec4fe4b4a9814e489a0ac8ea30a2b25d87 --- /dev/null +++ b/autosubmit/platforms/wrappers/slurm_wrapper.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +# Copyright 2017 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import textwrap + + +# TODO: Refactor with kwargs +class SlurmWrapper(object): + """Class to handle wrappers on SLURM platforms""" + + @classmethod + def vertical(cls, filename, queue, project, wallclock, num_procs, job_scripts, dependency, **kwargs): + return textwrap.dedent("""\ + #!/usr/bin/env python + ############################################################################### + # {0} + ############################################################################### + # + #SBATCH -J {0} + #SBATCH -p {1} + #SBATCH -A {2} + #SBATCH -o {0}.out + #SBATCH -e {0}.err + #SBATCH -t {3}:00 + #SBATCH -n {4} + {6} + # + ############################################################################### + + import os + import sys + from threading import Thread + from commands import getstatusoutput + + class JobThread(Thread): + def __init__ (self, template, id_run): + Thread.__init__(self) + self.template = template + self.id_run = id_run + + def run(self): + out = str(self.template) + '.' + str(self.id_run) + '.out' + err = str(self.template) + '.' + str(self.id_run) + '.err' + command = "bash " + str(self.template) + ' ' + str(self.id_run) + ' ' + os.getcwd() + (self.status) = getstatusoutput(command + ' > ' + out + ' 2> ' + err) + + scripts = {5} + + for i in range(len(scripts)): + current = JobThread(scripts[i], i) + current.start() + current.join() + completed_filename = scripts[i].replace('.cmd', '_COMPLETED') + completed_path = os.path.join(os.getcwd(), completed_filename) + if os.path.exists(completed_path): + print "The job ", current.template," has been COMPLETED" + else: + print "The job ", current.template," has FAILED" + os._exit(1) + """.format(filename, queue, project, wallclock, num_procs, str(job_scripts), + cls.dependency_directive(dependency))) + + @classmethod + def horizontal(cls, filename, queue, project, wallclock, num_procs, _, job_scripts, dependency, **kwargs): + return textwrap.dedent("""\ + #!/usr/bin/env python + ############################################################################### + # {0} + ############################################################################### + # + #SBATCH -J {0} + #SBATCH -p {1} + #SBATCH -A {2} + #SBATCH -o {0}.out + #SBATCH -e {0}.err + #SBATCH -t {3}:00 + #SBATCH -n {4} + {6} + # + ############################################################################### + + import os + import sys + from threading import Thread + from commands import getstatusoutput + + class JobThread(Thread): + def __init__ (self, template, id_run): + Thread.__init__(self) + self.template = template + self.id_run = id_run + + def run(self): + out = str(self.template) + "." + str(self.id_run) + ".out" + err = str(self.template) + "." + str(self.id_run) + ".err" + command = "bash " + str(self.template) + " " + str(self.id_run) + " " + os.getcwd() + (self.status) = getstatusoutput(command + " > " + out + " 2> " + err) + + # Defining scripts to be run + scripts = {5} + + # Initializing PIDs container + pid_list = [] + + # Initializing the scripts + for i in range(len(scripts)): + current = JobThread(scripts[i], i) + pid_list.append(current) + current.start() + + # Waiting until all scripts finish + for pid in pid_list: + pid.join() + completed_filename = scripts[i].replace('.cmd', '_COMPLETED') + completed_path = os.path.join(os.getcwd(), completed_filename) + if os.path.exists(completed_path): + print "The job ", pid.template," has been COMPLETED" + else: + print "The job ", pid.template," has FAILED" + """.format(filename, queue, project, wallclock, num_procs, str(job_scripts), + cls.dependency_directive(dependency))) + + @classmethod + def dependency_directive(cls, dependency): + return '#' if dependency is None else '#SBATCH --dependency=afterok:{0}'.format(dependency) diff --git a/docs/source/codedoc/date.rst b/docs/source/codedoc/date.rst deleted file mode 100644 index 90738c2bea412f826fcceb166ca1811b65847f74..0000000000000000000000000000000000000000 --- a/docs/source/codedoc/date.rst +++ /dev/null @@ -1,4 +0,0 @@ -autosubmit.date -=============== -.. automodule:: autosubmit.date.chunk_date_lib - :members: diff --git a/docs/source/conf.py b/docs/source/conf.py index 4d08fc1e4d3937f7e4000988268490524f760529..3d0f1b624bee6cb49884e210d95915177030c7f0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -64,7 +64,7 @@ author = u'Earth Science Department, Barcelona Supercomputing Center, BSC' # The short X.Y version. version = '3.8' # The full version, including alpha/beta/rc tags. -release = '3.8.0' +release = '3.8.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 6f5b4669a9d6ccd085331671cd59b2e340879ae1..a90c07aeb8d7a65b030d74d5689c3a1d57a873b1 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -204,6 +204,25 @@ More info on password-less ssh can be found at: http://www.linuxproblem.org/art_ .. caution:: After launching Autosubmit, one must be aware of login expiry limit and policy (if applicable for any HPC) and renew the login access accordingly (by using token/key etc) before expiry. +How to run an experiment that was created with another version +============================================================== + +.. important:: First of all you have to stop your Autosubmit instance related with the experiment + +Once you've already loaded / installed the Autosubmit version do you want: +:: + + autosubmit create EXPID + autosubmit recovery EXPID -s -all + autosubmit run EXPID + +*EXPID* is the experiment identifier. + +The most common problem when you change your Autosubmit version is the apparition of several Python errors. +This is due to how Autosubmit saves internally the data, which can be incompatible between versions. +The steps above represent the process to re-create (1) these internal data structures and to recover (2) the previous status of your experiment. + + How to test the experiment ========================== This method is to conduct a test for a given experiment. It creates a new experiment for a given experiment with a @@ -352,6 +371,25 @@ The location where user can find the generated plots with date and timestamp can /cxxx/plot/cxxx_statistics__