diff --git a/CHANGELOG b/CHANGELOG index 10ed47d6b434ffe4b25440ab0325101afd4f31b5..5046adf65c3f7b7fc29d7b1e69ad68a4ee34881e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -89,7 +89,6 @@ - CHUNKINI option - Paramiko permissions - Paramiko non-existing remote copy - - Saga sessions 3.7.7 Some improvements for Slurm platforms diff --git a/README.md b/README.md index 22be0c1b23fd04c2a344b39ab4dba7f036f32495..831e65716d92399d1eadf9052fcc82de04fa66a8 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ HOW TO DEPLOY/SETUP AUTOSUBMIT FRAMEWORK * ARCHER (EPCC machine) - Pre-requisites: These packages (bash, python2, sqlite3, git-scm > 1.8.2, subversion, dialog*) must be available at local - machine. These packages (argparse, dateutil, pyparsing, numpy, pydotplus, matplotlib, paramiko, saga-python, + machine. These packages (argparse, dateutil, pyparsing, numpy, pydotplus, matplotlib, paramiko, python2-pythondialog*, mock, portalocker) must be available for python runtime. And the machine is also able to access HPC platforms via password-less ssh. diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index 940e3e9336b2df35d033ecdec50e39c522a47d44..bae5ce07feec8e4e529682810ba9844cb7cefe55 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -1195,7 +1195,7 @@ class AutosubmitConfig(object): def is_valid_communications_library(self): library = self.get_communications_library() - return library in ['paramiko', 'saga'] + return library in ['paramiko'] def is_valid_storage_type(self): storage_type = self.get_storage_type() diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 2857fbb9be78c4514f3fb168ccbd233585bb7873..dadc92d7c720340cafdc097e7a46dd548b2abd76 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -800,19 +800,12 @@ class Job(object): def _get_template_content(self, as_conf, snippet, template): communications_library = as_conf.get_communications_library() - if communications_library == 'saga': - return self._get_saga_template(snippet, template) - elif communications_library == 'paramiko': + if communications_library == 'paramiko': return self._get_paramiko_template(snippet, template) else: Log.error('You have to define a template on Job class') raise Exception('Job template content not found') - def _get_saga_template(self, snippet, template): - return ''.join([snippet.as_header(''), - template, - snippet.as_tailer()]) - def _get_paramiko_template(self, snippet, template): current_platform = self.platform return ''.join([snippet.as_header(current_platform.get_header(self)), diff --git a/autosubmit/platforms/ecmwf_adaptor.py b/autosubmit/platforms/ecmwf_adaptor.py deleted file mode 100644 index 52fa2b902750bb1438b5e5a5b4f40c2d7603c08e..0000000000000000000000000000000000000000 --- a/autosubmit/platforms/ecmwf_adaptor.py +++ /dev/null @@ -1,968 +0,0 @@ -""" LSF mn adaptor implementation -""" - -import re -import os -import time -import threading -import saga - -# noinspection PyPackageRequirements -import radical.utils.threads as sut -import saga.url as surl -import saga.utils.pty_shell -import saga.adaptors.base -import saga.adaptors.cpi.job -import saga.adaptors.loadl.loadljob -import saga.adaptors.pbs.pbsjob -import saga.adaptors.cpi.decorators -from autosubmit.config.basicConfig import BasicConfig - -SYNC_CALL = saga.adaptors.cpi.decorators.SYNC_CALL -ASYNC_CALL = saga.adaptors.cpi.decorators.ASYNC_CALL - -SYNC_WAIT_UPDATE_INTERVAL = 1 # seconds -MONITOR_UPDATE_INTERVAL = 3 # seconds - - -# -------------------------------------------------------------------- -# -# noinspection PyProtectedMember,PyPep8Naming,PyMissingOrEmptyDocstring -class _job_state_monitor(threading.Thread): - """ thread that periodically monitors job states - """ - - def __init__(self, job_service): - - self.logger = job_service._logger - self.js = job_service - self._stop = sut.Event() - - super(_job_state_monitor, self).__init__() - self.setDaemon(True) - - def stop(self): - self._stop.set() - - def stopped(self): - return self._stop.isSet() - - def run(self): - while self.stopped() is False: - try: - # do bulk updates here! we don't want to pull information - # job by job. that would be too inefficient! - jobs = self.js.jobs - job_keys = jobs.keys() - - for job in job_keys: - # if the job hasn't been started, we can't update its - # state. we can tell if a job has been started if it - # has a job id - if jobs[job].get('job_id', None) is not None: - # we only need to monitor jobs that are not in a - # terminal state, so we can skip the ones that are - # either done, failed or canceled - state = jobs[job]['state'] - if (state != saga.job.DONE) and (state != saga.job.FAILED) and (state != saga.job.CANCELED): - - job_info = self.js._job_get_info(job) - self.logger.info( - "Job monitoring thread updating Job %s (state: %s)" % (job, job_info['state'])) - - if job_info['state'] != jobs[job]['state']: - # fire job state callback if 'state' has changed - job._api()._attributes_i_set('state', job_info['state'], job._api()._UP, True) - - # update job info - self.js.jobs[job] = job_info - - time.sleep(MONITOR_UPDATE_INTERVAL) - except Exception as e: - self.logger.warning("Exception caught in job monitoring thread: %s" % e) - - -# -------------------------------------------------------------------- -# -def log_error_and_raise(message, exception, logger): - """ - Logs an 'error' message and subsequently throws an exception - - :param message: message to show - :type message: str - :param exception: exception to raise - :param logger: logger to use - """ - logger.error(message) - raise exception(message) - - -# -------------------------------------------------------------------- -# -def _ecaccess_to_saga_jobstate(ecaccess_state): - """ translates a mn one-letter state to saga - """ - if ecaccess_state in ['EXEC']: - return saga.job.RUNNING - elif ecaccess_state in ['INIT', 'RETR', 'STDBY', 'WAIT']: - return saga.job.PENDING - elif ecaccess_state in ['DONE']: - return saga.job.DONE - elif ecaccess_state in ['STOP']: - return saga.job.FAILED - elif ecaccess_state in ['USUSP', 'SSUSP', 'PSUSP']: - return saga.job.SUSPENDED - else: - return saga.job.UNKNOWN - -_PTY_TIMEOUT = 2.0 - -# -------------------------------------------------------------------- -# the adaptor name -# -_ADAPTOR_NAME = "autosubmit.platforms.ecmwf_adaptor" -_ADAPTOR_SCHEMAS = ["ecaccess"] -_ADAPTOR_OPTIONS = [] - - -# -------------------------------------------------------------------- -# the adaptor capabilities & supported attributes -# -_ADAPTOR_CAPABILITIES = { - "jdes_attributes": [saga.job.NAME, - saga.job.EXECUTABLE, - saga.job.ARGUMENTS, - saga.job.ENVIRONMENT, - saga.job.INPUT, - saga.job.OUTPUT, - saga.job.ERROR, - saga.job.QUEUE, - saga.job.PROJECT, - saga.job.WALL_TIME_LIMIT, - saga.job.WORKING_DIRECTORY, - saga.job.SPMD_VARIATION, # TODO: 'hot'-fix for BigJob - saga.job.TOTAL_CPU_COUNT, - saga.job.THREADS_PER_PROCESS, - saga.job.PROCESSES_PER_HOST], - "job_attributes": [saga.job.EXIT_CODE, - saga.job.EXECUTION_HOSTS, - saga.job.CREATED, - saga.job.STARTED, - saga.job.FINISHED], - "metrics": [saga.job.STATE], - "callbacks": [saga.job.STATE], - "contexts": {"ssh": "SSH public/private keypair", - "x509": "GSISSH X509 proxy context", - "userpass": "username/password pair (ssh)"} -} - -# -------------------------------------------------------------------- -# the adaptor documentation -# -_ADAPTOR_DOC = { - "name": _ADAPTOR_NAME, - "cfg_options": _ADAPTOR_OPTIONS, - "capabilities": _ADAPTOR_CAPABILITIES, - "description": """ -The ecaccess adaptor allows to run and manage jobs on ECMWF machines -""", - "schemas": {"ecaccess": "connect using ecaccess tools"} -} - -# -------------------------------------------------------------------- -# the adaptor info is used to register the adaptor with SAGA -# -_ADAPTOR_INFO = { - "name": _ADAPTOR_NAME, - "version": "v0.1", - "schemas": _ADAPTOR_SCHEMAS, - "capabilities": _ADAPTOR_CAPABILITIES, - "cpis": [ - { - "type": "saga.job.Service", - "class": "ECMWFJobService" - }, - { - "type": "saga.job.Job", - "class": "ECMWFJob" - } - ] -} - - -############################################################################### -# The adaptor class -# noinspection PyMissingOrEmptyDocstring -class Adaptor(saga.adaptors.base.Base): - """ this is the actual adaptor class, which gets loaded by SAGA (i.e. by - the SAGA engine), and which registers the CPI implementation classes - which provide the adaptor's functionality. - """ - - # ---------------------------------------------------------------- - # - def __init__(self): - # noinspection PyCallByClass,PyTypeChecker - saga.adaptors.base.Base.__init__(self, _ADAPTOR_INFO, _ADAPTOR_OPTIONS) - - self.id_re = re.compile('^\[(.*)\]-\[(.*?)\]$') - - # ---------------------------------------------------------------- - # - def sanity_check(self): - # FIXME: also check for gsissh - pass - - # ---------------------------------------------------------------- - # - def parse_id(self, job_id): - # split the id '[rm]-[pid]' in its parts, and return them. - - match = self.id_re.match(job_id) - - if not match or len(match.groups()) != 2: - raise saga.BadParameter("Cannot parse job id '%s'" % job_id) - - return match.group(1), match.group(2) - - -############################################################################### -# -# noinspection PyMethodOverriding,PyMethodOverriding,PyProtectedMember,PyMissingOrEmptyDocstring -class ECMWFJobService(saga.adaptors.cpi.job.Service): - """ implements saga.adaptors.cpi.job.Service - """ - - # ---------------------------------------------------------------- - # - # noinspection PyMissingConstructor - def __init__(self, api, adaptor): - - self._mt = None - _cpi_base = super(ECMWFJobService, self) - _cpi_base.__init__(api, adaptor) - - self._adaptor = adaptor - self.host = None - self.scheduler = None - - # ---------------------------------------------------------------- - # - def __del__(self): - - self.close() - - # ---------------------------------------------------------------- - # - def close(self): - - if self.mt: - self.mt.stop() - self.mt.join(10) # don't block forever on join() - - self._logger.info("Job monitoring thread stopped.") - - self.finalize(True) - - # ---------------------------------------------------------------- - # - def finalize(self, kill_shell=False): - - if kill_shell: - if self.shell: - self.shell.finalize(True) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def init_instance(self, adaptor_state, rm_url, session): - """ service instance constructor - :param session: - :type session: saga.Session - :param adaptor_state: - :param rm_url: - """ - self.rm = rm_url - self.session = session - self.ppn = 1 - self.queue = None - self.shell = None - self.jobs = dict() - - # the monitoring thread - one per service instance - # noinspection PyTypeChecker - self.mt = _job_state_monitor(job_service=self) - self.mt.start() - - rm_scheme = rm_url.scheme - pty_url = surl.Url(rm_url) - - # we need to extrac the scheme for PTYShell. That's basically the - # job.Serivce Url withou the mn+ part. We use the PTYShell to execute - # mn commands either locally or via gsissh or ssh. - if rm_scheme == "ecaccess": - pty_url.scheme = "fork" - - self.shell = saga.utils.pty_shell.PTYShell(pty_url, self.session) - - self.initialize() - return self.get_api() - - # ---------------------------------------------------------------- - # - def initialize(self): - ret, out, _ = self.shell.run_sync("which ecaccess -version") - if ret == 0: - self._logger.info("Found ECMWF tools. Version: {0}".format(out)) - - def _job_run(self, job_obj): - """ - runs a job via ecaccess-job-submit - """ - # get the job description - jd = job_obj.jd - - # normalize working directory path - if jd.working_directory: - jd.working_directory = os.path.normpath(jd.working_directory) - - try: - # create a loadleveler or PBS job script from SAGA job description - if self.scheduler == 'loadleveler': - header = self._generate_ll_header(jd) - else: - header = self._generate_pbs_header(jd) - self._logger.info("Generated ECMWF header: %s" % header) - except Exception as ex: - header = '' - log_error_and_raise(str(ex), saga.BadParameter, self._logger) - - local_file = os.path.join(BasicConfig.LOCAL_ROOT_DIR, jd.name.split('_')[0], BasicConfig.LOCAL_TMP_DIR, - "{0}.cmd".format(str(jd.name))) - f = open(local_file, 'r+') - script = f.read() - script = header + script - f.seek(0) - f.write(script) - f.truncate() - f.close() - - cmdline = "ecaccess-file-put {0} {1}:{2}".format(local_file, self.host, jd.executable) - ret, out, _ = self.shell.run_sync(cmdline) - if ret != 0: - # something went wrong - message = "Error sending file job via 'ecaccess-file-put': %s. Commandline was: %s" \ - % (out, cmdline) - log_error_and_raise(message, saga.NoSuccess, self._logger) - - cmdline = "ecaccess-job-submit -queueName {0} -jobName {1} -distant {0}:{2}".format(self.host, - jd.name, jd.executable) - ret, out, _ = self.shell.run_sync(cmdline) - if ret != 0: - # something went wrong - message = "Error submitting job via 'ecaccess-job-submit': %s. Commandline was: %s" \ - % (out, cmdline) - log_error_and_raise(message, saga.NoSuccess, self._logger) - else: - lines = out.split("\n") - lines = filter(lambda l: l != '', lines) # remove empty - - self._logger.info('ecaccess-job-submit: %s' % ''.join(lines)) - - mn_job_id = lines[0] - - if not mn_job_id: - raise Exception("Failed to detect job id after submission.") - - job_id = "[%s]-[%s]" % (self.rm, mn_job_id) - - self._logger.info("Submitted ECMWF job with id: %s" % job_id) - - # update job dictionary - self.jobs[job_obj]['job_id'] = job_id - self.jobs[job_obj]['submitted'] = job_id - - # set status to 'pending' and manually trigger callback - # self.jobs[job_obj]['state'] = saga.job.PENDING - # job_obj._api()._attributes_i_set('state', self.jobs[job_obj]['state'], job_obj._api()._UP, True) - - # return the job id - return job_id - - @staticmethod - def _generate_ll_header(jd): - """ - generates a IMB LoadLeveler script from a SAGA job description - :param jd: job descriptor - :return: the llsubmit script - """ - loadl_params = '' - - if jd.name is not None: - loadl_params += "#@ job_name = %s \n" % jd.name - - if jd.environment is not None: - variable_list = '' - for key in jd.environment.keys(): - variable_list += "%s=%s;" % (key, jd.environment[key]) - loadl_params += "#@ environment = %s \n" % variable_list - - if jd.working_directory is not None: - loadl_params += "#@ initialdir = %s\n" % jd.working_directory - if jd.output is not None: - loadl_params += "#@ output = %s\n" % os.path.join(jd.working_directory, jd.output) - if jd.error is not None: - loadl_params += "#@ error = %s\n" % os.path.join(jd.working_directory, jd.error) - if jd.wall_time_limit is not None: - hours = jd.wall_time_limit / 60 - minutes = jd.wall_time_limit % 60 - loadl_params += "#@ wall_clock_limit = {0:02}:{1:02}:00\n".format(hours, minutes) - - if jd.total_cpu_count is None: - # try to come up with a sensible (?) default value - jd.total_cpu_count = 1 - else: - if jd.total_cpu_count > 1: - loadl_params += "#@ total_tasks = %s\n" % jd.total_cpu_count - - if jd.job_contact is not None: - if len(jd.job_contact) > 1: - raise Exception("Only one notify user supported.") - loadl_params += "#@ notify_user = %s\n" % jd.job_contact[0] - loadl_params += "#@ notification = always\n" - - # some default (?) parameter that seem to work fine everywhere... - if jd.queue is not None: - loadl_params += "#@ class = %s\n" % jd.queue - - # finally, we 'queue' the job - loadl_params += "#@ queue\n" - - loadlscript = "\n%s" % loadl_params - - return loadlscript.replace('"', '\\"') - - @staticmethod - def _generate_pbs_header(jd): - """ generates a PBS script from a SAGA job description - """ - pbs_params = str() - - if jd.name: - pbs_params += "#PBS -N %s \n" % jd.name - - # if jd.working_directory: - # pbs_params += "#PBS -d %s \n" % jd.working_directory - # - if jd.output: - pbs_params += "#PBS -o %s \n" % os.path.join(jd.working_directory, jd.output) - - if jd.error: - pbs_params += "#PBS -e %s \n" % os.path.join(jd.working_directory, jd.error) - - if jd.wall_time_limit: - hours = jd.wall_time_limit / 60 - minutes = jd.wall_time_limit % 60 - pbs_params += "#PBS -l walltime={0:02}:{1:02}:00 \n".format(hours, minutes) - - if jd.queue: - pbs_params += "#PBS -q %s \n" % jd.queue - - if jd.project: - pbs_params += "#PBS -l EC_billing_account=%s \n" % str(jd.project) - - if jd.total_cpu_count: - pbs_params += "#PBS -l EC_total_tasks=%s \n" % str(jd.total_cpu_count) - if jd.threads_per_process: - pbs_params += "#PBS -l EC_threads_per_task=%s \n" % str(jd.threads_per_process) - if jd.processes_per_host: - pbs_params += "#PBS -l EC_tasks_per_node=%s \n" % str(jd.processes_per_host) - - pbscript = pbs_params - - pbscript = pbscript.replace('"', '\\"') - return pbscript - - # ---------------------------------------------------------------- - # - def _retrieve_job(self, job_id): - """ see if we can get some info about a job that we don't - know anything about - """ - rm, pid = self._adaptor.parse_id(job_id) - - ret, out, _ = self.shell.run_sync("ecaccess-job-list {0}".format(pid)) - - if ret != 0: - message = "Couldn't reconnect to job '%s': %s" % (job_id, out) - log_error_and_raise(message, saga.NoSuccess, self._logger) - - else: - # the job seems to exist on the backend. let's gather some data. Output will look like - # Job-Id: 7100070 - # Job Name: SAGA-Python-LSFJobScript.j5u51g - # Queue: ecgate - # Host: ecgb.ecmwf.int - # Schedule: Aug 21 09:59 - # Expiration: Aug 28 09:59 - # Try Count: 1/1 - # Status: STOP - # Comment: Status STOP received from Slurm (exitCode: 127) - - job_info = { - 'state': saga.job.UNKNOWN, - 'exec_hosts': None, - 'returncode': None, - 'create_time': None, - 'start_time': None, - 'end_time': None, - 'gone': False - } - - results = out.split('\n') - job_info['state'] = _ecaccess_to_saga_jobstate(results[7].split(":")[1].strip()) - - return job_info - - # ---------------------------------------------------------------- - # - def _job_get_info(self, job_obj): - """ get job attributes via bjob - """ - - # if we don't have the job in our dictionary, we don't want it - if job_obj not in self.jobs: - message = "Unknown job object: %s. Can't update state." % job_obj._id - log_error_and_raise(message, saga.NoSuccess, self._logger) - - # prev. info contains the info collect when _job_get_info - # was called the last time - prev_info = self.jobs[job_obj] - - # if the 'gone' flag is set, there's no need to query the job - # state again. it's gone forever - if prev_info['gone'] is True: - return prev_info - - # curr. info will contain the new job info collect. it starts off - # as a copy of prev_info (don't use deepcopy because there is an API - # object in the dict -> recursion) - curr_info = dict() - curr_info['job_id'] = prev_info.get('job_id') - curr_info['state'] = prev_info.get('state') - curr_info['exec_hosts'] = prev_info.get('exec_hosts') - curr_info['returncode'] = prev_info.get('returncode') - curr_info['create_time'] = prev_info.get('create_time') - curr_info['start_time'] = prev_info.get('start_time') - curr_info['end_time'] = prev_info.get('end_time') - curr_info['gone'] = prev_info.get('gone') - - rm, pid = self._adaptor.parse_id(job_obj._id) - - # run the 'ecaccess-job-list' command to get some infos about our job - # the result of ecaccess-job-list looks like this: - # - # JOBID USER STAT QUEUE FROM_HOST EXEC_HOST JOB_NAME SUBMIT_TIME - # 901545 oweidne DONE regular yslogin5-ib ys3833-ib *FILENAME Nov 11 12:06 - # - ret, out, _ = self.shell.run_sync('ecaccess-job-list {0}'.format(pid)) - - if ret != 0: - if "Illegal job ID" in out: - # Let's see if the previous job state was running or pending. in - # that case, the job is gone now, which can either mean DONE, - # or FAILED. the only thing we can do is set it to 'DONE' - curr_info['gone'] = True - # we can also set the end time - self._logger.warning("Previously running job has disappeared. This probably means that the backend " + - "doesn't store informations about finished jobs. Setting state to 'DONE'.") - - if prev_info['state'] in [saga.job.RUNNING, saga.job.PENDING]: - curr_info['state'] = saga.job.DONE - else: - curr_info['state'] = saga.job.FAILED - else: - # something went wrong - message = "Error retrieving job info via 'ecaccess-job-list ': %s" % out - log_error_and_raise(message, saga.NoSuccess, self._logger) - else: - # parse the result - results = out.split() - curr_info['state'] = _ecaccess_to_saga_jobstate(results[2]) - curr_info['exec_hosts'] = results[5] - - # return the new job info dict - return curr_info - - # ---------------------------------------------------------------- - # - def _job_get_state(self, job_obj): - """ get the job's state - """ - return self.jobs[job_obj]['state'] - - # ---------------------------------------------------------------- - # - def _job_get_exit_code(self, job_obj): - """ get the job's exit code - """ - ret = self.jobs[job_obj]['returncode'] - - # FIXME: 'None' should cause an exception - if ret is None: - return None - else: - return int(ret) - - # ---------------------------------------------------------------- - # - def _job_get_execution_hosts(self, job_obj): - """ get the job's exit code - """ - return self.jobs[job_obj]['exec_hosts'] - - # ---------------------------------------------------------------- - # - def _job_get_create_time(self, job_obj): - """ get the job's creation time - """ - return self.jobs[job_obj]['create_time'] - - # ---------------------------------------------------------------- - # - def _job_get_start_time(self, job_obj): - """ get the job's start time - """ - return self.jobs[job_obj]['start_time'] - - # ---------------------------------------------------------------- - # - def _job_get_end_time(self, job_obj): - """ get the job's end time - """ - return self.jobs[job_obj]['end_time'] - - # ---------------------------------------------------------------- - # - def _job_cancel(self, job_obj): - """ cancel the job via 'qdel' - """ - rm, pid = self._adaptor.parse_id(job_obj._id) - - ret, out, _ = self.shell.run_sync('ecaccess-job-delete {0}'.format(pid)) - - if ret != 0: - message = "Error canceling job via 'ecaccess-job-delete': %s" % out - log_error_and_raise(message, saga.NoSuccess, self._logger) - - # assume the job was succesfully canceled - self.jobs[job_obj]['state'] = saga.job.CANCELED - - # ---------------------------------------------------------------- - # - def _job_wait(self, job_obj, timeout): - """ wait for the job to finish or fail - """ - time_start = time.time() - self._adaptor.parse_id(job_obj._id) - - while True: - state = self.jobs[job_obj]['state'] # this gets updated in the bg. - - if state == saga.job.DONE or state == saga.job.FAILED or state == saga.job.CANCELED: - return True - - # avoid busy poll - time.sleep(SYNC_WAIT_UPDATE_INTERVAL) - - # check if we hit timeout - if timeout >= 0: - time_now = time.time() - if time_now - time_start > timeout: - return False - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def create_job(self, jd): - """ - implements saga.adaptors.cpi.job.Service.get_url() - - :param jd: job description - """ - # this dict is passed on to the job adaptor class -- use it to pass any - # state information you need there. - adaptor_state = {"job_service": self, - "job_description": jd, - "job_schema": self.rm.schema, - "reconnect": False - } - - # create a new job object - job_obj = saga.job.Job(_adaptor=self._adaptor, - _adaptor_state=adaptor_state) - - # add job to internal list of known jobs. - self.jobs[job_obj._adaptor] = { - 'state': saga.job.NEW, - 'job_id': None, - 'exec_hosts': None, - 'returncode': None, - 'create_time': None, - 'start_time': None, - 'end_time': None, - 'gone': False, - 'submitted': False - } - - return job_obj - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_job(self, jobid): - """ - Implements saga.adaptors.cpi.job.Service.get_job() - - :param jobid: job identifier - :type jobid: str - """ - - # try to get some information about this job - job_info = self._retrieve_job(jobid) - - # this dict is passed on to the job adaptor class -- use it to pass any - # state information you need there. - adaptor_state = {"job_service": self, - # TODO: fill job description - "job_description": saga.job.Description(), - "job_schema": self.rm.schema, - "reconnect": True, - "reconnect_jobid": jobid - } - - job = saga.job.Job(_adaptor=self._adaptor, - _adaptor_state=adaptor_state) - - # throw it into our job dictionary. - self.jobs[job._adaptor] = job_info - return job - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_url(self): - """ implements saga.adaptors.cpi.job.Service.get_url() - """ - return self.rm - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def list(self): - """ implements saga.adaptors.cpi.job.Service.list() - """ - ids = [] - - ret, out, _ = self.shell.run_sync("ecaccess-job-list") - - if ret != 0 and len(out) > 0: - message = "failed to list jobs via 'ecaccess-job-list ': %s" % out - log_error_and_raise(message, saga.NoSuccess, self._logger) - elif ret != 0 and len(out) == 0: - - pass - else: - for line in out.split("\n"): - # output looks like this: - # 112059.svc.uc.futuregrid testjob oweidner 0 Q batch - # 112061.svc.uc.futuregrid testjob oweidner 0 Q batch - if len(line.split()) > 1: - jobid = "[%s]-[%s]" % (self.rm, line.split()[0].split('.')[0]) - ids.append(str(jobid)) - - return ids - - # # ---------------------------------------------------------------- - # # - # def container_run (self, jobs) : - # self._logger.debug ("container run: %s" % str(jobs)) - # # TODO: this is not optimized yet - # for job in jobs: - # job.run () - # - # - # # ---------------------------------------------------------------- - # # - # def container_wait (self, jobs, mode, timeout) : - # self._logger.debug ("container wait: %s" % str(jobs)) - # # TODO: this is not optimized yet - # for job in jobs: - # job.wait () - # - # - # # ---------------------------------------------------------------- - # # - # def container_cancel (self, jobs) : - # self._logger.debug ("container cancel: %s" % str(jobs)) - # raise saga.NoSuccess ("Not Implemented"); - - -############################################################################### -# -# noinspection PyMethodOverriding,PyProtectedMember,PyMissingOrEmptyDocstring -class ECMWFJob(saga.adaptors.cpi.job.Job): - """ implements saga.adaptors.cpi.job.Job - """ - - # noinspection PyMissingConstructor - def __init__(self, api, adaptor): - - # initialize parent class - _cpi_base = super(ECMWFJob, self) - _cpi_base.__init__(api, adaptor) - - def _get_impl(self): - return self - - @SYNC_CALL - def init_instance(self, job_info): - """ - implements saga.adaptors.cpi.job.Job.init_instance() - - :param job_info: job descriptiom - :type job_info: dict - """ - # init_instance is called for every new saga.job.Job object - # that is created - self.jd = job_info["job_description"] - self.js = job_info["job_service"] - - if job_info['reconnect'] is True: - self._id = job_info['reconnect_jobid'] - self._started = True - else: - self._id = None - self._started = False - - return self.get_api() - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_state(self): - """ implements saga.adaptors.cpi.job.Job.get_state() - """ - return self.js._job_get_state(job_obj=self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def wait(self, timeout): - """ - implements saga.adaptors.cpi.job.Job.wait() - - :param timeout: time to wait - :type timeout: int - """ - if self._started is False: - log_error_and_raise("Can't wait for job that hasn't been started", - saga.IncorrectState, self._logger) - else: - self.js._job_wait(job_obj=self, timeout=timeout) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def cancel(self, timeout): - """ - implements saga.adaptors.cpi.job.Job.cancel() - - :param timeout: time to wait - :type timeout: int - """ - if self._started is False: - log_error_and_raise("Can't wait for job that hasn't been started", - saga.IncorrectState, self._logger) - else: - self.js._job_cancel(self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def run(self): - """ implements saga.adaptors.cpi.job.Job.run() - """ - self._id = self.js._job_run(self) - self._started = True - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_service_url(self): - """ implements saga.adaptors.cpi.job.Job.get_service_url() - """ - return self.js.rm - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_id(self): - """ implements saga.adaptors.cpi.job.Job.get_id() - """ - return self._id - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_exit_code(self): - """ implements saga.adaptors.cpi.job.Job.get_exit_code() - """ - if self._started is False: - return None - else: - return self.js._job_get_exit_code(self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_created(self): - """ implements saga.adaptors.cpi.job.Job.get_created() - """ - if self._started is False: - return None - else: - return self.js._job_get_create_time(self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_started(self): - """ implements saga.adaptors.cpi.job.Job.get_started() - """ - if self._started is False: - return None - else: - return self.js._job_get_start_time(self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_finished(self): - """ implements saga.adaptors.cpi.job.Job.get_finished() - """ - if self._started is False: - return None - else: - return self.js._job_get_end_time(self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_execution_hosts(self): - """ implements saga.adaptors.cpi.job.Job.get_execution_hosts() - """ - if self._started is False: - return None - else: - return self.js._job_get_execution_hosts(self) diff --git a/autosubmit/platforms/mn_adaptor.py b/autosubmit/platforms/mn_adaptor.py deleted file mode 100644 index 3fadf72e193d271a0710bab710e7e8dd16643596..0000000000000000000000000000000000000000 --- a/autosubmit/platforms/mn_adaptor.py +++ /dev/null @@ -1,1071 +0,0 @@ -""" LSF mn adaptor implementation -""" - -import re -import os -import time -import threading -# noinspection PyDeprecation -from cgi import parse_qs - -# noinspection PyPackageRequirements -import radical.utils.threads as sut -import saga.url as surl -import saga.utils.pty_shell -import saga.adaptors.base -import saga.adaptors.cpi.job -import saga -import saga.adaptors.cpi.decorators - -SYNC_CALL = saga.adaptors.cpi.decorators.SYNC_CALL -ASYNC_CALL = saga.adaptors.cpi.decorators.ASYNC_CALL - -SYNC_WAIT_UPDATE_INTERVAL = 1 # seconds -MONITOR_UPDATE_INTERVAL = 3 # seconds - - -# -------------------------------------------------------------------- -# -# noinspection PyProtectedMember,PyPep8Naming,PyMissingOrEmptyDocstring -class _job_state_monitor(threading.Thread): - """ thread that periodically monitors job states - """ - - def __init__(self, job_service): - - self.logger = job_service._logger - self.js = job_service - self._stop = sut.Event() - - super(_job_state_monitor, self).__init__() - self.setDaemon(True) - - def stop(self): - self._stop.set() - - def stopped(self): - return self._stop.isSet() - - def run(self): - while self.stopped() is False: - try: - # do bulk updates here! we don't want to pull information - # job by job. that would be too inefficient! - jobs = self.js.jobs - job_keys = jobs.keys() - - for job in job_keys: - # if the job hasn't been started, we can't update its - # state. we can tell if a job has been started if it - # has a job id - if jobs[job].get('job_id', None) is not None: - # we only need to monitor jobs that are not in a - # terminal state, so we can skip the ones that are - # either done, failed or canceled - state = jobs[job]['state'] - if state not in [saga.job.DONE, saga.job.FAILED, saga.job.CANCELED]: - - job_info = self.js._job_get_info(job) - self.logger.info( - "Job monitoring thread updating Job %s (state: %s)" % (job, job_info['state'])) - - if job_info['state'] != state: - # fire job state callback if 'state' has changed - job._api()._attributes_i_set('state', job_info['state'], job._api()._UP, True) - - # update job info - self.js.jobs[job] = job_info - - time.sleep(MONITOR_UPDATE_INTERVAL) - except Exception as e: - self.logger.warning("Exception caught in job monitoring thread: %s" % e) - - -# -------------------------------------------------------------------- -# -def log_error_and_raise(message, exception, logger): - """ - logs an 'error' message and subsequently throws an exception - - :param message: message to log - :param exception: exception to rise - :param logger: logger to use - """ - logger.error(message) - raise exception(message) - - -# -------------------------------------------------------------------- -# -def _mn_to_saga_jobstate(mnjs): - """ translates a mn one-letter state to saga - """ - if mnjs in ['RUN']: - return saga.job.RUNNING - elif mnjs in ['WAIT', 'PEND']: - return saga.job.PENDING - elif mnjs in ['DONE']: - return saga.job.DONE - elif mnjs in ['ZOMBI', 'EXIT']: - return saga.job.FAILED - elif mnjs in ['USUSP', 'SSUSP', 'PSUSP']: - return saga.job.SUSPENDED - else: - return saga.job.UNKNOWN - - -# -------------------------------------------------------------------- -# -def _mnscript_generator(jd, queue=None): - """ generates an LSF script from a SAGA job description - """ - mn_params = str() - exec_n_args = str() - - if jd.executable is not None: - exec_n_args += "%s " % jd.executable - if jd.arguments is not None: - for arg in jd.arguments: - exec_n_args += "%s " % arg - - if jd.name is not None: - mn_params += "#BSUB -J %s \n" % jd.name - - if jd.environment is not None: - env_variable_list = "export " - for key in jd.environment.keys(): - env_variable_list += " %s=%s " % (key, jd.environment[key]) - else: - env_variable_list = "" - - # a workaround is to do an explicit 'cd' - if jd.working_directory is not None: - mn_params += "#BSUB -cwd %s \n" % jd.working_directory - - if jd.output is not None: - # if working directory is set, we want stdout to end up in - # the working directory as well, unless it containes a specific - # path name. - if jd.working_directory is not None: - if os.path.isabs(jd.output): - mn_params += "#BSUB -o %s \n" % jd.output - else: - # user provided a relative path for STDOUT. in this case - # we prepend the working directory path before passing - # it on to LSF. - mn_params += "#BSUB -o %s/%s \n" % (jd.working_directory, jd.output) - else: - mn_params += "#BSUB -o %s \n" % jd.output - - if jd.error is not None: - # if working directory is set, we want stderr to end up in - # the working directory as well, unless it contains a specific - # path name. - if jd.working_directory is not None: - if os.path.isabs(jd.error): - mn_params += "#BSUB -e %s \n" % jd.error - else: - # user provided a relative path for STDERR. in this case - # we prepend the working directory path before passing - # it on to LSF. - mn_params += "#BSUB -e %s/%s \n" % (jd.working_directory, jd.error) - else: - mn_params += "#BSUB -e %s \n" % jd.error - - if jd.wall_time_limit is not None: - hours = jd.wall_time_limit / 60 - minutes = jd.wall_time_limit % 60 - mn_params += "#BSUB -W %s:%s \n" \ - % (str(hours), str(minutes)) - - if (jd.queue is not None) and (queue is not None): - mn_params += "#BSUB -q %s \n" % queue - elif (jd.queue is not None) and (queue is None): - mn_params += "#BSUB -q %s \n" % jd.queue - elif (jd.queue is None) and (queue is not None): - mn_params += "#BSUB -q %s \n" % queue - - if jd.project is not None: - if ':' not in jd.project: - account = jd.project - else: - account, reservation, exclusivity = jd.project.split(':') - if reservation != '': - mn_params += "#BSUB -U %s \n" % str(reservation) - if exclusivity == 'true': - mn_params += "#BSUB -x \n" - - mn_params += "#BSUB -P %s \n" % str(account) - if jd.job_contact is not None: - mn_params += "#BSUB -u %s \n" % str(jd.job_contact) - - # if total_cpu_count is not defined, we assume 1 - if jd.total_cpu_count is None: - jd.total_cpu_count = 1 - - mn_params += "#BSUB -n %s \n" % str(jd.total_cpu_count) - - if jd.processes_per_host: - mn_params += '#BSUB -R "span[ptile=%s]"\n' % str(jd.processes_per_host) - - # escape all double quotes and dollarsigns, otherwise 'echo |' - # further down won't work - # only escape '$' in args and exe. not in the params - # exec_n_args = workdir_directives exec_n_args - exec_n_args = exec_n_args.replace('$', '\\$') - - mnscript = "\n#!/bin/bash \n%s\n%s\n%s" % (mn_params, env_variable_list, exec_n_args) - - mnscript = mnscript.replace('"', '\\"') - return mnscript - -# -------------------------------------------------------------------- -# some private defs -# -_PTY_TIMEOUT = 2.0 - -# -------------------------------------------------------------------- -# the adaptor name -# -_ADAPTOR_NAME = "autosubmit.platforms.mn_adaptor" -_ADAPTOR_SCHEMAS = ["mn", "mn+ssh", "mn+gsissh"] -_ADAPTOR_OPTIONS = [] - -# -------------------------------------------------------------------- -# the adaptor capabilities & supported attributes -# -_ADAPTOR_CAPABILITIES = { - "jdes_attributes": [saga.job.NAME, - saga.job.EXECUTABLE, - saga.job.ARGUMENTS, - saga.job.ENVIRONMENT, - saga.job.INPUT, - saga.job.OUTPUT, - saga.job.ERROR, - saga.job.QUEUE, - saga.job.PROJECT, - saga.job.WALL_TIME_LIMIT, - saga.job.WORKING_DIRECTORY, - saga.job.SPMD_VARIATION, # TODO: 'hot'-fix for BigJob - saga.job.PROCESSES_PER_HOST, - saga.job.TOTAL_CPU_COUNT], - "job_attributes": [saga.job.EXIT_CODE, - saga.job.EXECUTION_HOSTS, - saga.job.CREATED, - saga.job.STARTED, - saga.job.FINISHED], - "metrics": [saga.job.STATE], - "callbacks": [saga.job.STATE], - "contexts": {"ssh": "SSH public/private keypair", - "x509": "GSISSH X509 proxy context", - "userpass": "username/password pair (ssh)"} -} - -# -------------------------------------------------------------------- -# the adaptor documentation -# -_ADAPTOR_DOC = { - "name": _ADAPTOR_NAME, - "cfg_options": _ADAPTOR_OPTIONS, - "capabilities": _ADAPTOR_CAPABILITIES, - "description": """ -The MN adaptor allows to run and manage jobs on MareNostrum 3 -""", - "schemas": {"mn": "connect to a local cluster", - "mn+ssh": "connect to a remote cluster via SSH", - "mn+gsissh": "connect to a remote cluster via GSISSH"} -} - -# -------------------------------------------------------------------- -# the adaptor info is used to register the adaptor with SAGA -# -_ADAPTOR_INFO = { - "name": _ADAPTOR_NAME, - "version": "v0.2", - "schemas": _ADAPTOR_SCHEMAS, - "capabilities": _ADAPTOR_CAPABILITIES, - "cpis": [ - { - "type": "saga.job.Service", - "class": "MNJobService" - }, - { - "type": "saga.job.Job", - "class": "MNJob" - } - ] -} - - -############################################################################### -# The adaptor class -# noinspection PyMissingOrEmptyDocstring -class Adaptor(saga.adaptors.base.Base): - """ this is the actual adaptor class, which gets loaded by SAGA (i.e. by - the SAGA engine), and which registers the CPI implementation classes - which provide the adaptor's functionality. - """ - - # ---------------------------------------------------------------- - # - def __init__(self): - # noinspection PyCallByClass,PyTypeChecker - saga.adaptors.base.Base.__init__(self, _ADAPTOR_INFO, _ADAPTOR_OPTIONS) - - self.id_re = re.compile('^\[(.*)\]-\[(.*?)\]$') - self.opts = self.get_config(_ADAPTOR_NAME) - - # ---------------------------------------------------------------- - # - def sanity_check(self): - # FIXME: also check for gsissh - pass - - # ---------------------------------------------------------------- - # - def parse_id(self, job_id): - # split the id '[rm]-[pid]' in its parts, and return them. - - match = self.id_re.match(job_id) - - if not match or len(match.groups()) != 2: - raise saga.BadParameter("Cannot parse job id '%s'" % job_id) - - return match.group(1), match.group(2) - - -############################################################################### -# -# noinspection PyMethodOverriding,PyMethodOverriding,PyProtectedMember, PyMissingOrEmptyDocstring -class MNJobService(saga.adaptors.cpi.job.Service): - """ implements saga.adaptors.cpi.job.Service - """ - - # ---------------------------------------------------------------- - # - # noinspection PyMissingConstructor - def __init__(self, api, adaptor): - - self._mt = None - _cpi_base = super(MNJobService, self) - _cpi_base.__init__(api, adaptor) - - self._adaptor = adaptor - - # ---------------------------------------------------------------- - # - def __del__(self): - - self.close() - - # ---------------------------------------------------------------- - # - def close(self): - - if self.mt: - self.mt.stop() - self.mt.join(10) # don't block forever on join() - - self._logger.info("Job monitoring thread stopped.") - - self.finalize(True) - - # ---------------------------------------------------------------- - # - def finalize(self, kill_shell=False): - - if kill_shell: - if self.shell: - self.shell.finalize(True) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def init_instance(self, adaptor_state, rm_url, session): - """ - service instance constructor - - :param adaptor_state: state of the adaptor - :param rm_url: machine url - :param session: SAGA session to use - """ - self.rm = rm_url - self.session = session - self.ppn = 1 - self.queue = None - self.shell = None - self.jobs = dict() - - # the monitoring thread - one per service instance - # noinspection PyTypeChecker - self.mt = _job_state_monitor(job_service=self) - self.mt.start() - - rm_scheme = rm_url.scheme - pty_url = surl.Url(rm_url) - - # this adaptor supports options that can be passed via the - # 'query' component of the job service URL. - if rm_url.query is not None: - # noinspection PyDeprecation - for key, val in parse_qs(rm_url.query).iteritems(): - if key == 'queue': - self.queue = val[0] - elif key == 'span': - self.span = val[0] - - # we need to extrac the scheme for PTYShell. That's basically the - # job.Serivce Url withou the mn+ part. We use the PTYShell to execute - # mn commands either locally or via gsissh or ssh. - if rm_scheme == "mn": - pty_url.scheme = "fork" - elif rm_scheme == "mn+ssh": - pty_url.scheme = "ssh" - elif rm_scheme == "mn+gsissh": - pty_url.scheme = "gsissh" - - # these are the commands that we need in order to interact with LSF. - # the adaptor will try to find them during initialize(self) and bail - # out in case they are note avaialbe. - self._commands = {'bqueues': '9.1.2.0', - 'bjobs': '9.1.2.0', - 'bsub': '9.1.2.0', - 'bkill': '9.1.2.0'} - - self.shell = saga.utils.pty_shell.PTYShell(pty_url, self.session) - - # self.shell.set_initialize_hook(self.initialize) - # self.shell.set_finalize_hook(self.finalize) - - self.initialize() - return self.get_api() - - # ---------------------------------------------------------------- - # - def initialize(self): - # check if all required mn tools are available - # for cmd in self._commands.keys(): - # ret, out, _ = self.shell.run_sync("which %s " % cmd) - # if ret != 0: - # message = "Couldn't find LSF tools: %s" % out - # log_error_and_raise(message, saga.NoSuccess, self._logger) - # else: - # path = out.strip() # strip removes newline - # ret, out, _ = self.shell.run_sync("%s -V" % cmd) - # if ret != 0: - # message = "Couldn't find LSF tools: %s" % out - # log_error_and_raise(message, saga.NoSuccess, self._logger) - # else: - # # version is reported as: "version: x.y.z" - # version = out.split("\n")[0] - # - # # add path and version to the command dictionary - # self._commands[cmd] = {"path": cmd, - # "version": version} - - self._logger.info("Found LSF tools: %s" % self._commands) - - # see if we can get some information about the cluster, e.g., - # different queues, number of processes per node, etc. - # TODO: this is quite a hack. however, it *seems* to work quite - # well in practice. - # ret, out, _ = self.shell.run_sync('unset GREP_OPTIONS; %s -a | grep -E "(np|pcpu)"' % \ - # self._commands['pbsnodes']['path']) - # if ret != 0: - # - # message = "Error running pbsnodes: %s" % out - # log_error_and_raise(message, saga.NoSuccess, self._logger) - # else: - # this is black magic. we just assume that the highest occurence - # of a specific np is the number of processors (cores) per compute - # node. this equals max "PPN" for job scripts - # ppn_list = dict() - # for line in out.split('\n'): - # np = line.split(' = ') - # if len(np) == 2: - # np = np[1].strip() - # if np in ppn_list: - # ppn_list[np] += 1 - # else: - # ppn_list[np] = 1 - # self.ppn = max(ppn_list, key=ppn_list.get) - # self._logger.debug("Found the following 'ppn' configurations: %s. \ - - # Using %s as default ppn." - # % (ppn_list, self.ppn)) - - # ---------------------------------------------------------------- - # - def _job_run(self, job_obj): - """ runs a job via qsub - """ - # get the job description - jd = job_obj.jd - - # normalize working directory path - if jd.working_directory: - jd.working_directory = os.path.normpath(jd.working_directory) - - if (self.queue is not None) and (jd.queue is not None): - self._logger.warning("Job service was instantiated explicitly with \ -'queue=%s', but job description tries to a differnt queue: '%s'. Using '%s'." % - (self.queue, jd.queue, self.queue)) - - try: - # create an LSF job script from SAGA job description - script = _mnscript_generator(jd=jd, queue=self.queue) - - self._logger.info("Generated LSF script: %s" % script) - except Exception as ex: - script = '' - log_error_and_raise(str(ex), saga.BadParameter, self._logger) - - # try to create the working directory (if defined) - # WARNING: this assumes a shared filesystem between login node and - # compute nodes. - if jd.working_directory is not None: - self._logger.info("Creating working directory %s" % jd.working_directory) - ret, out, _ = self.shell.run_sync("mkdir -p %s" % jd.working_directory) - if ret != 0: - # something went wrong - message = "Couldn't create working directory - %s" % out - log_error_and_raise(message, saga.NoSuccess, self._logger) - - # Now we want to execute the script. This process consists of two steps: - # (1) we create a temporary file with 'mktemp' and write the contents of - # the generated PBS script into it - # (2) we call 'qsub ' to submit the script to the queueing system - # noinspection PyPep8 - cmdline = """SCRIPTFILE=`mktemp -t SAGA-Python-LSFJobScript.XXXXXX` && echo "%s" > $SCRIPTFILE && %s < $SCRIPTFILE && rm -f $SCRIPTFILE""" % (script, 'bsub') - ret, out, _ = self.shell.run_sync(cmdline) - - if ret != 0: - # something went wrong - message = "Error running job via 'bsub': %s. Commandline was: %s" \ - % (out, cmdline) - log_error_and_raise(message, saga.NoSuccess, self._logger) - else: - # parse the job id. bsub's output looks like this: - # Job <901545> is submitted to queue - lines = out.split("\n") - lines = filter(lambda l: l != '', lines) # remove empty - - self._logger.info('bsub: %s' % ''.join(lines)) - - mn_job_id = None - for line in lines: - if re.search('Job <.+> is submitted to', line): - mn_job_id = re.findall(r'<(.*?)>', line)[0] - break - - if not mn_job_id: - raise Exception("Failed to detect job id after submission.") - - job_id = "[%s]-[%s]" % (self.rm, mn_job_id) - - self._logger.info("Submitted LSF job with id: %s" % job_id) - - # update job dictionary - self.jobs[job_obj]['job_id'] = job_id - self.jobs[job_obj]['submitted'] = job_id - - # set status to 'pending' and manually trigger callback - # self.jobs[job_obj]['state'] = saga.job.PENDING - # job_obj._api()._attributes_i_set('state', self.jobs[job_obj]['state'], job_obj._api()._UP, True) - - # return the job id - return job_id - - # ---------------------------------------------------------------- - # - def _retrieve_job(self, job_id): - """ see if we can get some info about a job that we don't - know anything about - """ - rm, pid = self._adaptor.parse_id(job_id) - - ret, out, _ = self.shell.run_sync( - "%s -noheader %s" % ('bjobs', pid)) - - if ret != 0: - message = "Couldn't reconnect to job '%s': %s" % (job_id, out) - log_error_and_raise(message, saga.NoSuccess, self._logger) - - else: - # the job seems to exist on the backend. let's gather some data - job_info = { - 'state': saga.job.UNKNOWN, - 'exec_hosts': None, - 'returncode': None, - 'create_time': None, - 'start_time': None, - 'end_time': None, - 'gone': False - } - - results = out.split() - job_info['state'] = _mn_to_saga_jobstate(results[2]) - job_info['exec_hosts'] = results[5] - job_info['create_time'] = results[7] - - return job_info - - # ---------------------------------------------------------------- - # - def _job_get_info(self, job_obj): - """ get job attributes via bjob - """ - - # if we don't have the job in our dictionary, we don't want it - if job_obj not in self.jobs: - message = "Unknown job object: %s. Can't update state." % job_obj._id - log_error_and_raise(message, saga.NoSuccess, self._logger) - - # prev. info contains the info collect when _job_get_info - # was called the last time - prev_info = self.jobs[job_obj] - - # if the 'gone' flag is set, there's no need to query the job - # state again. it's gone forever - if prev_info['gone'] is True: - return prev_info - - # curr. info will contain the new job info collect. it starts off - # as a copy of prev_info (don't use deepcopy because there is an API - # object in the dict -> recursion) - curr_info = dict() - curr_info['job_id'] = prev_info.get('job_id') - curr_info['state'] = prev_info.get('state') - curr_info['exec_hosts'] = prev_info.get('exec_hosts') - curr_info['returncode'] = prev_info.get('returncode') - curr_info['create_time'] = prev_info.get('create_time') - curr_info['start_time'] = prev_info.get('start_time') - curr_info['end_time'] = prev_info.get('end_time') - curr_info['gone'] = prev_info.get('gone') - - rm, pid = self._adaptor.parse_id(job_obj._id) - - # run the LSF 'bjobs' command to get some infos about our job - # the result of bjobs looks like this: - # - # JOBID USER STAT QUEUE FROM_HOST EXEC_HOST JOB_NAME SUBMIT_TIME - # 901545 oweidne DONE regular yslogin5-ib ys3833-ib *FILENAME Nov 11 12:06 - # - # If we add the -nodeader flag, the first row is ommited - - ret, out, _ = self.shell.run_sync("%s -noheader %s" % ('bjobs', pid)) - - if ret != 0: - if "Illegal job ID" in out or "is not found" in out: - # Let's see if the previous job state was running or pending. in - # that case, the job is gone now, which can either mean DONE, - # or FAILED. the only thing we can do is set it to 'DONE' - curr_info['gone'] = True - # we can also set the end time - self._logger.warning("Previously running job has disappeared." + - "This probably means that the backend doesn't " + - "store informations about finished jobs." + - "Setting state to 'DONE'.") - - if prev_info['state'] in [saga.job.RUNNING, saga.job.PENDING]: - curr_info['state'] = saga.job.DONE - else: - curr_info['state'] = saga.job.FAILED - else: - # something went wrong - message = "Error retrieving job info via 'bjobs': %s" % out - log_error_and_raise(message, saga.NoSuccess, self._logger) - else: - # parse the result - results = out.split() - curr_info['state'] = _mn_to_saga_jobstate(results[2]) - curr_info['exec_hosts'] = results[5] - - # return the new job info dict - return curr_info - - # ---------------------------------------------------------------- - # - def _job_get_state(self, job_obj): - """ get the job's state - """ - return self.jobs[job_obj]['state'] - - # ---------------------------------------------------------------- - # - def _job_get_exit_code(self, job_obj): - """ get the job's exit code - """ - ret = self.jobs[job_obj]['returncode'] - - # FIXME: 'None' should cause an exception - if ret is None: - return None - else: - return int(ret) - - # ---------------------------------------------------------------- - # - def _job_get_execution_hosts(self, job_obj): - """ get the job's exit code - """ - return self.jobs[job_obj]['exec_hosts'] - - # ---------------------------------------------------------------- - # - def _job_get_create_time(self, job_obj): - """ get the job's creation time - """ - return self.jobs[job_obj]['create_time'] - - # ---------------------------------------------------------------- - # - def _job_get_start_time(self, job_obj): - """ get the job's start time - """ - return self.jobs[job_obj]['start_time'] - - # ---------------------------------------------------------------- - # - def _job_get_end_time(self, job_obj): - """ get the job's end time - """ - return self.jobs[job_obj]['end_time'] - - # ---------------------------------------------------------------- - # - def _job_cancel(self, job_obj): - """ cancel the job via 'qdel' - """ - rm, pid = self._adaptor.parse_id(job_obj._id) - - ret, out, _ = self.shell.run_sync("%s %s\n" % ('qdel', pid)) - - if ret != 0: - message = "Error canceling job via 'qdel': %s" % out - log_error_and_raise(message, saga.NoSuccess, self._logger) - - # assume the job was succesfully canceled - self.jobs[job_obj]['state'] = saga.job.CANCELED - - # ---------------------------------------------------------------- - # - def _job_wait(self, job_obj, timeout): - """ wait for the job to finish or fail - """ - time_start = time.time() - time_now = time_start - self._adaptor.parse_id(job_obj._id) - - while True: - # state = self._job_get_state(job_id=job_id, job_obj=job_obj) - state = self.jobs[job_obj]['state'] # this gets updated in the bg. - - if state == saga.job.DONE or \ - state == saga.job.FAILED or \ - state == saga.job.CANCELED: - return True - - # avoid busy poll - time.sleep(SYNC_WAIT_UPDATE_INTERVAL) - - # check if we hit timeout - if timeout >= 0: - time_now = time.time() - if time_now - time_start > timeout: - return False - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def create_job(self, jd): - """ - implements saga.adaptors.cpi.job.Service.get_url() - - :param jd: job description - """ - # this dict is passed on to the job adaptor class -- use it to pass any - # state information you need there. - adaptor_state = {"job_service": self, - "job_description": jd, - "job_schema": self.rm.schema, - "reconnect": False - } - - # create a new job object - job_obj = saga.job.Job(_adaptor=self._adaptor, - _adaptor_state=adaptor_state) - - # add job to internal list of known jobs. - self.jobs[job_obj._adaptor] = { - 'state': saga.job.NEW, - 'job_id': None, - 'exec_hosts': None, - 'returncode': None, - 'create_time': None, - 'start_time': None, - 'end_time': None, - 'gone': False, - 'submitted': False - } - - return job_obj - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_job(self, jobid): - """ - Implements saga.adaptors.cpi.job.Service.get_job() - - :param jobid: job identifier - :type jobid: str - """ - - # try to get some information about this job - job_info = self._retrieve_job(jobid) - - # this dict is passed on to the job adaptor class -- use it to pass any - # state information you need there. - adaptor_state = {"job_service": self, - # TODO: fill job description - "job_description": saga.job.Description(), - "job_schema": self.rm.schema, - "reconnect": True, - "reconnect_jobid": jobid - } - - job = saga.job.Job(_adaptor=self._adaptor, - _adaptor_state=adaptor_state) - - # throw it into our job dictionary. - self.jobs[job._adaptor] = job_info - return job - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_url(self): - """ implements saga.adaptors.cpi.job.Service.get_url() - """ - return self.rm - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def list(self): - """ implements saga.adaptors.cpi.job.Service.list() - """ - ids = [] - - ret, out, _ = self.shell.run_sync("%s -a -noheader" % 'bjobs') - - if ret != 0 and len(out) > 0: - message = "failed to list jobs via 'bjobs': %s" % out - log_error_and_raise(message, saga.NoSuccess, self._logger) - elif ret != 0 and len(out) == 0: - - pass - else: - for line in out.split("\n"): - # output looks like this: - # 112059.svc.uc.futuregrid testjob oweidner 0 Q batch - # 112061.svc.uc.futuregrid testjob oweidner 0 Q batch - if len(line.split()) > 1: - jobid = "[%s]-[%s]" % (self.rm, line.split()[0]) - ids.append(str(jobid)) - - return ids - - # # ---------------------------------------------------------------- - # # - # def container_run (self, jobs) : - # self._logger.debug ("container run: %s" % str(jobs)) - # # TODO: this is not optimized yet - # for job in jobs: - # job.run () - # - # - # # ---------------------------------------------------------------- - # # - # def container_wait (self, jobs, mode, timeout) : - # self._logger.debug ("container wait: %s" % str(jobs)) - # # TODO: this is not optimized yet - # for job in jobs: - # job.wait () - # - # - # # ---------------------------------------------------------------- - # # - # def container_cancel (self, jobs) : - # self._logger.debug ("container cancel: %s" % str(jobs)) - # raise saga.NoSuccess ("Not Implemented"); - - -############################################################################### -# -# noinspection PyMethodOverriding,PyProtectedMember,PyMissingOrEmptyDocstring -class MNJob(saga.adaptors.cpi.job.Job): - """ implements saga.adaptors.cpi.job.Job - """ - - # noinspection PyMissingConstructor - def __init__(self, api, adaptor): - - # initialize parent class - _cpi_base = super(MNJob, self) - _cpi_base.__init__(api, adaptor) - - def _get_impl(self): - return self - - @SYNC_CALL - def init_instance(self, job_info): - """ - implements saga.adaptors.cpi.job.Job.init_instance() - - :param job_info: job descriptiom - :type job_info: dict - """ - # init_instance is called for every new saga.job.Job object - # that is created - self.jd = job_info["job_description"] - self.js = job_info["job_service"] - - if job_info['reconnect'] is True: - self._id = job_info['reconnect_jobid'] - self._name = self.jd.get(saga.job.NAME) - self._started = True - else: - self._id = None - self._name = self.jd.get(saga.job.NAME) - self._started = False - - return self.get_api() - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_state(self): - """ implements saga.adaptors.cpi.job.Job.get_state() - """ - return self.js._job_get_state(job_obj=self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def wait(self, timeout): - """ - implements saga.adaptors.cpi.job.Job.wait() - - :param timeout: time to wait - :type timeout: int - """ - if self._started is False: - log_error_and_raise("Can't wait for job that hasn't been started", - saga.IncorrectState, self._logger) - else: - self.js._job_wait(job_obj=self, timeout=timeout) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def cancel(self, timeout): - """ - implements saga.adaptors.cpi.job.Job.cancel() - - :param timeout: time to wait - :type timeout: int - """ - if self._started is False: - log_error_and_raise("Can't wait for job that hasn't been started", - saga.IncorrectState, self._logger) - else: - self.js._job_cancel(self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def run(self): - """ implements saga.adaptors.cpi.job.Job.run() - """ - self._id = self.js._job_run(self) - self._started = True - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_service_url(self): - """ implements saga.adaptors.cpi.job.Job.get_service_url() - """ - return self.js.rm - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_id(self): - """ implements saga.adaptors.cpi.job.Job.get_id() - """ - return self._id - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_name (self): - """ Implements saga.adaptors.cpi.job.Job.get_name() """ - return self._name - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_exit_code(self): - """ implements saga.adaptors.cpi.job.Job.get_exit_code() - """ - if self._started is False: - return None - else: - return self.js._job_get_exit_code(self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_created(self): - """ implements saga.adaptors.cpi.job.Job.get_created() - """ - if self._started is False: - return None - else: - return self.js._job_get_create_time(self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_started(self): - """ implements saga.adaptors.cpi.job.Job.get_started() - """ - if self._started is False: - return None - else: - return self.js._job_get_start_time(self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_finished(self): - """ implements saga.adaptors.cpi.job.Job.get_finished() - """ - if self._started is False: - return None - else: - return self.js._job_get_end_time(self) - - # ---------------------------------------------------------------- - # - @SYNC_CALL - def get_execution_hosts(self): - """ implements saga.adaptors.cpi.job.Job.get_execution_hosts() - """ - if self._started is False: - return None - else: - return self.js._job_get_execution_hosts(self) diff --git a/autosubmit/platforms/saga_platform.py b/autosubmit/platforms/saga_platform.py deleted file mode 100644 index fd240956eaface29021a98e0b8f7d1fc578d9bf2..0000000000000000000000000000000000000000 --- a/autosubmit/platforms/saga_platform.py +++ /dev/null @@ -1,331 +0,0 @@ -import datetime -import subprocess -from time import sleep - -import os -import saga - -from bscearth.utils.log import Log -from bscearth.utils.date import date2str -from autosubmit.job.job_common import Status, Type -from autosubmit.platforms.platform import Platform - - -class SagaPlatform(Platform): - """ - Class to manage the connections to the different platforms with the SAGA library. - """ - - def __init__(self, expid, name, config): - """ - - :param config: - :param expid: - :param name: - """ - Platform.__init__(self, expid, name, config) - self._attributes = None - - def send_file(self, filename): - """ - Sends a local file to the platform - :param filename: name of the file to send - :type filename: str - """ - self.delete_file(filename) - if self.type == 'ecaccess': - try: - subprocess.check_call(['ecaccess-file-mkdir', '{0}:{1}'.format(self.host, self.root_dir)]) - subprocess.check_call(['ecaccess-file-mkdir', '{0}:{1}'.format(self.host, self.get_files_path())]) - destiny_path = os.path.join(self.get_files_path(), filename) - subprocess.check_call(['ecaccess-file-put', os.path.join(self.tmp_path, filename), - '{0}:{1}'.format(self.host, destiny_path)]) - subprocess.check_call(['ecaccess-file-chmod', '750', '{0}:{1}'.format(self.host, destiny_path)]) - return - except subprocess.CalledProcessError: - raise Exception("Could't send file {0} to {1}:{2}".format(os.path.join(self.tmp_path, filename), - self.host, self.get_files_path())) - # noinspection PyTypeChecker - out = saga.filesystem.File("file://{0}".format(os.path.join(self.tmp_path, filename)), - session=self.service.session) - if self.type == 'local': - out.copy("file://{0}".format(os.path.join(self.tmp_path, 'LOG_' + self.expid, filename)), - saga.filesystem.CREATE_PARENTS) - else: - workdir = self.get_workdir(self.get_files_path()) - out.copy(workdir.get_url()) - workdir.close() - out.close() - - def get_workdir(self, path): - """ - Creates and returns a DIrectory object for the current workdir - - :param path: path to the workdir - :type path: str - :return: working directory object - :rtype: saga.file.Directory - """ - if not path: - raise Exception("Workdir invalid") - - sftp_directory = 'sftp://{0}{1}'.format(self.host, path) - try: - # noinspection PyTypeChecker - return saga.filesystem.Directory(sftp_directory, session=self.service.session) - except saga.BadParameter: - try: - # noinspection PyTypeChecker - return saga.filesystem.Directory(sftp_directory, - saga.filesystem.CREATE, - session=self.service.session) - except saga.BadParameter: - new_directory = os.path.split(path)[1] - parent = self.get_workdir(os.path.dirname(path)) - parent.make_dir(new_directory) - parent.close() - # noinspection PyTypeChecker - return saga.filesystem.Directory(sftp_directory, session=self.service.session) - - def get_file(self, filename, must_exist=True, relative_path=''): - """ - Copies a file from the current platform to experiment's tmp folder - - :param filename: file name - :type filename: str - :param must_exist: If True, raises an exception if file can not be copied - :type must_exist: bool - :param relative_path: relative path inside tmp folder - :type relative_path: str - :return: True if file is copied successfully, false otherwise - :rtype: bool - """ - - local_path = os.path.join(self.tmp_path, relative_path) - if not os.path.exists(local_path): - os.makedirs(local_path) - - file_path = os.path.join(local_path, filename) - if os.path.exists(file_path): - os.remove(file_path) - - if self.type == 'ecaccess': - try: - subprocess.check_call(['ecaccess-file-get', '{0}:{1}'.format(self.host, - os.path.join(self.get_files_path(), - filename)), - file_path]) - return True - except subprocess.CalledProcessError: - if must_exist: - raise Exception("Could't get file {0} from {1}:{2}".format(file_path, - self.host, self.get_files_path())) - return False - - if not self.exists_file(filename): - if must_exist: - raise Exception('File {0} does not exists'.format(filename)) - return False - - out = self.directory.open(os.path.join(str(self.directory.url), filename)) - - out.copy("file://{0}".format(file_path)) - out.close() - return True - - def exists_file(self, filename): - """ - Checks if a file exists on this platform - - :param filename: file name - :type filename: str - :return: True if it exists, False otherwise - """ - # noinspection PyBroadException - if not self.directory: - try: - if self.type == 'local': - # noinspection PyTypeChecker - self.directory = saga.filesystem.Directory("file://{0}".format(os.path.join(self.tmp_path, - 'LOG_' + self.expid))) - else: - # noinspection PyTypeChecker - self.directory = saga.filesystem.Directory("sftp://{0}{1}".format(self.host, self.get_files_path()), - session=self.service.session) - except: - return False - - # noinspection PyBroadException - try: - self.directory.list(filename) - except: - return False - - return True - - def delete_file(self, filename): - """ - Deletes a file from this platform - - :param filename: file name - :type filename: str - :return: True if succesful or file does no exists - :rtype: bool - """ - if self.type == 'ecaccess': - try: - subprocess.check_call(['ecaccess-file-delete', - '{0}:{1}'.format(self.host, os.path.join(self.get_files_path(), filename))]) - return True - except subprocess.CalledProcessError: - return True - - if not self.exists_file(filename): - return True - - try: - if self.type == 'local': - # noinspection PyTypeChecker - out = saga.filesystem.File("file://{0}".format(os.path.join(self.tmp_path, 'LOG_' + self.expid, - filename))) - else: - # noinspection PyTypeChecker - out = saga.filesystem.File("sftp://{0}{1}".format(self.host, os.path.join(self.get_files_path(), - filename)), - session=self.service.session) - out.remove() - out.close() - return True - except saga.DoesNotExist: - return True - - def submit_job(self, job, script_name, hold=False): - """ - Submit a job from a given job object. - - :param job: job object - :type job: autosubmit.job.job.Job - :param scriptname: job script's name - :rtype scriptname: str - :return: job id for the submitted job - :rtype: int - """ - saga_job = self.create_saga_job(job, script_name) - saga_job.run() - return saga_job.id - - def create_saga_job(self, job, script_name): - """ - Creates a saga job from a given job object. - - :param job: job object - :type job: autosubmit.job.job.Job - :param script_name: job script's name - :type script_name: str - :return: saga job object for the given job - :rtype: saga.job.Job - """ - jd = saga.job.Description() - if job.type == Type.BASH: - binary = 'source' - elif job.type == Type.PYTHON: - binary = 'python ' - elif job.type == Type.R: - binary = 'Rscript' - - # jd.executable = '{0} {1}'.format(binary, os.path.join(self.get_files_path(), script_name)) - jd.executable = os.path.join(self.get_files_path(), script_name) - jd.working_directory = self.get_files_path() - - str_datetime = date2str(datetime.datetime.now(), 'S') - out_filename = "{0}.{1}.out".format(job.name, str_datetime) - err_filename = "{0}.{1}.err".format(job.name, str_datetime) - job.local_logs = (out_filename, err_filename) - jd.output = out_filename - jd.error = err_filename - - self.add_attribute(jd, 'Name', job.name) - - wall_clock = job.parameters["WALLCLOCK"] - if wall_clock == '': - wall_clock = 0 - else: - wall_clock = wall_clock.split(':') - wall_clock = int(wall_clock[0]) * 60 + int(wall_clock[1]) - self.add_attribute(jd, 'WallTimeLimit', wall_clock) - - self.add_attribute(jd, 'Queue', job.parameters["CURRENT_QUEUE"]) - - project = job.parameters["CURRENT_BUDG"] - if job.parameters["CURRENT_RESERVATION"] != '' or job.parameters["CURRENT_EXCLUSIVITY"] == 'true': - project += ':' + job.parameters["CURRENT_RESERVATION"] + ':' - if job.parameters["CURRENT_EXCLUSIVITY"] == 'true': - project += job.parameters["CURRENT_EXCLUSIVITY"] - self.add_attribute(jd, 'Project', project) - - self.add_attribute(jd, 'TotalCPUCount', job.parameters["NUMPROC"]) - if job.parameters["NUMTASK"] is not None: - self.add_attribute(jd, 'ProcessesPerHost', job.parameters["NUMTASK"]) - self.add_attribute(jd, 'ThreadsPerProcess', job.parameters["NUMTHREADS"]) - self.add_attribute(jd, 'TotalPhysicalMemory', job.parameters["MEMORY"]) - saga_job = self.service.create_job(jd) - return saga_job - - def add_attribute(self, jd, name, value): - """ - Adds an attribute to a given job descriptor, only if it is supported by the adaptor. - - :param jd: job descriptor to use: - :type jd: saga.job.Descriptor - :param name: attribute's name - :type name: str - :param value: attribute's value - """ - if self._attributes is None: - # noinspection PyProtectedMember - self._attributes = self.service._adaptor._adaptor._info['capabilities']['jdes_attributes'] - if name not in self._attributes or not value: - return - jd.set_attribute(name, value) - - def check_job(self, job_id, default_status=Status.COMPLETED, retries=10): - """ - Checks job running status - - :param retries: retries - :param job_id: job id - :type job_id: str - :param default_status: status to assign if it can be retrieved from the platform - :type default_status: autosubmit.job.job_common.Status - :return: current job status - :rtype: autosubmit.job.job_common.Status - """ - saga_status = None - while saga_status is None and retries >= 0: - try: - if job_id not in self.service.jobs: - return Status.COMPLETED - saga_status = self.service.get_job(job_id).state - except Exception as e: - # If SAGA can not get the job state, we change it to completed - # It will change to FAILED if not COMPLETED file is present - Log.debug('Can not get job state: {0}', e) - retries -= 1 - sleep(5) - - if saga_status is None: - return default_status - elif saga_status == saga.job.UNKNOWN: - return Status.UNKNOWN - elif saga_status == saga.job.PENDING: - return Status.QUEUING - elif saga_status == saga.job.FAILED: - return Status.FAILED - elif saga_status == saga.job.CANCELED: - return Status.FAILED - elif saga_status == saga.job.DONE: - return Status.COMPLETED - elif saga_status == saga.job.RUNNING: - return Status.RUNNING - elif saga_status == saga.job.SUSPENDED: - return Status.SUSPENDED diff --git a/autosubmit/platforms/saga_submitter.py b/autosubmit/platforms/saga_submitter.py deleted file mode 100644 index acb46a2828e0487d5964802c991962542bc2d947..0000000000000000000000000000000000000000 --- a/autosubmit/platforms/saga_submitter.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2014 Climate Forecasting Unit, IC3 - -# This file is part of Autosubmit. - -# Autosubmit is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# Autosubmit is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with Autosubmit. If not, see . - - -import time - -import os -import saga - -from autosubmit.config.basicConfig import BasicConfig -from autosubmit.config.config_common import AutosubmitConfig -from saga_platform import SagaPlatform -from submitter import Submitter - - -class SagaSubmitter(Submitter): - """ - Class to manage the experiments platform - """ - - def load_platforms(self, asconf, retries=10): - """ - Create all the platforms object that will be used by the experiment - - :param retries: retries in case creation of service fails - :param asconf: autosubmit config to use - :type asconf: AutosubmitConfig - :return: platforms used by the experiment - :rtype: dict - """ - adaptors_variable = os.environ.get('SAGA_ADAPTOR_PATH') - if adaptors_variable is None: - adaptors_variable = '' - if 'autosubmit.platforms.ecmwf_adaptor' not in adaptors_variable: - if len(adaptors_variable) > 0 and not adaptors_variable.endswith(':'): - adaptors_variable += ':' - adaptors_variable += 'autosubmit.platforms.ecmwf_adaptor' - - if 'autosubmit.platforms.mn_adaptor' not in adaptors_variable: - if len(adaptors_variable) > 0 and not adaptors_variable.endswith(':'): - adaptors_variable += ':' - adaptors_variable += 'autosubmit.platforms.mn_adaptor' - - platforms_used = list() - hpcarch = asconf.get_platform() - - job_parser = asconf.jobs_parser - for job in job_parser.sections(): - hpc = job_parser.get_option(job, 'PLATFORM', hpcarch).lower() - if hpc not in platforms_used: - platforms_used.append(hpc) - - os.environ['SAGA_ADAPTOR_PATH'] = adaptors_variable - parser = asconf.platforms_parser - - session = None - - platforms = dict() - local_platform = SagaPlatform(asconf.expid, 'local', BasicConfig) - local_platform.service = None - retry = retries - while local_platform.service is None and retry >= 0: - try: - local_platform.service = saga.job.Service("fork://localhost", session=session) - except saga.SagaException: - retry -= 1 - time.sleep(5) - local_platform.type = 'local' - local_platform.queue = '' - local_platform.max_wallclock = asconf.get_max_wallclock() - local_platform.max_processors = asconf.get_max_processors() - local_platform.max_waiting_jobs = asconf.get_max_waiting_jobs() - local_platform.total_jobs = asconf.get_total_jobs() - local_platform.scratch = os.path.join(BasicConfig.LOCAL_ROOT_DIR, asconf.expid, BasicConfig.LOCAL_TMP_DIR) - local_platform.project = '' - local_platform.budget = '' - local_platform.reservation = '' - local_platform.exclusivity = '' - local_platform.user = '' - local_platform.root_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, local_platform.expid) - local_platform.transfer = "file" - local_platform.host = 'localhost' - platforms['local'] = local_platform - platforms['LOCAL'] = local_platform - - for section in parser.sections(): - - if section.lower() not in platforms_used: - continue - - platform_type = parser.get_option(section, 'TYPE', '').lower() - - remote_platform = SagaPlatform(asconf.expid, section.lower(), BasicConfig) - remote_platform.type = platform_type - - platform_version = parser.get_option(section, 'VERSION', '') - if platform_type == 'pbs': - adaptor = 'pbs+ssh' - elif platform_type == 'sge': - adaptor = 'sge+ssh' - elif platform_type == 'ps': - adaptor = 'ssh' - elif platform_type == 'lsf': - if platform_version == 'mn': - adaptor = 'mn+ssh' - else: - adaptor = 'lsf+ssh' - elif platform_type == 'ecaccess': - adaptor = 'ecaccess' - remote_platform.scheduler = parser.get_option(section, 'SCHEDULER', 'pbs').lower() - elif platform_type == 'slurm': - adaptor = 'slurm+ssh' - elif platform_type == '': - raise Exception("Queue type not specified on platform {0}".format(section)) - else: - adaptor = platform_type - - if parser.get_option(section, 'ADD_PROJECT_TO_HOST', '').lower() == 'true': - host = '{0}-{1}'.format(parser.get_option(section, 'HOST', None), - parser.get_option(section, 'PROJECT', None)) - else: - host = parser.get_option(section, 'HOST', None) - - if adaptor.endswith('ssh'): - ctx = saga.Context('ssh') - ctx.user_id = parser.get_option(section, 'USER', None) - session = saga.Session(False) - session.add_context(ctx) - else: - session = None - - remote_platform.host = host - if remote_platform.type == 'ecaccess': - # It has to be fork because we are communicating through commands at the local machine - host = 'localhost' - - remote_platform.service = None - retry = retries - while remote_platform.service is None and retry >= 0: - try: - # noinspection PyTypeChecker - remote_platform.service = saga.job.Service("{0}://{1}".format(adaptor, host), session=session) - except saga.SagaException: - retry -= 1 - time.sleep(5) - # noinspection PyProtectedMember - remote_platform.service._adaptor.host = remote_platform.host - # noinspection PyProtectedMember - remote_platform.service._adaptor.scheduler = remote_platform.scheduler - remote_platform.max_wallclock = parser.get_option(section, 'MAX_WALLCLOCK', - asconf.get_max_wallclock()) - remote_platform.max_processors = parser.get_option(section, 'MAX_PROCESSORS', - asconf.get_max_processors()) - remote_platform.max_waiting_jobs = int(parser.get_option(section, 'MAX_WAITING_JOBS', - asconf.get_max_waiting_jobs())) - remote_platform.total_jobs = int(parser.get_option(section, 'TOTAL_JOBS', - asconf.get_total_jobs())) - remote_platform.project = parser.get_option(section, 'PROJECT', None) - remote_platform.budget = parser.get_option(section, 'BUDGET', remote_platform.project) - remote_platform.reservation = parser.get_option(section, 'RESERVATION', '') - remote_platform.exclusivity = parser.get_option(section, 'EXCLUSIVITY', '').lower() - remote_platform.user = parser.get_option(section, 'USER', None) - remote_platform.scratch = parser.get_option(section, 'SCRATCH_DIR', None) - remote_platform._default_queue = parser.get_option(section, 'QUEUE', None) - remote_platform._serial_queue = parser.get_option(section, 'SERIAL_QUEUE', None) - remote_platform.processors_per_node = parser.get_option(section, 'PROCESSORS_PER_NODE', - None) - remote_platform.scratch_free_space = parser.get_option(section, 'SCRATCH_FREE_SPACE', - None) - remote_platform.root_dir = os.path.join(remote_platform.scratch, remote_platform.project, - remote_platform.user, remote_platform.expid) - platforms[section.lower()] = remote_platform - - for section in parser.sections(): - if parser.has_option(section, 'SERIAL_PLATFORM'): - platforms[section.lower()].serial_platform = platforms[parser.get_option(section, - 'SERIAL_PLATFORM', - None).lower()] - - self.platforms = platforms diff --git a/docs/source/conf.py b/docs/source/conf.py index 22f4c3dac16ff7ae89fcce4d2766dcfb725a36ce..bcfda0c201edf4dc2c6e6e37b5391bb46f93143e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -111,7 +111,7 @@ pygments_style = 'sphinx' todo_include_todos = False autodoc_mock_imports = ["portalocker", "argparse", "python-dateutil", "pydotplus", "pyparsing", - 'numpy', 'matplotlib', 'matplotlib.pyplot', 'matplotlib.gridspec', 'matplotlib.patches', 'saga', 'saga-python', 'paramiko', + 'numpy', 'matplotlib', 'matplotlib.pyplot', 'matplotlib.gridspec', 'matplotlib.patches', 'paramiko', 'mock', "networkx", 'networkx.algorithms.dag', 'bscearth.utils', 'bscearth.utils.log', 'bscearth.utils.config_parser', 'bscearth.utils.date'] diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 63fe609299d74554443966eccd986acf60230a79..90a931b69b40b9dedd74ddd898c1a03e41a76c46 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -9,7 +9,7 @@ The Autosubmit code is maintained in *PyPi*, the main source for python packages - Pre-requisties: These packages (bash, python2, sqlite3, git-scm > 1.8.2, subversion, dialog and GraphViz) must be available at local host machine. -These packages (argparse, python-dateutil, pyparsing, numpy, pydotplus, matplotlib, paramiko, saga-python, python2-pythondialog and portalocker) must be available for python runtime. +These packages (argparse, python-dateutil, pyparsing, numpy, pydotplus, matplotlib, paramiko,python2-pythondialog and portalocker) must be available for python runtime. .. important:: The host machine has to be able to access HPC's/Clusters via password-less ssh. diff --git a/docs/source/usage/communication_library.rst b/docs/source/usage/communication_library.rst index ea142846a0a3b3054c2efadcdc58509e4845505a..bc2aa4a036e68f4d96d6650e04d7fbe2fc81a757 100644 --- a/docs/source/usage/communication_library.rst +++ b/docs/source/usage/communication_library.rst @@ -5,7 +5,7 @@ In order to handle the remote communications with the different platforms, Autos of a communications library. There are multiple implementations, so you can choose any of them. .. hint:: - At this moment there are two available communications libraries which are ``saga`` and ``paramiko``. + At this moment there are one available communication library which is ``paramiko``. To change the communications library, open the /cxxx/conf/autosubmit_cxxx.conf file where cxxx is the experiment identifier and change the value of the API configuration variable in the communications @@ -14,6 +14,6 @@ section: .. code-block:: ini [communications] - # Communications library used to connect with platforms: paramiko or saga. + # Communications library used to connect with platforms: paramiko. # Default = paramiko - API = saga \ No newline at end of file + API = paramiko \ No newline at end of file diff --git a/docs/source/usage/new_platform.rst b/docs/source/usage/new_platform.rst index d25467fdfd29769767e6dac44991e89ba5cb4589..173dafae45e937f818845ae19a69bfa879f841c3 100644 --- a/docs/source/usage/new_platform.rst +++ b/docs/source/usage/new_platform.rst @@ -19,8 +19,7 @@ identifier and add this text: This will create a platform named "new_platform". The options specified are all mandatory: -* TYPE: queue type for the platform. Options supported are PBS, SGE, PS, LSF, ecaccess and SLURM and -also the options supported by saga-python library. +* TYPE: queue type for the platform. Options supported are PBS, SGE, PS, LSF, ecaccess and SLURM. * HOST: hostname of the platform @@ -35,8 +34,6 @@ also the options supported by saga-python library. .. warning:: With some platform types, Autosubmit may also need the version, forcing you to add the parameter VERSION. These platforms are PBS (options: 10, 11, 12) and ecaccess (options: pbs, loadleveler). -.. hint:: If you want to run on ``marenostrum3`` with ``saga`` communications library, you have to set the -platform type as ``lsf`` and the platform version as ``mn``. Some platforms may require to run serial jobs in a different queue or platform. To avoid changing the job configuration, you can specify what platform or queue to use to run serial jobs assigned to this platform: diff --git a/environment.yml b/environment.yml index bb6a91d0adc9fe467d7a2d264f98c1191e778225..d8a515c53d94da12a75f12b0b5f79dc52709c63a 100644 --- a/environment.yml +++ b/environment.yml @@ -18,5 +18,4 @@ dependencies: - pip: - bscearth.utils - - saga-python diff --git a/requeriments.txt b/requeriments.txt index 24e7ff924974e566825b0a2e89b1f54dcb4a89d6..19d80cd96b2fffd4db8ebf02a697f09b1cac6fd2 100644 --- a/requeriments.txt +++ b/requeriments.txt @@ -4,7 +4,6 @@ matplotlib numpy pydotplus>=2 pyparsing>=2.0.1 -saga-python>=0.40 paramiko==1.15 mock>=1.3.0 portalocker>=0.5.7 diff --git a/setup.py b/setup.py index 96527c8f33527521ed9055e6376a5e15bca2a7d4..425e34608ae0c2f9d1e9376c26b1e68684d416bf 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ setup( download_url='https://earth.bsc.es/wiki/doku.php?id=tools:autosubmit', keywords=['climate', 'weather', 'workflow', 'HPC'], install_requires=['argparse>=1.2,<2', 'python-dateutil>2', 'pydotplus>=2', 'pyparsing>=2.0.1', - 'numpy', 'matplotlib', 'saga-python>=0.40', 'paramiko==1.15', + 'numpy', 'matplotlib', 'paramiko==1.15', 'mock>=1.3.0', 'portalocker>=0.5.7', 'networkx', 'bscearth.utils'], extras_require={ 'dialog': ["python2-pythondialog>=3.3.0"] diff --git a/test/regression/test_ecmwf_with_saga/conf/autosubmit.conf b/test/regression/test_ecmwf_with_saga/conf/autosubmit.conf deleted file mode 100644 index 97006d74f0b27f972ff0f119d3b27f93bf3a5296..0000000000000000000000000000000000000000 --- a/test/regression/test_ecmwf_with_saga/conf/autosubmit.conf +++ /dev/null @@ -1,22 +0,0 @@ -[config] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# No need to change. -# Autosubmit version identifier -AUTOSUBMIT_VERSION = 3.3.1 -# Default maximum number of jobs to be waiting in any platform -# Default = 3 -MAXWAITINGJOBS = 3 -# Default maximum number of jobs to be running at the same time at any platform -# Default = 6 -TOTALJOBS = 6 -# Time (seconds) between connections to the HPC queue scheduler to poll already submitted jobs status -# Default = 10 -SAFETYSLEEPTIME = 10 -# Number of retrials if a job fails. Can ve override at job level -# Default = 0 -RETRIALS = 0 - -[communications] -API = saga \ No newline at end of file diff --git a/test/regression/test_ecmwf_with_saga/conf/expdef.conf b/test/regression/test_ecmwf_with_saga/conf/expdef.conf deleted file mode 100644 index 837525cbd276c050ffddce4cd5e26b9593390ffe..0000000000000000000000000000000000000000 --- a/test/regression/test_ecmwf_with_saga/conf/expdef.conf +++ /dev/null @@ -1,73 +0,0 @@ -[DEFAULT] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# HPC name. -# No need to change -HPCARCH = ecmwf-cca - -[experiment] -# Supply the list of start dates. Available formats: YYYYMMDD YYYYMMDDhh YYYYMMDDhhmm -# You can also use an abbreviated syntax for multiple dates with common parts: 200001[01 15] <=> 20000101 20000115 -# 200001[01-04] <=> 20000101 20000102 20000103 20000104 -# DATELIST = 19600101 19650101 19700101 -# DATELIST = 1960[0101 0201 0301] -# DATELIST = 19[60-65] -DATELIST = 19960101 -# Supply the list of members. Format fcX -# You can also use an abreviated syntax for multiple members: fc[0 1 2] <=> fc0 fc1 fc2 -# fc[0-2] <=> fc0 fc1 fc2 -# MEMBERS = fc0 fc1 fc2 fc3 fc4 -# MEMBERS = fc[0-4] -MEMBERS = fc[0-1] -# Chunk size unit. STRING = hour, day, month, year -CHUNKSIZEUNIT = month -# Chunk size. NUMERIC = 4, 6, 12 -CHUNKSIZE = 1 -# Total number of chunks in experiment. NUMERIC = 30, 15, 10 -NUMCHUNKS = 2 -# Calendar used. LIST: standard, noleap -CALENDAR = noleap - -[rerun] -# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE -RERUN = FALSE -# If RERUN = TRUE then supply the list of chunks to rerun -# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] -CHUNKLIST = - -[project] -# Select project type. STRING = git, svn, local, none -# If PROJECT_TYPE is set to none, Autosubmit self-contained dummy templates will be used -PROJECT_TYPE = local -# Destination folder name for project. type = STRING, default = leave empty, -PROJECT_DESTINATION = - -# If PROJECT_TYPE is not git, no need to change -[git] -# Repository URL STRING = 'https://github.com/torvalds/linux.git' -PROJECT_ORIGIN = -# Select branch or tag, STRING, default = 'master', help = {'master' (default), 'develop', 'v3.1b', ...} -PROJECT_BRANCH = -# type = STRING, default = leave empty, help = if model branch is a TAG leave empty -PROJECT_COMMIT = - -# If PROJECT_TYPE is not svn, no need to change -[svn] -# type = STRING, help = 'https://svn.ec-earth.org/ecearth3' -PROJECT_URL = -# Select revision number. NUMERIC = 1778 -PROJECT_REVISION = - -# If PROJECT_TYPE is not local, no need to change -[local] -# type = STRING, help = /foo/bar/ecearth -PROJECT_PATH = PROJECT-PATH-HERE - -# If PROJECT_TYPE is none, no need to change -[project_files] -# Where is PROJECT CONFIGURATION file location relative to project root path -FILE_PROJECT_CONF = -# Where is JOBS CONFIGURATION file location relative to project root path -FILE_JOBS_CONF = -JOB_SCRIPTS_TYPE = bash diff --git a/test/regression/test_ecmwf_with_saga/conf/jobs.conf b/test/regression/test_ecmwf_with_saga/conf/jobs.conf deleted file mode 100644 index b0f14e75553e8c04f8151c00e482d7ee11b78dc9..0000000000000000000000000000000000000000 --- a/test/regression/test_ecmwf_with_saga/conf/jobs.conf +++ /dev/null @@ -1,22 +0,0 @@ -[LOCAL_SETUP] -FILE = TEST_NOLEAP.sh -PLATFORM = LOCAL -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = LOCAL_SETUP-1 - -[REMOTE_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -TASKS = 24 -DEPENDENCIES = REMOTE_SETUP-1 - -[REMOTE_PARALLEL_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -TASKS = 24 -PROCESSORS = 48 -QUEUE = np -DEPENDENCIES = REMOTE_PARALLEL_SETUP-1 \ No newline at end of file diff --git a/test/regression/test_ecmwf_with_saga/conf/proj.conf b/test/regression/test_ecmwf_with_saga/conf/proj.conf deleted file mode 100644 index 6d326a0ca08bae06a8aaaf003064f8215df236ef..0000000000000000000000000000000000000000 --- a/test/regression/test_ecmwf_with_saga/conf/proj.conf +++ /dev/null @@ -1,3 +0,0 @@ -[DEFAULT] -HOURS=24 -DAYS=30 diff --git a/test/regression/test_ecmwf_with_saga/src/TEST_NOLEAP.sh b/test/regression/test_ecmwf_with_saga/src/TEST_NOLEAP.sh deleted file mode 100644 index b61d0f636691860f322417d41eaef023baa638a5..0000000000000000000000000000000000000000 --- a/test/regression/test_ecmwf_with_saga/src/TEST_NOLEAP.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -set -xvue -TEST=%Chunk_END_DATE% -TEST2=%CHUNK% -TEST3=%PREV% -TEST4=%NUMMEMBERS% -sleep 30s \ No newline at end of file diff --git a/test/regression/test_large_experiment_on_moore_with_saga/conf/autosubmit.conf b/test/regression/test_large_experiment_on_moore_with_saga/conf/autosubmit.conf deleted file mode 100644 index 97006d74f0b27f972ff0f119d3b27f93bf3a5296..0000000000000000000000000000000000000000 --- a/test/regression/test_large_experiment_on_moore_with_saga/conf/autosubmit.conf +++ /dev/null @@ -1,22 +0,0 @@ -[config] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# No need to change. -# Autosubmit version identifier -AUTOSUBMIT_VERSION = 3.3.1 -# Default maximum number of jobs to be waiting in any platform -# Default = 3 -MAXWAITINGJOBS = 3 -# Default maximum number of jobs to be running at the same time at any platform -# Default = 6 -TOTALJOBS = 6 -# Time (seconds) between connections to the HPC queue scheduler to poll already submitted jobs status -# Default = 10 -SAFETYSLEEPTIME = 10 -# Number of retrials if a job fails. Can ve override at job level -# Default = 0 -RETRIALS = 0 - -[communications] -API = saga \ No newline at end of file diff --git a/test/regression/test_large_experiment_on_moore_with_saga/conf/expdef.conf b/test/regression/test_large_experiment_on_moore_with_saga/conf/expdef.conf deleted file mode 100644 index 95aa1f295ef0f17d0ce6f4d1b4fe1c5d107e40a6..0000000000000000000000000000000000000000 --- a/test/regression/test_large_experiment_on_moore_with_saga/conf/expdef.conf +++ /dev/null @@ -1,73 +0,0 @@ -[DEFAULT] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# HPC name. -# No need to change -HPCARCH = moore - -[experiment] -# Supply the list of start dates. Available formats: YYYYMMDD YYYYMMDDhh YYYYMMDDhhmm -# You can also use an abbreviated syntax for multiple dates with common parts: 200001[01 15] <=> 20000101 20000115 -# 200001[01-04] <=> 20000101 20000102 20000103 20000104 -# DATELIST = 19600101 19650101 19700101 -# DATELIST = 1960[0101 0201 0301] -# DATELIST = 19[60-65] -DATELIST = 19960101 -# Supply the list of members. Format fcX -# You can also use an abreviated syntax for multiple members: fc[0 1 2] <=> fc0 fc1 fc2 -# fc[0-2] <=> fc0 fc1 fc2 -# MEMBERS = fc0 fc1 fc2 fc3 fc4 -# MEMBERS = fc[0-4] -MEMBERS = fc[0-14] -# Chunk size unit. STRING = hour, day, month, year -CHUNKSIZEUNIT = month -# Chunk size. NUMERIC = 4, 6, 12 -CHUNKSIZE = 1 -# Total number of chunks in experiment. NUMERIC = 30, 15, 10 -NUMCHUNKS = 5 -# Calendar used. LIST: standard, noleap -CALENDAR = noleap - -[rerun] -# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE -RERUN = FALSE -# If RERUN = TRUE then supply the list of chunks to rerun -# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] -CHUNKLIST = - -[project] -# Select project type. STRING = git, svn, local, none -# If PROJECT_TYPE is set to none, Autosubmit self-contained dummy templates will be used -PROJECT_TYPE = local -# Destination folder name for project. type = STRING, default = leave empty, -PROJECT_DESTINATION = - -# If PROJECT_TYPE is not git, no need to change -[git] -# Repository URL STRING = 'https://github.com/torvalds/linux.git' -PROJECT_ORIGIN = -# Select branch or tag, STRING, default = 'master', help = {'master' (default), 'develop', 'v3.1b', ...} -PROJECT_BRANCH = -# type = STRING, default = leave empty, help = if model branch is a TAG leave empty -PROJECT_COMMIT = - -# If PROJECT_TYPE is not svn, no need to change -[svn] -# type = STRING, help = 'https://svn.ec-earth.org/ecearth3' -PROJECT_URL = -# Select revision number. NUMERIC = 1778 -PROJECT_REVISION = - -# If PROJECT_TYPE is not local, no need to change -[local] -# type = STRING, help = /foo/bar/ecearth -PROJECT_PATH = PROJECT-PATH-HERE - -# If PROJECT_TYPE is none, no need to change -[project_files] -# Where is PROJECT CONFIGURATION file location relative to project root path -FILE_PROJECT_CONF = -# Where is JOBS CONFIGURATION file location relative to project root path -FILE_JOBS_CONF = -JOB_SCRIPTS_TYPE = bash diff --git a/test/regression/test_large_experiment_on_moore_with_saga/conf/jobs.conf b/test/regression/test_large_experiment_on_moore_with_saga/conf/jobs.conf deleted file mode 100644 index fa900f6e32a9aec7e0636b8ba411059f22cd4e3a..0000000000000000000000000000000000000000 --- a/test/regression/test_large_experiment_on_moore_with_saga/conf/jobs.conf +++ /dev/null @@ -1,13 +0,0 @@ -[LOCAL_SETUP] -FILE = TEST_NOLEAP.sh -PLATFORM = LOCAL -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = LOCAL_SETUP-1 - -[REMOTE_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -TASKS = 16 -DEPENDENCIES = REMOTE_SETUP-1 diff --git a/test/regression/test_large_experiment_on_moore_with_saga/conf/proj.conf b/test/regression/test_large_experiment_on_moore_with_saga/conf/proj.conf deleted file mode 100644 index 6d326a0ca08bae06a8aaaf003064f8215df236ef..0000000000000000000000000000000000000000 --- a/test/regression/test_large_experiment_on_moore_with_saga/conf/proj.conf +++ /dev/null @@ -1,3 +0,0 @@ -[DEFAULT] -HOURS=24 -DAYS=30 diff --git a/test/regression/test_large_experiment_on_moore_with_saga/src/TEST_NOLEAP.sh b/test/regression/test_large_experiment_on_moore_with_saga/src/TEST_NOLEAP.sh deleted file mode 100644 index e9a10ba6432dc622919357f66d2750d8a645f2e3..0000000000000000000000000000000000000000 --- a/test/regression/test_large_experiment_on_moore_with_saga/src/TEST_NOLEAP.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -set -xvue -TEST=%Chunk_END_DATE% -TEST2=%CHUNK% -TEST3=%PREV% -TEST4=%NUMMEMBERS% diff --git a/test/regression/test_mistral_with_saga/conf/autosubmit.conf b/test/regression/test_mistral_with_saga/conf/autosubmit.conf deleted file mode 100644 index 97006d74f0b27f972ff0f119d3b27f93bf3a5296..0000000000000000000000000000000000000000 --- a/test/regression/test_mistral_with_saga/conf/autosubmit.conf +++ /dev/null @@ -1,22 +0,0 @@ -[config] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# No need to change. -# Autosubmit version identifier -AUTOSUBMIT_VERSION = 3.3.1 -# Default maximum number of jobs to be waiting in any platform -# Default = 3 -MAXWAITINGJOBS = 3 -# Default maximum number of jobs to be running at the same time at any platform -# Default = 6 -TOTALJOBS = 6 -# Time (seconds) between connections to the HPC queue scheduler to poll already submitted jobs status -# Default = 10 -SAFETYSLEEPTIME = 10 -# Number of retrials if a job fails. Can ve override at job level -# Default = 0 -RETRIALS = 0 - -[communications] -API = saga \ No newline at end of file diff --git a/test/regression/test_mistral_with_saga/conf/expdef.conf b/test/regression/test_mistral_with_saga/conf/expdef.conf deleted file mode 100644 index c8a943b25decb4b4adfb0131b7e3ef3dc5e66127..0000000000000000000000000000000000000000 --- a/test/regression/test_mistral_with_saga/conf/expdef.conf +++ /dev/null @@ -1,73 +0,0 @@ -[DEFAULT] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# HPC name. -# No need to change -HPCARCH = moore - -[experiment] -# Supply the list of start dates. Available formats: YYYYMMDD YYYYMMDDhh YYYYMMDDhhmm -# You can also use an abbreviated syntax for multiple dates with common parts: 200001[01 15] <=> 20000101 20000115 -# 200001[01-04] <=> 20000101 20000102 20000103 20000104 -# DATELIST = 19600101 19650101 19700101 -# DATELIST = 1960[0101 0201 0301] -# DATELIST = 19[60-65] -DATELIST = 19960101 -# Supply the list of members. Format fcX -# You can also use an abreviated syntax for multiple members: fc[0 1 2] <=> fc0 fc1 fc2 -# fc[0-2] <=> fc0 fc1 fc2 -# MEMBERS = fc0 fc1 fc2 fc3 fc4 -# MEMBERS = fc[0-4] -MEMBERS = fc[0-1] -# Chunk size unit. STRING = hour, day, month, year -CHUNKSIZEUNIT = month -# Chunk size. NUMERIC = 4, 6, 12 -CHUNKSIZE = 1 -# Total number of chunks in experiment. NUMERIC = 30, 15, 10 -NUMCHUNKS = 2 -# Calendar used. LIST: standard, noleap -CALENDAR = noleap - -[rerun] -# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE -RERUN = FALSE -# If RERUN = TRUE then supply the list of chunks to rerun -# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] -CHUNKLIST = - -[project] -# Select project type. STRING = git, svn, local, none -# If PROJECT_TYPE is set to none, Autosubmit self-contained dummy templates will be used -PROJECT_TYPE = local -# Destination folder name for project. type = STRING, default = leave empty, -PROJECT_DESTINATION = - -# If PROJECT_TYPE is not git, no need to change -[git] -# Repository URL STRING = 'https://github.com/torvalds/linux.git' -PROJECT_ORIGIN = -# Select branch or tag, STRING, default = 'master', help = {'master' (default), 'develop', 'v3.1b', ...} -PROJECT_BRANCH = -# type = STRING, default = leave empty, help = if model branch is a TAG leave empty -PROJECT_COMMIT = - -# If PROJECT_TYPE is not svn, no need to change -[svn] -# type = STRING, help = 'https://svn.ec-earth.org/ecearth3' -PROJECT_URL = -# Select revision number. NUMERIC = 1778 -PROJECT_REVISION = - -# If PROJECT_TYPE is not local, no need to change -[local] -# type = STRING, help = /foo/bar/ecearth -PROJECT_PATH = PROJECT-PATH-HERE - -# If PROJECT_TYPE is none, no need to change -[project_files] -# Where is PROJECT CONFIGURATION file location relative to project root path -FILE_PROJECT_CONF = -# Where is JOBS CONFIGURATION file location relative to project root path -FILE_JOBS_CONF = -JOB_SCRIPTS_TYPE = bash diff --git a/test/regression/test_mistral_with_saga/conf/jobs.conf b/test/regression/test_mistral_with_saga/conf/jobs.conf deleted file mode 100644 index e66d93ec6e4c944b74f5229c6af48c8666e594f0..0000000000000000000000000000000000000000 --- a/test/regression/test_mistral_with_saga/conf/jobs.conf +++ /dev/null @@ -1,19 +0,0 @@ -[LOCAL_SETUP] -FILE = TEST_NOLEAP.sh -PLATFORM = LOCAL -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = LOCAL_SETUP-1 - -[REMOTE_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = REMOTE_SETUP-1 - -[REMOTE_PARALLEL_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -PROCESSORS = 24 -DEPENDENCIES = REMOTE_PARALLEL_SETUP-1 \ No newline at end of file diff --git a/test/regression/test_mistral_with_saga/conf/proj.conf b/test/regression/test_mistral_with_saga/conf/proj.conf deleted file mode 100644 index 6d326a0ca08bae06a8aaaf003064f8215df236ef..0000000000000000000000000000000000000000 --- a/test/regression/test_mistral_with_saga/conf/proj.conf +++ /dev/null @@ -1,3 +0,0 @@ -[DEFAULT] -HOURS=24 -DAYS=30 diff --git a/test/regression/test_mistral_with_saga/src/TEST_NOLEAP.sh b/test/regression/test_mistral_with_saga/src/TEST_NOLEAP.sh deleted file mode 100644 index e9a10ba6432dc622919357f66d2750d8a645f2e3..0000000000000000000000000000000000000000 --- a/test/regression/test_mistral_with_saga/src/TEST_NOLEAP.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -set -xvue -TEST=%Chunk_END_DATE% -TEST2=%CHUNK% -TEST3=%PREV% -TEST4=%NUMMEMBERS% diff --git a/test/regression/test_mn_with_saga/conf/autosubmit.conf b/test/regression/test_mn_with_saga/conf/autosubmit.conf deleted file mode 100644 index 97006d74f0b27f972ff0f119d3b27f93bf3a5296..0000000000000000000000000000000000000000 --- a/test/regression/test_mn_with_saga/conf/autosubmit.conf +++ /dev/null @@ -1,22 +0,0 @@ -[config] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# No need to change. -# Autosubmit version identifier -AUTOSUBMIT_VERSION = 3.3.1 -# Default maximum number of jobs to be waiting in any platform -# Default = 3 -MAXWAITINGJOBS = 3 -# Default maximum number of jobs to be running at the same time at any platform -# Default = 6 -TOTALJOBS = 6 -# Time (seconds) between connections to the HPC queue scheduler to poll already submitted jobs status -# Default = 10 -SAFETYSLEEPTIME = 10 -# Number of retrials if a job fails. Can ve override at job level -# Default = 0 -RETRIALS = 0 - -[communications] -API = saga \ No newline at end of file diff --git a/test/regression/test_mn_with_saga/conf/expdef.conf b/test/regression/test_mn_with_saga/conf/expdef.conf deleted file mode 100644 index e008ab94398b5abeba00a53239f042f735f19e37..0000000000000000000000000000000000000000 --- a/test/regression/test_mn_with_saga/conf/expdef.conf +++ /dev/null @@ -1,73 +0,0 @@ -[DEFAULT] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# HPC name. -# No need to change -HPCARCH = marenostrum3 - -[experiment] -# Supply the list of start dates. Available formats: YYYYMMDD YYYYMMDDhh YYYYMMDDhhmm -# You can also use an abbreviated syntax for multiple dates with common parts: 200001[01 15] <=> 20000101 20000115 -# 200001[01-04] <=> 20000101 20000102 20000103 20000104 -# DATELIST = 19600101 19650101 19700101 -# DATELIST = 1960[0101 0201 0301] -# DATELIST = 19[60-65] -DATELIST = 19960101 -# Supply the list of members. Format fcX -# You can also use an abreviated syntax for multiple members: fc[0 1 2] <=> fc0 fc1 fc2 -# fc[0-2] <=> fc0 fc1 fc2 -# MEMBERS = fc0 fc1 fc2 fc3 fc4 -# MEMBERS = fc[0-4] -MEMBERS = fc[0-1] -# Chunk size unit. STRING = hour, day, month, year -CHUNKSIZEUNIT = month -# Chunk size. NUMERIC = 4, 6, 12 -CHUNKSIZE = 1 -# Total number of chunks in experiment. NUMERIC = 30, 15, 10 -NUMCHUNKS = 2 -# Calendar used. LIST: standard, noleap -CALENDAR = noleap - -[rerun] -# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE -RERUN = FALSE -# If RERUN = TRUE then supply the list of chunks to rerun -# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] -CHUNKLIST = - -[project] -# Select project type. STRING = git, svn, local, none -# If PROJECT_TYPE is set to none, Autosubmit self-contained dummy templates will be used -PROJECT_TYPE = local -# Destination folder name for project. type = STRING, default = leave empty, -PROJECT_DESTINATION = - -# If PROJECT_TYPE is not git, no need to change -[git] -# Repository URL STRING = 'https://github.com/torvalds/linux.git' -PROJECT_ORIGIN = -# Select branch or tag, STRING, default = 'master', help = {'master' (default), 'develop', 'v3.1b', ...} -PROJECT_BRANCH = -# type = STRING, default = leave empty, help = if model branch is a TAG leave empty -PROJECT_COMMIT = - -# If PROJECT_TYPE is not svn, no need to change -[svn] -# type = STRING, help = 'https://svn.ec-earth.org/ecearth3' -PROJECT_URL = -# Select revision number. NUMERIC = 1778 -PROJECT_REVISION = - -# If PROJECT_TYPE is not local, no need to change -[local] -# type = STRING, help = /foo/bar/ecearth -PROJECT_PATH = PROJECT-PATH-HERE - -# If PROJECT_TYPE is none, no need to change -[project_files] -# Where is PROJECT CONFIGURATION file location relative to project root path -FILE_PROJECT_CONF = -# Where is JOBS CONFIGURATION file location relative to project root path -FILE_JOBS_CONF = -JOB_SCRIPTS_TYPE = bash diff --git a/test/regression/test_mn_with_saga/conf/jobs.conf b/test/regression/test_mn_with_saga/conf/jobs.conf deleted file mode 100644 index 807426bcd9cc63e176b0bf155b74ed800295382e..0000000000000000000000000000000000000000 --- a/test/regression/test_mn_with_saga/conf/jobs.conf +++ /dev/null @@ -1,21 +0,0 @@ -[LOCAL_SETUP] -FILE = TEST_NOLEAP.sh -PLATFORM = LOCAL -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = LOCAL_SETUP-1 - -[REMOTE_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -TASKS = 16 -DEPENDENCIES = REMOTE_SETUP-1 - -[REMOTE_PARALLEL_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -TASKS = 16 -PROCESSORS = 32 -DEPENDENCIES = REMOTE_PARALLEL_SETUP-1 \ No newline at end of file diff --git a/test/regression/test_mn_with_saga/conf/proj.conf b/test/regression/test_mn_with_saga/conf/proj.conf deleted file mode 100644 index 6d326a0ca08bae06a8aaaf003064f8215df236ef..0000000000000000000000000000000000000000 --- a/test/regression/test_mn_with_saga/conf/proj.conf +++ /dev/null @@ -1,3 +0,0 @@ -[DEFAULT] -HOURS=24 -DAYS=30 diff --git a/test/regression/test_mn_with_saga/src/TEST_NOLEAP.sh b/test/regression/test_mn_with_saga/src/TEST_NOLEAP.sh deleted file mode 100644 index e9a10ba6432dc622919357f66d2750d8a645f2e3..0000000000000000000000000000000000000000 --- a/test/regression/test_mn_with_saga/src/TEST_NOLEAP.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -set -xvue -TEST=%Chunk_END_DATE% -TEST2=%CHUNK% -TEST3=%PREV% -TEST4=%NUMMEMBERS% diff --git a/test/regression/test_mn_with_saga_python/conf/autosubmit.conf b/test/regression/test_mn_with_saga_python/conf/autosubmit.conf deleted file mode 100644 index 97006d74f0b27f972ff0f119d3b27f93bf3a5296..0000000000000000000000000000000000000000 --- a/test/regression/test_mn_with_saga_python/conf/autosubmit.conf +++ /dev/null @@ -1,22 +0,0 @@ -[config] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# No need to change. -# Autosubmit version identifier -AUTOSUBMIT_VERSION = 3.3.1 -# Default maximum number of jobs to be waiting in any platform -# Default = 3 -MAXWAITINGJOBS = 3 -# Default maximum number of jobs to be running at the same time at any platform -# Default = 6 -TOTALJOBS = 6 -# Time (seconds) between connections to the HPC queue scheduler to poll already submitted jobs status -# Default = 10 -SAFETYSLEEPTIME = 10 -# Number of retrials if a job fails. Can ve override at job level -# Default = 0 -RETRIALS = 0 - -[communications] -API = saga \ No newline at end of file diff --git a/test/regression/test_mn_with_saga_python/conf/expdef.conf b/test/regression/test_mn_with_saga_python/conf/expdef.conf deleted file mode 100644 index 136d585ec3d506d43c4d61f631dade6010623798..0000000000000000000000000000000000000000 --- a/test/regression/test_mn_with_saga_python/conf/expdef.conf +++ /dev/null @@ -1,73 +0,0 @@ -[DEFAULT] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# HPC name. -# No need to change -HPCARCH = marenostrum3 - -[experiment] -# Supply the list of start dates. Available formats: YYYYMMDD YYYYMMDDhh YYYYMMDDhhmm -# You can also use an abbreviated syntax for multiple dates with common parts: 200001[01 15] <=> 20000101 20000115 -# 200001[01-04] <=> 20000101 20000102 20000103 20000104 -# DATELIST = 19600101 19650101 19700101 -# DATELIST = 1960[0101 0201 0301] -# DATELIST = 19[60-65] -DATELIST = 19960101 -# Supply the list of members. Format fcX -# You can also use an abreviated syntax for multiple members: fc[0 1 2] <=> fc0 fc1 fc2 -# fc[0-2] <=> fc0 fc1 fc2 -# MEMBERS = fc0 fc1 fc2 fc3 fc4 -# MEMBERS = fc[0-4] -MEMBERS = fc[0-1] -# Chunk size unit. STRING = hour, day, month, year -CHUNKSIZEUNIT = month -# Chunk size. NUMERIC = 4, 6, 12 -CHUNKSIZE = 1 -# Total number of chunks in experiment. NUMERIC = 30, 15, 10 -NUMCHUNKS = 2 -# Calendar used. LIST: standard, noleap -CALENDAR = noleap - -[rerun] -# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE -RERUN = FALSE -# If RERUN = TRUE then supply the list of chunks to rerun -# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] -CHUNKLIST = - -[project] -# Select project type. STRING = git, svn, local, none -# If PROJECT_TYPE is set to none, Autosubmit self-contained dummy templates will be used -PROJECT_TYPE = local -# Destination folder name for project. type = STRING, default = leave empty, -PROJECT_DESTINATION = - -# If PROJECT_TYPE is not git, no need to change -[git] -# Repository URL STRING = 'https://github.com/torvalds/linux.git' -PROJECT_ORIGIN = -# Select branch or tag, STRING, default = 'master', help = {'master' (default), 'develop', 'v3.1b', ...} -PROJECT_BRANCH = -# type = STRING, default = leave empty, help = if model branch is a TAG leave empty -PROJECT_COMMIT = - -# If PROJECT_TYPE is not svn, no need to change -[svn] -# type = STRING, help = 'https://svn.ec-earth.org/ecearth3' -PROJECT_URL = -# Select revision number. NUMERIC = 1778 -PROJECT_REVISION = - -# If PROJECT_TYPE is not local, no need to change -[local] -# type = STRING, help = /foo/bar/ecearth -PROJECT_PATH = PROJECT-PATH-HERE - -# If PROJECT_TYPE is none, no need to change -[project_files] -# Where is PROJECT CONFIGURATION file location relative to project root path -FILE_PROJECT_CONF = -# Where is JOBS CONFIGURATION file location relative to project root path -FILE_JOBS_CONF = -JOB_SCRIPTS_TYPE = python diff --git a/test/regression/test_mn_with_saga_python/conf/jobs.conf b/test/regression/test_mn_with_saga_python/conf/jobs.conf deleted file mode 100644 index 968731491b40e1036139d53758c7820faf4e003b..0000000000000000000000000000000000000000 --- a/test/regression/test_mn_with_saga_python/conf/jobs.conf +++ /dev/null @@ -1,13 +0,0 @@ -[LOCAL_SETUP] -FILE = TEST_NOLEAP.py -PLATFORM = LOCAL -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = LOCAL_SETUP-1 - -[REMOTE_SETUP] -FILE = TEST_NOLEAP.py -RUNNING = chunk -WALLCLOCK = 00:10 -TASKS = 16 -DEPENDENCIES = REMOTE_SETUP-1 diff --git a/test/regression/test_mn_with_saga_python/conf/proj.conf b/test/regression/test_mn_with_saga_python/conf/proj.conf deleted file mode 100644 index 6d326a0ca08bae06a8aaaf003064f8215df236ef..0000000000000000000000000000000000000000 --- a/test/regression/test_mn_with_saga_python/conf/proj.conf +++ /dev/null @@ -1,3 +0,0 @@ -[DEFAULT] -HOURS=24 -DAYS=30 diff --git a/test/regression/test_mn_with_saga_python/src/TEST_NOLEAP.py b/test/regression/test_mn_with_saga_python/src/TEST_NOLEAP.py deleted file mode 100644 index 6428017b643ea68ac58c54c0d0a34057d71838a2..0000000000000000000000000000000000000000 --- a/test/regression/test_mn_with_saga_python/src/TEST_NOLEAP.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python -print "%Chunk_END_DATE%" -print "%CHUNK%" -print "%PREV%" -print "%NUMMEMBERS%" diff --git a/test/regression/test_moore_with_saga/conf/autosubmit.conf b/test/regression/test_moore_with_saga/conf/autosubmit.conf deleted file mode 100644 index 97006d74f0b27f972ff0f119d3b27f93bf3a5296..0000000000000000000000000000000000000000 --- a/test/regression/test_moore_with_saga/conf/autosubmit.conf +++ /dev/null @@ -1,22 +0,0 @@ -[config] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# No need to change. -# Autosubmit version identifier -AUTOSUBMIT_VERSION = 3.3.1 -# Default maximum number of jobs to be waiting in any platform -# Default = 3 -MAXWAITINGJOBS = 3 -# Default maximum number of jobs to be running at the same time at any platform -# Default = 6 -TOTALJOBS = 6 -# Time (seconds) between connections to the HPC queue scheduler to poll already submitted jobs status -# Default = 10 -SAFETYSLEEPTIME = 10 -# Number of retrials if a job fails. Can ve override at job level -# Default = 0 -RETRIALS = 0 - -[communications] -API = saga \ No newline at end of file diff --git a/test/regression/test_moore_with_saga/conf/expdef.conf b/test/regression/test_moore_with_saga/conf/expdef.conf deleted file mode 100644 index c8a943b25decb4b4adfb0131b7e3ef3dc5e66127..0000000000000000000000000000000000000000 --- a/test/regression/test_moore_with_saga/conf/expdef.conf +++ /dev/null @@ -1,73 +0,0 @@ -[DEFAULT] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# HPC name. -# No need to change -HPCARCH = moore - -[experiment] -# Supply the list of start dates. Available formats: YYYYMMDD YYYYMMDDhh YYYYMMDDhhmm -# You can also use an abbreviated syntax for multiple dates with common parts: 200001[01 15] <=> 20000101 20000115 -# 200001[01-04] <=> 20000101 20000102 20000103 20000104 -# DATELIST = 19600101 19650101 19700101 -# DATELIST = 1960[0101 0201 0301] -# DATELIST = 19[60-65] -DATELIST = 19960101 -# Supply the list of members. Format fcX -# You can also use an abreviated syntax for multiple members: fc[0 1 2] <=> fc0 fc1 fc2 -# fc[0-2] <=> fc0 fc1 fc2 -# MEMBERS = fc0 fc1 fc2 fc3 fc4 -# MEMBERS = fc[0-4] -MEMBERS = fc[0-1] -# Chunk size unit. STRING = hour, day, month, year -CHUNKSIZEUNIT = month -# Chunk size. NUMERIC = 4, 6, 12 -CHUNKSIZE = 1 -# Total number of chunks in experiment. NUMERIC = 30, 15, 10 -NUMCHUNKS = 2 -# Calendar used. LIST: standard, noleap -CALENDAR = noleap - -[rerun] -# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE -RERUN = FALSE -# If RERUN = TRUE then supply the list of chunks to rerun -# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] -CHUNKLIST = - -[project] -# Select project type. STRING = git, svn, local, none -# If PROJECT_TYPE is set to none, Autosubmit self-contained dummy templates will be used -PROJECT_TYPE = local -# Destination folder name for project. type = STRING, default = leave empty, -PROJECT_DESTINATION = - -# If PROJECT_TYPE is not git, no need to change -[git] -# Repository URL STRING = 'https://github.com/torvalds/linux.git' -PROJECT_ORIGIN = -# Select branch or tag, STRING, default = 'master', help = {'master' (default), 'develop', 'v3.1b', ...} -PROJECT_BRANCH = -# type = STRING, default = leave empty, help = if model branch is a TAG leave empty -PROJECT_COMMIT = - -# If PROJECT_TYPE is not svn, no need to change -[svn] -# type = STRING, help = 'https://svn.ec-earth.org/ecearth3' -PROJECT_URL = -# Select revision number. NUMERIC = 1778 -PROJECT_REVISION = - -# If PROJECT_TYPE is not local, no need to change -[local] -# type = STRING, help = /foo/bar/ecearth -PROJECT_PATH = PROJECT-PATH-HERE - -# If PROJECT_TYPE is none, no need to change -[project_files] -# Where is PROJECT CONFIGURATION file location relative to project root path -FILE_PROJECT_CONF = -# Where is JOBS CONFIGURATION file location relative to project root path -FILE_JOBS_CONF = -JOB_SCRIPTS_TYPE = bash diff --git a/test/regression/test_moore_with_saga/conf/jobs.conf b/test/regression/test_moore_with_saga/conf/jobs.conf deleted file mode 100644 index 5c4345ef553c7aed1a70bc2a3ea7f4e96c28eebc..0000000000000000000000000000000000000000 --- a/test/regression/test_moore_with_saga/conf/jobs.conf +++ /dev/null @@ -1,19 +0,0 @@ -[LOCAL_SETUP] -FILE = TEST_NOLEAP.sh -PLATFORM = LOCAL -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = LOCAL_SETUP-1 - -[REMOTE_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = REMOTE_SETUP-1 - -[REMOTE_PARALLEL_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -PROCESSORS = 2 -DEPENDENCIES = REMOTE_PARALLEL_SETUP-1 \ No newline at end of file diff --git a/test/regression/test_moore_with_saga/conf/proj.conf b/test/regression/test_moore_with_saga/conf/proj.conf deleted file mode 100644 index 6d326a0ca08bae06a8aaaf003064f8215df236ef..0000000000000000000000000000000000000000 --- a/test/regression/test_moore_with_saga/conf/proj.conf +++ /dev/null @@ -1,3 +0,0 @@ -[DEFAULT] -HOURS=24 -DAYS=30 diff --git a/test/regression/test_moore_with_saga/src/TEST_NOLEAP.sh b/test/regression/test_moore_with_saga/src/TEST_NOLEAP.sh deleted file mode 100644 index e9a10ba6432dc622919357f66d2750d8a645f2e3..0000000000000000000000000000000000000000 --- a/test/regression/test_moore_with_saga/src/TEST_NOLEAP.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -set -xvue -TEST=%Chunk_END_DATE% -TEST2=%CHUNK% -TEST3=%PREV% -TEST4=%NUMMEMBERS% diff --git a/test/regression/test_moore_with_saga_python/conf/autosubmit.conf b/test/regression/test_moore_with_saga_python/conf/autosubmit.conf deleted file mode 100644 index 97006d74f0b27f972ff0f119d3b27f93bf3a5296..0000000000000000000000000000000000000000 --- a/test/regression/test_moore_with_saga_python/conf/autosubmit.conf +++ /dev/null @@ -1,22 +0,0 @@ -[config] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# No need to change. -# Autosubmit version identifier -AUTOSUBMIT_VERSION = 3.3.1 -# Default maximum number of jobs to be waiting in any platform -# Default = 3 -MAXWAITINGJOBS = 3 -# Default maximum number of jobs to be running at the same time at any platform -# Default = 6 -TOTALJOBS = 6 -# Time (seconds) between connections to the HPC queue scheduler to poll already submitted jobs status -# Default = 10 -SAFETYSLEEPTIME = 10 -# Number of retrials if a job fails. Can ve override at job level -# Default = 0 -RETRIALS = 0 - -[communications] -API = saga \ No newline at end of file diff --git a/test/regression/test_moore_with_saga_python/conf/expdef.conf b/test/regression/test_moore_with_saga_python/conf/expdef.conf deleted file mode 100644 index 35ac9101d78e12dfbbe667f4e02a27d3c03457b3..0000000000000000000000000000000000000000 --- a/test/regression/test_moore_with_saga_python/conf/expdef.conf +++ /dev/null @@ -1,73 +0,0 @@ -[DEFAULT] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# HPC name. -# No need to change -HPCARCH = moore - -[experiment] -# Supply the list of start dates. Available formats: YYYYMMDD YYYYMMDDhh YYYYMMDDhhmm -# You can also use an abbreviated syntax for multiple dates with common parts: 200001[01 15] <=> 20000101 20000115 -# 200001[01-04] <=> 20000101 20000102 20000103 20000104 -# DATELIST = 19600101 19650101 19700101 -# DATELIST = 1960[0101 0201 0301] -# DATELIST = 19[60-65] -DATELIST = 19960101 -# Supply the list of members. Format fcX -# You can also use an abreviated syntax for multiple members: fc[0 1 2] <=> fc0 fc1 fc2 -# fc[0-2] <=> fc0 fc1 fc2 -# MEMBERS = fc0 fc1 fc2 fc3 fc4 -# MEMBERS = fc[0-4] -MEMBERS = fc[0-1] -# Chunk size unit. STRING = hour, day, month, year -CHUNKSIZEUNIT = month -# Chunk size. NUMERIC = 4, 6, 12 -CHUNKSIZE = 1 -# Total number of chunks in experiment. NUMERIC = 30, 15, 10 -NUMCHUNKS = 2 -# Calendar used. LIST: standard, noleap -CALENDAR = noleap - -[rerun] -# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE -RERUN = FALSE -# If RERUN = TRUE then supply the list of chunks to rerun -# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] -CHUNKLIST = - -[project] -# Select project type. STRING = git, svn, local, none -# If PROJECT_TYPE is set to none, Autosubmit self-contained dummy templates will be used -PROJECT_TYPE = local -# Destination folder name for project. type = STRING, default = leave empty, -PROJECT_DESTINATION = - -# If PROJECT_TYPE is not git, no need to change -[git] -# Repository URL STRING = 'https://github.com/torvalds/linux.git' -PROJECT_ORIGIN = -# Select branch or tag, STRING, default = 'master', help = {'master' (default), 'develop', 'v3.1b', ...} -PROJECT_BRANCH = -# type = STRING, default = leave empty, help = if model branch is a TAG leave empty -PROJECT_COMMIT = - -# If PROJECT_TYPE is not svn, no need to change -[svn] -# type = STRING, help = 'https://svn.ec-earth.org/ecearth3' -PROJECT_URL = -# Select revision number. NUMERIC = 1778 -PROJECT_REVISION = - -# If PROJECT_TYPE is not local, no need to change -[local] -# type = STRING, help = /foo/bar/ecearth -PROJECT_PATH = PROJECT-PATH-HERE - -# If PROJECT_TYPE is none, no need to change -[project_files] -# Where is PROJECT CONFIGURATION file location relative to project root path -FILE_PROJECT_CONF = -# Where is JOBS CONFIGURATION file location relative to project root path -FILE_JOBS_CONF = -JOB_SCRIPTS_TYPE = python diff --git a/test/regression/test_moore_with_saga_python/conf/jobs.conf b/test/regression/test_moore_with_saga_python/conf/jobs.conf deleted file mode 100644 index 0a9894f174c187cccaa21ee33a542888fb643485..0000000000000000000000000000000000000000 --- a/test/regression/test_moore_with_saga_python/conf/jobs.conf +++ /dev/null @@ -1,12 +0,0 @@ -[LOCAL_SETUP] -FILE = TEST_NOLEAP.py -PLATFORM = LOCAL -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = LOCAL_SETUP-1 - -[REMOTE_SETUP] -FILE = TEST_NOLEAP.py -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = REMOTE_SETUP-1 diff --git a/test/regression/test_moore_with_saga_python/conf/proj.conf b/test/regression/test_moore_with_saga_python/conf/proj.conf deleted file mode 100644 index 6d326a0ca08bae06a8aaaf003064f8215df236ef..0000000000000000000000000000000000000000 --- a/test/regression/test_moore_with_saga_python/conf/proj.conf +++ /dev/null @@ -1,3 +0,0 @@ -[DEFAULT] -HOURS=24 -DAYS=30 diff --git a/test/regression/test_moore_with_saga_python/src/TEST_NOLEAP.py b/test/regression/test_moore_with_saga_python/src/TEST_NOLEAP.py deleted file mode 100644 index 6428017b643ea68ac58c54c0d0a34057d71838a2..0000000000000000000000000000000000000000 --- a/test/regression/test_moore_with_saga_python/src/TEST_NOLEAP.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python -print "%Chunk_END_DATE%" -print "%CHUNK%" -print "%PREV%" -print "%NUMMEMBERS%" diff --git a/test/regression/test_sedema_with_saga/conf/autosubmit.conf b/test/regression/test_sedema_with_saga/conf/autosubmit.conf deleted file mode 100644 index 97006d74f0b27f972ff0f119d3b27f93bf3a5296..0000000000000000000000000000000000000000 --- a/test/regression/test_sedema_with_saga/conf/autosubmit.conf +++ /dev/null @@ -1,22 +0,0 @@ -[config] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# No need to change. -# Autosubmit version identifier -AUTOSUBMIT_VERSION = 3.3.1 -# Default maximum number of jobs to be waiting in any platform -# Default = 3 -MAXWAITINGJOBS = 3 -# Default maximum number of jobs to be running at the same time at any platform -# Default = 6 -TOTALJOBS = 6 -# Time (seconds) between connections to the HPC queue scheduler to poll already submitted jobs status -# Default = 10 -SAFETYSLEEPTIME = 10 -# Number of retrials if a job fails. Can ve override at job level -# Default = 0 -RETRIALS = 0 - -[communications] -API = saga \ No newline at end of file diff --git a/test/regression/test_sedema_with_saga/conf/expdef.conf b/test/regression/test_sedema_with_saga/conf/expdef.conf deleted file mode 100644 index 942496b06983b190c8eebeb0e262de11796e1af3..0000000000000000000000000000000000000000 --- a/test/regression/test_sedema_with_saga/conf/expdef.conf +++ /dev/null @@ -1,73 +0,0 @@ -[DEFAULT] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# HPC name. -# No need to change -HPCARCH = SEDEMA - -[experiment] -# Supply the list of start dates. Available formats: YYYYMMDD YYYYMMDDhh YYYYMMDDhhmm -# You can also use an abbreviated syntax for multiple dates with common parts: 200001[01 15] <=> 20000101 20000115 -# 200001[01-04] <=> 20000101 20000102 20000103 20000104 -# DATELIST = 19600101 19650101 19700101 -# DATELIST = 1960[0101 0201 0301] -# DATELIST = 19[60-65] -DATELIST = 19960101 -# Supply the list of members. Format fcX -# You can also use an abreviated syntax for multiple members: fc[0 1 2] <=> fc0 fc1 fc2 -# fc[0-2] <=> fc0 fc1 fc2 -# MEMBERS = fc0 fc1 fc2 fc3 fc4 -# MEMBERS = fc[0-4] -MEMBERS = fc[0-1] -# Chunk size unit. STRING = hour, day, month, year -CHUNKSIZEUNIT = month -# Chunk size. NUMERIC = 4, 6, 12 -CHUNKSIZE = 1 -# Total number of chunks in experiment. NUMERIC = 30, 15, 10 -NUMCHUNKS = 2 -# Calendar used. LIST: standard, noleap -CALENDAR = noleap - -[rerun] -# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE -RERUN = FALSE -# If RERUN = TRUE then supply the list of chunks to rerun -# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] -CHUNKLIST = - -[project] -# Select project type. STRING = git, svn, local, none -# If PROJECT_TYPE is set to none, Autosubmit self-contained dummy templates will be used -PROJECT_TYPE = local -# Destination folder name for project. type = STRING, default = leave empty, -PROJECT_DESTINATION = - -# If PROJECT_TYPE is not git, no need to change -[git] -# Repository URL STRING = 'https://github.com/torvalds/linux.git' -PROJECT_ORIGIN = -# Select branch or tag, STRING, default = 'master', help = {'master' (default), 'develop', 'v3.1b', ...} -PROJECT_BRANCH = -# type = STRING, default = leave empty, help = if model branch is a TAG leave empty -PROJECT_COMMIT = - -# If PROJECT_TYPE is not svn, no need to change -[svn] -# type = STRING, help = 'https://svn.ec-earth.org/ecearth3' -PROJECT_URL = -# Select revision number. NUMERIC = 1778 -PROJECT_REVISION = - -# If PROJECT_TYPE is not local, no need to change -[local] -# type = STRING, help = /foo/bar/ecearth -PROJECT_PATH = PROJECT-PATH-HERE - -# If PROJECT_TYPE is none, no need to change -[project_files] -# Where is PROJECT CONFIGURATION file location relative to project root path -FILE_PROJECT_CONF = -# Where is JOBS CONFIGURATION file location relative to project root path -FILE_JOBS_CONF = -JOB_SCRIPTS_TYPE = bash diff --git a/test/regression/test_sedema_with_saga/conf/jobs.conf b/test/regression/test_sedema_with_saga/conf/jobs.conf deleted file mode 100644 index 3ce5309e39b005c49e21a6af5d719f04260c0051..0000000000000000000000000000000000000000 --- a/test/regression/test_sedema_with_saga/conf/jobs.conf +++ /dev/null @@ -1,19 +0,0 @@ -[LOCAL_SETUP] -FILE = TEST_NOLEAP.sh -PLATFORM = LOCAL -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = LOCAL_SETUP-1 - -[REMOTE_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = REMOTE_SETUP-1 - -[REMOTE_PARALLEL_SETUP] -FILE = TEST_NOLEAP.sh -RUNNING = chunk -WALLCLOCK = 00:10 -PROCESSORS = 16 -DEPENDENCIES = REMOTE_PARALLEL_SETUP-1 \ No newline at end of file diff --git a/test/regression/test_sedema_with_saga/conf/proj.conf b/test/regression/test_sedema_with_saga/conf/proj.conf deleted file mode 100644 index 6d326a0ca08bae06a8aaaf003064f8215df236ef..0000000000000000000000000000000000000000 --- a/test/regression/test_sedema_with_saga/conf/proj.conf +++ /dev/null @@ -1,3 +0,0 @@ -[DEFAULT] -HOURS=24 -DAYS=30 diff --git a/test/regression/test_sedema_with_saga/src/TEST_NOLEAP.sh b/test/regression/test_sedema_with_saga/src/TEST_NOLEAP.sh deleted file mode 100644 index e9a10ba6432dc622919357f66d2750d8a645f2e3..0000000000000000000000000000000000000000 --- a/test/regression/test_sedema_with_saga/src/TEST_NOLEAP.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -set -xvue -TEST=%Chunk_END_DATE% -TEST2=%CHUNK% -TEST3=%PREV% -TEST4=%NUMMEMBERS% diff --git a/test/regression/test_sedema_with_saga_python/conf/autosubmit.conf b/test/regression/test_sedema_with_saga_python/conf/autosubmit.conf deleted file mode 100644 index 97006d74f0b27f972ff0f119d3b27f93bf3a5296..0000000000000000000000000000000000000000 --- a/test/regression/test_sedema_with_saga_python/conf/autosubmit.conf +++ /dev/null @@ -1,22 +0,0 @@ -[config] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# No need to change. -# Autosubmit version identifier -AUTOSUBMIT_VERSION = 3.3.1 -# Default maximum number of jobs to be waiting in any platform -# Default = 3 -MAXWAITINGJOBS = 3 -# Default maximum number of jobs to be running at the same time at any platform -# Default = 6 -TOTALJOBS = 6 -# Time (seconds) between connections to the HPC queue scheduler to poll already submitted jobs status -# Default = 10 -SAFETYSLEEPTIME = 10 -# Number of retrials if a job fails. Can ve override at job level -# Default = 0 -RETRIALS = 0 - -[communications] -API = saga \ No newline at end of file diff --git a/test/regression/test_sedema_with_saga_python/conf/expdef.conf b/test/regression/test_sedema_with_saga_python/conf/expdef.conf deleted file mode 100644 index 562b0b2309cd874d3efa8e611622b798b4e9ec74..0000000000000000000000000000000000000000 --- a/test/regression/test_sedema_with_saga_python/conf/expdef.conf +++ /dev/null @@ -1,73 +0,0 @@ -[DEFAULT] -# Experiment identifier -# No need to change -EXPID = EXPID-HERE -# HPC name. -# No need to change -HPCARCH = SEDEMA - -[experiment] -# Supply the list of start dates. Available formats: YYYYMMDD YYYYMMDDhh YYYYMMDDhhmm -# You can also use an abbreviated syntax for multiple dates with common parts: 200001[01 15] <=> 20000101 20000115 -# 200001[01-04] <=> 20000101 20000102 20000103 20000104 -# DATELIST = 19600101 19650101 19700101 -# DATELIST = 1960[0101 0201 0301] -# DATELIST = 19[60-65] -DATELIST = 19960101 -# Supply the list of members. Format fcX -# You can also use an abreviated syntax for multiple members: fc[0 1 2] <=> fc0 fc1 fc2 -# fc[0-2] <=> fc0 fc1 fc2 -# MEMBERS = fc0 fc1 fc2 fc3 fc4 -# MEMBERS = fc[0-4] -MEMBERS = fc[0-1] -# Chunk size unit. STRING = hour, day, month, year -CHUNKSIZEUNIT = month -# Chunk size. NUMERIC = 4, 6, 12 -CHUNKSIZE = 1 -# Total number of chunks in experiment. NUMERIC = 30, 15, 10 -NUMCHUNKS = 2 -# Calendar used. LIST: standard, noleap -CALENDAR = noleap - -[rerun] -# Is a rerun or not? [Default: Do set FALSE]. BOOLEAN = TRUE, FALSE -RERUN = FALSE -# If RERUN = TRUE then supply the list of chunks to rerun -# LIST = [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] -CHUNKLIST = - -[project] -# Select project type. STRING = git, svn, local, none -# If PROJECT_TYPE is set to none, Autosubmit self-contained dummy templates will be used -PROJECT_TYPE = local -# Destination folder name for project. type = STRING, default = leave empty, -PROJECT_DESTINATION = - -# If PROJECT_TYPE is not git, no need to change -[git] -# Repository URL STRING = 'https://github.com/torvalds/linux.git' -PROJECT_ORIGIN = -# Select branch or tag, STRING, default = 'master', help = {'master' (default), 'develop', 'v3.1b', ...} -PROJECT_BRANCH = -# type = STRING, default = leave empty, help = if model branch is a TAG leave empty -PROJECT_COMMIT = - -# If PROJECT_TYPE is not svn, no need to change -[svn] -# type = STRING, help = 'https://svn.ec-earth.org/ecearth3' -PROJECT_URL = -# Select revision number. NUMERIC = 1778 -PROJECT_REVISION = - -# If PROJECT_TYPE is not local, no need to change -[local] -# type = STRING, help = /foo/bar/ecearth -PROJECT_PATH = PROJECT-PATH-HERE - -# If PROJECT_TYPE is none, no need to change -[project_files] -# Where is PROJECT CONFIGURATION file location relative to project root path -FILE_PROJECT_CONF = -# Where is JOBS CONFIGURATION file location relative to project root path -FILE_JOBS_CONF = -JOB_SCRIPTS_TYPE = python diff --git a/test/regression/test_sedema_with_saga_python/conf/jobs.conf b/test/regression/test_sedema_with_saga_python/conf/jobs.conf deleted file mode 100644 index 0a9894f174c187cccaa21ee33a542888fb643485..0000000000000000000000000000000000000000 --- a/test/regression/test_sedema_with_saga_python/conf/jobs.conf +++ /dev/null @@ -1,12 +0,0 @@ -[LOCAL_SETUP] -FILE = TEST_NOLEAP.py -PLATFORM = LOCAL -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = LOCAL_SETUP-1 - -[REMOTE_SETUP] -FILE = TEST_NOLEAP.py -RUNNING = chunk -WALLCLOCK = 00:10 -DEPENDENCIES = REMOTE_SETUP-1 diff --git a/test/regression/test_sedema_with_saga_python/conf/proj.conf b/test/regression/test_sedema_with_saga_python/conf/proj.conf deleted file mode 100644 index 6d326a0ca08bae06a8aaaf003064f8215df236ef..0000000000000000000000000000000000000000 --- a/test/regression/test_sedema_with_saga_python/conf/proj.conf +++ /dev/null @@ -1,3 +0,0 @@ -[DEFAULT] -HOURS=24 -DAYS=30 diff --git a/test/regression/test_sedema_with_saga_python/src/TEST_NOLEAP.py b/test/regression/test_sedema_with_saga_python/src/TEST_NOLEAP.py deleted file mode 100644 index 6428017b643ea68ac58c54c0d0a34057d71838a2..0000000000000000000000000000000000000000 --- a/test/regression/test_sedema_with_saga_python/src/TEST_NOLEAP.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python -print "%Chunk_END_DATE%" -print "%CHUNK%" -print "%PREV%" -print "%NUMMEMBERS%" diff --git a/test/regression/tests.conf b/test/regression/tests.conf index a142654942fb77bb4b48bed69df23af044dfc510..33a3c96c8b5500fb205ae7feab9ba5ecfbc0eef4 100644 --- a/test/regression/tests.conf +++ b/test/regression/tests.conf @@ -6,20 +6,11 @@ HPCARCH = marenostrum3 DESCRIPTION = "Simple experiment on MN3 with paramiko" SRC_PATH = test_mn3_with_paramiko -[test_mn_with_saga] -HPCARCH = marenostrum3 -DESCRIPTION = "Simple experiment on MN3 with saga" -SRC_PATH = test_mn_with_saga - [test_mn3_with_paramiko_python] HPCARCH = marenostrum3 DESCRIPTION = "Simple experiment on MN3 with paramiko and python" SRC_PATH = test_mn3_with_paramiko_python -[test_mn_with_saga_python] -HPCARCH = marenostrum3 -DESCRIPTION = "Simple experiment on MN3 with saga and python" -SRC_PATH = test_mn_with_saga_python #Tests on Marenostrum 4 ###################### @@ -62,10 +53,6 @@ HPCARCH = ecmwf-cca DESCRIPTION = "Simple experiment on ECMWF with paramiko" SRC_PATH = test_ecmwf_with_paramiko -[test_ecmwf_with_saga] -HPCARCH = ecmwf-cca -DESCRIPTION = "Simple experiment on ECMWF with saga" -SRC_PATH = test_ecmwf_with_saga #Tests on moore ################ @@ -75,25 +62,14 @@ HPCARCH = moore DESCRIPTION = "Simple experiment on moore with paramiko" SRC_PATH = test_moore_with_paramiko -[test_moore_with_saga] -HPCARCH = moore -DESCRIPTION = "Simple experiment on moore with saga" -SRC_PATH = test_moore_with_saga [test_moore_with_paramiko_python] HPCARCH = moore DESCRIPTION = "Simple experiment on moore with paramiko and python" SRC_PATH = test_moore_with_paramiko_python -[test_moore_with_saga_python] -HPCARCH = moore -DESCRIPTION = "Simple experiment on moore with saga and python" -SRC_PATH = test_moore_with_saga_python -[test_large_experiment_on_moore_with_saga] -HPCARCH = moore -DESCRIPTION = "Large experiment with 15 members and 5 chunks on moore with saga" -SRC_PATH = test_large_experiment_on_moore_with_saga + [test_large_experiment_on_moore_with_paramiko] HPCARCH = moore @@ -110,20 +86,12 @@ SRC_PATH = test_large_experiment_on_moore_with_paramiko #DESCRIPTION = "Simple experiment on SEDEMA with paramiko" #SRC_PATH = test_sedema_with_paramiko -#[test_sedema_with_saga] -#HPCARCH = SEDEMA -#DESCRIPTION = "Simple experiment on SEDEMA with saga" -#SRC_PATH = test_sedema_with_saga #[test_sedema_with_paramiko_python] #HPCARCH = SEDEMA #DESCRIPTION = "Simple experiment on SEDEMA with paramiko and python" #SRC_PATH = test_sedema_with_paramiko_python -#[test_sedema_with_saga_python] -#HPCARCH = SEDEMA -#DESCRIPTION = "Simple experiment on SEDEMA with saga and python" -#SRC_PATH = test_sedema_with_saga_python @@ -135,7 +103,3 @@ SRC_PATH = test_large_experiment_on_moore_with_paramiko #DESCRIPTION = "Simple experiment on mistral with paramiko" #SRC_PATH = test_mistral_with_paramiko -#[test_mistral_with_saga] -#HPCARCH = mistral -#DESCRIPTION = "Simple experiment on mistral with saga" -#SRC_PATH = test_mistral_with_saga \ No newline at end of file diff --git a/test/unit/test_saga_platform.py b/test/unit/test_saga_platform.py deleted file mode 100644 index cf1ddddfbd8108fa713954bce38dba1a16d2fef7..0000000000000000000000000000000000000000 --- a/test/unit/test_saga_platform.py +++ /dev/null @@ -1,206 +0,0 @@ -# import subprocess -# import sys -# from unittest import TestCase -# -# import os -# import re -# -# from mock import Mock -# from mock import patch -# -# from autosubmit.job.job_common import Status -# from autosubmit.job.job_common import Type -# -# ############################################### -# # Special SAGA import to prevent logging/atfork errors -# -# -# os.environ['RADICAL_UTILS_NOATFORK'] = 'True' -# import saga -# from autosubmit.platforms.saga_platform import SagaPlatform -# -# -# ############################################### -# -# class TestSagaPlatform(TestCase): -# def setUp(self): -# self.experiment_id = 'random-id' -# self.platform = SagaPlatform(self.experiment_id, 'test', FakeBasicConfig) -# self.platform.service = Mock() -# self.platform.service.session = Mock() -# -# def test_check_status_returns_completed_if_job_id_not_exists(self): -# # arrange -# self.platform.service = FakeService([]) -# # act -# status = self.platform.check_job('any-id') -# # assert -# self.assertEquals(Status.COMPLETED, status) -# -# def test_check_status_returns_the_right_states(self): -# # arrange -# self.platform.service = FakeService(['any-id']) -# self.platform.service.get_job = Mock(side_effect=[FakeJob('any-name', saga.job.UNKNOWN), -# FakeJob('any-name', saga.job.PENDING), -# FakeJob('any-name', saga.job.FAILED), -# FakeJob('any-name', saga.job.CANCELED), -# FakeJob('any-name', saga.job.DONE), -# FakeJob('any-name', saga.job.RUNNING), -# FakeJob('any-name', saga.job.SUSPENDED)]) -# # act -# should_be_unknown = self.platform.check_job('any-id') -# should_be_queuing = self.platform.check_job('any-id') -# should_be_failed = self.platform.check_job('any-id') -# should_be_failed2 = self.platform.check_job('any-id') -# should_be_completed = self.platform.check_job('any-id') -# should_be_running = self.platform.check_job('any-id') -# should_be_suspended = self.platform.check_job('any-id') -# -# # assert -# self.assertEquals(Status.UNKNOWN, should_be_unknown) -# self.assertEquals(Status.QUEUING, should_be_queuing) -# self.assertEquals(Status.FAILED, should_be_failed) -# self.assertEquals(Status.FAILED, should_be_failed2) -# self.assertEquals(Status.COMPLETED, should_be_completed) -# self.assertEquals(Status.RUNNING, should_be_running) -# self.assertEquals(Status.SUSPENDED, should_be_suspended) -# -# def test_creates_a_saga_job_correctly(self): -# parameters = {'WALLCLOCK': '', -# 'CURRENT_QUEUE': 'queue', -# 'CURRENT_BUDG': 'project', -# 'NUMPROC': 666, -# 'NUMTASK': 777, -# 'NUMTHREADS': 888, -# 'MEMORY': 999, -# 'CURRENT_RESERVATION': 'dummy', -# 'CURRENT_EXCLUSIVITY': 'true'} -# job = FakeJob('any-name', saga.job.RUNNING, Type.BASH, parameters) -# jd = FakeJobDescription() -# sys.modules['saga'].job.Description = Mock(return_value=jd) -# self.platform.add_attribute = Mock() -# self.platform.service = FakeService([]) -# self.platform.service.create_job = Mock(return_value='created-job') -# -# # act -# created_job = self.platform.create_saga_job(job, 'scriptname') -# -# # assert -# self.assertEquals('LOG_random-id/scriptname', jd.executable) -# self.assertEquals('LOG_random-id', jd.working_directory) -# self.assertIsNotNone(re.match('any-name.[0-9]*.out', jd.output)) -# self.assertIsNotNone(re.match('any-name.[0-9]*.err', jd.error)) -# self.platform.add_attribute.assert_any_call(jd, 'Name', job.name) -# self.platform.add_attribute.assert_any_call(jd, 'WallTimeLimit', 0) -# self.platform.add_attribute.assert_any_call(jd, 'Queue', parameters["CURRENT_QUEUE"]) -# self.platform.add_attribute.assert_any_call(jd, 'Project', parameters["CURRENT_BUDG"] + ':' + parameters[ -# "CURRENT_RESERVATION"] + ':' + parameters["CURRENT_EXCLUSIVITY"]) -# self.platform.add_attribute.assert_any_call(jd, 'TotalCPUCount', parameters["NUMPROC"]) -# self.platform.add_attribute.assert_any_call(jd, 'ProcessesPerHost', parameters["NUMTASK"]) -# self.platform.add_attribute.assert_any_call(jd, 'ThreadsPerProcess', parameters["NUMTHREADS"]) -# self.platform.add_attribute.assert_any_call(jd, 'TotalPhysicalMemory', parameters["MEMORY"]) -# self.assertEquals('created-job', created_job) -# -# def test_deleting_file_returns_true_if_not_exists(self): -# self.platform.exists_file = Mock(return_value=False) -# deleted = self.platform.delete_file('filename') -# self.assertTrue(deleted) -# -# def test_deleting_file_on_ecaccess_platform_makes_the_right_call(self): -# self.platform.type = 'ecaccess' -# sys.modules['subprocess'].check_call = Mock() -# -# deleted = self.platform.delete_file('file/path') -# -# self.assertTrue(deleted) -# sys.modules['subprocess'].check_call.assert_called_once_with( -# ['ecaccess-file-delete', '{0}:{1}'.format(self.platform.host, os.path.join(self.platform.get_files_path(), -# 'file/path'))]) -# -# def test_deleting_file_on_ecaccess_platform_returns_true_on_error(self): -# self.platform.type = 'ecaccess' -# -# check_call_mock = Mock() -# check_call_mock.side_effect = subprocess.CalledProcessError -# sys.modules['subprocess'].check_call = check_call_mock -# -# deleted = self.platform.delete_file('file/path') -# self.assertTrue(deleted) -# -# def test_deleting_file_on_local_platform_makes_the_right_call(self): -# self.platform.type = 'local' -# self.platform.exists_file = Mock(return_value=True) -# out_mock = Mock() -# out_mock.remove = Mock() -# out_mock.close = Mock() -# sys.modules['saga'].filesystem.File = Mock(return_value=out_mock) -# -# deleted = self.platform.delete_file('file/path') -# -# self.assertTrue(deleted) -# sys.modules['saga'].filesystem.File.assert_called_once_with( -# "file://{0}".format(os.path.join(self.platform.tmp_path, 'LOG_' + self.platform.expid, -# 'file/path'))) -# out_mock.remove.assert_called_once_with() -# out_mock.close.assert_called_once_with() -# -# def test_deleting_file_on_non_local_platform_makes_the_right_call(self): -# self.platform.exists_file = Mock(return_value=True) -# out_mock = Mock() -# out_mock.remove = Mock() -# out_mock.close = Mock() -# sys.modules['saga'].filesystem.File = Mock(return_value=out_mock) -# -# deleted = self.platform.delete_file('file/path') -# -# self.assertTrue(deleted) -# sys.modules['saga'].filesystem.File.assert_called_once_with( -# "sftp://{0}{1}".format(self.platform.host, os.path.join(self.platform.get_files_path(),'file/path')), -# session=self.platform.service.session) -# out_mock.remove.assert_called_once_with() -# out_mock.close.assert_called_once_with() -# -# @patch('autosubmit.platforms.platform.sleep') -# def test_that_get_completed_makes_the_right_number_of_retries_when_not_found(self, mock_sleep): -# retries = 5 -# self.platform.get_file = Mock(return_value=False) -# -# found = self.platform.get_completed_files('any-name', retries) -# -# self.assertFalse(found) -# self.assertEquals(retries + 1, self.platform.get_file.call_count) -# -# -# class FakeService: -# def __init__(self, jobs): -# self.jobs = jobs -# -# -# class FakeJob: -# def __init__(self, name, state, type=None, parameters={}): -# self.name = name -# self.state = state -# self.type = type -# self.parameters = parameters -# -# -# class FakeJobDescription: -# def __init__(self): -# self.executable = None -# self.working_directory = None -# self.output = None -# self.error = None -# -# -# class FakeBasicConfig: -# def __init__(self): -# pass -# -# DB_DIR = '/dummy/db/dir' -# DB_FILE = '/dummy/db/file' -# DB_PATH = '/dummy/db/path' -# LOCAL_ROOT_DIR = '/dummy/local/root/dir' -# LOCAL_TMP_DIR = '/dummy/local/temp/dir' -# LOCAL_PROJ_DIR = '/dummy/local/proj/dir' -# DEFAULT_PLATFORMS_CONF = '' -# DEFAULT_JOBS_CONF = ''