From 845146a559be15322fe880f5fb61e4ed852f6641 Mon Sep 17 00:00:00 2001 From: Larissa Batista Leite Date: Tue, 24 Oct 2017 11:28:54 +0200 Subject: [PATCH 1/2] Added delay feature so that a job only starts after a given number of chunks --- autosubmit/job/job_dict.py | 48 +++++++++++++++++++++---------------- autosubmit/job/job_list.py | 22 +++++++++++------ autosubmit/job/job_utils.py | 3 ++- test/unit/test_dic_jobs.py | 11 +++++---- 4 files changed, 51 insertions(+), 33 deletions(-) diff --git a/autosubmit/job/job_dict.py b/autosubmit/job/job_dict.py index 177576819..53bcdca4e 100644 --- a/autosubmit/job/job_dict.py +++ b/autosubmit/job/job_dict.py @@ -78,7 +78,8 @@ class DicJobs: self._create_jobs_member(section, priority, frequency, default_job_type, jobs_data) elif running == 'chunk': synchronize = self.get_option(section, "SYNCHRONIZE", None) - self._create_jobs_chunk(section, priority, frequency, default_job_type, synchronize, jobs_data) + delay = int(self.get_option(section, "DELAY", -1)) + self._create_jobs_chunk(section, priority, frequency, default_job_type, synchronize, delay, jobs_data) def _create_jobs_once(self, section, priority, default_job_type, jobs_data=dict()): """ @@ -100,7 +101,7 @@ class DicJobs: :type section: str :param priority: priority for the jobs :type priority: int - :param frequency: if greater than 1, only creates one job each frequency startdates. Allways creates one job + :param frequency: if greater than 1, only creates one job each frequency startdates. Always creates one job for the last :type frequency: int """ @@ -121,7 +122,7 @@ class DicJobs: :type section: str :param priority: priority for the jobs :type priority: int - :param frequency: if greater than 1, only creates one job each frequency members. Allways creates one job + :param frequency: if greater than 1, only creates one job each frequency members. Always creates one job for the last :type frequency: int """ @@ -141,7 +142,7 @@ class DicJobs: conditional decision to the father which makes the call ''' - def _create_jobs_chunk(self, section, priority, frequency, default_job_type, synchronize=None, jobs_data=dict()): + def _create_jobs_chunk(self, section, priority, frequency, default_job_type, synchronize=None, delay=0, jobs_data=dict()): """ Create jobs to be run once per chunk @@ -153,6 +154,8 @@ class DicJobs: :param frequency: if greater than 1, only creates one job each frequency chunks. Always creates one job for the last :type frequency: int + :param delay: if this parameter is set, the job is only created for the chunks greater than the delay + :type delay: int """ # Temporally creation for unified jobs in case of synchronize if synchronize is not None: @@ -160,14 +163,15 @@ class DicJobs: count = 0 for chunk in self._chunk_list: count += 1 - if count % frequency == 0 or count == len(self._chunk_list): - if synchronize == 'date': - tmp_dic[chunk] = self.build_job(section, priority, None, None, - chunk, default_job_type, jobs_data) - elif synchronize == 'member': - tmp_dic[chunk] = dict() - for date in self._date_list: - tmp_dic[chunk][date] = self.build_job(section, priority, date, None, + if delay == -1 or delay < chunk: + if count % frequency == 0 or count == len(self._chunk_list): + if synchronize == 'date': + tmp_dic[chunk] = self.build_job(section, priority, None, None, + chunk, default_job_type, jobs_data) + elif synchronize == 'member': + tmp_dic[chunk] = dict() + for date in self._date_list: + tmp_dic[chunk][date] = self.build_job(section, priority, date, None, chunk, default_job_type, jobs_data) # Real dic jobs assignment/creation self._dic[section] = dict() @@ -178,15 +182,16 @@ class DicJobs: count = 0 for chunk in self._chunk_list: count += 1 - if count % frequency == 0 or count == len(self._chunk_list): - if synchronize == 'date': - self._dic[section][date][member][chunk] = tmp_dic[chunk] - elif synchronize == 'member': - self._dic[section][date][member][chunk] = tmp_dic[chunk][date] - else: - self._dic[section][date][member][chunk] = self.build_job(section, priority, date, member, - chunk, default_job_type, jobs_data) - self._jobs_list.graph.add_node(self._dic[section][date][member][chunk].name) + if delay == -1 or delay < chunk: + if count % frequency == 0 or count == len(self._chunk_list): + if synchronize == 'date': + self._dic[section][date][member][chunk] = tmp_dic[chunk] + elif synchronize == 'member': + self._dic[section][date][member][chunk] = tmp_dic[chunk][date] + else: + self._dic[section][date][member][chunk] = self.build_job(section, priority, date, member, + chunk, default_job_type, jobs_data) + self._jobs_list.graph.add_node(self._dic[section][date][member][chunk].name) def get_jobs(self, section, date=None, member=None, chunk=None): """ @@ -274,6 +279,7 @@ class DicJobs: job.date_format = self._date_format job.frequency = int(self.get_option(section, "FREQUENCY", 1)) + job.delay = int(self.get_option(section, "DELAY", -1)) job.wait = self.get_option(section, "WAIT", 'true').lower() == 'true' job.rerun_only = self.get_option(section, "RERUN_ONLY", 'false').lower() == 'true' diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index 2c4022c04..c139fdb8c 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -146,7 +146,7 @@ class JobList: for job_section in jobs_parser.sections(): Log.debug("Adding dependencies for {0} jobs".format(job_section)) - # If does not has dependencies, do nothing + # If does not have dependencies, do nothing if not jobs_parser.has_option(job_section, option): continue @@ -162,13 +162,19 @@ class JobList: dependencies = dict() for key in dependencies_keys: if '-' not in key and '+' not in key: - dependencies[key] = Dependency(key) + dependency = Dependency(key) + section = key else: sign = '-' if '-' in key else '+' key_split = key.split(sign) - dependency_running_type = dic_jobs.get_option(key_split[0], 'RUNNING', 'once').lower() - dependency = Dependency(key_split[0], int(key_split[1]), dependency_running_type, sign) - dependencies[key] = dependency + section = key_split[0] + distance = key_split[1] + dependency_running_type = dic_jobs.get_option(section, 'RUNNING', 'once').lower() + dependency = Dependency(section, int(distance), dependency_running_type, sign) + + delay = int(dic_jobs.get_option(section, 'DELAY', -1)) + dependency.delay = delay + dependencies[key] = dependency return dependencies @staticmethod @@ -184,8 +190,10 @@ class JobList: continue for parent in dic_jobs.get_jobs(dependency.section, date, member, chunk): - job.add_parent(parent) - graph.add_edge(parent.name, job.name) + # only creates the dependency in the graph if the delay is not defined or if the chunk is greater than it + if dependency.delay == -1 or chunk > dependency.delay: + job.add_parent(parent) + graph.add_edge(parent.name, job.name) JobList.handle_frequency_interval_dependencies(chunk, chunk_list, date, date_list, dic_jobs, job, member, member_list, dependency.section, graph) diff --git a/autosubmit/job/job_utils.py b/autosubmit/job/job_utils.py index 25168ca0c..9b9c5efe6 100644 --- a/autosubmit/job/job_utils.py +++ b/autosubmit/job/job_utils.py @@ -44,9 +44,10 @@ class Dependency(object): """ - def __init__(self, section, distance=None, running=None, sign=None): + def __init__(self, section, distance=None, running=None, sign=None, delay=-1): self.section = section self.distance = distance self.running = running self.sign = sign + self.delay = delay diff --git a/test/unit/test_dic_jobs.py b/test/unit/test_dic_jobs.py index 3234d79e2..7448c02ff 100644 --- a/test/unit/test_dic_jobs.py +++ b/test/unit/test_dic_jobs.py @@ -97,9 +97,10 @@ class TestDicJobs(TestCase): priority = 999 frequency = 123 synchronize = 'date' + delay = -1 self.parser_mock.has_option = Mock(return_value=True) self.parser_mock.get = Mock(return_value='chunk') - self.dictionary.get_option = Mock(side_effect=[frequency, synchronize]) + self.dictionary.get_option = Mock(side_effect=[frequency, synchronize, delay]) self.dictionary._create_jobs_once = Mock() self.dictionary._create_jobs_startdate = Mock() self.dictionary._create_jobs_member = Mock() @@ -112,7 +113,7 @@ class TestDicJobs(TestCase): self.dictionary._create_jobs_once.assert_not_called() self.dictionary._create_jobs_startdate.assert_not_called() self.dictionary._create_jobs_member.assert_not_called() - self.dictionary._create_jobs_chunk.assert_called_once_with(section, priority, frequency, Type.BASH, synchronize, {}) + self.dictionary._create_jobs_chunk.assert_called_once_with(section, priority, frequency, Type.BASH, synchronize, delay, {}) def test_dic_creates_right_jobs_by_startdate(self): # arrange @@ -283,6 +284,7 @@ class TestDicJobs(TestCase): member = 'fc0' chunk = 'ch0' frequency = 123 + delay = -1 platform_name = 'fake-platform' filename = 'fake-fike' queue = 'fake-queue' @@ -292,9 +294,9 @@ class TestDicJobs(TestCase): memory = memory_per_task = 444 wallclock = 555 notify_on = 'COMPLETED FAILED' - self.parser_mock.has_option = Mock(side_effect=[True, True, True, True, True, True, True, True, True, True, + self.parser_mock.has_option = Mock(side_effect=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, True]) - self.parser_mock.get = Mock(side_effect=[frequency, 'True', 'True', 'bash', platform_name, filename, queue, + self.parser_mock.get = Mock(side_effect=[frequency, delay, 'True', 'True', 'bash', platform_name, filename, queue, 'True', processors, threads, tasks, memory, memory_per_task, wallclock, notify_on]) job_list_mock = Mock() @@ -314,6 +316,7 @@ class TestDicJobs(TestCase): self.assertEquals(chunk, created_job.chunk) self.assertEquals(self.date_format, created_job.date_format) self.assertEquals(frequency, created_job.frequency) + self.assertEquals(delay, created_job.delay) self.assertTrue(created_job.wait) self.assertTrue(created_job.rerun_only) self.assertEquals(Type.BASH, created_job.type) -- GitLab From f86fea0f4b110ac07bf6391eaf9c8dc9c45bcb39 Mon Sep 17 00:00:00 2001 From: Larissa Batista Leite Date: Tue, 24 Oct 2017 13:34:26 +0200 Subject: [PATCH 2/2] Update on the documentation adding delay feature and fixing small mistakes --- docs/source/codedoc/config.rst | 4 -- docs/source/codedoc/git.rst | 2 +- docs/source/codedoc/main.rst | 1 - docs/source/codedoc/platforms.rst | 15 +----- docs/source/introduction.rst | 6 ++- docs/source/workflows.rst | 78 +++++++++++++++++++++++++------ 6 files changed, 70 insertions(+), 36 deletions(-) diff --git a/docs/source/codedoc/config.rst b/docs/source/codedoc/config.rst index 6cc0d6c2b..5fb79db24 100644 --- a/docs/source/codedoc/config.rst +++ b/docs/source/codedoc/config.rst @@ -16,7 +16,3 @@ autosubmit.config.config_common :inherited-members: :members: -autosubmit.config.log ---------------------- -.. automodule:: autosubmit.config.log - :members: diff --git a/docs/source/codedoc/git.rst b/docs/source/codedoc/git.rst index 0722bb086..ba32da717 100644 --- a/docs/source/codedoc/git.rst +++ b/docs/source/codedoc/git.rst @@ -1,4 +1,4 @@ autosubmit.git ============== -.. automodule:: autosubmit.git.git_common +.. automodule:: autosubmit.git.autosubmit_git :members: diff --git a/docs/source/codedoc/main.rst b/docs/source/codedoc/main.rst index 9ce57d9e9..737c0d767 100644 --- a/docs/source/codedoc/main.rst +++ b/docs/source/codedoc/main.rst @@ -8,7 +8,6 @@ Module documentation autosubmit config database - date git job monitor diff --git a/docs/source/codedoc/platforms.rst b/docs/source/codedoc/platforms.rst index a14ebe39f..4fb9f3ee9 100644 --- a/docs/source/codedoc/platforms.rst +++ b/docs/source/codedoc/platforms.rst @@ -1,35 +1,22 @@ autosubmit.platform =================== -.. automodule:: autosubmit.platforms.hpcplatform - :show-inheritance: - :inherited-members: - :members: .. automodule:: autosubmit.platforms.ecplatform :show-inheritance: :members: .. automodule:: autosubmit.platforms.lsfplatform :show-inheritance: - :inherited-members: :members: .. automodule:: autosubmit.platforms.pbsplatform :show-inheritance: - :inherited-members: - :members: -.. automodule:: autosubmit.platforms.psplatform - :show-inheritance: - :inherited-members: :members: .. automodule:: autosubmit.platforms.sgeplatform :show-inheritance: - :inherited-members: :members: .. automodule:: autosubmit.platforms.slurmplatform :show-inheritance: - :inherited-members: :members: -.. automodule:: autosubmit.platforms.localplatform +.. automodule:: autosubmit.platforms.locplatform :show-inheritance: - :inherited-members: :members: diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index 080ed6024..14b9f82f1 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -58,9 +58,10 @@ To create a new experiment, run the command: *HPCname* is the name of the main HPC platform for the experiment: it will be the default platform for the tasks. *Description* is a brief experiment description. -This command assigns a unique four character identifier (``xxxx``, names starting from a letter, the other three characters) to the experiment and creates a new folder in experiments repository with structure shown in Figure 1.1. +This command assigns a unique four character identifier (``xxxx``, names starting from a letter, the other three characters) to the experiment and creates a new folder in experiments repository with structure shown in Figure :numref:`exp_folder`. .. figure:: fig1.png + :name: exp_folder :width: 33% :align: center :alt: experiment folder @@ -70,9 +71,10 @@ This command assigns a unique four character identifier (``xxxx``, names startin Experiment configuration ------------------------ -To configure the experiment, edit ``expdef_xxxx.conf``, ``jobs_xxxx.conf`` and ``platforms_xxxx.conf`` in the ``conf`` folder of the experiment (see contents in Figure 1.2). +To configure the experiment, edit ``expdef_xxxx.conf``, ``jobs_xxxx.conf`` and ``platforms_xxxx.conf`` in the ``conf`` folder of the experiment (see contents in Figure :numref:`exp_config`). .. figure:: fig2.png + :name: exp_config :width: 50% :align: center :alt: configuration files diff --git a/docs/source/workflows.rst b/docs/source/workflows.rst index 2eb4e1881..c4b3e6e5c 100644 --- a/docs/source/workflows.rst +++ b/docs/source/workflows.rst @@ -3,7 +3,7 @@ Defining the workflow ##################### One of the most important step that you have to do when planning to use autosubmit for an experiment is the definition -of the workflow the experiment will use. On this section you will learn about the workflow definition syntax so you will +of the workflow the experiment will use. In this section you will learn about the workflow definition syntax so you will be able to exploit autosubmit's full potential .. warning:: @@ -32,7 +32,7 @@ be finished before launching the job that has the DEPENDENCIES attribute. DEPENDENCIES = One -The resulting workflow can be seen on figure 5.1 +The resulting workflow can be seen in Figure :numref:`simple` .. figure:: workflows/simple.png :name: simple @@ -49,7 +49,7 @@ Running jobs once per startdate, member or chunk Autosubmit is capable of running ensembles made of various startdates and members. It also has the capability to divide member execution on different chunks. -To set at what level a job has to run you have to use the RUNNING attribute. It has four posible values: once, date, +To set at what level a job has to run you have to use the RUNNING attribute. It has four possible values: once, date, member and chunk corresponding to running once, once per startdate, once per member or once per chunk respectively. .. code-block:: ini @@ -73,9 +73,10 @@ member and chunk corresponding to running once, once per startdate, once per mem RUNNING = chunk -The resulting workflow can be seen on figure 5.2 for a experiment with 2 startdates, 2 members and 2 chunks. +The resulting workflow can be seen in Figure :numref:`running` for a experiment with 2 startdates, 2 members and 2 chunks. .. figure:: workflows/running.png + :name: running :width: 100% :align: center :alt: simple workflow plot @@ -93,7 +94,7 @@ Dependencies with previous jobs _______________________________ Autosubmit can manage dependencies between jobs that are part of different chunks, members or startdates. The next -example will show how to make wait a simulation job for the previous chunk of the simulation. To do that, we add +example will show how to make a simulation job wait for the previous chunk of the simulation. To do that, we add sim-1 on the DEPENDENCIES attribute. As you can see, you can add as much dependencies as you like separated by spaces .. code-block:: ini @@ -113,7 +114,7 @@ sim-1 on the DEPENDENCIES attribute. As you can see, you can add as much depende RUNNING = chunk -The resulting workflow can be seen on figure 5.3 +The resulting workflow can be seen in Figure :numref:`dprevious` .. warning:: @@ -123,6 +124,7 @@ The resulting workflow can be seen on figure 5.3 .. figure:: workflows/dependencies_previous.png + :name: dprevious :width: 100% :align: center :alt: simple workflow plot @@ -163,9 +165,10 @@ jobs to be finished. That is the case of the postprocess combine dependency on t RUNNING = member -The resulting workflow can be seen on figure 5.4 +The resulting workflow can be seen in Figure :numref:`dependencies` .. figure:: workflows/dependencies_running.png + :name: dependencies :width: 100% :align: center :alt: simple workflow plot @@ -177,8 +180,8 @@ Job frequency ------------- Some times you just don't need a job to be run on every chunk or member. For example, you may want to launch the postprocessing -job after various chunks have completed. This behaviour can be achieved by using the FREQUENCY attribute. You can specify -an integer I on this attribute and the job will run only once for each I iterations on the running level. +job after various chunks have completed. This behaviour can be achieved using the FREQUENCY attribute. You can specify +an integer I for this attribute and the job will run only once for each I iterations on the running level. .. hint:: You don't need to adjust the frequency to be a divisor of the total jobs. A job will always execute at the last @@ -207,9 +210,10 @@ an integer I on this attribute and the job will run only once for each I iterati RUNNING = member -The resulting workflow can be seen on figure 5.5 +The resulting workflow can be seen in Figure :numref:`frequency` .. figure:: workflows/frequency.png + :name: frequency :width: 100% :align: center :alt: simple workflow plot @@ -244,9 +248,10 @@ of synchronization do you want. See the below examples with and without this par DEPENDENCIES = SIM RUNNING = chunk -The resulting workflow can be seen on figure 5.6 +The resulting workflow can be seen in Figure :numref:`nosync` .. figure:: workflows/no-synchronize.png + :name: nosync :width: 100% :align: center :alt: simple workflow plot @@ -258,9 +263,10 @@ The resulting workflow can be seen on figure 5.6 [ASIM] SYNCHRONIZE = member -The resulting workflow of setting SYNCHRONIZE parameter to 'member' can be seen on figure 5.7 +The resulting workflow of setting SYNCHRONIZE parameter to 'member' can be seen in Figure :numref:`msynchronize` .. figure:: workflows/member-synchronize.png + :name: msynchronize :width: 100% :align: center :alt: simple workflow plot @@ -272,15 +278,58 @@ The resulting workflow of setting SYNCHRONIZE parameter to 'member' can be seen [ASIM] SYNCHRONIZE = date -The resulting workflow of setting SYNCHRONIZE parameter to 'date' can be seen on figure 5.8 +The resulting workflow of setting SYNCHRONIZE parameter to 'date' can be seen in Figure :numref:`dsynchronize` .. figure:: workflows/date-synchronize.png + :name: dsynchronize :width: 100% :align: center :alt: simple workflow plot Example showing dependencies between chunk jobs running with date synchronize. +Job delay +------------------ + +Some times you need a job to be run after a certain number of chunks. For example, you may want to launch the asim +job after various chunks have completed. This behaviour can be achieved using the DELAY attribute. You can specify +an integer N for this attribute and the job will run only after N chunks. + +.. hint:: + This job parameter was thought to work with jobs with RUNNING parameter equals to 'chunk'. + +.. code-block:: ini + + [ini] + FILE = ini.sh + RUNNING = member + + [sim] + FILE = sim.sh + DEPENDENCIES = ini sim-1 + RUNNING = chunk + + [asim] + FILE = asim.sh + DEPENDENCIES = sim asim-1 + RUNNING = chunk + DELAY = 2 + + [post] + FILE = post.sh + DEPENDENCIES = sim asim + RUNNING = chunk + +The resulting workflow can be seen in Figure :numref:`delay` + +.. figure:: workflows/experiment_delay_doc.png + :name: delay + :width: 100% + :align: center + :alt: simple workflow with delay option + + Example showing the data assimilation job starting only from chunk 3. + Rerun dependencies ------------------ @@ -319,9 +368,10 @@ case, but will appear on the reruns. DEPENDENCIES = postprocess RUNNING = member -The resulting workflow can be seen on figure 5.9 for a rerun of chunks 2 and 3 of member 2. +The resulting workflow can be seen in Figure :numref:`rerun` for a rerun of chunks 2 and 3 of member 2. .. figure:: workflows/rerun.png + :name: rerun :width: 100% :align: center :alt: simple workflow plot -- GitLab