From 999493ba651579bed0b36fc1f0b5489241e1698e Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 25 Mar 2021 11:03:22 +0100 Subject: [PATCH 1/4] TWO_STEP_START changed behavoir when all priority jobs are waiting or failed --- autosubmit/job/job_list.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index ab897ae27..5b2295f77 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -850,11 +850,13 @@ class JobList(object): prev_jobs_to_run_first = self.jobs_to_run_first if len(self.jobs_to_run_first) > 0: self.jobs_to_run_first = [ job for job in self.jobs_to_run_first if job.status != Status.COMPLETED ] - #if len(self.jobs_to_run_first) > 0: - #waiting_jobs = [ job for job in self.jobs_to_run_first if job.status != Status.WAITING ] - #if len(waiting_jobs) == len(self.jobs_to_run_first): - #self.jobs_to_run_first = [] - #Log.warning("No more jobs to run first, there were still pending jobs but they're unable to run without their parents.") + keep_running = False + for job in self.jobs_to_run_first: + running_parents = [parent for parent in job.parents if parent.status != Status.WAITING and parent.status != Status.FAILED ] #job is parent of itself + if len(running_parents) == len(job.parents): + keep_running = True + if len(self.jobs_to_run_first) > 0 and keep_running is False: + raise AutosubmitCritical("No more jobs to run first, there were still pending jobs but they're unable to run without their parents or there are failed jobs.",7014) def parse_two_step_start(self, unparsed_jobs): jobs_to_run_first = list() job_names = "" -- GitLab From 3ef825c8992741fc4e8f67e30b75f09a2887f2cb Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 25 Mar 2021 13:02:00 +0100 Subject: [PATCH 2/4] TWO_STEP_START improved regex for chunkls --- autosubmit/job/job_list.py | 58 +++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index 5b2295f77..d42e992c7 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -864,8 +864,8 @@ class JobList(object): jobs_to_check = unparsed_jobs.split("&") job_names = jobs_to_check[0] unparsed_jobs = jobs_to_check[1] - if "," in unparsed_jobs: - semiparsed_jobs = unparsed_jobs.split(",") + if ";" in unparsed_jobs: + semiparsed_jobs = unparsed_jobs.split(";") if 2 <= len(semiparsed_jobs) <= 4: section_job = semiparsed_jobs[0] date = semiparsed_jobs[1] @@ -905,21 +905,65 @@ class JobList(object): jobs_final = [] jobs_final_2 = [] - + number_start_at = 0 + for char in self._member_list[0]: + if char.isdigit(): + break + number_start_at += 1 if member_or_chunk_list != "": + final_member_or_chunk_list = "" + number_range = re.findall("\[[0-9]+-[0-9]+\]", member_or_chunk_list.lower()) + if len(number_range) > 0: + for numbers_found in number_range: + number_range = numbers_found.split('-') + lower_bound = int(number_range[0][1:]) + upper_bound = int(number_range[1][:-1]) + for seq in xrange(lower_bound, upper_bound): + final_member_or_chunk_list += str(seq) + "," + final_member_or_chunk_list += str(upper_bound) + "," + numbers_separated = re.findall("\[[0-9]*[^-a-z]+\]", member_or_chunk_list.lower()) + if len(numbers_separated) > 0: + for numbers in numbers_separated: + if ',' in numbers: + numbers = numbers.split(',') + else: + numbers = numbers.split() + for number in numbers: + final_member_or_chunk_list += str(number).strip("[]") + "," + final_member_or_chunk_list = final_member_or_chunk_list if 'c' in member_or_chunk_list[0]: - jobs_final = [job for job in jobs_date if re.search("(^|[^0-9a-z_])" + str(job.chunk) + "([^a-z0-9_]|$)",member_or_chunk_list.lower()) is not None or job.running == "once"] + jobs_final = [job for job in jobs_date if re.search("(^|[^0-9a-z_])" + str(job.chunk) + "([^a-z0-9_]|$)",final_member_or_chunk_list) is not None or job.running == "once"] elif 'm' in member_or_chunk_list[0]: - jobs_final = [job for job in jobs_date if re.search("(^|[^0-9a-z_])" + str(job.member).lower() + "([^a-z0-9_]|$)", member_or_chunk_list.lower()) is not None or job.running == "once"] + jobs_final = [job for job in jobs_date if re.search("(^|[^0-9a-z_])" + str(job.member)[number_start_at:].lower() + "([^a-z0-9_]|$)", final_member_or_chunk_list) is not None or job.running == "once"] else: jobs_final = [] else: jobs_final = jobs_date if chunk_or_member_list != "": + final_member_or_chunk_list = "" + number_range = re.findall("\[[0-9]+-[0-9]+\]", member_or_chunk_list.lower()) + if len(number_range) > 0: + for numbers_found in number_range: + number_range = numbers_found.split('-') + lower_bound = int(number_range[0][1:]) + upper_bound = int(number_range[1][:-1]) + for seq in xrange(lower_bound, upper_bound): + final_member_or_chunk_list += str(seq) + "," + final_member_or_chunk_list += str(upper_bound) + "," + numbers_separated = re.findall("\[[0-9]+[^-a-z]+\]", member_or_chunk_list.lower()) + if len(numbers_separated) > 0: + for numbers in numbers_separated: + if ',' in numbers: + numbers = numbers.split(',') + else: + numbers = numbers.split() + for number in numbers: + final_member_or_chunk_list += str(number).strip("[]") + "," + final_member_or_chunk_list = final_member_or_chunk_list if 'c' in chunk_or_member_list[0]: - jobs_final_2 = [job for job in jobs_date if re.search("(^|[^0-9a-z_])" + str(job.chunk) + "([^a-z0-9_]|$)",chunk_or_member_list.lower()) is not None or job.running == "once"] + jobs_final_2 = [job for job in jobs_date if re.search("(^|[^0-9a-z_])" + str(job.chunk) + "([^a-z0-9_]|$)",final_member_or_chunk_list) is not None or job.running == "once"] elif 'm' in chunk_or_member_list[0]: - jobs_final_2 = [job for job in jobs_date if re.search("(^|[^0-9a-z_])" + str(job.member).lower() + "([^a-z0-9_]|$)", chunk_or_member_list.lower()) is not None or job.running == "once"] + jobs_final_2 = [job for job in jobs_date if re.search("(^|[^0-9a-z_])" + str(job.member)[number_start_at:].lower() + "([^a-z0-9_]|$)", final_member_or_chunk_list) is not None or job.running == "once"] else: jobs_final_2 = [] ultimate_jobs_list = list(set(jobs_final+jobs_by_name+jobs_final_2)) #Duplicates out -- GitLab From 3f3054d277ec4bce487151d73ac75af05c7e80fb Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 25 Mar 2021 17:04:00 +0100 Subject: [PATCH 3/4] TWO_STEP_START docs --- autosubmit/job/job_list.py | 3 ++ docs/source/usage/run_two_step.rst | 64 ++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 docs/source/usage/run_two_step.rst diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index d42e992c7..8fd4c2d92 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -846,6 +846,7 @@ class JobList(object): all_jobs = [job.name for job in self._job_list] return all_jobs + def update_two_step_jobs(self): prev_jobs_to_run_first = self.jobs_to_run_first if len(self.jobs_to_run_first) > 0: @@ -857,6 +858,7 @@ class JobList(object): keep_running = True if len(self.jobs_to_run_first) > 0 and keep_running is False: raise AutosubmitCritical("No more jobs to run first, there were still pending jobs but they're unable to run without their parents or there are failed jobs.",7014) + def parse_two_step_start(self, unparsed_jobs): jobs_to_run_first = list() job_names = "" @@ -896,6 +898,7 @@ class JobList(object): :return: jobs_list :rtype: list """ + jobs_by_name = [ job for job in self._job_list if re.search("(^|[^0-9a-z_])"+job.name.lower()+"([^a-z0-9_]|$)",job_names.lower()) is not None ] jobs = [ job for job in self._job_list if re.search("(^|[^0-9a-z_])"+job.section.lower()+"([^a-z0-9_]|$)",section_list.lower()) is not None ] if date_list != "": diff --git a/docs/source/usage/run_two_step.rst b/docs/source/usage/run_two_step.rst new file mode 100644 index 000000000..1d8fd2a7e --- /dev/null +++ b/docs/source/usage/run_two_step.rst @@ -0,0 +1,64 @@ +######## +Prepare an experiment to run in two independent job_list. (Prioritary jobs, Two-step-run) +######## + +Feature overview and configuration +----------------- + +This feature allows to run an experiment in two separated steps without the need of do anything manually. + +To achieve this, you will have to use an special parameter called TWO_STEP_START in which you will put the list of the jobs that you want to run in an exclusive mode. These jobs will run until all of them finishes and once it finishes, the rest of the jobs will begun the execution. + +It can be activated through TWO_STEP_START and it is set on expdef_a02n.conf, under the [experiment] section. + +.. code-block:: ini + + [experiment] + DATELIST = 20120101 20120201 + MEMBERS = fc00[0-3] + CHUNKSIZEUNIT = day + CHUNKSIZE = 1 + NUMCHUNKS = 10 + CHUNKINI = + CALENDAR = standard + # To run before the rest of experiment: + TWO_STEP_START = + +In order to be easier to use, there are Three modes for use this feature: job_names and section,dates,member_or_chunk(M/C),chunk_or_member(C/M). + +* By using job_names alone, you will need to put all jobs names one by one divided by the char , . +* By using section,dates,member_or_chunk(M/C),chunk_or_member(C/M). You will be able to select multiple jobs at once combining these filters. +* Use both options, job_names and section,dates,member_or_chunk(M/C),chunk_or_member(C/M). You will have to put & between the two modes. + +There are 5 fields on TWO_STEP_START, all of them are optional but there are certain limitations: + +* **Job_name**: [Independent] List of job names, separated by ',' char. Optional, doesn't depend on any field. Separated from the rest of fields by '&' must be the first field if specified +* **Section**: [Independent] List of sections, separated by ',' char. Optional, can be used alone. Separated from the rest of fields by ';' +* **Dates**: [Depends on section] List of dates, separated by ',' char. Optional, but depends on Section field. Separated from the rest of fields by ';' +* **member_or_chunk**: [Depends on Dates(OR)] List of chunk or member, must start with C or M to indicate the filter type. Jobs are selected by [1,2,3..] or by a range [0-9] Optional, but depends on Dates field. Separated from the rest of fields by ';' +* **chunk_or_member**: [Depends on Dates(OR)] List of member or chunk, must start with M or C to indicate the filter type. Jobs are selected by [1,2,3..] or by a range [0-9] Optional, but depends on Dates field. Separated from the rest of fields by ';' + +Example: +:: + + vi /cxxx/conf/expdef_cxxx.conf + +.. code-block:: ini + + [experiment] + DATELIST = 20120101 20120201 + MEMBERS = 00[0-3] + CHUNKSIZEUNIT = day + CHUNKSIZE = 1 + NUMCHUNKS = 10 + CHUNKINI = + CALENDAR = standard + # To run before the rest of experiment: + TWO_STEP_START = LOCAL_SEND_INITIAL_DA,COMPILE_DA,LOCAL_SETUP,LOCAL_SEND,REMOTE_COMPILE,SIM;20120101;c[1] + + + +Finally, you can launch Autosubmit *run* in background and with ``nohup`` (continue running although the user who launched the process logs out). +:: + + nohup autosubmit run cxxx & -- GitLab From a74af26baeec619b2d2ea415a006ec7758764408 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 26 Mar 2021 09:45:07 +0100 Subject: [PATCH 4/4] TWO_STEP_START documentation --- autosubmit/job/job_list.py | 2 +- docs/source/usage.rst | 3 ++- docs/source/usage/run_two_step.rst | 35 +++++++++++++++++------------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index 8fd4c2d92..ea3a9fa95 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -898,7 +898,7 @@ class JobList(object): :return: jobs_list :rtype: list """ - + jobs_by_name = [ job for job in self._job_list if re.search("(^|[^0-9a-z_])"+job.name.lower()+"([^a-z0-9_]|$)",job_names.lower()) is not None ] jobs = [ job for job in self._job_list if re.search("(^|[^0-9a-z_])"+job.section.lower()+"([^a-z0-9_]|$)",section_list.lower()) is not None ] if date_list != "": diff --git a/docs/source/usage.rst b/docs/source/usage.rst index ea29da196..fbb04950a 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -59,4 +59,5 @@ Command list usage/configure usage/wrappers usage/remote_dependencies - usage/report \ No newline at end of file + usage/report + usage/run_two_step \ No newline at end of file diff --git a/docs/source/usage/run_two_step.rst b/docs/source/usage/run_two_step.rst index 1d8fd2a7e..4714f165b 100644 --- a/docs/source/usage/run_two_step.rst +++ b/docs/source/usage/run_two_step.rst @@ -1,9 +1,9 @@ -######## -Prepare an experiment to run in two independent job_list. (Prioritary jobs, Two-step-run) -######## +############################################################################################## +How to prepare an experiment to run in two independent job_list. (Priority jobs, Two-step-run) +############################################################################################## -Feature overview and configuration ------------------ +Feature overview +---------------- This feature allows to run an experiment in two separated steps without the need of do anything manually. @@ -38,23 +38,28 @@ There are 5 fields on TWO_STEP_START, all of them are optional but there are cer * **member_or_chunk**: [Depends on Dates(OR)] List of chunk or member, must start with C or M to indicate the filter type. Jobs are selected by [1,2,3..] or by a range [0-9] Optional, but depends on Dates field. Separated from the rest of fields by ';' * **chunk_or_member**: [Depends on Dates(OR)] List of member or chunk, must start with M or C to indicate the filter type. Jobs are selected by [1,2,3..] or by a range [0-9] Optional, but depends on Dates field. Separated from the rest of fields by ';' -Example: -:: +Example +------- - vi /cxxx/conf/expdef_cxxx.conf +Guess the expdef configuration as follow: .. code-block:: ini [experiment] - DATELIST = 20120101 20120201 - MEMBERS = 00[0-3] + DATELIST = 20120101 + MEMBERS = 00[0-1] CHUNKSIZEUNIT = day CHUNKSIZE = 1 - NUMCHUNKS = 10 - CHUNKINI = - CALENDAR = standard - # To run before the rest of experiment: - TWO_STEP_START = LOCAL_SEND_INITIAL_DA,COMPILE_DA,LOCAL_SETUP,LOCAL_SEND,REMOTE_COMPILE,SIM;20120101;c[1] + NUMCHUNKS = 2 + TWO_STEP_START = a02n_20120101_000_1_REDUCE&COMPILE_DA,SIM;20120101;c[1] + +Given this job_list ( jobs_conf has REMOTE_COMPILE(once),DA,SIM,REDUCE) + +['a02n_REMOTE_COMPILE', 'a02n_20120101_000_1_SIM', 'a02n_20120101_000_2_SIM', 'a02n_20120101_001_1_SIM', 'a02n_20120101_001_2_SIM', 'a02n_COMPILE_DA', 'a02n_20120101_1_DA', 'a02n_20120101_2_DA', 'a02n_20120101_000_1_REDUCE', 'a02n_20120101_000_2_REDUCE', 'a02n_20120101_001_1_REDUCE', 'a02n_20120101_001_2_REDUCE'] + +The priority jobs will be ( check TWO_STEP_START from expdef conf): + +['a02n_20120101_000_1_SIM', 'a02n_20120101_001_1_SIM', 'a02n_COMPILE_DA', 'a02n_20120101_000_1_REDUCE'] -- GitLab