From 9878f390d93874e8d25fdbd340bb71244dde2c05 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 25 Jan 2024 09:49:52 +0100 Subject: [PATCH 1/9] Queuing times for inner_jobs fixed --- autosubmit/autosubmit.py | 2 +- autosubmit/job/job.py | 2 +- autosubmit/job/job_list.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index d94662632..0d76c7d18 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -2687,7 +2687,7 @@ class Autosubmit: current_table_structure = get_structure(expid, BasicConfig.STRUCTURES_DIR) subjobs = [] for job in job_list.get_job_list(): - job_info = JobList.retrieve_times(job.status, job.name, job._tmp_path, make_exception=False, + job_info = JobList.retrieve_times(job.status, job.name, job._tmp_path, make_exception=True, job_times=None, seconds=True, job_data_collection=None) time_total = (job_info.queue_time + job_info.run_time) if job_info else 0 subjobs.append( diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index ce5b8fbe5..d0a26cae6 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -1826,7 +1826,7 @@ class Job(object): template = '' if as_conf.get_remote_dependencies() == "true": if self.type == Type.BASH: - template = 'sleep 5' + "\n" + template = 'sleep 60' + "\n" elif self.type == Type.PYTHON2: template = 'time.sleep(5)' + "\n" elif self.type == Type.PYTHON3 or self.type == Type.PYTHON: diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index 3365bd8c9..e58b39f30 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -2979,13 +2979,13 @@ class JobList(object): # COMPLETED adds too much overhead so these values are now stored in a database and retrieved separately submit_time, start_time, finish_time, status = JobList._job_running_check( status_code, name, tmp_path) - if status_code in [Status.RUNNING, Status.FAILED]: + if status_code in [Status.RUNNING, Status.FAILED, Status.COMPLETED]: running_for_min = (finish_time - start_time) queuing_for_min = (start_time - submit_time) submit_time = mktime(submit_time.timetuple()) start_time = mktime(start_time.timetuple()) finish_time = mktime(finish_time.timetuple()) if status_code in [ - Status.FAILED] else 0 + Status.FAILED, Status.COMPLETED] else 0 else: queuing_for_min = ( datetime.datetime.now() - submit_time) -- GitLab From 12dbf0e87e0aef473c449eaf1f020185d0480766 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 25 Jan 2024 12:00:33 +0100 Subject: [PATCH 2/9] Stats now counts the exact amount of processors if node is prompt --- autosubmit/experiment/statistics.py | 36 ++++++++++++++++- autosubmit/job/job.py | 13 +++++++ autosubmit/job/job_dict.py | 10 +++-- autosubmit/job/job_list.py | 2 +- autosubmit/statistics/jobs_stat.py | 60 +++++++++++++++++++---------- autosubmit/statistics/statistics.py | 3 +- 6 files changed, 96 insertions(+), 28 deletions(-) diff --git a/autosubmit/experiment/statistics.py b/autosubmit/experiment/statistics.py index 793210923..7305732a7 100644 --- a/autosubmit/experiment/statistics.py +++ b/autosubmit/experiment/statistics.py @@ -17,6 +17,7 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . +import math import datetime from autosubmit.job.job import Job from autosubmit.monitor.utils import FixedSizeList @@ -107,6 +108,34 @@ class ExperimentStats(object): def fail_run(self): return FixedSizeList(self._fail_run, 0.0) + def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: + if nodes: + return nodes + elif tasks: + return math.ceil(processors / tasks) + elif processors_per_node and processors > processors_per_node: + return math.ceil(processors / processors_per_node) + else: + return 1 + + def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: + if processors_per_node: + estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) + return estimated_nodes * int(processors_per_node) + elif not processors_per_node and (tasks or nodes): + Log.warning(f'Missing PROCESSORS_PER_NODE. Should be set if TASKS or NODES are defined. The PROCESSORS will used instead.') + return processors + + def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: + if nodes: + return int(nodes) + elif tasks: + return math.ceil(int(processors) / int(tasks)) + elif processors_per_node and int(processors) > int(processors_per_node): + return math.ceil(int(processors) / int(processors_per_node)) + else: + return 1 + def _calculate_stats(self): """ Main calculation @@ -116,6 +145,10 @@ class ExperimentStats(object): for i, job in enumerate(self._jobs_list): last_retrials = job.get_last_retrials() processors = job.total_processors + nodes = job.nodes + tasks = job.tasks + processors_per_node = job.processors_per_node + processors = self._calculate_processing_elements(nodes, processors, tasks, processors_per_node) for retrial in last_retrials: if Job.is_a_completed_retrial(retrial): # The retrial has all necessary values and is status COMPLETED @@ -158,8 +191,7 @@ class ExperimentStats(object): self._total_jobs_run += len(last_retrials) self._total_jobs_failed += self.failed_jobs[i] self._threshold = max(self._threshold, job.total_wallclock) - self._expected_cpu_consumption += job.total_wallclock * \ - int(processors) + self._expected_cpu_consumption += job.total_wallclock * int(processors) self._expected_real_consumption += job.total_wallclock self._total_queueing_time += self._queued[i] diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index d0a26cae6..e1556f01e 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -163,6 +163,7 @@ class Job(object): 'M': '%M%', 'M_': '%M_%', 'm': '%m%', 'm_': '%m_%'} self._threads = '1' self._processors = '1' + self._processors_per_node = None self._memory = '' self._memory_per_task = '' self._chunk = None @@ -776,6 +777,17 @@ class Job(object): def processors(self, value): self._processors = value + @property + @autosubmit_parameter(name=['processors_per_node']) + def processors_per_node(self): + """Number of processors per node that the job can use.""" + return self._processors_per_node + + @processors_per_node.setter + def processors_per_node(self, value): + """Number of processors per node that the job can use.""" + self._processors_per_node = value + def inc_fail_count(self): """ Increments fail count @@ -1600,6 +1612,7 @@ class Job(object): self.total_jobs = as_conf.jobs_data[self.section].get("TOTALJOBS", job_platform.total_jobs) self.max_waiting_jobs = as_conf.jobs_data[self.section].get("MAXWAITINGJOBS", job_platform.max_waiting_jobs) self.processors = as_conf.jobs_data[self.section].get("PROCESSORS",as_conf.platforms_data.get(job_platform.name,{}).get("PROCESSORS","1")) + self.processors_per_node = as_conf.jobs_data[self.section].get("PROCESSORS_PER_NODE",as_conf.platforms_data.get(job_platform.name,{}).get("PROCESSORS_PER_NODE","1")) self.nodes = as_conf.jobs_data[self.section].get("NODES",as_conf.platforms_data.get(job_platform.name,{}).get("NODES","")) self.exclusive = as_conf.jobs_data[self.section].get("EXCLUSIVE",as_conf.platforms_data.get(job_platform.name,{}).get("EXCLUSIVE",False)) self.threads = as_conf.jobs_data[self.section].get("THREADS",as_conf.platforms_data.get(job_platform.name,{}).get("THREADS","1")) diff --git a/autosubmit/job/job_dict.py b/autosubmit/job/job_dict.py index 9645f493f..c9a81b2a8 100644 --- a/autosubmit/job/job_dict.py +++ b/autosubmit/job/job_dict.py @@ -397,9 +397,13 @@ class DicJobs: job.partition = str(parameters[section].get( "PARTITION", "")) job.check = str(parameters[section].get( "CHECK", "true")).lower() job.export = str(parameters[section].get( "EXPORT", "")) - job.processors = str(parameters[section].get( "PROCESSORS", "")) - job.threads = str(parameters[section].get( "THREADS", "")) - job.tasks = str(parameters[section].get( "TASKS", "")) + # Used by Stat command # check in 4.1+ as this doesn't exist + job.processors = str(parameters[section].get("PROCESSORS",self.experiment_data.get("PLATFORMS",{}).get(job.platform_name,{}).get("PROCESSORS",""))) + job.processors_per_node = str(parameters[section].get("PROCESSORS_PER_NODE",self.experiment_data.get("PLATFORMS",{}).get(job.platform_name,{}).get("PROCESSORS_PER_NODE",""))) + job.nodes = str(parameters[section].get("NODES",self.experiment_data.get("PLATFORMS",{}).get(job.platform_name,{}).get("NODES",""))) + job.threads = str(parameters[section].get("THREADS",self.experiment_data.get("PLATFORMS",{}).get(job.platform_name,{}).get("THREADS",""))) + job.tasks = str(parameters[section].get("TASKS",self.experiment_data.get("PLATFORMS",{}).get(job.platform_name,{}).get("TASKS",""))) + job.memory = str(parameters[section].get("MEMORY", "")) job.memory_per_task = str(parameters[section].get("MEMORY_PER_TASK", "")) remote_max_wallclock = self.experiment_data["PLATFORMS"].get(job.platform_name,{}) diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index e58b39f30..d7648a2e9 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -2975,7 +2975,7 @@ class JobList(object): # Using standard procedure if status_code in [Status.RUNNING, Status.SUBMITTED, Status.QUEUING, - Status.FAILED] or make_exception is True: + Status.FAILED] or make_exception is True: # COMPLETED adds too much overhead so these values are now stored in a database and retrieved separately submit_time, start_time, finish_time, status = JobList._job_running_check( status_code, name, tmp_path) diff --git a/autosubmit/statistics/jobs_stat.py b/autosubmit/statistics/jobs_stat.py index b2d1de97b..984ebaa60 100644 --- a/autosubmit/statistics/jobs_stat.py +++ b/autosubmit/statistics/jobs_stat.py @@ -1,27 +1,47 @@ #!/bin/env/python from datetime import datetime, timedelta from .utils import timedelta2hours +from log.log import Log +import math class JobStat(object): - def __init__(self, name, processors, wallclock, section, date, member, chunk): - # type: (str, int, float, str, str, str, str) -> None - self._name = name - self._processors = processors - self._wallclock = wallclock - self.submit_time = None # type: datetime - self.start_time = None # type: datetime - self.finish_time = None # type: datetime - self.completed_queue_time = timedelta() - self.completed_run_time = timedelta() - self.failed_queue_time = timedelta() - self.failed_run_time = timedelta() - self.retrial_count = 0 - self.completed_retrial_count = 0 - self.failed_retrial_count = 0 - self.section = section - self.date = date - self.member = member - self.chunk = chunk + def __init__(self, name, processors, wallclock, section, date, member, chunk, processors_per_node,tasks,nodes ): + # type: (str, int, float, str, str, str, str, str, str , str) -> None + self._name = name + self._processors = self._calculate_processing_elements(nodes, processors, tasks, processors_per_node) + self._wallclock = wallclock + self.submit_time = None # type: datetime + self.start_time = None # type: datetime + self.finish_time = None # type: datetime + self.completed_queue_time = timedelta() + self.completed_run_time = timedelta() + self.failed_queue_time = timedelta() + self.failed_run_time = timedelta() + self.retrial_count = 0 + self.completed_retrial_count = 0 + self.failed_retrial_count = 0 + self.section = section + self.date = date + self.member = member + self.chunk = chunk + + def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: + if processors_per_node: + estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) + return estimated_nodes * int(processors_per_node) + elif not processors_per_node and (tasks or nodes): + Log.warning(f'Missing PROCESSORS_PER_NODE. Should be set if TASKS or NODES are defined. The PROCESSORS will used instead.') + return processors + + def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: + if nodes: + return int(nodes) + elif tasks: + return math.ceil(int(processors) / int(tasks)) + elif processors_per_node and int(processors) > int(processors_per_node): + return math.ceil(int(processors) / int(processors_per_node)) + else: + return 1 def inc_retrial_count(self): self.retrial_count += 1 @@ -51,7 +71,7 @@ class JobStat(object): @property def expected_cpu_consumption(self): return self._wallclock * self._processors - + @property def name(self): return self._name diff --git a/autosubmit/statistics/statistics.py b/autosubmit/statistics/statistics.py index 9f7590657..9b44471b2 100644 --- a/autosubmit/statistics/statistics.py +++ b/autosubmit/statistics/statistics.py @@ -47,9 +47,8 @@ class Statistics(object): for index, job in enumerate(self._jobs): retrials = job.get_last_retrials() for retrial in retrials: - print(retrial) job_stat = self._name_to_jobstat_dict.setdefault(job.name, JobStat(job.name, parse_number_processors( - job.processors), job.total_wallclock, job.section, job.date, job.member, job.chunk)) + job.processors), job.total_wallclock, job.section, job.date, job.member, job.chunk, job.processors_per_node, job.tasks, job.nodes)) job_stat.inc_retrial_count() if Job.is_a_completed_retrial(retrial): job_stat.inc_completed_retrial_count() -- GitLab From 8f127290bf7d948dae68b8bbeb4b7300c6dbb717 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 25 Jan 2024 12:21:35 +0100 Subject: [PATCH 3/9] Changed to pass ro_crate and be more robust to miss introductions --- autosubmit/experiment/statistics.py | 23 +++++++---------------- autosubmit/statistics/jobs_stat.py | 24 ++++++++++++------------ 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/autosubmit/experiment/statistics.py b/autosubmit/experiment/statistics.py index 7305732a7..f94df78fc 100644 --- a/autosubmit/experiment/statistics.py +++ b/autosubmit/experiment/statistics.py @@ -109,32 +109,23 @@ class ExperimentStats(object): return FixedSizeList(self._fail_run, 0.0) def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: - if nodes: + if str(nodes).isdigit(): return nodes - elif tasks: - return math.ceil(processors / tasks) - elif processors_per_node and processors > processors_per_node: - return math.ceil(processors / processors_per_node) + elif str(tasks).isdigit(): + return math.ceil(int(processors) / int(tasks)) + elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): + return math.ceil(int(processors) / int(processors_per_node)) else: return 1 def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: - if processors_per_node: + if str(processors_per_node).isdigit(): estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) return estimated_nodes * int(processors_per_node) - elif not processors_per_node and (tasks or nodes): + elif (str(tasks).isdigit() or str(nodes).isdigit()): Log.warning(f'Missing PROCESSORS_PER_NODE. Should be set if TASKS or NODES are defined. The PROCESSORS will used instead.') return processors - def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: - if nodes: - return int(nodes) - elif tasks: - return math.ceil(int(processors) / int(tasks)) - elif processors_per_node and int(processors) > int(processors_per_node): - return math.ceil(int(processors) / int(processors_per_node)) - else: - return 1 def _calculate_stats(self): """ diff --git a/autosubmit/statistics/jobs_stat.py b/autosubmit/statistics/jobs_stat.py index 984ebaa60..b18ab3eaa 100644 --- a/autosubmit/statistics/jobs_stat.py +++ b/autosubmit/statistics/jobs_stat.py @@ -25,24 +25,24 @@ class JobStat(object): self.member = member self.chunk = chunk - def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: - if processors_per_node: - estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) - return estimated_nodes * int(processors_per_node) - elif not processors_per_node and (tasks or nodes): - Log.warning(f'Missing PROCESSORS_PER_NODE. Should be set if TASKS or NODES are defined. The PROCESSORS will used instead.') - return processors - def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: - if nodes: - return int(nodes) - elif tasks: + if str(nodes).isdigit(): + return nodes + elif str(tasks).isdigit(): return math.ceil(int(processors) / int(tasks)) - elif processors_per_node and int(processors) > int(processors_per_node): + elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): return math.ceil(int(processors) / int(processors_per_node)) else: return 1 + def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: + if str(processors_per_node).isdigit(): + estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) + return estimated_nodes * int(processors_per_node) + elif (str(tasks).isdigit() or str(nodes).isdigit()): + Log.warning(f'Missing PROCESSORS_PER_NODE. Should be set if TASKS or NODES are defined. The PROCESSORS will used instead.') + return processors + def inc_retrial_count(self): self.retrial_count += 1 -- GitLab From d2cfff6e5a9990b78bf8d25d4d8bca56cd472ed2 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 25 Jan 2024 12:23:19 +0100 Subject: [PATCH 4/9] Changed to pass ro_crate and be more robust to miss introductions --- autosubmit/statistics/jobs_stat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosubmit/statistics/jobs_stat.py b/autosubmit/statistics/jobs_stat.py index b18ab3eaa..ce1f34f7a 100644 --- a/autosubmit/statistics/jobs_stat.py +++ b/autosubmit/statistics/jobs_stat.py @@ -27,7 +27,7 @@ class JobStat(object): def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: if str(nodes).isdigit(): - return nodes + return int(nodes) elif str(tasks).isdigit(): return math.ceil(int(processors) / int(tasks)) elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): -- GitLab From fb41bd2425eb8df54d44c2278c181c4e08160c2f Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 25 Jan 2024 12:44:48 +0100 Subject: [PATCH 5/9] fix bug --- autosubmit/experiment/statistics.py | 4 ++-- autosubmit/statistics/jobs_stat.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/autosubmit/experiment/statistics.py b/autosubmit/experiment/statistics.py index f94df78fc..ca59a77e3 100644 --- a/autosubmit/experiment/statistics.py +++ b/autosubmit/experiment/statistics.py @@ -110,13 +110,13 @@ class ExperimentStats(object): def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: if str(nodes).isdigit(): - return nodes + return int(nodes) elif str(tasks).isdigit(): return math.ceil(int(processors) / int(tasks)) elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): return math.ceil(int(processors) / int(processors_per_node)) else: - return 1 + return processors def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: if str(processors_per_node).isdigit(): diff --git a/autosubmit/statistics/jobs_stat.py b/autosubmit/statistics/jobs_stat.py index ce1f34f7a..96d355655 100644 --- a/autosubmit/statistics/jobs_stat.py +++ b/autosubmit/statistics/jobs_stat.py @@ -33,7 +33,7 @@ class JobStat(object): elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): return math.ceil(int(processors) / int(processors_per_node)) else: - return 1 + return processors def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: if str(processors_per_node).isdigit(): -- GitLab From d12193fd5519763befc2547a46224976fe57b479 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 25 Jan 2024 12:56:45 +0100 Subject: [PATCH 6/9] sleep rechanged --- autosubmit/job/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index e1556f01e..c16924e1f 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -1839,7 +1839,7 @@ class Job(object): template = '' if as_conf.get_remote_dependencies() == "true": if self.type == Type.BASH: - template = 'sleep 60' + "\n" + template = 'sleep 5' + "\n" elif self.type == Type.PYTHON2: template = 'time.sleep(5)' + "\n" elif self.type == Type.PYTHON3 or self.type == Type.PYTHON: -- GitLab From da2245b938e15158aa611488ad09610bb07310ea Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 25 Jan 2024 13:07:27 +0100 Subject: [PATCH 7/9] changed the bugfix --- autosubmit/experiment/statistics.py | 7 +++++-- autosubmit/statistics/jobs_stat.py | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/autosubmit/experiment/statistics.py b/autosubmit/experiment/statistics.py index ca59a77e3..6e4f12ca0 100644 --- a/autosubmit/experiment/statistics.py +++ b/autosubmit/experiment/statistics.py @@ -116,12 +116,15 @@ class ExperimentStats(object): elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): return math.ceil(int(processors) / int(processors_per_node)) else: - return processors + return None def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: if str(processors_per_node).isdigit(): estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) - return estimated_nodes * int(processors_per_node) + if not estimated_nodes: + return processors + else: + return estimated_nodes * int(processors_per_node) elif (str(tasks).isdigit() or str(nodes).isdigit()): Log.warning(f'Missing PROCESSORS_PER_NODE. Should be set if TASKS or NODES are defined. The PROCESSORS will used instead.') return processors diff --git a/autosubmit/statistics/jobs_stat.py b/autosubmit/statistics/jobs_stat.py index 96d355655..db12a7f2b 100644 --- a/autosubmit/statistics/jobs_stat.py +++ b/autosubmit/statistics/jobs_stat.py @@ -33,12 +33,15 @@ class JobStat(object): elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): return math.ceil(int(processors) / int(processors_per_node)) else: - return processors + return None def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: if str(processors_per_node).isdigit(): estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) - return estimated_nodes * int(processors_per_node) + if not estimated_nodes: + return processors + else: + return estimated_nodes * int(processors_per_node) elif (str(tasks).isdigit() or str(nodes).isdigit()): Log.warning(f'Missing PROCESSORS_PER_NODE. Should be set if TASKS or NODES are defined. The PROCESSORS will used instead.') return processors -- GitLab From f53a9e19b46c432dafe1e078f0d7534bca825f46 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 26 Jan 2024 11:58:01 +0100 Subject: [PATCH 8/9] added --exclusive --- autosubmit/experiment/statistics.py | 13 ++++++++----- autosubmit/job/job_dict.py | 2 +- autosubmit/statistics/jobs_stat.py | 17 ++++++++++------- autosubmit/statistics/statistics.py | 2 +- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/autosubmit/experiment/statistics.py b/autosubmit/experiment/statistics.py index 6e4f12ca0..94ae05f2d 100644 --- a/autosubmit/experiment/statistics.py +++ b/autosubmit/experiment/statistics.py @@ -108,7 +108,7 @@ class ExperimentStats(object): def fail_run(self): return FixedSizeList(self._fail_run, 0.0) - def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: + def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node,exclusive) -> int: if str(nodes).isdigit(): return int(nodes) elif str(tasks).isdigit(): @@ -116,11 +116,14 @@ class ExperimentStats(object): elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): return math.ceil(int(processors) / int(processors_per_node)) else: - return None + if exclusive: + return 1 + else: + return None - def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: + def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node,exclusive) -> int: if str(processors_per_node).isdigit(): - estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) + estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node,exclusive) if not estimated_nodes: return processors else: @@ -142,7 +145,7 @@ class ExperimentStats(object): nodes = job.nodes tasks = job.tasks processors_per_node = job.processors_per_node - processors = self._calculate_processing_elements(nodes, processors, tasks, processors_per_node) + processors = self._calculate_processing_elements(nodes, processors, tasks, processors_per_node, job.exclusive) for retrial in last_retrials: if Job.is_a_completed_retrial(retrial): # The retrial has all necessary values and is status COMPLETED diff --git a/autosubmit/job/job_dict.py b/autosubmit/job/job_dict.py index c9a81b2a8..ef2800988 100644 --- a/autosubmit/job/job_dict.py +++ b/autosubmit/job/job_dict.py @@ -403,7 +403,7 @@ class DicJobs: job.nodes = str(parameters[section].get("NODES",self.experiment_data.get("PLATFORMS",{}).get(job.platform_name,{}).get("NODES",""))) job.threads = str(parameters[section].get("THREADS",self.experiment_data.get("PLATFORMS",{}).get(job.platform_name,{}).get("THREADS",""))) job.tasks = str(parameters[section].get("TASKS",self.experiment_data.get("PLATFORMS",{}).get(job.platform_name,{}).get("TASKS",""))) - + job.exclusive = parameters[section].get("EXCLUSIVE",self.experiment_data.get("PLATFORMS",{}).get(job.platform_name,{}).get("EXCLUSIVE",False)) job.memory = str(parameters[section].get("MEMORY", "")) job.memory_per_task = str(parameters[section].get("MEMORY_PER_TASK", "")) remote_max_wallclock = self.experiment_data["PLATFORMS"].get(job.platform_name,{}) diff --git a/autosubmit/statistics/jobs_stat.py b/autosubmit/statistics/jobs_stat.py index db12a7f2b..a2b79e7b5 100644 --- a/autosubmit/statistics/jobs_stat.py +++ b/autosubmit/statistics/jobs_stat.py @@ -5,10 +5,10 @@ from log.log import Log import math class JobStat(object): - def __init__(self, name, processors, wallclock, section, date, member, chunk, processors_per_node,tasks,nodes ): - # type: (str, int, float, str, str, str, str, str, str , str) -> None + def __init__(self, name, processors, wallclock, section, date, member, chunk, processors_per_node, tasks, nodes, exclusive ): + # type: (str, int, float, str, str, str, str, str, str , str, str) -> None self._name = name - self._processors = self._calculate_processing_elements(nodes, processors, tasks, processors_per_node) + self._processors = self._calculate_processing_elements(nodes, processors, tasks, processors_per_node, exclusive) self._wallclock = wallclock self.submit_time = None # type: datetime self.start_time = None # type: datetime @@ -25,7 +25,7 @@ class JobStat(object): self.member = member self.chunk = chunk - def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: + def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node,exclusive) -> int: if str(nodes).isdigit(): return int(nodes) elif str(tasks).isdigit(): @@ -33,11 +33,14 @@ class JobStat(object): elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): return math.ceil(int(processors) / int(processors_per_node)) else: - return None + if exclusive: + return 1 + else: + return None - def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node) -> int: + def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node,exclusive) -> int: if str(processors_per_node).isdigit(): - estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) + estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node,exclusive) if not estimated_nodes: return processors else: diff --git a/autosubmit/statistics/statistics.py b/autosubmit/statistics/statistics.py index 9b44471b2..0f4037793 100644 --- a/autosubmit/statistics/statistics.py +++ b/autosubmit/statistics/statistics.py @@ -48,7 +48,7 @@ class Statistics(object): retrials = job.get_last_retrials() for retrial in retrials: job_stat = self._name_to_jobstat_dict.setdefault(job.name, JobStat(job.name, parse_number_processors( - job.processors), job.total_wallclock, job.section, job.date, job.member, job.chunk, job.processors_per_node, job.tasks, job.nodes)) + job.processors), job.total_wallclock, job.section, job.date, job.member, job.chunk, job.processors_per_node, job.tasks, job.nodes, job.exclusive )) job_stat.inc_retrial_count() if Job.is_a_completed_retrial(retrial): job_stat.inc_completed_retrial_count() -- GitLab From b4305b38441498dd9cefa4b9b7fd9d3f0f74edf7 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 26 Jan 2024 12:24:20 +0100 Subject: [PATCH 9/9] changes --- autosubmit/experiment/statistics.py | 20 ++++++++++---------- autosubmit/statistics/jobs_stat.py | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/autosubmit/experiment/statistics.py b/autosubmit/experiment/statistics.py index 94ae05f2d..0188f0081 100644 --- a/autosubmit/experiment/statistics.py +++ b/autosubmit/experiment/statistics.py @@ -108,7 +108,7 @@ class ExperimentStats(object): def fail_run(self): return FixedSizeList(self._fail_run, 0.0) - def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node,exclusive) -> int: + def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: if str(nodes).isdigit(): return int(nodes) elif str(tasks).isdigit(): @@ -116,21 +116,21 @@ class ExperimentStats(object): elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): return math.ceil(int(processors) / int(processors_per_node)) else: - if exclusive: - return 1 - else: - return None + return 1 def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node,exclusive) -> int: if str(processors_per_node).isdigit(): - estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node,exclusive) - if not estimated_nodes: - return processors + if str(nodes).isdigit(): + return int(nodes) * int(processors_per_node) else: - return estimated_nodes * int(processors_per_node) + estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) + if not exclusive and estimated_nodes <= 1 and int(processors) <= int(processors_per_node): + return int(processors) + else: + return estimated_nodes * int(processors_per_node) elif (str(tasks).isdigit() or str(nodes).isdigit()): Log.warning(f'Missing PROCESSORS_PER_NODE. Should be set if TASKS or NODES are defined. The PROCESSORS will used instead.') - return processors + return int(processors) def _calculate_stats(self): diff --git a/autosubmit/statistics/jobs_stat.py b/autosubmit/statistics/jobs_stat.py index a2b79e7b5..8eec5ec65 100644 --- a/autosubmit/statistics/jobs_stat.py +++ b/autosubmit/statistics/jobs_stat.py @@ -25,7 +25,7 @@ class JobStat(object): self.member = member self.chunk = chunk - def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node,exclusive) -> int: + def _estimate_requested_nodes(self,nodes,processors,tasks,processors_per_node) -> int: if str(nodes).isdigit(): return int(nodes) elif str(tasks).isdigit(): @@ -33,21 +33,21 @@ class JobStat(object): elif str(processors_per_node).isdigit() and int(processors) > int(processors_per_node): return math.ceil(int(processors) / int(processors_per_node)) else: - if exclusive: - return 1 - else: - return None + return 1 def _calculate_processing_elements(self,nodes,processors,tasks,processors_per_node,exclusive) -> int: if str(processors_per_node).isdigit(): - estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node,exclusive) - if not estimated_nodes: - return processors + if str(nodes).isdigit(): + return int(nodes) * int(processors_per_node) else: - return estimated_nodes * int(processors_per_node) + estimated_nodes = self._estimate_requested_nodes(nodes,processors,tasks,processors_per_node) + if not exclusive and estimated_nodes <= 1 and int(processors) <= int(processors_per_node): + return int(processors) + else: + return estimated_nodes * int(processors_per_node) elif (str(tasks).isdigit() or str(nodes).isdigit()): Log.warning(f'Missing PROCESSORS_PER_NODE. Should be set if TASKS or NODES are defined. The PROCESSORS will used instead.') - return processors + return int(processors) def inc_retrial_count(self): self.retrial_count += 1 -- GitLab