From f9239e642690c71d15920d1002849e70a92e234b Mon Sep 17 00:00:00 2001 From: Wilmer Uruchi Ticona Date: Tue, 9 Feb 2021 19:58:40 +0100 Subject: [PATCH] Clarifying documentation for #645. Adding variables documentation for #621. --- autosubmit/platforms/paramiko_submitter.py | 70 ++++++++++++++-------- autosubmit/platforms/platform.py | 11 ++-- docs/source/tutorial.rst | 8 ++- docs/source/variables.rst | 16 +++++ 4 files changed, 75 insertions(+), 30 deletions(-) diff --git a/autosubmit/platforms/paramiko_submitter.py b/autosubmit/platforms/paramiko_submitter.py index 49ed6ba9b..1ab81acf6 100644 --- a/autosubmit/platforms/paramiko_submitter.py +++ b/autosubmit/platforms/paramiko_submitter.py @@ -40,9 +40,9 @@ class ParamikoSubmitter(Submitter): """ def load_platforms_migrate(self, asconf, retries=5): - pass #Add all info related to migrate + pass # Add all info related to migrate - def load_local_platform(self,asconf): + def load_local_platform(self, asconf): platforms = dict() # Build Local Platform Object local_platform = LocalPlatform(asconf.expid, 'local', BasicConfig) @@ -50,9 +50,12 @@ class ParamikoSubmitter(Submitter): local_platform.max_processors = asconf.get_max_processors() local_platform.max_waiting_jobs = asconf.get_max_waiting_jobs() local_platform.total_jobs = asconf.get_total_jobs() - local_platform.scratch = os.path.join(BasicConfig.LOCAL_ROOT_DIR, asconf.expid, BasicConfig.LOCAL_TMP_DIR) - local_platform.temp_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, 'ASlogs') - local_platform.root_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, local_platform.expid) + local_platform.scratch = os.path.join( + BasicConfig.LOCAL_ROOT_DIR, asconf.expid, BasicConfig.LOCAL_TMP_DIR) + local_platform.temp_dir = os.path.join( + BasicConfig.LOCAL_ROOT_DIR, 'ASlogs') + local_platform.root_dir = os.path.join( + BasicConfig.LOCAL_ROOT_DIR, local_platform.expid) local_platform.host = 'localhost' # Add object to entry in dictionary platforms['local'] = local_platform @@ -90,9 +93,12 @@ class ParamikoSubmitter(Submitter): local_platform.max_processors = asconf.get_max_processors() local_platform.max_waiting_jobs = asconf.get_max_waiting_jobs() local_platform.total_jobs = asconf.get_total_jobs() - local_platform.scratch = os.path.join(BasicConfig.LOCAL_ROOT_DIR, asconf.expid, BasicConfig.LOCAL_TMP_DIR) - local_platform.temp_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, 'ASlogs') - local_platform.root_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, local_platform.expid) + local_platform.scratch = os.path.join( + BasicConfig.LOCAL_ROOT_DIR, asconf.expid, BasicConfig.LOCAL_TMP_DIR) + local_platform.temp_dir = os.path.join( + BasicConfig.LOCAL_ROOT_DIR, 'ASlogs') + local_platform.root_dir = os.path.join( + BasicConfig.LOCAL_ROOT_DIR, local_platform.expid) local_platform.host = 'localhost' # Add object to entry in dictionary platforms['local'] = local_platform @@ -109,19 +115,26 @@ class ParamikoSubmitter(Submitter): platform_version = parser.get_option(section, 'VERSION', '') try: if platform_type == 'pbs': - remote_platform = PBSPlatform(asconf.expid, section.lower(), BasicConfig, platform_version) + remote_platform = PBSPlatform( + asconf.expid, section.lower(), BasicConfig, platform_version) elif platform_type == 'sge': - remote_platform = SgePlatform(asconf.expid, section.lower(), BasicConfig) + remote_platform = SgePlatform( + asconf.expid, section.lower(), BasicConfig) elif platform_type == 'ps': - remote_platform = PsPlatform(asconf.expid, section.lower(), BasicConfig) + remote_platform = PsPlatform( + asconf.expid, section.lower(), BasicConfig) elif platform_type == 'lsf': - remote_platform = LsfPlatform(asconf.expid, section.lower(), BasicConfig) + remote_platform = LsfPlatform( + asconf.expid, section.lower(), BasicConfig) elif platform_type == 'ecaccess': - remote_platform = EcPlatform(asconf.expid, section.lower(), BasicConfig, platform_version) + remote_platform = EcPlatform( + asconf.expid, section.lower(), BasicConfig, platform_version) elif platform_type == 'slurm': - remote_platform = SlurmPlatform(asconf.expid, section.lower(), BasicConfig) + remote_platform = SlurmPlatform( + asconf.expid, section.lower(), BasicConfig) else: - raise Exception("Queue type not specified on platform {0}".format(section)) + raise Exception( + "Queue type not specified on platform {0}".format(section)) except ParamikoPlatformException as e: Log.error("Queue exception: {0}".format(e.message)) @@ -149,20 +162,29 @@ class ParamikoSubmitter(Submitter): asconf.get_total_jobs())) remote_platform.hyperthreading = parser.get_option(section, 'HYPERTHREADING', 'false').lower() - remote_platform.project = parser.get_option(section, 'PROJECT', None) - remote_platform.budget = parser.get_option(section, 'BUDGET', remote_platform.project) - remote_platform.reservation = parser.get_option(section, 'RESERVATION', '') - remote_platform.exclusivity = parser.get_option(section, 'EXCLUSIVITY', '').lower() + remote_platform.project = parser.get_option( + section, 'PROJECT', None) + remote_platform.budget = parser.get_option( + section, 'BUDGET', remote_platform.project) + remote_platform.reservation = parser.get_option( + section, 'RESERVATION', '') + remote_platform.exclusivity = parser.get_option( + section, 'EXCLUSIVITY', '').lower() remote_platform.user = parser.get_option(section, 'USER', None) - remote_platform.scratch = parser.get_option(section, 'SCRATCH_DIR', None) - remote_platform.temp_dir = parser.get_option(section, 'TEMP_DIR', None) - remote_platform._default_queue = parser.get_option(section, 'QUEUE', None) - remote_platform._serial_queue = parser.get_option(section, 'SERIAL_QUEUE', None) + remote_platform.scratch = parser.get_option( + section, 'SCRATCH_DIR', None) + remote_platform.temp_dir = parser.get_option( + section, 'TEMP_DIR', None) + remote_platform._default_queue = parser.get_option( + section, 'QUEUE', None) + remote_platform._serial_queue = parser.get_option( + section, 'SERIAL_QUEUE', None) remote_platform.processors_per_node = parser.get_option(section, 'PROCESSORS_PER_NODE', None) remote_platform.custom_directives = parser.get_option(section, 'CUSTOM_DIRECTIVES', None) - Log.debug("Custom directives from platform.conf: {0}".format(remote_platform.custom_directives)) + Log.debug("Custom directives from platform.conf: {0}".format( + remote_platform.custom_directives)) remote_platform.scratch_free_space = parser.get_option(section, 'SCRATCH_FREE_SPACE', None) remote_platform.root_dir = os.path.join(remote_platform.scratch, remote_platform.project, diff --git a/autosubmit/platforms/platform.py b/autosubmit/platforms/platform.py index a27da0eee..1d40c83bd 100644 --- a/autosubmit/platforms/platform.py +++ b/autosubmit/platforms/platform.py @@ -43,6 +43,7 @@ class Platform(object): self.directory = None self.hyperthreading = 'false' self.max_wallclock = '' + self.total_jobs = None self.max_processors = None self._allow_arrays = False self._allow_wrappers = False @@ -155,7 +156,7 @@ class Platform(object): """ raise NotImplementedError - def get_file(self, filename, must_exist=True, relative_path='', ignore_log=False,wrapper_failed=False): + def get_file(self, filename, must_exist=True, relative_path='', ignore_log=False, wrapper_failed=False): """ Copies a file from the current platform to experiment's tmp folder @@ -211,7 +212,7 @@ class Platform(object): self.get_files([job_out_filename, job_err_filename], False, 'LOG_{0}'.format(exp_id)) - def get_completed_files(self, job_name, retries=0, recovery=False,wrapper_failed=False): + def get_completed_files(self, job_name, retries=0, recovery=False, wrapper_failed=False): """ Get the COMPLETED file of the given job @@ -228,8 +229,8 @@ class Platform(object): return True else: return False - if self.check_file_exists('{0}_COMPLETED'.format(job_name),wrapper_failed=wrapper_failed): - if self.get_file('{0}_COMPLETED'.format(job_name), False,wrapper_failed=wrapper_failed): + if self.check_file_exists('{0}_COMPLETED'.format(job_name), wrapper_failed=wrapper_failed): + if self.get_file('{0}_COMPLETED'.format(job_name), False, wrapper_failed=wrapper_failed): return True else: return False @@ -266,7 +267,7 @@ class Platform(object): return True return False - def check_file_exists(self, src,wrapper_failed=False): + def check_file_exists(self, src, wrapper_failed=False): return True def get_stat_file(self, job_name, retries=0): diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index e68517ec7..d15d4e518 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -183,6 +183,9 @@ Examples: ## Wallclock to be submitted to the HPC queue in format HH:MM # WALLCLOCK = 00:05 ## Processors number to be submitted to the HPC. If not specified, defaults to 1. + ## Wallclock chunk increase (WALLCLOCK will be increased according to the formula WALLCLOCK + WCHUNKINC * (chunk - 1)). + ## Ideal for sequences of jobs that change their expected running time according to the current chunk. + # WCHUNKINC = 00:01 # PROCESSORS = 1 ## Threads number to be submitted to the HPC. If not specified, defaults to 1. # THREADS = 1 @@ -282,7 +285,9 @@ Examples: ## Default Maximum number of jobs to be waiting in any platform queue ## Default = 3 # MAX_WAITING_JOBS = 3 - ## Default maximum number of jobs to be running at the same time at any platform + ## Default maximum number of jobs to be running at the same time at the platform. + ## Applies at platform level. Considers QUEUEING + RUNNING jobs. + ## Ideal for configurations where some remote platform has a low upper limit of allowed jobs per user at the same time. ## Default = 6 # TOTAL_JOBS = 6 @@ -313,6 +318,7 @@ Examples: # Default = 3 MAXWAITINGJOBS = 3 # Default maximum number of jobs to be running at the same time at any platform + # Can be set at platform level on the platform_cxxx.conf file # Default = 6 TOTALJOBS = 6 # Time (seconds) between connections to the HPC queue scheduler to poll already submitted jobs status diff --git a/docs/source/variables.rst b/docs/source/variables.rst index e0ac8df28..8cb16a59c 100644 --- a/docs/source/variables.rst +++ b/docs/source/variables.rst @@ -91,3 +91,19 @@ Project variables - **ROOTDIR**: local path to experiment's folder - **PROJDIR**: local path to experiment's proj folder +Performance Metrics +=================== + +Currently, these variables apply only to the report function of Autosubmit. See :ref:`report`. + +- **SYPD**: Simulated years per day. +- **ASYPD**: Actual simulated years per day. +- **RSYPD**: Raw simulated years per day. +- **CHSY**: Core hours per simulated year. +- **JPSY**: Joules per simulated year. +- **Parallelization**: Number of cores requested for the simulation job. + +For more information about these metrics please visit: + +https://earth.bsc.es/gitlab/wuruchi/autosubmitreact/-/wikis/Performance-Metrics. + -- GitLab