From cee6069e2ec61313d4fbc4f054565602cc668b71 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 18 Aug 2020 12:31:50 +0200 Subject: [PATCH 01/23] # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished --- autosubmit/autosubmit.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 70226dc74..4fcaec7a2 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -135,8 +135,8 @@ class Autosubmit: BasicConfig.read() parser = argparse.ArgumentParser( description='Main executable for autosubmit. ') - parser.add_argument('-v', '--version', action='version', version=Autosubmit.autosubmit_version, - help="returns autosubmit's version number and exit") + + parser.add_argument('-v', '--version', action='version', version=Autosubmit.autosubmit_version) parser.add_argument('-lf', '--logfile', choices=('NO_LOG','INFO','WARNING', 'DEBUG'), default='WARNING', type=str, help="sets file's log level.") @@ -145,7 +145,6 @@ class Autosubmit: help="sets console's log level") subparsers = parser.add_subparsers(dest='command') - # Run subparser = subparsers.add_parser( 'run', description="runs specified experiment") @@ -491,7 +490,11 @@ class Autosubmit: # Changelog subparsers.add_parser('changelog', description='show changelog') args = parser.parse_args() + except BaseException as e: + if type(e) is SystemExit: + if e.message == 0: # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished + os._exit(0) raise AutosubmitCritical("Incorrect arguments for this command",7000) @@ -502,6 +505,8 @@ class Autosubmit: if args.command == 'run': return Autosubmit.run_experiment(args.expid, args.notransitive, args.update_version) + elif args.command == 'version': + return Autosubmit.autosubmit_version elif args.command == 'expid': return Autosubmit.expid(args.HPC, args.description, args.copy, args.dummy, False, args.operational, args.config) != '' -- GitLab From 4f8c61b635b781f61096fa9866092a7dce8b2ef8 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 18 Aug 2020 13:12:16 +0200 Subject: [PATCH 02/23] # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished --- autosubmit/autosubmit.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 4fcaec7a2..e772b1cca 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -494,6 +494,7 @@ class Autosubmit: except BaseException as e: if type(e) is SystemExit: if e.message == 0: # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished + print(Autosubmit.autosubmit_version) os._exit(0) raise AutosubmitCritical("Incorrect arguments for this command",7000) @@ -505,8 +506,6 @@ class Autosubmit: if args.command == 'run': return Autosubmit.run_experiment(args.expid, args.notransitive, args.update_version) - elif args.command == 'version': - return Autosubmit.autosubmit_version elif args.command == 'expid': return Autosubmit.expid(args.HPC, args.description, args.copy, args.dummy, False, args.operational, args.config) != '' -- GitLab From f05e684044694aa7301f6e93110741cdc903f234 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 18 Aug 2020 13:22:53 +0200 Subject: [PATCH 03/23] # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished --- autosubmit/autosubmit.py | 2 +- bin/autosubmit | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index e772b1cca..86e3aebbd 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -491,7 +491,7 @@ class Autosubmit: subparsers.add_parser('changelog', description='show changelog') args = parser.parse_args() - except BaseException as e: + except Exception as e: if type(e) is SystemExit: if e.message == 0: # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished print(Autosubmit.autosubmit_version) diff --git a/bin/autosubmit b/bin/autosubmit index 55c651ef9..d8332425e 100755 --- a/bin/autosubmit +++ b/bin/autosubmit @@ -42,9 +42,9 @@ def main(): Log.error("Trace: {0}", e.trace) Log.critical("{1} [eCode={0}]", e.code, e.message) os._exit(1) - except BaseException as e: + except Exception as e: Log.error("Trace: {0}", e.message) - if "temporarily unavailable" in e.message: + if "temporarily unavailable" in str(e.message): Log.critical("Another instance of autosubmit is running on this experiment. If this is not the case, delete autosubmit.lock",7000) else: Log.critical("Unhandled error, if you see this message report it in autosubmit git") -- GitLab From ca4c60e0307a2f4dcb391943b98b8b161abe43f8 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 18 Aug 2020 13:34:06 +0200 Subject: [PATCH 04/23] # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 425e34608..ed9e285da 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ setup( keywords=['climate', 'weather', 'workflow', 'HPC'], install_requires=['argparse>=1.2,<2', 'python-dateutil>2', 'pydotplus>=2', 'pyparsing>=2.0.1', 'numpy', 'matplotlib', 'paramiko==1.15', - 'mock>=1.3.0', 'portalocker>=0.5.7', 'networkx', 'bscearth.utils'], + 'mock>=1.3.0', 'portalocker==0.5.7', 'networkx', 'bscearth.utils'], extras_require={ 'dialog': ["python2-pythondialog>=3.3.0"] }, -- GitLab From 0d910dc9046356aea0a78f655b26b61ace427aed Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 20 Aug 2020 13:27:04 +0200 Subject: [PATCH 05/23] # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished --- autosubmit/autosubmit.py | 279 +++++++++----------------- autosubmit/config/config_common.py | 3 +- autosubmit/job/job.py | 59 ++++-- autosubmit/job/job_list.py | 2 +- autosubmit/monitor/diagram.py | 4 +- autosubmit/platforms/ecplatform.py | 6 +- autosubmit/platforms/lsfplatform.py | 6 +- autosubmit/platforms/pbsplatform.py | 6 +- autosubmit/platforms/psplatform.py | 6 +- autosubmit/platforms/sgeplatform.py | 6 +- autosubmit/platforms/slurmplatform.py | 2 +- bin/autosubmit | 4 +- 12 files changed, 159 insertions(+), 224 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 86e3aebbd..04dc50c64 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -629,7 +629,7 @@ class Autosubmit: id_eadmin = os.popen('id -u eadmin').read().strip() if expid_delete == '' or expid_delete is None and not os.path.exists(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid_delete)): - Log.info("Experiment directory does not exist.") + Log.result("Experiment directory does not exist.") else: ret = False # Handling possible failure of retrieval of current owner data @@ -940,7 +940,7 @@ class Autosubmit: os.system('clear') signal.signal(signal.SIGINT, signal_handler) as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files() + as_conf.check_conf_files(False) project_type = as_conf.get_project_type() if project_type != "none": # Check proj configuration @@ -1159,9 +1159,6 @@ class Autosubmit: os.system('clear') signal.signal(signal.SIGINT, signal_handler) - as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files() - hpcarch = as_conf.get_platform() safetysleeptime = as_conf.get_safetysleeptime() retrials = as_conf.get_retrials() @@ -1382,10 +1379,13 @@ class Autosubmit: raise AutosubmitCritical("Corrupted job_list, backup couldn''t be restored", 7000, e.message) if main_loop_retrials > 0: # Restore platforms and try again, to avoid endless loop with failed configuration, a hard limit is set. - Autosubmit.restore_platforms(platforms_to_test) - main_loop_retrials = main_loop_retrials - 1 + try: + Autosubmit.restore_platforms(platforms_to_test) + except: + raise AutosubmitCritical("Autosubmit couldn't recover the platforms",7000, e.message) + main_loop_retrials = main_loop_retrials - 1 else: - raise AutosubmitCritical("Autosubmit Encounter too much errors during running time",7000,e.message) + raise AutosubmitCritical("Autosubmit Encounter too much e rrors during running time",7000,e.message) except AutosubmitCritical as e: # Critical errors can't be recovered. Failed configuration or autosubmit error raise AutosubmitCritical(e.message, e.code, e.trace) except portalocker.AlreadyLocked: @@ -1571,7 +1571,7 @@ class Autosubmit: exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) Log.info("Getting job list...") as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files() + as_conf.check_conf_files(False) # Getting output type from configuration output_type = as_conf.get_output_type() pkl_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, 'pkl') @@ -1745,7 +1745,7 @@ class Autosubmit: exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) Log.info("Loading jobs...") as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files() + as_conf.check_conf_files(False) pkl_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, 'pkl') @@ -1782,9 +1782,8 @@ class Autosubmit: monitor_exp.generate_output_stats( expid, job_list, file_format, period_ini, period_fi, not hide) Log.result("Stats plot ready") - except Exception as ex: - Log.critical(str(ex)) - return False + except Exception as e: + raise AutosubmitCritical("Stats couldn't be shown",7000,e.message) else: Log.info("There are no {0} jobs in the period from {1} to {2}...".format( ft, period_ini, period_fi)) @@ -1856,7 +1855,7 @@ class Autosubmit: exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files() + as_conf.check_conf_files(False) Log.info('Recovering experiment {0}'.format(expid)) pkl_dir = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, 'pkl') @@ -1864,7 +1863,7 @@ class Autosubmit: expid, as_conf, notransitive=notransitive, monitor=True) Log.debug("Job list restored from {0} files", pkl_dir) - as_conf.check_conf_files() + as_conf.check_conf_files(False) # Getting output type provided by the user in config, 'pdf' as default output_type = as_conf.get_output_type() @@ -1982,7 +1981,7 @@ class Autosubmit: Log.info('Migrating experiment {0}'.format(experiment_id)) as_conf = AutosubmitConfig( experiment_id, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files() + as_conf.check_conf_files(False) submitter = Autosubmit._get_submitter(as_conf) submitter.load_platforms(as_conf) if submitter.platforms is None: @@ -1998,8 +1997,8 @@ class Autosubmit: Log.info( "Checking [{0}] from platforms configuration...", platform) if not as_conf.get_migrate_user_to(platform): - Log.critical( - "Missing directive USER_TO in [{0}]", platform) + Log.printlog( + "Missing directive USER_TO in [{0}]".format( platform),7000) error = True break if as_conf.get_migrate_project_to(platform): @@ -2071,8 +2070,8 @@ class Autosubmit: Log.debug( "The platform {0} does not contain absolute symlinks", platform) except BaseException: - Log.warning( - "Absolute symlinks failed to convert, check user in platform.conf") + Log.printlog( + "Absolute symlinks failed to convert, check user in platform.conf",3000) error = True break @@ -2081,13 +2080,14 @@ class Autosubmit: "Moving remote files/dirs on {0}", platform) p.send_command("chmod 777 -R " + p.root_dir) if not p.move_file(p.root_dir, os.path.join(p.temp_dir, experiment_id), True): - Log.critical("The files/dirs on {0} cannot be moved to {1}.", p.root_dir, - os.path.join(p.temp_dir, experiment_id)) + Log.printlog( + "The files/dirs on {0} cannot be moved to {1}.".format(p.root_dir, + os.path.join(p.temp_dir, experiment_id), 6000)) error = True break - except (IOError, BaseException): - Log.critical("The files/dirs on {0} cannot be moved to {1}.", p.root_dir, - os.path.join(p.temp_dir, experiment_id)) + except (IOError, BaseException) as e: + Log.printlog("The files/dirs on {0} cannot be moved to {1}.".format(p.root_dir, + os.path.join(p.temp_dir, experiment_id)),6000) error = True break @@ -2097,11 +2097,11 @@ class Autosubmit: Log.result("[{0}] from platforms configuration OK", platform) if error: - Log.critical( - "The experiment cannot be offered, reverting changes") + Log.printlog( + "The experiment cannot be offered, reverting changes",7000) as_conf = AutosubmitConfig( experiment_id, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files() + as_conf.check_conf_files(False) for platform in backup_files: p = submitter.platforms[platform] p.move_file(os.path.join( @@ -2116,8 +2116,8 @@ class Autosubmit: return False else: if not Autosubmit.archive(experiment_id, False, False): - Log.critical( - "The experiment cannot be offered,reverting changes.") + Log.printlog( + "The experiment cannot be offered, reverting changes", 7000) for platform in backup_files: p = submitter.platforms[platform] p.move_file(os.path.join( @@ -2136,12 +2136,11 @@ class Autosubmit: Log.info('Migrating experiment {0}'.format(experiment_id)) Log.info("Moving local files/dirs") if not Autosubmit.unarchive(experiment_id, False): - Log.critical("The experiment cannot be picked up") - return False + raise AutosubmitCritical("The experiment cannot be picked up",7000) Log.info("Local files/dirs have been successfully picked up") as_conf = AutosubmitConfig( experiment_id, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files() + as_conf.check_conf_files(False) Log.info("Checking remote platforms") submitter = Autosubmit._get_submitter(as_conf) submitter.load_platforms(as_conf) @@ -2167,8 +2166,7 @@ class Autosubmit: "Files/dirs on {0} have been successfully picked up", platform) except (IOError, BaseException): error = True - Log.critical("The files/dirs on {0} cannot be copied to {1}.", - os.path.join(p.temp_dir, experiment_id), p.root_dir) + Log.printlog("The files/dirs on {0} cannot be copied to {1}.".format(os.path.join(p.temp_dir, experiment_id), p.root_dir),6000) break backup_files.append(platform) else: @@ -2176,8 +2174,8 @@ class Autosubmit: "Files/dirs on {0} have been successfully picked up", platform) if error: Autosubmit.archive(experiment_id, False, False) - Log.critical( - "The experiment cannot be picked,reverting changes.") + Log.printlog( + "The experiment cannot be picked,reverting changes.",7000) for platform in backup_files: p = submitter.platforms[platform] p.send_command("rm -R " + p.root_dir) @@ -2200,17 +2198,11 @@ class Autosubmit: :type experiment_id: str """ exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, experiment_id) - if not os.path.exists(exp_path): - Log.critical( - "The directory {0} is needed and does not exist.", exp_path) - Log.warning("Does an experiment with the given id exist?") - return False - as_conf = AutosubmitConfig( experiment_id, BasicConfig, ConfigParserFactory()) - if not as_conf.check_conf_files(): - return False + as_conf.check_conf_files(False) + project_type = as_conf.get_project_type() if project_type != "none": @@ -2250,16 +2242,11 @@ class Autosubmit: Log.info("Describing {0}", experiment_id) exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, experiment_id) - if not os.path.exists(exp_path): - Log.critical( - "The directory {0} is needed and does not exist.", exp_path) - Log.warning("Does an experiment with the given id exist?") - return False as_conf = AutosubmitConfig( experiment_id, BasicConfig, ConfigParserFactory()) - if not as_conf.check_conf_files(): - return False + as_conf.check_conf_files(False) + user = os.stat(as_conf.experiment_file).st_uid try: user = pwd.getpwuid(user).pw_name @@ -2389,8 +2376,7 @@ class Autosubmit: config_file.close() Log.result("Configuration file written successfully") except (IOError, OSError) as e: - Log.critical("Can not write config file: {0}".format(e.message)) - return False + raise AutosubmitCritical("Can not write config file: {0}",7000,e.message) return True @staticmethod @@ -2410,12 +2396,9 @@ class Autosubmit: d = dialog.Dialog( dialog="dialog", autowidgetsize=True, screen_color='GREEN') except dialog.DialogError: - Log.critical(not_enough_screen_size_msg) - return False + raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7000) except Exception: - Log.critical("Missing package 'dialog', please install it with: 'apt-get install dialog'" - "or provide configure arguments") - return False + raise AutosubmitCritical("Dialog libs aren't found in your Operational system",7000) d.set_background_title("Autosubmit configure utility") if os.geteuid() == 0: @@ -2437,8 +2420,7 @@ class Autosubmit: os.system('clear') return False except dialog.DialogError: - Log.critical(not_enough_screen_size_msg) - return False + raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7000) filename = '.autosubmitrc' if level == 'All': @@ -2475,19 +2457,16 @@ class Autosubmit: jobs_conf_path = parser.get('conf', 'jobs') except (IOError, OSError) as e: - Log.critical("Can not read config file: {0}".format(e.message)) - return False + raise AutosubmitCritical("Can not read config file",7000,e.message) while True: try: code, database_path = d.dselect(database_path, width=80, height=20, title='\Zb\Z1Select path to database\Zn', colors='enable') except dialog.DialogError: - Log.critical(not_enough_screen_size_msg) - return False - + raise AutosubmitCritical("Graphical visualization failed, not enough screen size", 7000) if Autosubmit._requested_exit(code, d): - return False + raise AutosubmitCritical("Graphical visualization failed, requested exit", 7000) elif code == dialog.Dialog.OK: database_path = database_path.replace('~', home_path) if not os.path.exists(database_path): @@ -2502,11 +2481,11 @@ class Autosubmit: title='\Zb\Z1Select path to experiments repository\Zn', colors='enable') except dialog.DialogError: - Log.critical(not_enough_screen_size_msg) - return False + raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7000) + if Autosubmit._requested_exit(code, d): - return False + raise AutosubmitCritical("Graphical visualization failed,requested exit",7000) elif code == dialog.Dialog.OK: database_path = database_path.replace('~', home_path) if not os.path.exists(database_path): @@ -2527,11 +2506,10 @@ class Autosubmit: form_height=10, title='\Zb\Z1Just a few more options:\Zn', colors='enable') except dialog.DialogError: - Log.critical(not_enough_screen_size_msg) - return False + raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7000) if Autosubmit._requested_exit(code, d): - return False + raise AutosubmitCritical("Graphical visualization failed, _requested_exit", 7000) elif code == dialog.Dialog.OK: database_filename = tag[0] platforms_conf_path = tag[1] @@ -2562,11 +2540,10 @@ class Autosubmit: form_height=10, title='\Zb\Z1Mail notifications configuration:\Zn', colors='enable') except dialog.DialogError: - Log.critical(not_enough_screen_size_msg) - return False + raise AutosubmitCritical("Graphical visualization failed, not enough screen size", 7000) if Autosubmit._requested_exit(code, d): - return False + raise AutosubmitCritical("Graphical visualization failed, requested exit", 7000) elif code == dialog.Dialog.OK: smtp_hostname = tag[0] mail_from = tag[1] @@ -2598,9 +2575,7 @@ class Autosubmit: width=50, height=5) os.system('clear') except (IOError, OSError) as e: - Log.critical("Can not write config file: {0}".format(e.message)) - os.system('clear') - return False + raise AutosubmitCritical("Can not write config file", 7000,e.message) return True @staticmethod @@ -2623,12 +2598,10 @@ class Autosubmit: Log.info("Creating autosubmit database...") qry = resource_string('autosubmit.database', 'data/autosubmit.sql') if not create_db(qry): - Log.critical("Can not write database file") - return False + raise AutosubmitCritical("Can not write database file", 7000) Log.result("Autosubmit database created successfully") else: - Log.error("Database already exists.") - return False + raise AutosubmitCritical("Database already exists.", 7000) return True @staticmethod @@ -2646,9 +2619,8 @@ class Autosubmit: Autosubmit._check_ownership(expid) as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.reload() - if not as_conf.check_expdef_conf(): - Log.critical('Can not refresh with invalid configuration') - return False + as_conf.check_expdef_conf() + project_type = as_conf.get_project_type() if Autosubmit._copy_code(as_conf, expid, project_type, True): @@ -2668,9 +2640,8 @@ class Autosubmit: as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.reload() - if not as_conf.check_expdef_conf(): - Log.critical('Can not refresh with invalid configuration') - return False + as_conf.check_expdef_conf() + Log.info("Changing {0} experiment version from {1} to {2}", expid, as_conf.get_version(), Autosubmit.autosubmit_version) as_conf.set_version(Autosubmit.autosubmit_version) @@ -2689,11 +2660,6 @@ class Autosubmit: """ exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) - if not os.path.exists(exp_path): - Log.critical( - "The directory %s is needed and does not exist." % exp_path) - Log.warning("Does an experiment with the given id exist?") - return 1 exp_folder = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) @@ -2701,8 +2667,7 @@ class Autosubmit: # Cleaning to reduce file size. version = get_autosubmit_version(expid) if version is not None and version.startswith('3') and not Autosubmit.clean(expid, True, True, True, False): - Log.critical("Can not archive project. Clean not successful") - return False + raise AutosubmitCritical("Can not archive project. Clean not successful", 7000) # Getting year of last completed. If not, year of expid folder year = None @@ -2737,8 +2702,8 @@ class Autosubmit: tar.close() os.chmod(os.path.join(year_path, output_filepath), 0o755) except Exception as e: - Log.critical("Can not write tar file: {0}".format(e)) - return False + raise AutosubmitCritical("Can not write tar file", 7000,e.message) + Log.info("Tar file created!") @@ -2756,10 +2721,9 @@ class Autosubmit: Log.warning("Experiment folder renamed to: {0}".format( exp_folder+"_to_delete ")) except Exception as e: - Log.critical( - "Can not remove or rename experiments folder: {0}".format(e)) + Autosubmit.unarchive(expid, compress, True) - return False + raise AutosubmitCritical("Can not remove or rename experiments folder",7000,e.message) Log.result("Experiment archived successfully") return True @@ -2807,7 +2771,7 @@ class Autosubmit: tar.close() except Exception as e: shutil.rmtree(exp_folder, ignore_errors=True) - Log.critical("Can not extract tar file: {0}".format(e)) + Log.printlog("Can not extract tar file: {0}".format(e),6000) return False Log.info("Unpacking finished") @@ -2815,7 +2779,7 @@ class Autosubmit: try: os.remove(archive_path) except Exception as e: - Log.error("Can not remove archived file folder: {0}".format(e)) + Log.printlog("Can not remove archived file folder: {0}".format(e),7000) return False Log.result("Experiment {0} unarchived successfully", experiment_id) @@ -2878,11 +2842,6 @@ class Autosubmit: os.chmod(aslogs_path, 0o775) else: os.chmod(aslogs_path, 0o775) - if not os.path.exists(exp_path): - Log.critical( - "The directory %s is needed and does not exist." % exp_path) - Log.warning("Does an experiment with the given id exist?") - return 1 # checking if there is a lock file to avoid multiple running on the same expid try: @@ -2892,7 +2851,7 @@ class Autosubmit: Log.info("Preparing .lock file to avoid multiple instances with same expid.") as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files() + as_conf.check_conf_files(False) project_type = as_conf.get_project_type() # Getting output type provided by the user in config, 'pdf' as default output_type = as_conf.get_output_type() @@ -2920,8 +2879,7 @@ class Autosubmit: chunk_ini = as_conf.get_chunk_ini() member_list = as_conf.get_member_list() if len(member_list) != len(set(member_list)): - Log.error('There are repeated member names!') - return False + raise AutosubmitCritical("There are repeated member names!") rerun = as_conf.get_rerun() Log.info("\nCreating the jobs list...") @@ -3072,10 +3030,10 @@ class Autosubmit: svn_project_revision + " " + svn_project_url + " " + project_destination, shell=True) except subprocess.CalledProcessError: - Log.error("Can not check out revision {0} into {1}", svn_project_revision + " " + svn_project_url, - project_path) + shutil.rmtree(project_path, ignore_errors=True) - return False + raise AutosubmitCritical("Can not check out revision {0} into {1}".format(svn_project_revision + " " + svn_project_url, + project_path),7000) Log.debug("{0}", output) elif project_type == "local": @@ -3093,19 +3051,17 @@ class Autosubmit: local_project_path+"/* "+local_destination] subprocess.call(cmd, shell=True) except subprocess.CalledProcessError: - Log.error( - "Can not synchronize {0} into {1}. Exiting...", local_project_path, project_path) - return False + raise AutosubmitCritical("Can not rsync {0} into {1}. Exiting...".format( + local_project_path, project_path), 7000) else: os.mkdir(local_destination) try: output = subprocess.check_output( "cp -R " + local_project_path + "/* " + local_destination, shell=True) except subprocess.CalledProcessError: - Log.error( - "Can not copy {0} into {1}. Exiting...", local_project_path, project_path) shutil.rmtree(project_path) - return False + raise AutosubmitCritical("Can not copy {0} into {1}. Exiting...".format( + local_project_path, project_path), 7000) else: os.mkdir(project_path) os.mkdir(local_destination) @@ -3117,10 +3073,9 @@ class Autosubmit: output = subprocess.check_output( "cp -R " + local_project_path + "/* " + local_destination, shell=True) except subprocess.CalledProcessError: - Log.error( - "Can not copy {0} into {1}. Exiting...", local_project_path, project_path) shutil.rmtree(project_path) - return False + raise AutosubmitCritical( + "Can not copy {0} into {1}. Exiting...".format( local_project_path, project_path), 7000) Log.debug("{0}", output) return True @@ -3177,12 +3132,6 @@ class Autosubmit: Autosubmit._check_ownership(expid) exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) tmp_path = os.path.join(exp_path, BasicConfig.LOCAL_TMP_DIR) - if not os.path.exists(exp_path): - Log.critical( - "The directory %s is needed and does not exist." % exp_path) - Log.warning("Does an experiment with the given id exist?") - return 1 - # checking if there is a lock file to avoid multiple running on the same expid try: with portalocker.Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1): @@ -3199,9 +3148,8 @@ class Autosubmit: wrongExpid = 0 as_conf = AutosubmitConfig( expid, BasicConfig, ConfigParserFactory()) - if not as_conf.check_conf_files(): - Log.critical('Can not run with invalid configuration') - return False + as_conf.check_conf_files(False) + # Getting output type from configuration output_type = as_conf.get_output_type() @@ -3240,10 +3188,7 @@ class Autosubmit: ".\n\tProcess stopped. Review the format of the provided input. Comparison is case sensitive." + \ "\n\tRemember that this option expects section names separated by a blank space as input." - Log.info(section_validation_message) - Log.critical("Error in the supplied input for -ft.") - return False - + raise AutosubmitCritical("Error in the supplied input for -ft.",7000,section_validation_message) job_list = Autosubmit.load_job_list( expid, as_conf, notransitive=notransitive) submitter = Autosubmit._get_submitter(as_conf) @@ -3288,9 +3233,7 @@ class Autosubmit: job_validation_message += "\n\tSpecified job(s) : [" + str(job_not_foundList) + "] not found in the experiment " + \ str(expid) + ". \n\tProcess stopped. Review the format of the provided input. Comparison is case sensitive." + \ "\n\tRemember that this option expects job names separated by a blank space as input." - Log.info(job_validation_message) - Log.critical("Error in the supplied input for -fl.") - return False + raise AutosubmitCritical("Error in the supplied input for -ft.",7000,section_validation_message) # Validating fc if filter_chunks -fc has been set: if filter_chunks is not None: @@ -3354,10 +3297,7 @@ class Autosubmit: # Ending validation if fc_filter_is_correct == False: - Log.info(fc_validation_message) - Log.critical("Error in the supplied input for -fc.") - return False - + raise AutosubmitCritical("Error in the supplied input for -fc.",7000,section_validation_message) # Validating status, if filter_status -fs has been set: # At this point we already have job_list from where we are getting the allows STATUS if filter_status is not None: @@ -3389,9 +3329,8 @@ class Autosubmit: status_validation_message += "\n\t There are no jobs with status " + \ status + " in this experiment." if status_validation_error == True: - Log.info(status_validation_message) - Log.critical("Error in the supplied input for -fs.") - return False + raise AutosubmitCritical("Error in the supplied input for -fs.",7000,section_validation_message) + jobs_filtered = [] final_status = Autosubmit._get_status(final) if filter_section or filter_chunks: @@ -3476,9 +3415,7 @@ class Autosubmit: # Ending validation if filter_is_correct == False: - Log.info(validation_message) - Log.critical("Error in the supplied input for -ftc.") - return False + raise AutosubmitCritical("Error in the supplied input for -ftc.", 7000, section_validation_message) # If input is valid, continue. record = dict() @@ -3576,8 +3513,6 @@ class Autosubmit: "-d option: Experiment has too many jobs to be printed in the terminal. Maximum job quantity is 1000, your experiment has " + str(current_length) + " jobs.") else: Log.info(job_list.print_with_status(statusChange = performed_changes)) - Log.status(job_list.print_with_status(statusChange = performed_changes)) - else: Log.warning("No changes were performed.") # End of New Feature @@ -3658,12 +3593,8 @@ class Autosubmit: if save and wrongExpid == 0: job_list.save() else: - Log.warning( - "Changes NOT saved to the JobList!!!!: use -s option to save") - if wrongExpid > 0: - - Log.error( - "Save disabled due invalid expid, please check or/and jobs expid name") + Log.printlog( + "Changes NOT saved to the JobList!!!!: use -s option to save",3000) if as_conf.get_wrapper_type() != 'none' and check_wrapper: packages_persistence = JobPackagePersistence(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "pkl"), @@ -3872,11 +3803,8 @@ class Autosubmit: communications_library = as_conf.get_communications_library() if communications_library == 'paramiko': return ParamikoSubmitter() - - # communications library not known - Log.error( - 'You have defined a not valid communications library on the configuration file') - raise Exception('Communications library not known') + else: + return ParamikoSubmitter()# only paramiko is avaliable right now so.. @staticmethod def _get_job_list_persistence(expid, as_conf): @@ -4000,8 +3928,7 @@ class Autosubmit: exp_parser = as_conf.get_parser( ConfigParserFactory(), as_conf.experiment_file) if exp_parser.get_bool_option('rerun', "RERUN", True): - Log.error('Can not test a RERUN experiment') - return False + raise AutosubmitCritical('Can not test a RERUN experiment',7000) content = open(as_conf.experiment_file).read() if random_select: @@ -4013,8 +3940,8 @@ class Autosubmit: if platforms_parser.get_option(section, 'TEST_SUITE', 'false').lower() == 'true': test_platforms.append(section) if len(test_platforms) == 0: - Log.critical('No test HPC defined') - return False + raise AutosubmitCritical("Missing hpcarch setting in expdef",7000) + hpc = random.choice(test_platforms) if member is None: member = random.choice(exp_parser.get( @@ -4109,9 +4036,12 @@ class Autosubmit: hpcarch = as_conf.get_platform() submitter = Autosubmit._get_submitter(as_conf) - submitter.load_platforms(as_conf) - if submitter.platforms is None: - raise AutosubmitCritical("platforms couldn't be loaded",7000) + try: + submitter.load_platforms(as_conf) + if submitter.platforms is None: + raise AutosubmitCritical("platforms couldn't be loaded",7000) + except: + raise AutosubmitCritical("platforms couldn't be loaded", 7000) platforms = submitter.platforms platforms_to_test = set() @@ -4138,18 +4068,7 @@ class Autosubmit: if job.platform.get_completed_files(job.name, 0): job.status = Status.COMPLETED - Log.info("CHANGED job '{0}' status to COMPLETED".format(job.name)) - Log.status("CHANGED job '{0}' status to COMPLETED".format(job.name)) - - #elif job.status != Status.SUSPENDED: - # job.status = Status.WAITING - # job.fail_count = 0 - # Log.info("CHANGED job '{0}' status to WAITING".format(job.name)) job.platform.get_logs_files(expid, job.remote_logs) - - #end = datetime.datetime.now() - #Log.info("Time spent: '{0}'".format(end - start)) - #Log.info("Updating the jobs list") return job_list diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index 363341e55..53ecc117d 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -353,7 +353,7 @@ class AutosubmitConfig(object): """ return str(self._jobs_parser.get_option(section, 'CUSTOM_DIRECTIVES', '')) - def check_conf_files(self): + def check_conf_files(self,check_file=True): """ Checks configuration files (autosubmit, experiment jobs and platforms), looking for invalid values, missing required options. Prints results in log @@ -362,6 +362,7 @@ class AutosubmitConfig(object): :rtype: bool """ Log.info('\nChecking configuration files...') + self.ignore_file_path = check_file self.reload() self.check_expdef_conf() self.check_platforms_conf() diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index fff538682..018ea3254 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -101,7 +101,7 @@ class Job(object): self.scratch_free_space = None self.custom_directives = [] self.undefined_variables = None - + self.log_retries = 5 self.id = job_id self.file = None self._local_logs = ('', '') @@ -518,27 +518,41 @@ class Job(object): retries = 3 sleeptime = 5 i = 0 - while (not out_exist or not err_exist) and i < retries: - out_exist = platform.check_file_exists( - remote_logs[0]) # will do 5 retries - err_exist = platform.check_file_exists( - remote_logs[1]) # will do 5 retries - if not out_exist or not err_exist: - sleeptime = sleeptime + 5 - i = i + 1 - sleep(sleeptime) - if out_exist and err_exist: - if copy_remote_logs: - if local_logs != remote_logs: - # unifying names for log files - self.synchronize_logs(platform, remote_logs, local_logs) - remote_logs = local_logs - platform.get_logs_files(self.expid, remote_logs) - # Update the logs with Autosubmit Job Id Brand - for local_log in local_logs: - platform.write_jobid(self.id, os.path.join( - self._tmp_path, 'LOG_' + str(self.expid), local_log)) - platform.closeConnection() + try: + while (not out_exist or not err_exist) and i < retries: + out_exist = platform.check_file_exists( + remote_logs[0]) # will do 5 retries + err_exist = platform.check_file_exists( + remote_logs[1]) # will do 5 retries + if not out_exist or not err_exist: + sleeptime = sleeptime + 5 + i = i + 1 + sleep(sleeptime) + if out_exist and err_exist: + if copy_remote_logs: + if local_logs != remote_logs: + # unifying names for log files + self.synchronize_logs(platform, remote_logs, local_logs) + remote_logs = local_logs + platform.get_logs_files(self.expid, remote_logs) + # Update the logs with Autosubmit Job Id Brand + for local_log in local_logs: + platform.write_jobid(self.id, os.path.join( + self._tmp_path, 'LOG_' + str(self.expid), local_log)) + except AutosubmitError as e: + Log.error("{1} [eCode={0}]", e.code, e.message) + # Save job_list if not is a failed submitted job + try: + platform.test_connection() + self.retrieve_logfiles() + except Exception: + Log.printlog("Failed to retrieve log file for job {0}".format(self.name),6000) + except AutosubmitCritical as e: # Critical errors can't be recovered. Failed configuration or autosubmit error + Log.printlog("Failed to retrieve log file for job {0}".format(self.name),6000) + try: + platform.closeConnection() + except: + pass sleep(2) return @@ -1092,6 +1106,7 @@ class WrapperJob(Job): self.job_list = job_list # divide jobs in dictionary by state? self.wallclock = total_wallclock + self.num_processors = num_processors self.running_jobs_start = OrderedDict() self.platform = platform diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index f11e702e8..a6602a792 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -940,7 +940,7 @@ class JobList: Status.SUBMITTED and not job.status == Status.READY] if len(tmp) == len(active): # IF only held jobs left without dependencies satisfied if len(tmp) != 0 and len(active) != 0: - raise AutosubmitCritical("Only Held Jobs active,Exiting Autosubmit (TIP: This can happen if suspended or/and Failed jobs are found on the workflow)",7000) + raise AutosubmitCritical("Only Held Jobs active. Exiting Autosubmit (TIP: This can happen if suspended or/and Failed jobs are found on the workflow)",7000) active = [] return active diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index 11ab04cb6..fd7c7fbe7 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -41,9 +41,9 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per # Creating stats figure + sanity check if num_plots > MAX_NUM_PLOTS: message = "The results are too large to be shown, try narrowing your query. \n Use a filter like -ft where you supply a list of job types, e.g. INI, SIM; \ -or -fp where you supply an integer that represents the number of hours into the past that should be queried, \ +or -fp where you supply an integer that represents the number of hours into the past that should be queried: \ suppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." - raise AutosubmitCritical("Stats query our of bounds",7000,message) + raise AutosubmitCritical("Stats query out of bounds",7000,message) fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * num_plots)) diff --git a/autosubmit/platforms/ecplatform.py b/autosubmit/platforms/ecplatform.py index 65ae5873d..d364c7b6e 100644 --- a/autosubmit/platforms/ecplatform.py +++ b/autosubmit/platforms/ecplatform.py @@ -115,7 +115,7 @@ class EcPlatform(ParamikoPlatform): def connect(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool @@ -123,7 +123,7 @@ class EcPlatform(ParamikoPlatform): self.connected = True def restore_connection(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool @@ -131,7 +131,7 @@ class EcPlatform(ParamikoPlatform): self.connected = True def test_connection(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool diff --git a/autosubmit/platforms/lsfplatform.py b/autosubmit/platforms/lsfplatform.py index caaed7a1a..dd973552e 100644 --- a/autosubmit/platforms/lsfplatform.py +++ b/autosubmit/platforms/lsfplatform.py @@ -112,7 +112,7 @@ class LsfPlatform(ParamikoPlatform): '\n'.ljust(13).join(str(s) for s in directives)) def connect(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool @@ -120,7 +120,7 @@ class LsfPlatform(ParamikoPlatform): self.connected = True def restore_connection(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool @@ -128,7 +128,7 @@ class LsfPlatform(ParamikoPlatform): self.connected = True def test_connection(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool diff --git a/autosubmit/platforms/pbsplatform.py b/autosubmit/platforms/pbsplatform.py index 33f7d9820..5150a6b8e 100644 --- a/autosubmit/platforms/pbsplatform.py +++ b/autosubmit/platforms/pbsplatform.py @@ -103,7 +103,7 @@ class PBSPlatform(ParamikoPlatform): return "ssh " + self.host + " " + self.get_qstatjob(job_id) def connect(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool @@ -111,7 +111,7 @@ class PBSPlatform(ParamikoPlatform): self.connected = True def restore_connection(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool @@ -119,7 +119,7 @@ class PBSPlatform(ParamikoPlatform): self.connected = True def test_connection(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool diff --git a/autosubmit/platforms/psplatform.py b/autosubmit/platforms/psplatform.py index 163611f05..a171992b4 100644 --- a/autosubmit/platforms/psplatform.py +++ b/autosubmit/platforms/psplatform.py @@ -81,7 +81,7 @@ class PsPlatform(ParamikoPlatform): return self.get_pscall(job_id) def connect(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool @@ -89,7 +89,7 @@ class PsPlatform(ParamikoPlatform): self.connected = True def restore_connection(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool @@ -97,7 +97,7 @@ class PsPlatform(ParamikoPlatform): self.connected = True def test_connection(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool diff --git a/autosubmit/platforms/sgeplatform.py b/autosubmit/platforms/sgeplatform.py index 40e959c22..5957cc225 100644 --- a/autosubmit/platforms/sgeplatform.py +++ b/autosubmit/platforms/sgeplatform.py @@ -87,7 +87,7 @@ class SgePlatform(ParamikoPlatform): def connect(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool @@ -95,7 +95,7 @@ class SgePlatform(ParamikoPlatform): self.connected = True def restore_connection(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool @@ -103,7 +103,7 @@ class SgePlatform(ParamikoPlatform): self.connected = True def test_connection(self): """ - In this case, it does nothing because connection is established foe each command + In this case, it does nothing because connection is established for each command :return: True :rtype: bool diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index 30df13c28..2c6a60166 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -133,7 +133,7 @@ class SlurmPlatform(ParamikoPlatform): jobs_id.append(int(output.split(' ')[3])) return jobs_id except IndexError: - raise AutosubmitCritical("Submission failed, There are issues on your config file",7000) + raise AutosubmitCritical("Submission failed. There are issues on your config file",7000) def jobs_in_queue(self): dom = parseString('') jobs_xml = dom.getElementsByTagName("JB_job_number") diff --git a/bin/autosubmit b/bin/autosubmit index d8332425e..31dccac6b 100755 --- a/bin/autosubmit +++ b/bin/autosubmit @@ -21,7 +21,7 @@ import os import sys -from log.log import Log,AutosubmitError,AutosubmitCritical +from log.log import Log,AutosubmitCritical scriptdir = os.path.abspath(os.path.dirname(sys.argv[0])) assert sys.path[0] == scriptdir @@ -47,7 +47,7 @@ def main(): if "temporarily unavailable" in str(e.message): Log.critical("Another instance of autosubmit is running on this experiment. If this is not the case, delete autosubmit.lock",7000) else: - Log.critical("Unhandled error, if you see this message report it in autosubmit git") + Log.critical("Unhandled error: If you see this message, please report it in Autosubmit's GitLab project") os._exit(1) if __name__ == "__main__": -- GitLab From 6112d055bd874c3accf61c52743e0b659e21ec1e Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 21 Aug 2020 15:27:33 +0200 Subject: [PATCH 06/23] # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished --- autosubmit/autosubmit.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 04dc50c64..c5d274a98 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -942,9 +942,6 @@ class Autosubmit: as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.check_conf_files(False) project_type = as_conf.get_project_type() - if project_type != "none": - # Check proj configuration - as_conf.check_proj() safetysleeptime = as_conf.get_safetysleeptime() Log.debug("The Experiment name is: {0}", expid) Log.debug("Sleep: {0}", safetysleeptime) @@ -1815,7 +1812,6 @@ class Autosubmit: project_type = autosubmit_config.get_project_type() if project_type == "git": - autosubmit_config.check_proj() Log.info("Registering commit SHA...") autosubmit_config.set_git_project_commit(autosubmit_config) autosubmit_git = AutosubmitGit(expid[0]) @@ -2205,9 +2201,6 @@ class Autosubmit: project_type = as_conf.get_project_type() - if project_type != "none": - if not as_conf.check_proj(): - return False submitter = Autosubmit._get_submitter(as_conf) submitter.load_platforms(as_conf) @@ -2258,9 +2251,6 @@ class Autosubmit: os.path.getmtime(as_conf.experiment_file)) project_type = as_conf.get_project_type() - if project_type != "none": - if not as_conf.check_proj(): - return False if (as_conf.get_svn_project_url()): model = as_conf.get_svn_project_url() branch = as_conf.get_svn_project_url() @@ -2837,11 +2827,6 @@ class Autosubmit: exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) tmp_path = os.path.join(exp_path, BasicConfig.LOCAL_TMP_DIR) aslogs_path = os.path.join(tmp_path, BasicConfig.LOCAL_ASLOG_DIR) - if not os.path.exists(aslogs_path): - os.mkdir(aslogs_path) - os.chmod(aslogs_path, 0o775) - else: - os.chmod(aslogs_path, 0o775) # checking if there is a lock file to avoid multiple running on the same expid try: @@ -2865,7 +2850,7 @@ class Autosubmit: if project_type != "none": # Check project configuration - as_conf.check_proj() + as_conf.check_proj(False) # Load parameters Log.info("Loading parameters...") -- GitLab From c8b7e027ebbda0a840798179009cc33505539a6f Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 21 Aug 2020 15:54:01 +0200 Subject: [PATCH 07/23] # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished --- autosubmit/autosubmit.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index c5d274a98..70b718603 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -2826,7 +2826,6 @@ class Autosubmit: Autosubmit._check_ownership(expid) exp_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid) tmp_path = os.path.join(exp_path, BasicConfig.LOCAL_TMP_DIR) - aslogs_path = os.path.join(tmp_path, BasicConfig.LOCAL_ASLOG_DIR) # checking if there is a lock file to avoid multiple running on the same expid try: -- GitLab From 3d068ad90b73cf365f49c4075cc97eef44343b0b Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 21 Aug 2020 16:01:12 +0200 Subject: [PATCH 08/23] disable check filepath in refresh --- autosubmit/autosubmit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 70b718603..15f9d63c0 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -813,7 +813,7 @@ class Autosubmit: ##### autosubmit_config = AutosubmitConfig( exp_id, BasicConfig, ConfigParserFactory()) - autosubmit_config.check_conf_files() + autosubmit_config.check_conf_files(False) project_type = autosubmit_config.get_project_type() if project_type == "git": autosubmit_git = AutosubmitGit(copy_id[0]) @@ -1808,7 +1808,7 @@ class Autosubmit: if project: autosubmit_config = AutosubmitConfig( expid, BasicConfig, ConfigParserFactory()) - autosubmit_config.check_conf_files() + autosubmit_config.check_conf_files(False) project_type = autosubmit_config.get_project_type() if project_type == "git": @@ -2609,7 +2609,7 @@ class Autosubmit: Autosubmit._check_ownership(expid) as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.reload() - as_conf.check_expdef_conf() + as_conf.check_expdef_conf(False) project_type = as_conf.get_project_type() @@ -2630,7 +2630,7 @@ class Autosubmit: as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.reload() - as_conf.check_expdef_conf() + as_conf.check_expdef_conf(False) Log.info("Changing {0} experiment version from {1} to {2}", expid, as_conf.get_version(), Autosubmit.autosubmit_version) -- GitLab From b6889c1280a45567dc8b246fd6dc7d3722faf98e Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 21 Aug 2020 16:08:34 +0200 Subject: [PATCH 09/23] disable check filepath in refresh --- autosubmit/autosubmit.py | 4 ++-- autosubmit/config/config_common.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 15f9d63c0..39e97d2a6 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1138,7 +1138,7 @@ class Autosubmit: as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files() + as_conf.check_conf_files(True) Log.info("Autosubmit is running with {0}", Autosubmit.autosubmit_version) if update_version: if as_conf.get_version() != Autosubmit.autosubmit_version: @@ -2609,7 +2609,7 @@ class Autosubmit: Autosubmit._check_ownership(expid) as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.reload() - as_conf.check_expdef_conf(False) + as_conf.check_expdef_conf() project_type = as_conf.get_project_type() diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index 53ecc117d..9d758bdd2 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -60,7 +60,6 @@ class AutosubmitConfig(object): self._proj_parser = None self._proj_parser_file = os.path.join(self.basic_config.LOCAL_ROOT_DIR, expid, "conf", "proj_" + expid + ".conf") - #self.check_proj_file() self.ignore_file_path = False self.wrong_config = defaultdict(list) self.warn_config = defaultdict(list) @@ -353,7 +352,7 @@ class AutosubmitConfig(object): """ return str(self._jobs_parser.get_option(section, 'CUSTOM_DIRECTIVES', '')) - def check_conf_files(self,check_file=True): + def check_conf_files(self,check_file=False): """ Checks configuration files (autosubmit, experiment jobs and platforms), looking for invalid values, missing required options. Prints results in log -- GitLab From bd2e3618f157d932c73e8459309ee173455f38e2 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Fri, 21 Aug 2020 16:52:29 +0200 Subject: [PATCH 10/23] disable check filepath in refresh --- autosubmit/autosubmit.py | 5 ++--- autosubmit/config/config_common.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 39e97d2a6..0d30b27bd 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -2847,9 +2847,6 @@ class Autosubmit: Autosubmit._create_project_associated_conf( as_conf, False, update_job) - if project_type != "none": - # Check project configuration - as_conf.check_proj(False) # Load parameters Log.info("Loading parameters...") @@ -2971,6 +2968,8 @@ class Autosubmit: except portalocker.AlreadyLocked: message = "We have detected that there is another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are using the experiment or delete autosubmit.lock file located on tmp folder" raise AutosubmitCritical(message,7000) + except AutosubmitCritical as e: + raise @staticmethod def _copy_code(as_conf, expid, project_type, force): diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index 9d758bdd2..b5298556d 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -493,7 +493,7 @@ class AutosubmitConfig(object): else: section_file_path = parser.get_option(section,'FILE') try: - if not self.ignore_file_path: + if self.ignore_file_path: if not os.path.exists(section_file_path): if parser.check_exists(section, 'CHECK'): if not parser.get_option(section, 'CHECK') in "on_submission": -- GitLab From 2cf9d1868c0d95c40949383d4f8f4217d4484b2e Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Aug 2020 07:13:41 +0200 Subject: [PATCH 11/23] little fix --- autosubmit/autosubmit.py | 5 ++--- autosubmit/config/config_common.py | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 0d30b27bd..b3a2e5670 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -940,7 +940,7 @@ class Autosubmit: os.system('clear') signal.signal(signal.SIGINT, signal_handler) as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) - as_conf.check_conf_files(False) + as_conf.check_conf_files(True) project_type = as_conf.get_project_type() safetysleeptime = as_conf.get_safetysleeptime() Log.debug("The Experiment name is: {0}", expid) @@ -2854,8 +2854,7 @@ class Autosubmit: date_list = as_conf.get_date_list() if len(date_list) != len(set(date_list)): - Log.error('There are repeated start dates!') - return False + raise AutosubmitCritical('There are repeated start dates!',7000) num_chunks = as_conf.get_num_chunks() chunk_ini = as_conf.get_chunk_ini() member_list = as_conf.get_member_list() diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index b5298556d..ddd976c8e 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -494,12 +494,12 @@ class AutosubmitConfig(object): section_file_path = parser.get_option(section,'FILE') try: if self.ignore_file_path: - if not os.path.exists(section_file_path): + if not os.path.exists(os.path.join(self.get_project_dir(),section_file_path)): if parser.check_exists(section, 'CHECK'): if not parser.get_option(section, 'CHECK') in "on_submission": - self.wrong_config["Jobs"] += [[section, "FILE path doesn't exists, check parameter is found however is not in on_submission value"]] + self.wrong_config["Jobs"] += [[section, "FILE {0} doesn''t exists, check parameter is found however is not in on_submission value".format(section_file_path)]] else: - self.wrong_config["Jobs"] += [[section, "FILE path doesn't exists"]] + self.wrong_config["Jobs"] += [[section, "FILE {0} doesn't exists".format(os.path.join(self.get_project_dir(),section_file_path))]] except BaseException: pass # tests conflict quick-patch if not parser.check_is_boolean(section, 'RERUN_ONLY', False): -- GitLab From 96e868b83e4f0875b7deb09b20a1f3fb2a4b226e Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Aug 2020 09:56:18 +0200 Subject: [PATCH 12/23] little fix --- autosubmit/autosubmit.py | 7 +++++-- autosubmit/config/config_common.py | 13 +++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index b3a2e5670..1a82fa8c6 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -2968,7 +2968,7 @@ class Autosubmit: message = "We have detected that there is another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are using the experiment or delete autosubmit.lock file located on tmp folder" raise AutosubmitCritical(message,7000) except AutosubmitCritical as e: - raise + raise AutosubmitCritical(e.message,e.code) @staticmethod def _copy_code(as_conf, expid, project_type, force): @@ -2989,7 +2989,10 @@ class Autosubmit: if project_type == "git": submitter = Autosubmit._get_submitter(as_conf) submitter.load_platforms(as_conf) - hpcarch = submitter.platforms[as_conf.get_platform().lower()] + try: + hpcarch = submitter.platforms[as_conf.get_platform()] + except: + raise AutosubmitCritical("Can't set main platform",7000) return AutosubmitGit.clone_repository(as_conf, force, hpcarch) elif project_type == "svn": svn_project_url = as_conf.get_svn_project_url() diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index ddd976c8e..6b06eb607 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -446,8 +446,10 @@ class AutosubmitConfig(object): if len(self._platforms_parser.sections()) != len(set(self._platforms_parser.sections())): self.wrong_config["Platform"]+=[["Global", "Platforms found multiple times"]] - + main_platform_found = False for section in self._platforms_parser.sections(): + if section in self.hpcarch: + main_platform_found= True if not self._platforms_parser.check_exists(section, 'TYPE'): self.wrong_config["Platform"]+=[[section, "Mandatory TYPE parameter not found"]] platform_type = self._platforms_parser.get_option(section, 'TYPE', '').lower() @@ -468,6 +470,8 @@ class AutosubmitConfig(object): self.wrong_config["Platform"]+=[[ section, "Mandatory MAX_WAITING_JOBS parameter not found or non-integer"]] if not self._platforms_parser.check_is_int(section, 'TOTAL_JOBS', False): self.wrong_config["Platform"]+=[[ section, "Mandatory TOTAL_JOBS parameter not found or non-integer"]] + if not main_platform_found: + self.wrong_config["Expdef"] += [["Default", "Main platform is not defined! check if [HPCARCH = {0}] has any typo".format(self.hpcarch)]] if "Platform" not in self.wrong_config: Log.result('{0} OK'.format(os.path.basename(self._platforms_parser_file))) return True @@ -550,7 +554,11 @@ class AutosubmitConfig(object): if not parser.check_exists('DEFAULT', 'HPCARCH'): self.wrong_config["Expdef"]+=[['DEFAULT', "Mandatory HPCARCH parameter is invalid"]] - + else: + try: + self.hpcarch = self.get_platform() + except: + self.wrong_config["Expdef"] += [['Default', "HPCARCH value is not a valid platform (check typo)"]] if not parser.check_exists('experiment', 'DATELIST'): self.wrong_config["Expdef"]+=[['DEFAULT', "Mandatory DATELIST parameter is invalid"]] if not parser.check_exists('experiment', 'MEMBERS'): @@ -596,6 +604,7 @@ class AutosubmitConfig(object): else: self.wrong_config["Expdef"]+=[['project', "Mandatory project choice is invalid"]] + if "Expdef" not in self.wrong_config: Log.result('{0} OK'.format(os.path.basename(self._exp_parser_file))) return True -- GitLab From 4e54aadedf49ada86bf403375ca63735549d3316 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Aug 2020 14:12:15 +0200 Subject: [PATCH 13/23] Added codes --- autosubmit/autosubmit.py | 146 +++++++++++----------- autosubmit/config/config_common.py | 58 +++++---- autosubmit/database/db_common.py | 28 ++--- autosubmit/git/autosubmit_git.py | 10 +- autosubmit/job/job.py | 4 +- autosubmit/job/job_list.py | 2 +- autosubmit/job/job_list_persistence.py | 2 +- autosubmit/job/job_packages.py | 2 +- autosubmit/monitor/diagram.py | 2 +- autosubmit/platforms/paramiko_platform.py | 4 +- autosubmit/platforms/slurmplatform.py | 2 +- 11 files changed, 134 insertions(+), 126 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 1a82fa8c6..f018f0527 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -101,7 +101,7 @@ def signal_handler_create(signal_received, frame): :param signal_received: :param frame: """ - raise AutosubmitCritical('Autosubmit has been closed in an unexpected way. Killed or control + c.',7000) + raise AutosubmitCritical('Autosubmit has been closed in an unexpected way. Killed or control + c.',7010) class Autosubmit: """ @@ -496,7 +496,7 @@ class Autosubmit: if e.message == 0: # Version keyword force an exception in parse arg due and os_exit(0) but the program is succesfully finished print(Autosubmit.autosubmit_version) os._exit(0) - raise AutosubmitCritical("Incorrect arguments for this command",7000) + raise AutosubmitCritical("Incorrect arguments for this command",7011) expid = "None" @@ -584,7 +584,7 @@ class Autosubmit: tmp_path = os.path.join(exp_path, BasicConfig.LOCAL_TMP_DIR) aslogs_path = os.path.join(tmp_path, BasicConfig.LOCAL_ASLOG_DIR) if not os.path.exists(exp_path) and "create" not in command: - raise AutosubmitCritical("Experiment does not exist", 7000) + raise AutosubmitCritical("Experiment does not exist", 7012) if not os.path.exists(tmp_path): os.mkdir(tmp_path) if not os.path.exists(aslogs_path): @@ -604,9 +604,9 @@ class Autosubmit: current_owner_id = pwd.getpwuid(os.stat(os.path.join( BasicConfig.LOCAL_ROOT_DIR, expid)).st_uid).pw_name if current_user_id != current_owner_id: - raise AutosubmitCritical("You don't own the experiment {0}.".format(expid),7000) + raise AutosubmitCritical("You don't own the experiment {0}.".format(expid),7012) except BaseException as e: - raise AutosubmitCritical("User or owner does not exists",7000,e.message) + raise AutosubmitCritical("User or owner does not exists",7012,e.message) @staticmethod @@ -655,16 +655,16 @@ class Autosubmit: shutil.rmtree(os.path.join( BasicConfig.LOCAL_ROOT_DIR, expid_delete)) except OSError as e: - raise AutosubmitCritical('Can not delete experiment folder: ',7000,e.message) + raise AutosubmitCritical('Can not delete experiment folder: ',7012,e.message) Log.info("Deleting experiment from database...") ret = delete_experiment(expid_delete) if ret: Log.result("Experiment {0} deleted".format(expid_delete)) else: if currentOwner_id == 0: - raise AutosubmitCritical('Detected Eadmin user however, -f flag is not found. {0} can not be deleted!'.format(expid_delete), 7000) + raise AutosubmitCritical('Detected Eadmin user however, -f flag is not found. {0} can not be deleted!'.format(expid_delete), 7012) else: - raise AutosubmitCritical('Current user is not the owner of the experiment. {0} can not be deleted!'.format(expid_delete), 7000) + raise AutosubmitCritical('Current user is not the owner of the experiment. {0} can not be deleted!'.format(expid_delete), 7012) @staticmethod def expid(hpc, description, copy_id='', dummy=False, test=False, operational=False, root_folder=''): @@ -687,12 +687,12 @@ class Autosubmit: """ exp_id = None if description is None or hpc is None: - raise AutosubmitCritical("Check that the parameters are defined (-d and -H) ",7000) + raise AutosubmitCritical("Check that the parameters are defined (-d and -H) ",7011) if not copy_id: exp_id = new_experiment( description, Autosubmit.autosubmit_version, test, operational) if exp_id == '': - raise AutosubmitCritical("Couldn't create a new experiment",7000) + raise AutosubmitCritical("Couldn't create a new experiment",7011) try: os.mkdir(os.path.join(BasicConfig.LOCAL_ROOT_DIR, exp_id)) os.mkdir(os.path.join( @@ -730,7 +730,7 @@ class Autosubmit: exp_id, hpc, Autosubmit.autosubmit_version, dummy) except (OSError, IOError) as e: Autosubmit._delete_expid(exp_id) - raise AutosubmitCritical("Couldn't create a new experiment, permissions?", 7000, e.message) + raise AutosubmitCritical("Couldn't create a new experiment, permissions?", 7012, e.message) else: try: if root_folder == '' or root_folder is None: @@ -819,13 +819,13 @@ class Autosubmit: autosubmit_git = AutosubmitGit(copy_id[0]) Log.info("checking model version...") if not autosubmit_git.check_commit(autosubmit_config): - raise AutosubmitCritical("Uncommitted changes",7000) + raise AutosubmitCritical("Uncommitted changes",7013) else: - raise AutosubmitCritical("The experiment directory doesn't exist",7000) + raise AutosubmitCritical("The experiment directory doesn't exist",7012) except (OSError, IOError) as e: Autosubmit._delete_expid(exp_id, True) - raise AutosubmitCritical("Can not create experiment", 7000,e.message) + raise AutosubmitCritical("Can not create experiment", 7012,e.message) Log.debug("Creating temporal directory...") exp_id_path = os.path.join(BasicConfig.LOCAL_ROOT_DIR, exp_id) @@ -886,9 +886,9 @@ class Autosubmit: Log.debug('Enter Autosubmit._delete_expid {0}', expid) return Autosubmit._delete_expid(expid, force) else: - raise AutosubmitCritical("Insufficient permissions",7000) + raise AutosubmitCritical("Insufficient permissions",7012) else: - raise AutosubmitCritical("Experiment does not exist", 7000) + raise AutosubmitCritical("Experiment does not exist", 7012) @staticmethod def _load_parameters(as_conf, job_list, platforms): @@ -913,7 +913,7 @@ class Autosubmit: # Platform = from DEFAULT.HPCARCH, e.g. marenostrum4 if as_conf.get_platform().lower() not in platforms.keys(): raise AutosubmitCritical("Specified platform in expdef_.conf " + str(as_conf.get_platform( - ).lower()) + " is not a valid platform defined in platforms_.conf.",7000) + ).lower()) + " is not a valid platform defined in platforms_.conf.",7014) platform = platforms[as_conf.get_platform().lower()] platform.add_parameters(parameters, True) # Attach paramenters to JobList @@ -1148,7 +1148,7 @@ class Autosubmit: else: if as_conf.get_version() != '' and as_conf.get_version() != Autosubmit.autosubmit_version: raise AutosubmitCritical("Current experiment uses ({0}) which is not the running Autosubmit version \nPlease, update the experiment version if you wish to continue using AutoSubmit {1}\nYou can achieve this using the command autosubmit updateversion {2} \n" - "Or with the -v parameter: autosubmit run {2} -v ".format(as_conf.get_version(), Autosubmit.autosubmit_version, expid),7000 ) + "Or with the -v parameter: autosubmit run {2} -v ".format(as_conf.get_version(), Autosubmit.autosubmit_version, expid),7 ) # checking if there is a lock file to avoid multiple running on the same expid try: with portalocker.Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1): @@ -1170,7 +1170,7 @@ class Autosubmit: try: job_list = Autosubmit.load_job_list(expid, as_conf, notransitive=notransitive) except BaseException as e: - raise AutosubmitCritical("Corrupted job_list, backup couldn''t be restored",7000,e.message) + raise AutosubmitCritical("Corrupted job_list, backup couldn''t be restored",7040,e.message) Log.debug("Starting from job list restored from {0} files", pkl_dir) @@ -1191,7 +1191,7 @@ class Autosubmit: try: packages_persistence = JobPackagePersistence(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "pkl"),"job_packages_" + expid) except BaseException as e: - raise AutosubmitCritical("Corrupted job_packages, python 2.7 and sqlite doesn''t allow to restore these packages",7000,e.message) + raise AutosubmitCritical("Corrupted job_packages, python 2.7 and sqlite doesn''t allow to restore these packages",7040,e.message) if as_conf.get_wrapper_type() != 'none': os.chmod(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "pkl", "job_packages_" + expid+".db"), 0644) @@ -1200,7 +1200,7 @@ class Autosubmit: except BaseException as e: raise AutosubmitCritical( "Corrupted job_packages, python 2.7 and sqlite doesn''t allow to restore these packages", - 7000, e.message) + 7040, e.message) for (exp_id, package_name, job_name) in packages: if package_name not in job_list.packages_dict: @@ -1273,7 +1273,7 @@ class Autosubmit: if wrapper_job.status != wrapper_job.new_status: Log.info('Wrapper job ' + wrapper_job.name + ' changed from ' + str(Status.VALUE_TO_KEY[wrapper_job.status]) + ' to status ' + str(Status.VALUE_TO_KEY[wrapper_job.new_status])) except: - raise AutosubmitCritical("Wrapper is in Unknown Status couldn't get wrapper parameters",7000) + raise AutosubmitCritical("Wrapper is in Unknown Status couldn't get wrapper parameters",7050) # New status will be saved and inner_jobs will be checked. wrapper_job.check_status(wrapper_job.new_status) @@ -1367,22 +1367,22 @@ class Autosubmit: try: job_list = Autosubmit.load_job_list(expid, as_conf, notransitive=notransitive) except BaseException as e: - raise AutosubmitCritical("Corrupted job_list, backup couldn''t be restored", 7000, + raise AutosubmitCritical("Corrupted job_list, backup couldn''t be restored", 7040, e.message) else: # Restore from files try: job_list = Autosubmit.load_job_list(expid, as_conf, notransitive=notransitive) except BaseException as e: - raise AutosubmitCritical("Corrupted job_list, backup couldn''t be restored", 7000, + raise AutosubmitCritical("Corrupted job_list, backup couldn''t be restored", 7040, e.message) if main_loop_retrials > 0: # Restore platforms and try again, to avoid endless loop with failed configuration, a hard limit is set. try: Autosubmit.restore_platforms(platforms_to_test) except: - raise AutosubmitCritical("Autosubmit couldn't recover the platforms",7000, e.message) + raise AutosubmitCritical("Autosubmit couldn't recover the platforms",7050, e.message) main_loop_retrials = main_loop_retrials - 1 else: - raise AutosubmitCritical("Autosubmit Encounter too much e rrors during running time",7000,e.message) + raise AutosubmitCritical("Autosubmit Encounter too much errors during running time",7051,e.message) except AutosubmitCritical as e: # Critical errors can't be recovered. Failed configuration or autosubmit error raise AutosubmitCritical(e.message, e.code, e.trace) except portalocker.AlreadyLocked: @@ -1498,7 +1498,7 @@ class Autosubmit: packages_persistence.save( package.name, package.jobs, package._expid, inspect) except WrongTemplateException as e: - raise AutosubmitCritical("Invalid parameter substitution in {0} template".format(e.job_name),7000) + raise AutosubmitCritical("Invalid parameter substitution in {0} template".format(e.job_name),7014) except AutosubmitCritical as e: raise AutosubmitCritical(e.message,e.code,e.trace) except Exception as e: @@ -1780,7 +1780,7 @@ class Autosubmit: expid, job_list, file_format, period_ini, period_fi, not hide) Log.result("Stats plot ready") except Exception as e: - raise AutosubmitCritical("Stats couldn't be shown",7000,e.message) + raise AutosubmitCritical("Stats couldn't be shown",7061,e.message) else: Log.info("There are no {0} jobs in the period from {1} to {2}...".format( ft, period_ini, period_fi)) @@ -1994,7 +1994,7 @@ class Autosubmit: "Checking [{0}] from platforms configuration...", platform) if not as_conf.get_migrate_user_to(platform): Log.printlog( - "Missing directive USER_TO in [{0}]".format( platform),7000) + "Missing directive USER_TO in [{0}]".format( platform),7014) error = True break if as_conf.get_migrate_project_to(platform): @@ -2094,7 +2094,7 @@ class Autosubmit: if error: Log.printlog( - "The experiment cannot be offered, reverting changes",7000) + "The experiment cannot be offered, reverting changes",7012) as_conf = AutosubmitConfig( experiment_id, BasicConfig, ConfigParserFactory()) as_conf.check_conf_files(False) @@ -2113,7 +2113,7 @@ class Autosubmit: else: if not Autosubmit.archive(experiment_id, False, False): Log.printlog( - "The experiment cannot be offered, reverting changes", 7000) + "The experiment cannot be offered, reverting changes", 7012) for platform in backup_files: p = submitter.platforms[platform] p.move_file(os.path.join( @@ -2132,7 +2132,7 @@ class Autosubmit: Log.info('Migrating experiment {0}'.format(experiment_id)) Log.info("Moving local files/dirs") if not Autosubmit.unarchive(experiment_id, False): - raise AutosubmitCritical("The experiment cannot be picked up",7000) + raise AutosubmitCritical("The experiment cannot be picked up",7012) Log.info("Local files/dirs have been successfully picked up") as_conf = AutosubmitConfig( experiment_id, BasicConfig, ConfigParserFactory()) @@ -2171,7 +2171,7 @@ class Autosubmit: if error: Autosubmit.archive(experiment_id, False, False) Log.printlog( - "The experiment cannot be picked,reverting changes.",7000) + "The experiment cannot be picked,reverting changes.",7012) for platform in backup_files: p = submitter.platforms[platform] p.send_command("rm -R " + p.root_dir) @@ -2366,7 +2366,7 @@ class Autosubmit: config_file.close() Log.result("Configuration file written successfully") except (IOError, OSError) as e: - raise AutosubmitCritical("Can not write config file: {0}",7000,e.message) + raise AutosubmitCritical("Can not write config file: {0}",7012,e.message) return True @staticmethod @@ -2386,9 +2386,9 @@ class Autosubmit: d = dialog.Dialog( dialog="dialog", autowidgetsize=True, screen_color='GREEN') except dialog.DialogError: - raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7000) + raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7060) except Exception: - raise AutosubmitCritical("Dialog libs aren't found in your Operational system",7000) + raise AutosubmitCritical("Dialog libs aren't found in your Operational system",7060) d.set_background_title("Autosubmit configure utility") if os.geteuid() == 0: @@ -2410,7 +2410,7 @@ class Autosubmit: os.system('clear') return False except dialog.DialogError: - raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7000) + raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7060) filename = '.autosubmitrc' if level == 'All': @@ -2447,16 +2447,16 @@ class Autosubmit: jobs_conf_path = parser.get('conf', 'jobs') except (IOError, OSError) as e: - raise AutosubmitCritical("Can not read config file",7000,e.message) + raise AutosubmitCritical("Can not read config file",7014,e.message) while True: try: code, database_path = d.dselect(database_path, width=80, height=20, title='\Zb\Z1Select path to database\Zn', colors='enable') except dialog.DialogError: - raise AutosubmitCritical("Graphical visualization failed, not enough screen size", 7000) + raise AutosubmitCritical("Graphical visualization failed, not enough screen size", 7060) if Autosubmit._requested_exit(code, d): - raise AutosubmitCritical("Graphical visualization failed, requested exit", 7000) + raise AutosubmitCritical("Graphical visualization failed, requested exit", 7060) elif code == dialog.Dialog.OK: database_path = database_path.replace('~', home_path) if not os.path.exists(database_path): @@ -2471,11 +2471,11 @@ class Autosubmit: title='\Zb\Z1Select path to experiments repository\Zn', colors='enable') except dialog.DialogError: - raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7000) + raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7060) if Autosubmit._requested_exit(code, d): - raise AutosubmitCritical("Graphical visualization failed,requested exit",7000) + raise AutosubmitCritical("Graphical visualization failed,requested exit",7060) elif code == dialog.Dialog.OK: database_path = database_path.replace('~', home_path) if not os.path.exists(database_path): @@ -2496,10 +2496,10 @@ class Autosubmit: form_height=10, title='\Zb\Z1Just a few more options:\Zn', colors='enable') except dialog.DialogError: - raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7000) + raise AutosubmitCritical("Graphical visualization failed, not enough screen size",7060) if Autosubmit._requested_exit(code, d): - raise AutosubmitCritical("Graphical visualization failed, _requested_exit", 7000) + raise AutosubmitCritical("Graphical visualization failed, _requested_exit", 7060) elif code == dialog.Dialog.OK: database_filename = tag[0] platforms_conf_path = tag[1] @@ -2530,10 +2530,10 @@ class Autosubmit: form_height=10, title='\Zb\Z1Mail notifications configuration:\Zn', colors='enable') except dialog.DialogError: - raise AutosubmitCritical("Graphical visualization failed, not enough screen size", 7000) + raise AutosubmitCritical("Graphical visualization failed, not enough screen size", 7060) if Autosubmit._requested_exit(code, d): - raise AutosubmitCritical("Graphical visualization failed, requested exit", 7000) + raise AutosubmitCritical("Graphical visualization failed, requested exit", 7060) elif code == dialog.Dialog.OK: smtp_hostname = tag[0] mail_from = tag[1] @@ -2565,7 +2565,7 @@ class Autosubmit: width=50, height=5) os.system('clear') except (IOError, OSError) as e: - raise AutosubmitCritical("Can not write config file", 7000,e.message) + raise AutosubmitCritical("Can not write config file", 7012,e.message) return True @staticmethod @@ -2588,10 +2588,10 @@ class Autosubmit: Log.info("Creating autosubmit database...") qry = resource_string('autosubmit.database', 'data/autosubmit.sql') if not create_db(qry): - raise AutosubmitCritical("Can not write database file", 7000) + raise AutosubmitCritical("Can not write database file", 7004) Log.result("Autosubmit database created successfully") else: - raise AutosubmitCritical("Database already exists.", 7000) + raise AutosubmitCritical("Database already exists.", 7004) return True @staticmethod @@ -2609,9 +2609,9 @@ class Autosubmit: Autosubmit._check_ownership(expid) as_conf = AutosubmitConfig(expid, BasicConfig, ConfigParserFactory()) as_conf.reload() - as_conf.check_expdef_conf() - - + as_conf.check_conf_files() + if "Expdef" in as_conf.wrong_config: + as_conf.show_messages() project_type = as_conf.get_project_type() if Autosubmit._copy_code(as_conf, expid, project_type, True): Log.result("Project folder updated") @@ -2657,7 +2657,7 @@ class Autosubmit: # Cleaning to reduce file size. version = get_autosubmit_version(expid) if version is not None and version.startswith('3') and not Autosubmit.clean(expid, True, True, True, False): - raise AutosubmitCritical("Can not archive project. Clean not successful", 7000) + raise AutosubmitCritical("Can not archive project. Clean not successful", 7012) # Getting year of last completed. If not, year of expid folder year = None @@ -2692,7 +2692,7 @@ class Autosubmit: tar.close() os.chmod(os.path.join(year_path, output_filepath), 0o755) except Exception as e: - raise AutosubmitCritical("Can not write tar file", 7000,e.message) + raise AutosubmitCritical("Can not write tar file", 7012,e.message) Log.info("Tar file created!") @@ -2713,7 +2713,7 @@ class Autosubmit: except Exception as e: Autosubmit.unarchive(expid, compress, True) - raise AutosubmitCritical("Can not remove or rename experiments folder",7000,e.message) + raise AutosubmitCritical("Can not remove or rename experiments folder",7012,e.message) Log.result("Experiment archived successfully") return True @@ -2769,7 +2769,7 @@ class Autosubmit: try: os.remove(archive_path) except Exception as e: - Log.printlog("Can not remove archived file folder: {0}".format(e),7000) + Log.printlog("Can not remove archived file folder: {0}".format(e),7012) return False Log.result("Experiment {0} unarchived successfully", experiment_id) @@ -2854,7 +2854,7 @@ class Autosubmit: date_list = as_conf.get_date_list() if len(date_list) != len(set(date_list)): - raise AutosubmitCritical('There are repeated start dates!',7000) + raise AutosubmitCritical('There are repeated start dates!',7014) num_chunks = as_conf.get_num_chunks() chunk_ini = as_conf.get_chunk_ini() member_list = as_conf.get_member_list() @@ -2963,7 +2963,7 @@ class Autosubmit: signal.signal(signal.SIGINT, signal_handler_create) fh.flush() os.fsync(fh.fileno()) - raise AutosubmitCritical("Stopped by user input", 7000) + raise AutosubmitCritical("Stopped by user input", 7010) except portalocker.AlreadyLocked: message = "We have detected that there is another Autosubmit instance using the experiment\n. Stop other Autosubmit instances that are using the experiment or delete autosubmit.lock file located on tmp folder" raise AutosubmitCritical(message,7000) @@ -2992,7 +2992,7 @@ class Autosubmit: try: hpcarch = submitter.platforms[as_conf.get_platform()] except: - raise AutosubmitCritical("Can't set main platform",7000) + raise AutosubmitCritical("Can't set main platform",7014) return AutosubmitGit.clone_repository(as_conf, force, hpcarch) elif project_type == "svn": svn_project_url = as_conf.get_svn_project_url() @@ -3018,7 +3018,7 @@ class Autosubmit: shutil.rmtree(project_path, ignore_errors=True) raise AutosubmitCritical("Can not check out revision {0} into {1}".format(svn_project_revision + " " + svn_project_url, - project_path),7000) + project_path),7062) Log.debug("{0}", output) elif project_type == "local": @@ -3037,7 +3037,7 @@ class Autosubmit: subprocess.call(cmd, shell=True) except subprocess.CalledProcessError: raise AutosubmitCritical("Can not rsync {0} into {1}. Exiting...".format( - local_project_path, project_path), 7000) + local_project_path, project_path), 7063) else: os.mkdir(local_destination) try: @@ -3046,7 +3046,7 @@ class Autosubmit: except subprocess.CalledProcessError: shutil.rmtree(project_path) raise AutosubmitCritical("Can not copy {0} into {1}. Exiting...".format( - local_project_path, project_path), 7000) + local_project_path, project_path), 7063) else: os.mkdir(project_path) os.mkdir(local_destination) @@ -3060,7 +3060,7 @@ class Autosubmit: except subprocess.CalledProcessError: shutil.rmtree(project_path) raise AutosubmitCritical( - "Can not copy {0} into {1}. Exiting...".format( local_project_path, project_path), 7000) + "Can not copy {0} into {1}. Exiting...".format( local_project_path, project_path), 7063) Log.debug("{0}", output) return True @@ -3173,7 +3173,7 @@ class Autosubmit: ".\n\tProcess stopped. Review the format of the provided input. Comparison is case sensitive." + \ "\n\tRemember that this option expects section names separated by a blank space as input." - raise AutosubmitCritical("Error in the supplied input for -ft.",7000,section_validation_message) + raise AutosubmitCritical("Error in the supplied input for -ft.",7011,section_validation_message) job_list = Autosubmit.load_job_list( expid, as_conf, notransitive=notransitive) submitter = Autosubmit._get_submitter(as_conf) @@ -3218,7 +3218,7 @@ class Autosubmit: job_validation_message += "\n\tSpecified job(s) : [" + str(job_not_foundList) + "] not found in the experiment " + \ str(expid) + ". \n\tProcess stopped. Review the format of the provided input. Comparison is case sensitive." + \ "\n\tRemember that this option expects job names separated by a blank space as input." - raise AutosubmitCritical("Error in the supplied input for -ft.",7000,section_validation_message) + raise AutosubmitCritical("Error in the supplied input for -ft.",7011,section_validation_message) # Validating fc if filter_chunks -fc has been set: if filter_chunks is not None: @@ -3282,7 +3282,7 @@ class Autosubmit: # Ending validation if fc_filter_is_correct == False: - raise AutosubmitCritical("Error in the supplied input for -fc.",7000,section_validation_message) + raise AutosubmitCritical("Error in the supplied input for -fc.",7011,section_validation_message) # Validating status, if filter_status -fs has been set: # At this point we already have job_list from where we are getting the allows STATUS if filter_status is not None: @@ -3314,7 +3314,7 @@ class Autosubmit: status_validation_message += "\n\t There are no jobs with status " + \ status + " in this experiment." if status_validation_error == True: - raise AutosubmitCritical("Error in the supplied input for -fs.",7000,section_validation_message) + raise AutosubmitCritical("Error in the supplied input for -fs.",7011,section_validation_message) jobs_filtered = [] final_status = Autosubmit._get_status(final) @@ -3400,7 +3400,7 @@ class Autosubmit: # Ending validation if filter_is_correct == False: - raise AutosubmitCritical("Error in the supplied input for -ftc.", 7000, section_validation_message) + raise AutosubmitCritical("Error in the supplied input for -ftc.", 7011, section_validation_message) # If input is valid, continue. record = dict() @@ -3805,7 +3805,7 @@ class Autosubmit: elif storage_type == 'db': return JobListPersistenceDb(os.path.join(BasicConfig.LOCAL_ROOT_DIR, expid, "pkl"), "job_list_" + expid) - raise AutosubmitCritical('Storage type not known',7000) + raise AutosubmitCritical('Storage type not known',7014) @staticmethod def _create_json(text): @@ -3913,7 +3913,7 @@ class Autosubmit: exp_parser = as_conf.get_parser( ConfigParserFactory(), as_conf.experiment_file) if exp_parser.get_bool_option('rerun', "RERUN", True): - raise AutosubmitCritical('Can not test a RERUN experiment',7000) + raise AutosubmitCritical('Can not test a RERUN experiment',7014) content = open(as_conf.experiment_file).read() if random_select: @@ -3925,7 +3925,7 @@ class Autosubmit: if platforms_parser.get_option(section, 'TEST_SUITE', 'false').lower() == 'true': test_platforms.append(section) if len(test_platforms) == 0: - raise AutosubmitCritical("Missing hpcarch setting in expdef",7000) + raise AutosubmitCritical("Missing hpcarch setting in expdef",7014) hpc = random.choice(test_platforms) if member is None: @@ -4024,9 +4024,9 @@ class Autosubmit: try: submitter.load_platforms(as_conf) if submitter.platforms is None: - raise AutosubmitCritical("platforms couldn't be loaded",7000) + raise AutosubmitCritical("platforms couldn't be loaded",7014) except: - raise AutosubmitCritical("platforms couldn't be loaded", 7000) + raise AutosubmitCritical("platforms couldn't be loaded", 7014) platforms = submitter.platforms platforms_to_test = set() diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index 6b06eb607..401e72444 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -351,29 +351,8 @@ class AutosubmitConfig(object): :rtype: str """ return str(self._jobs_parser.get_option(section, 'CUSTOM_DIRECTIVES', '')) + def show_messages(self): - def check_conf_files(self,check_file=False): - """ - Checks configuration files (autosubmit, experiment jobs and platforms), looking for invalid values, missing - required options. Prints results in log - - :return: True if everything is correct, False if it finds any error - :rtype: bool - """ - Log.info('\nChecking configuration files...') - self.ignore_file_path = check_file - self.reload() - self.check_expdef_conf() - self.check_platforms_conf() - self.check_jobs_conf() - self.check_autosubmit_conf() - - try: - if self.get_project_type() != "none": - # Check proj configuration - self.check_proj() - except: - pass # test doesn't check proj if len(self.warn_config.keys()) == 0 and len(self.wrong_config.keys()) == 0: Log.result("Configuration files OK\n") elif len(self.warn_config.keys()) > 0 and len(self.wrong_config.keys()) == 0: @@ -394,9 +373,38 @@ class AutosubmitConfig(object): for parameter in self.wrong_config[section]: message += "\n[{0}] {1}".format(parameter[0], parameter[1]) message += "\n" - raise AutosubmitCritical(message,7000) + raise AutosubmitCritical(message,7014) else: return True + def check_conf_files(self,check_file=False): + """ + Checks configuration files (autosubmit, experiment jobs and platforms), looking for invalid values, missing + required options. Prints results in log + + :return: True if everything is correct, False if it finds any error + :rtype: bool + """ + Log.info('\nChecking configuration files...') + self.ignore_file_path = check_file + self.reload() + self.check_expdef_conf() + self.check_platforms_conf() + self.check_jobs_conf() + self.check_autosubmit_conf() + + try: + if self.get_project_type() != "none": + # Check proj configuration + self.check_proj() + except: + pass # test doesn't check proj + try: + result = self.show_messages() + return result + except AutosubmitCritical as e: + raise AutosubmitCritical(e.message,e.code,e.trace) + except Exception as e: + raise AutosubmitCritical("There was an error while showing the config log messages",7014,e.message) def check_autosubmit_conf(self): """ @@ -823,14 +831,14 @@ class AutosubmitConfig(object): output = subprocess.check_output("cd {0}; git rev-parse --abbrev-ref HEAD".format(full_project_path), shell=True) except subprocess.CalledProcessError as e: - raise AutosubmitCritical("Failed to retrieve project branch...",7000,e.message) + raise AutosubmitCritical("Failed to retrieve project branch...",7014,e.message) project_branch = output Log.debug("Project branch is: " + project_branch) try: output = subprocess.check_output("cd {0}; git rev-parse HEAD".format(full_project_path), shell=True) except subprocess.CalledProcessError as e: - raise AutosubmitCritical("Failed to retrieve project commit SHA...", 7000,e.message) + raise AutosubmitCritical("Failed to retrieve project commit SHA...", 7014,e.message) Log.critical("Failed to retrieve project commit SHA...") project_sha = output Log.debug("Project commit SHA is: " + project_sha) diff --git a/autosubmit/database/db_common.py b/autosubmit/database/db_common.py index c9d9ded05..d3b5b7e03 100644 --- a/autosubmit/database/db_common.py +++ b/autosubmit/database/db_common.py @@ -40,14 +40,14 @@ def create_db(qry): try: (conn, cursor) = open_conn(False) except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7000,e.message) + raise AutosubmitCritical('Connection to database could not be established',7001,e.message) try: cursor.executescript(qry) except sqlite3.Error as e: close_conn(conn, cursor) - raise AutosubmitCritical('Database can not be created',7000,e.message) + raise AutosubmitCritical('Database can not be created',7004,e.message) conn.commit() close_conn(conn, cursor) @@ -62,7 +62,7 @@ def check_db(): """ if not os.path.exists(BasicConfig.DB_PATH): - raise AutosubmitCritical('DB path does not exists: {0}'.format(BasicConfig.DB_PATH),7000) + raise AutosubmitCritical('DB path does not exists: {0}'.format(BasicConfig.DB_PATH),7003) return True @@ -100,12 +100,12 @@ def open_conn(check_version=True): # If database version is not the expected, update database.... if version < CURRENT_DATABASE_VERSION: if not _update_database(version, cursor): - raise AutosubmitCritical('Database version doesn''t match', 7000) + raise AutosubmitCritical('Database version doesn''t match', 7001) # ... or ask for autosubmit upgrade elif version > CURRENT_DATABASE_VERSION: raise AutosubmitCritical('Database version is not compatible with this autosubmit version. Please execute pip install ' - 'autosubmit --upgrade', 7000) + 'autosubmit --upgrade', 7002) return conn, cursor @@ -140,14 +140,14 @@ def save_experiment(name, description, version): try: (conn, cursor) = open_conn() except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7000,e.message) + raise AutosubmitCritical('Connection to database could not be established',7001,e.message) try: cursor.execute('INSERT INTO experiment (name, description, autosubmit_version) VALUES (:name, :description, ' ':version)', {'name': name, 'description': description, 'version': version}) except sqlite3.IntegrityError as e: close_conn(conn, cursor) - raise AutosubmitCritical('Couldn''t register experiment',7000,e.message) + raise AutosubmitCritical('Couldn''t register experiment',7005,e.message) conn.commit() @@ -171,7 +171,7 @@ def check_experiment_exists(name, error_on_inexistence=True): try: (conn, cursor) = open_conn() except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7000,e.message) + raise AutosubmitCritical('Connection to database could not be established',7001,e.message) conn.isolation_level = None # SQLite always return a unicode object, but we can change this @@ -182,7 +182,7 @@ def check_experiment_exists(name, error_on_inexistence=True): close_conn(conn, cursor) if row is None: if error_on_inexistence: - raise AutosubmitCritical('The experiment name "{0}" does not exist yet!!!', 7000) + raise AutosubmitCritical('The experiment name "{0}" does not exist yet!!!', 7005) return False return True @@ -202,7 +202,7 @@ def get_autosubmit_version(expid): try: (conn, cursor) = open_conn() except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7000,e.message) + raise AutosubmitCritical('Connection to database could not be established',7001,e.message) conn.isolation_level = None # SQLite always return a unicode object, but we can change this @@ -212,7 +212,7 @@ def get_autosubmit_version(expid): row = cursor.fetchone() close_conn(conn, cursor) if row is None: - raise AutosubmitCritical('The experiment "{0}" does not exist'.format(expid),7000) + raise AutosubmitCritical('The experiment "{0}" does not exist'.format(expid),7005) return row[0] @@ -232,7 +232,7 @@ def last_name_used(test=False, operational=False): try: (conn, cursor) = open_conn() except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7000,e.message) + raise AutosubmitCritical('Connection to database could not be established',7001,e.message) conn.text_factory = str if test: cursor.execute('SELECT name ' @@ -281,7 +281,7 @@ def delete_experiment(experiment_id): try: (conn, cursor) = open_conn() except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7000,e.message) + raise AutosubmitCritical('Connection to database could not be established',7001,e.message) return False cursor.execute('DELETE FROM experiment ' 'WHERE name=:name', {'name': experiment_id}) @@ -317,7 +317,7 @@ def _update_database(version, cursor): 'WHERE autosubmit_version NOT NULL;') cursor.execute('UPDATE db_version SET version={0};'.format(CURRENT_DATABASE_VERSION)) except sqlite3.Error as e: - raise AutosubmitCritical('unable to update database version', 7000,e.message) + raise AutosubmitCritical('unable to update database version', 7001,e.message) Log.info("Update completed") return True diff --git a/autosubmit/git/autosubmit_git.py b/autosubmit/git/autosubmit_git.py index d2ef046dd..afa0d5f25 100644 --- a/autosubmit/git/autosubmit_git.py +++ b/autosubmit/git/autosubmit_git.py @@ -56,16 +56,16 @@ class AutosubmitGit: output = subprocess.check_output("cd {0}; git diff-index HEAD --".format(dirname_path), shell=True) except subprocess.CalledProcessError as e: - raise AutosubmitCritical("Failed to retrieve git info ...",7000,e.message) + raise AutosubmitCritical("Failed to retrieve git info ...",7064,e.message) if output: Log.info("Changes not committed detected... SKIPPING!") - raise AutosubmitCritical("Commit needed!",7000) + raise AutosubmitCritical("Commit needed!",7013) else: output = subprocess.check_output("cd {0}; git log --branches --not --remotes".format(dirname_path), shell=True) if output: Log.info("Changes not pushed detected... SKIPPING!") - raise AutosubmitCritical("Synchronization needed!", 7000) + raise AutosubmitCritical("Synchronization needed!", 7064) else: if not as_conf.set_git_project_commit(as_conf): return False @@ -124,7 +124,7 @@ class AutosubmitGit: :return: True if clone was successful, False otherwise """ if not as_conf.is_valid_git_repository(): - raise AutosubmitCritical("Incorrect Git Configuration, check origin,commit and branch settings of expdef file", 7000) + raise AutosubmitCritical("Incorrect Git Configuration, check origin,commit and branch settings of expdef file", 7064) git_project_origin = as_conf.get_git_project_origin() git_project_branch = as_conf.get_git_project_branch() git_remote_project_path = as_conf.get_git_remote_project_root() @@ -219,5 +219,5 @@ class AutosubmitGit: except subprocess.CalledProcessError as e: shutil.rmtree(project_path) - raise AutosubmitCritical("Can not clone {0} into {1}".format(git_project_branch + " " + git_project_origin, project_path), 7000,e.message) + raise AutosubmitCritical("Can not clone {0} into {1}".format(git_project_branch + " " + git_project_origin, project_path), 7065,e.message) return True diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 018ea3254..715dd326f 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -633,7 +633,7 @@ class Job(object): if communications_library == 'paramiko': return ParamikoSubmitter() # communications library not known - raise AutosubmitCritical( 'You have defined a not valid communications library on the configuration file', 7000) + raise AutosubmitCritical( 'You have defined a not valid communications library on the configuration file', 7014) def update_children_status(self): children = list(self.children) @@ -849,7 +849,7 @@ class Job(object): if communications_library == 'paramiko': return self._get_paramiko_template(snippet, template) else: - raise AutosubmitCritical("Job {0} does not have an correct template// template not found".format(self.name),7000) + raise AutosubmitCritical("Job {0} does not have an correct template// template not found".format(self.name),7014) def _get_paramiko_template(self, snippet, template): current_platform = self.platform diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index a6602a792..0dbc57f11 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -940,7 +940,7 @@ class JobList: Status.SUBMITTED and not job.status == Status.READY] if len(tmp) == len(active): # IF only held jobs left without dependencies satisfied if len(tmp) != 0 and len(active) != 0: - raise AutosubmitCritical("Only Held Jobs active. Exiting Autosubmit (TIP: This can happen if suspended or/and Failed jobs are found on the workflow)",7000) + raise AutosubmitCritical("Only Held Jobs active. Exiting Autosubmit (TIP: This can happen if suspended or/and Failed jobs are found on the workflow)",7066) active = [] return active diff --git a/autosubmit/job/job_list_persistence.py b/autosubmit/job/job_list_persistence.py index 2a100e9d4..13eabf88d 100644 --- a/autosubmit/job/job_list_persistence.py +++ b/autosubmit/job/job_list_persistence.py @@ -72,7 +72,7 @@ class JobListPersistencePkl(JobListPersistence): fd = open(path, 'r') return pickle.load(fd) else: - Log.printlog('File {0} does not exist'.format(path),7000) + Log.printlog('File {0} does not exist'.format(path),7040) return list() def save(self, persistence_path, persistence_file, job_list): diff --git a/autosubmit/job/job_packages.py b/autosubmit/job/job_packages.py index 13942821c..175110018 100644 --- a/autosubmit/job/job_packages.py +++ b/autosubmit/job/job_packages.py @@ -93,7 +93,7 @@ class JobPackageBase(object): exit=True break if not os.path.exists(os.path.join(configuration.get_project_dir(), job.file)): - raise AutosubmitCritical("check=on_submission parameter didn't generate the template {0}".format(job.name),7000) + raise AutosubmitCritical("check=on_submission parameter didn't generate the template {0}".format(job.name),7014) if not job.check_script(configuration, parameters,show_logs=job.check_warnings): Log.warning("Script {0} check failed",job.name) Log.warning("On submission script has some empty variables") diff --git a/autosubmit/monitor/diagram.py b/autosubmit/monitor/diagram.py index fd7c7fbe7..b2497347d 100644 --- a/autosubmit/monitor/diagram.py +++ b/autosubmit/monitor/diagram.py @@ -43,7 +43,7 @@ def create_bar_diagram(experiment_id, jobs_list, general_stats, output_file, per message = "The results are too large to be shown, try narrowing your query. \n Use a filter like -ft where you supply a list of job types, e.g. INI, SIM; \ or -fp where you supply an integer that represents the number of hours into the past that should be queried: \ suppose it is noon, if you supply -fp 5 the query will consider changes starting from 7:00 am. If you really wish to query the whole experiment, refer to Autosubmit GUI." - raise AutosubmitCritical("Stats query out of bounds",7000,message) + raise AutosubmitCritical("Stats query out of bounds",7061,message) fig = plt.figure(figsize=(RATIO * 4, 3 * RATIO * num_plots)) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 0bffc5276..470b56002 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -94,7 +94,7 @@ class ParamikoPlatform(Platform): retry += 1 if not self.connected: trace='Can not create ssh or sftp connection to {0}: Connection could not be established to platform {1}\n Please, check your expid platform.conf to see if there are mistakes in the configuration\n Also Ensure that the login node listed on HOST parameter is available(try to connect via ssh on a terminal)\n Also you can put more than one host using a comma as separator'.format(self.host, self.name) - raise AutosubmitCritical('Experiment cant no continue without unexpected behaviour, Stopping Autosubmit',7000,trace) + raise AutosubmitCritical('Experiment cant no continue without unexpected behaviour, Stopping Autosubmit',7050,trace) def connect(self, reconnect=False): """ @@ -247,7 +247,7 @@ class ParamikoPlatform(Platform): except BaseException as e: Log.error('Could not remove file {0} due a wrong configuration'.format(os.path.join(self.get_files_path(), filename))) if e.lower().contains("garbage"): - raise AutosubmitCritical("Wrong User or invalid .ssh/config. Or invalid user in platform.conf or public key not set ",7000,e.message) + raise AutosubmitCritical("Wrong User or invalid .ssh/config. Or invalid user in platform.conf or public key not set ",7051,e.message) diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index 2c6a60166..db00c91c4 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -133,7 +133,7 @@ class SlurmPlatform(ParamikoPlatform): jobs_id.append(int(output.split(' ')[3])) return jobs_id except IndexError: - raise AutosubmitCritical("Submission failed. There are issues on your config file",7000) + raise AutosubmitCritical("Submission failed. There are issues on your config file",7014) def jobs_in_queue(self): dom = parseString('') jobs_xml = dom.getElementsByTagName("JB_job_number") -- GitLab From 46979c1f6a334979250c121e4c3d616e93c75299 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Aug 2020 15:05:04 +0200 Subject: [PATCH 14/23] readthedocs code addition (critical ones) --- docs/source/faq.rst | 192 +++++++++++++++++--------------------------- 1 file changed, 74 insertions(+), 118 deletions(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index abc03240c..10d184ccf 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -1,135 +1,91 @@ ############ -FAQ - Frequently Asked Questions +Critical Error codes - Solutions ############ -[CRITICAL] Unhandled exception on Autosubmit: [Errno 11] Resource temporarily unavailable +Database Issues - Critical Error codes [7001-7005] ==================== - -.. code-block:: python - - [CRITICAL] Unhandled exception on Autosubmit: [Errno 11] Resource temporarily unavailable - Traceback (most recent call last): - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/autosubmit.py", line 402, in parse_args - args.group_by, args.expand, args.expand_status) - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/autosubmit.py", line 2093, in set_status - with portalocker.Lock(os.path.join(tmp_path, 'autosubmit.lock'), timeout=1): - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/portalocker-1.2.0-py2.7.egg/portalocker/utils.py", line 195, in __enter__ - return self.acquire() - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/portalocker-1.2.0-py2.7.egg/portalocker/utils.py", line 155, in acquire - raise exceptions.LockException(exception) - LockException: [Errno 11] Resource temporarily unavailable - - -Solution ++------------+------------+-----------+ +| Code | Details | Solution | ++============+============+===========+ +| 7001 | Connection to the db couldn't be established | Check if database exists | ++------------+------------+-----------+ +| 7002 | Wrong version | Check system sqlite version | ++------------+------------+-----------+ +| 7003 | DB doesn't exists | Check if database exists | ++------------+------------+-----------+ +| 7004 | Can't create a new database | check your user permissions | ++------------+------------+-----------+ +| 7005 | AS database is corrupted or locked | Please, open a new issue ASAP. (If you are on BSC environment) | ++------------+------------+-----------+ + +Default Solution --------------- -Make sure the experiment is not still running. If it's not, delete the autosubmit.lock in the /tmp folder inside your experiment directory. +These issues are usually from server side, ask first in Autosubmit git if you don't have a custom installation- ---- - -[CRITICAL] Unhandled exception on Autosubmit: attempt to write a readonly database +Wrong User Input - Critical Error codes [7010-7030] ==================== - -.. code-block:: python - - [CRITICAL] Unhandled exception on Autosubmit: attempt to write a readonly database - Traceback (most recent call last): - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/autosubmit - .py", line 389, in parse_args - return Autosubmit.create(args.expid, args.noplot, args.hide, args.output, args.group_by, args.expand, args.expand_status) - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/autosubmit - .py", line 1953, in create - "job_packages_" + expid).reset_table() - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/job/job_pa - ckage_persistence.py", line 65, in reset_table - self.db_manager.drop_table(self.JOB_PACKAGES_TABLE) - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/database/d - b_manager.py", line 65, in drop_table - cursor.execute(drop_command) - OperationalError: attempt to write a readonly database - -Solution ++------------+------------+-----------+ +| Code | Details | Solution | ++============+============+===========+ +| 7010 | Experiment has been halted in a manual way | ++------------+------------+-----------+ +| 7011 | Wrong arguments for an specific command | Check the command section for more info | ++------------+------------+-----------+ +| 7012 | Insufficient permissions for an specific experiment. | Check if you have enough permissions, experiment exists or specified expid has a typo| ++------------+------------+-----------+ +| 7013 | Pending commits | You must commit/synchronize pending changes in the experiment proj folder. | ++------------+------------+-----------+ +| 7014 | Wrong configuration | Check your experiment/conf files, also take a look to the ASLOG/command.log detailed output | ++------------+------------+-----------+ + +Default Solution --------------- -This usually happens when trying to run `autosubmit create` with an expid of another user, please double check the expid you are using. - +These issues are usually mistakes from the user input, check the avaliable logs and git resolved issues. Alternative, you can ask for help to Autosubmit team. ---- -[ERROR] Command sbatch -D ... failed with error message: sbatch: error: Batch job submission failed: Invalid account or account/partition combination specified +Platform issues - Critical Error codes. Local [7040-7050] and remote [7050-7060] ==================== - -Solution ++------------+------------+-----------+ +| Code | Details | Solution | ++============+============+===========+ +| 7040 | Invalid experiment pkl/db likely due a local platform failure | Should be recovered automatically, if not check if there is a backup file and do it manually | ++------------+------------+-----------+ +| 7041 | Weird job status | Weird Job status, try to recover experiment(check the recovery how-to for more info) if this issue persist please, report it to gitlab | ++------------+------------+-----------+ +| 7050 | Connection can't be established. | check your experiment platform configuration | ++------------+------------+-----------+ +| 7050 | Failure after a restart, connection can't be restored. | Check or ask (manually) if the remote platforms have any known issue | ++------------+------------+-----------+ +| 7051 | Invalid ssh configuration. | Check .ssh/config file. Additionally, Check if you can perform a password less connection to that platform. | ++------------+------------+-----------+ + +Default Solution --------------- -This can be due to an invalid configuration in your ~/.ssh/config file, so check if you are able to run a ssh command using the account displayed in the error message. -If so, once you are in the remote platform, type bsc_acct and see if the information for your username/account is displayed: - -.. code-block:: ini - - USER CONSUMED CPU: - - User:                                             Machine:          Used [khours]: - -If not, contact support referring to the problem and specifying your account. - +Check autosubmit log for detailed information, there will be additional error codes. ---- -[ERROR] Cannot send file to remote platform -=================================== - -.. code-block:: python - - [ERROR] marenostrum4 submission failed - [CRITICAL] Unhandled exception on Autosubmit: size mismatch in put! 0 != 38998 - Traceback (most recent call last): - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/autosubmit.py", line 368, in parse_args - return Autosubmit.run_experiment(args.expid) - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/autosubmit.py", line 776, in run_experiment - if Autosubmit.submit_ready_jobs(as_conf, job_list, platforms_to_test, packages_persistence): - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/autosubmit.py", line 819, in submit_ready_jobs - package.submit(as_conf, job_list.parameters) - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/job/job_packages.py", line 87, in submit - self._send_files() - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/job/job_packages.py", line 115, in _send_files - self.platform.send_file(self._job_scripts[job.name]) - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/platforms/paramiko_platform.py", line 129, in send_file - ftp.put(os.path.join(self.tmp_path, filename), os.path.join(self.get_files_path(), filename)) - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/paramiko-1.15.0-py2.7.egg/paramiko/sftp_client.py", line 669, in put - return self.putfo(fl, remotepath, file_size, callback, confirm) - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/paramiko-1.15.0-py2.7.egg/paramiko/sftp_client.py", line 635, in putfo - raise IOError('size mismatch in put! %d != %d' % (s.st_size, size)) - IOError: size mismatch in put! 0 != 38998 - -This happens when the quota has been reached and the machine is full - ----- - -[CRITICAL] Unhandled exception on Autosubmit: database is locked -=================================== - -.. code-block:: python - - [CRITICAL] Unhandled exception on Autosubmit: database is locked - Traceback (most recent call last): - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/autosubmit.py", line 377, in parse_args - args.operational) != '' - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/autosubmit.py", line 532, in expid - exp_id = copy_experiment(copy_id, description, Autosubmit.autosubmit_version, test, operational) - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/experiment/experiment_common.py", line 93, in copy_experiment - new_name = new_experiment(description, version, test, operational) - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/experiment/experiment_common.py", line 68, in new_experiment - if not db_common.save_experiment(new_name, description, version): - File "/shared/earth/software/autosubmit/3.11.0b-foss-2015a-Python-2.7.9/lib/python2.7/site-packages/autosubmit-3.10.0-py2.7.egg/autosubmit/database/db_common.py", line 151, in save_experiment - {'name': name, 'description': description, 'version': version}) - OperationalError: database is locked - -Solution +Uncatalogued Issues - Critical Error codes [7060+] +==================== ++------------+------------+-----------+ +| Code | Details | Solution | ++============+============+===========+ +| 7060 | Display issues during monitoring | try to use a different output or txt | ++------------+------------+-----------+ +| 7061 | Stat command failed | Check Aslogs command output, open a git issue | ++------------+------------+-----------+ +| 7062 | Svn issues | Check, in expdef, if url exists | ++------------+------------+-----------+ +| 7063 | cp/rsync issues | Check if destination path exists | ++------------+------------+-----------+ +| 7064 | Git issues | check that the proj folder is a well configured git folder. Also, check [GIT] expdef config | ++------------+------------+-----------+ +| 7065 | Wrong git configuration | Invalid git url. Check [GIT] expdef config. If issue persists, check if proj folder is a well configured git folder. | ++------------+------------+-----------+ +| 7066 | Presubmission feature issues | New feature, this message should be prompt. Please report it to Git| ++------------+------------+-----------+ + +Default Solution --------------- -If you were trying to copy an experiment, make sure you put the -y immediately after expid: `autosubmit expid -y` - +Check autosubmit log for detailed information, there will be additional error codes. ---- - -bash: sbatch: command not found -=================================== - -Solution ---------------- -First, check your jobs_expid.conf and platforms_expid.conf files and make sure the platform assigned to the running job is defined correctly and is a SLURM platform. -If this is ok, check that the hostname of the platform you are using is also correctly defined in your ~/.ssh/config file. -- GitLab From 3ddf26e8864185f28ddc6ae5f7a3180e4722b12a Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Aug 2020 15:57:00 +0200 Subject: [PATCH 15/23] readthedocs code addition (critical ones) --- docs/source/faq.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 10d184ccf..3d3901dd8 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -4,6 +4,7 @@ Critical Error codes - Solutions Database Issues - Critical Error codes [7001-7005] ==================== + +------------+------------+-----------+ | Code | Details | Solution | +============+============+===========+ @@ -23,8 +24,10 @@ Default Solution These issues are usually from server side, ask first in Autosubmit git if you don't have a custom installation- ---- + Wrong User Input - Critical Error codes [7010-7030] ==================== + +------------+------------+-----------+ | Code | Details | Solution | +============+============+===========+ @@ -41,11 +44,14 @@ Wrong User Input - Critical Error codes [7010-7030] Default Solution --------------- + These issues are usually mistakes from the user input, check the avaliable logs and git resolved issues. Alternative, you can ask for help to Autosubmit team. + ---- Platform issues - Critical Error codes. Local [7040-7050] and remote [7050-7060] ==================== + +------------+------------+-----------+ | Code | Details | Solution | +============+============+===========+ @@ -62,11 +68,14 @@ Platform issues - Critical Error codes. Local [7040-7050] and remote [7050-7060 Default Solution --------------- + Check autosubmit log for detailed information, there will be additional error codes. + ---- Uncatalogued Issues - Critical Error codes [7060+] ==================== + +------------+------------+-----------+ | Code | Details | Solution | +============+============+===========+ @@ -87,5 +96,7 @@ Uncatalogued Issues - Critical Error codes [7060+] Default Solution --------------- + Check autosubmit log for detailed information, there will be additional error codes. + ---- -- GitLab From 913e0a531b51efcb14f17247ad88fb6393197ee0 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Aug 2020 16:07:32 +0200 Subject: [PATCH 16/23] readthedocs code addition (critical ones) --- docs/source/faq.rst | 66 ++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 3d3901dd8..ce6b9d48f 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -5,19 +5,19 @@ Critical Error codes - Solutions Database Issues - Critical Error codes [7001-7005] ==================== -+------------+------------+-----------+ -| Code | Details | Solution | -+============+============+===========+ -| 7001 | Connection to the db couldn't be established | Check if database exists | -+------------+------------+-----------+ -| 7002 | Wrong version | Check system sqlite version | -+------------+------------+-----------+ ++------+----------------------------------------------+-------------------------------+ +| Code | Details | Solution | ++======+----------------------------------------------+===============================+ +| 7001 | Connection to the db couldn't be established | Check if database exists | ++------+----------------------------------------------+-------------------------------+ +| 7002 | Wrong version | Check system sqlite version | ++------+------------------+-------------------------------+ | 7003 | DB doesn't exists | Check if database exists | -+------------+------------+-----------+ ++------+-------------------+--------------------------+ | 7004 | Can't create a new database | check your user permissions | -+------------+------------+-----------+ ++------+--------------------------------+------------------------------+ | 7005 | AS database is corrupted or locked | Please, open a new issue ASAP. (If you are on BSC environment) | -+------------+------------+-----------+ ++------+--------------------------------------+-----------------------------------------------------------------+ Default Solution --------------- @@ -32,15 +32,15 @@ Wrong User Input - Critical Error codes [7010-7030] | Code | Details | Solution | +============+============+===========+ | 7010 | Experiment has been halted in a manual way | -+------------+------------+-----------+ ++------+------------+-----------+ | 7011 | Wrong arguments for an specific command | Check the command section for more info | -+------------+------------+-----------+ ++------+------------+-----------+ | 7012 | Insufficient permissions for an specific experiment. | Check if you have enough permissions, experiment exists or specified expid has a typo| -+------------+------------+-----------+ ++------+------------+-----------+ | 7013 | Pending commits | You must commit/synchronize pending changes in the experiment proj folder. | -+------------+------------+-----------+ ++------+------------+-----------+ | 7014 | Wrong configuration | Check your experiment/conf files, also take a look to the ASLOG/command.log detailed output | -+------------+------------+-----------+ ++------+------------+-----------+ Default Solution --------------- @@ -52,19 +52,19 @@ These issues are usually mistakes from the user input, check the avaliable logs Platform issues - Critical Error codes. Local [7040-7050] and remote [7050-7060] ==================== -+------------+------------+-----------+ -| Code | Details | Solution | -+============+============+===========+ ++------+------------+----------+ +| Code | Details | Solution | ++======+=================================================================+================================================================================================+ | 7040 | Invalid experiment pkl/db likely due a local platform failure | Should be recovered automatically, if not check if there is a backup file and do it manually | -+------------+------------+-----------+ ++------+------------+-----------+ | 7041 | Weird job status | Weird Job status, try to recover experiment(check the recovery how-to for more info) if this issue persist please, report it to gitlab | -+------------+------------+-----------+ ++------+------------+-----------+ | 7050 | Connection can't be established. | check your experiment platform configuration | -+------------+------------+-----------+ ++------+------------+-----------+ | 7050 | Failure after a restart, connection can't be restored. | Check or ask (manually) if the remote platforms have any known issue | -+------------+------------+-----------+ ++------+------------+-----------+ | 7051 | Invalid ssh configuration. | Check .ssh/config file. Additionally, Check if you can perform a password less connection to that platform. | -+------------+------------+-----------+ ++------+------------+-----------+ Default Solution --------------- @@ -76,23 +76,23 @@ Check autosubmit log for detailed information, there will be additional error co Uncatalogued Issues - Critical Error codes [7060+] ==================== -+------------+------------+-----------+ -| Code | Details | Solution | -+============+============+===========+ ++------+------------+-----------+ +| Code | Details | Solution | ++======+===========+===========+ | 7060 | Display issues during monitoring | try to use a different output or txt | -+------------+------------+-----------+ ++------+------------+-----------+ | 7061 | Stat command failed | Check Aslogs command output, open a git issue | -+------------+------------+-----------+ ++------+------------+-----------+ | 7062 | Svn issues | Check, in expdef, if url exists | -+------------+------------+-----------+ ++------+------------+-----------+ | 7063 | cp/rsync issues | Check if destination path exists | -+------------+------------+-----------+ ++------+------------+-----------+ | 7064 | Git issues | check that the proj folder is a well configured git folder. Also, check [GIT] expdef config | -+------------+------------+-----------+ ++------+------------+-----------+ | 7065 | Wrong git configuration | Invalid git url. Check [GIT] expdef config. If issue persists, check if proj folder is a well configured git folder. | -+------------+------------+-----------+ ++------+------------+-----------+ | 7066 | Presubmission feature issues | New feature, this message should be prompt. Please report it to Git| -+------------+------------+-----------+ ++------+------------+-----------+ Default Solution --------------- -- GitLab From a6d6b4af369cb991fc96e78eecf5b1b6a9416d0d Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Aug 2020 16:10:54 +0200 Subject: [PATCH 17/23] readthedocs code addition (critical ones) --- docs/source/faq.rst | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index ce6b9d48f..72b9a4083 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -5,19 +5,19 @@ Critical Error codes - Solutions Database Issues - Critical Error codes [7001-7005] ==================== -+------+----------------------------------------------+-------------------------------+ -| Code | Details | Solution | -+======+----------------------------------------------+===============================+ -| 7001 | Connection to the db couldn't be established | Check if database exists | -+------+----------------------------------------------+-------------------------------+ -| 7002 | Wrong version | Check system sqlite version | -+------+------------------+-------------------------------+ -| 7003 | DB doesn't exists | Check if database exists | -+------+-------------------+--------------------------+ -| 7004 | Can't create a new database | check your user permissions | -+------+--------------------------------+------------------------------+ -| 7005 | AS database is corrupted or locked | Please, open a new issue ASAP. (If you are on BSC environment) | -+------+--------------------------------------+-----------------------------------------------------------------+ ++------+----------------------------------------------+-----------------------------------------------------------------+ +| Code | Details | Solution | ++======+----------------------------------------------+===============================+=================================+ +| 7001 | Connection to the db couldn't be established | Check if database exists | ++------+----------------------------------------------+-----------------------------------------------------------------+ +| 7002 | Wrong version | Check system sqlite version | ++------+----------------------------------------------+-----------------------------------------------------------------+ +| 7003 | DB doesn't exists | Check if database exists | ++------+----------------------------------------------+-----------------------------------------------------------------+ +| 7004 | Can't create a new database | check your user permissions | ++------+----------------------------------------------+-----------------------------------------------------------------+ +| 7005 | AS database is corrupted or locked | Please, open a new issue ASAP. (If you are on BSC environment) | ++------+----------------------------------------------+-----------------------------------------------------------------+ Default Solution --------------- -- GitLab From 2f932206848b68dbf5675ced624dc9f08495934d Mon Sep 17 00:00:00 2001 From: dbeltran Date: Mon, 31 Aug 2020 17:14:48 +0200 Subject: [PATCH 18/23] readthedocs table --- docs/source/faq.rst | 143 ++++++++++++++++++++++---------------------- 1 file changed, 72 insertions(+), 71 deletions(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 72b9a4083..cbee7bacb 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -1,102 +1,103 @@ -############ -Critical Error codes - Solutions -############ +################################## +Critical Error codes and solutions +################################## Database Issues - Critical Error codes [7001-7005] -==================== - -+------+----------------------------------------------+-----------------------------------------------------------------+ -| Code | Details | Solution | -+======+----------------------------------------------+===============================+=================================+ -| 7001 | Connection to the db couldn't be established | Check if database exists | -+------+----------------------------------------------+-----------------------------------------------------------------+ -| 7002 | Wrong version | Check system sqlite version | -+------+----------------------------------------------+-----------------------------------------------------------------+ -| 7003 | DB doesn't exists | Check if database exists | -+------+----------------------------------------------+-----------------------------------------------------------------+ -| 7004 | Can't create a new database | check your user permissions | -+------+----------------------------------------------+-----------------------------------------------------------------+ -| 7005 | AS database is corrupted or locked | Please, open a new issue ASAP. (If you are on BSC environment) | -+------+----------------------------------------------+-----------------------------------------------------------------+ +=================================================== + ++------------------------------------------------------------------------------------------------------------------------+ +| Code | Details | Solution | ++======+===============================================+=================================================================+ +| 7001 | Connection to the db could not be established | Check if database exists | ++------+-----------------------------------------------+-----------------------------------------------------------------+ +| 7002 | Wrong version | Check system sqlite version | ++------+-----------------------------------------------+-----------------------------------------------------------------+ +| 7003 | DB doesn't exists | Check if database exists | ++------+-----------------------------------------------+-----------------------------------------------------------------+ +| 7004 | Can't create a new database | check your user permissions | ++------+-----------------------------------------------+-----------------------------------------------------------------+ +| 7005 | AS database is corrupted or locked | Please, open a new issue ASAP. (If you are on BSC environment) | ++------+-----------------------------------------------+-----------------------------------------------------------------+ Default Solution ---------------- +---------------- These issues are usually from server side, ask first in Autosubmit git if you don't have a custom installation- ---- Wrong User Input - Critical Error codes [7010-7030] -==================== - -+------------+------------+-----------+ -| Code | Details | Solution | -+============+============+===========+ -| 7010 | Experiment has been halted in a manual way | -+------+------------+-----------+ -| 7011 | Wrong arguments for an specific command | Check the command section for more info | -+------+------------+-----------+ -| 7012 | Insufficient permissions for an specific experiment. | Check if you have enough permissions, experiment exists or specified expid has a typo| -+------+------------+-----------+ -| 7013 | Pending commits | You must commit/synchronize pending changes in the experiment proj folder. | -+------+------------+-----------+ -| 7014 | Wrong configuration | Check your experiment/conf files, also take a look to the ASLOG/command.log detailed output | -+------+------------+-----------+ +==================================================== + ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| Code | Details | Solution | ++======+======================================================+================================================================================================+ +| 7010 | Experiment has been halted in a manual way | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 7011 | Wrong arguments for an specific command | Check the command section for more info | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 7012 | Insufficient permissions for an specific experiment. | Check if you have enough permissions, experiment exists or specified expid has a typo | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 7013 | Pending commits | You must commit/synchronize pending changes in the experiment proj folder. | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 7014 | Wrong configuration | Check your experiment/conf files, also take a look to the ASLOG/command.log detailed output | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ Default Solution ---------------- +---------------- These issues are usually mistakes from the user input, check the avaliable logs and git resolved issues. Alternative, you can ask for help to Autosubmit team. ---- Platform issues - Critical Error codes. Local [7040-7050] and remote [7050-7060] -==================== - -+------+------------+----------+ -| Code | Details | Solution | -+======+=================================================================+================================================================================================+ -| 7040 | Invalid experiment pkl/db likely due a local platform failure | Should be recovered automatically, if not check if there is a backup file and do it manually | -+------+------------+-----------+ -| 7041 | Weird job status | Weird Job status, try to recover experiment(check the recovery how-to for more info) if this issue persist please, report it to gitlab | -+------+------------+-----------+ -| 7050 | Connection can't be established. | check your experiment platform configuration | -+------+------------+-----------+ -| 7050 | Failure after a restart, connection can't be restored. | Check or ask (manually) if the remote platforms have any known issue | -+------+------------+-----------+ -| 7051 | Invalid ssh configuration. | Check .ssh/config file. Additionally, Check if you can perform a password less connection to that platform. | -+------+------------+-----------+ +================================================================================= + ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Code | Details | Solution | ++======+=================================================================+=========================================================================================================================================+ +| 7040 | Invalid experiment pkl/db likely due a local platform failure | Should be recovered automatically, if not check if there is a backup file and do it manually | ++------+-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ +| 7041 | Weird job status | Weird Job status, try to recover experiment(check the recovery how-to for more info) if this issue persist please, report it to gitlab | ++------+-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ +| 7050 | Connection can't be established. | check your experiment platform configuration | ++------+-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ +| 7050 | Failure after a restart, connection can't be restored. | Check or ask (manually) if the remote platforms have any known issue | ++------+-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ +| 7051 | Invalid ssh configuration. | Check .ssh/config file. Additionally, Check if you can perform a password less connection to that platform. | ++------+-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ Default Solution ---------------- +---------------- Check autosubmit log for detailed information, there will be additional error codes. ---- Uncatalogued Issues - Critical Error codes [7060+] -==================== - -+------+------------+-----------+ -| Code | Details | Solution | -+======+===========+===========+ -| 7060 | Display issues during monitoring | try to use a different output or txt | -+------+------------+-----------+ -| 7061 | Stat command failed | Check Aslogs command output, open a git issue | -+------+------------+-----------+ -| 7062 | Svn issues | Check, in expdef, if url exists | -+------+------------+-----------+ -| 7063 | cp/rsync issues | Check if destination path exists | -+------+------------+-----------+ -| 7064 | Git issues | check that the proj folder is a well configured git folder. Also, check [GIT] expdef config | -+------+------------+-----------+ -| 7065 | Wrong git configuration | Invalid git url. Check [GIT] expdef config. If issue persists, check if proj folder is a well configured git folder. | -+------+------------+-----------+ -| 7066 | Presubmission feature issues | New feature, this message should be prompt. Please report it to Git| -+------+------------+-----------+ +=================================================== + ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Code | Details | Solution | ++======+=====================================+========================================================================================================================+ +| 7060 | Display issues during monitoring | try to use a different output or txt | ++------+-------------------------------------+------------------------------------------------------------------------------------------------------------------------+ +| 7061 | Stat command failed | Check Aslogs command output, open a git issue | ++------+-------------------------------------+------------------------------------------------------------------------------------------------------------------------+ +| 7062 | Svn issues | Check, in expdef, if url exists | ++------+-------------------------------------+------------------------------------------------------------------------------------------------------------------------+ +| 7063 | cp/rsync issues | Check if destination path exists | ++------+-------------------------------------+------------------------------------------------------------------------------------------------------------------------+ +| 7064 | Git issues | check that the proj folder is a well configured git folder. Also, check [GIT] expdef config | ++------+-------------------------------------+------------------------------------------------------------------------------------------------------------------------+ +| 7065 | Wrong git configuration | Invalid git url. Check [GIT] expdef config. If issue persists, check if proj folder is a well configured git folder. | ++------+-------------------------------------+------------------------------------------------------------------------------------------------------------------------+ +| 7066 | Presubmission feature issues | New feature, this message should be prompt. Please report it to Git | ++------+-------------------------------------+------------------------------------------------------------------------------------------------------------------------+ Default Solution ---------------- +---------------- Check autosubmit log for detailed information, there will be additional error codes. ---- + -- GitLab From a37759f30465fb8ab5b64eb9a65acf08c490417e Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 1 Sep 2020 10:51:37 +0200 Subject: [PATCH 19/23] readthedocs table error codes --- autosubmit/autosubmit.py | 8 ++--- autosubmit/config/config_common.py | 2 +- autosubmit/job/job.py | 19 +++++------ autosubmit/job/job_list.py | 2 +- autosubmit/notifications/mail_notifier.py | 2 +- autosubmit/platforms/paramiko_platform.py | 35 ++++++++++---------- autosubmit/platforms/slurmplatform.py | 6 ++-- docs/source/faq.rst | 40 ++++++++++++++++++++--- 8 files changed, 72 insertions(+), 42 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index f018f0527..adb7c8ce9 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -2078,12 +2078,12 @@ class Autosubmit: if not p.move_file(p.root_dir, os.path.join(p.temp_dir, experiment_id), True): Log.printlog( "The files/dirs on {0} cannot be moved to {1}.".format(p.root_dir, - os.path.join(p.temp_dir, experiment_id), 6000)) + os.path.join(p.temp_dir, experiment_id), 6012)) error = True break except (IOError, BaseException) as e: Log.printlog("The files/dirs on {0} cannot be moved to {1}.".format(p.root_dir, - os.path.join(p.temp_dir, experiment_id)),6000) + os.path.join(p.temp_dir, experiment_id)),6012) error = True break @@ -2162,7 +2162,7 @@ class Autosubmit: "Files/dirs on {0} have been successfully picked up", platform) except (IOError, BaseException): error = True - Log.printlog("The files/dirs on {0} cannot be copied to {1}.".format(os.path.join(p.temp_dir, experiment_id), p.root_dir),6000) + Log.printlog("The files/dirs on {0} cannot be copied to {1}.".format(os.path.join(p.temp_dir, experiment_id), p.root_dir),6012) break backup_files.append(platform) else: @@ -2761,7 +2761,7 @@ class Autosubmit: tar.close() except Exception as e: shutil.rmtree(exp_folder, ignore_errors=True) - Log.printlog("Can not extract tar file: {0}".format(e),6000) + Log.printlog("Can not extract tar file: {0}".format(e),6012) return False Log.info("Unpacking finished") diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index 401e72444..f376fac55 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -364,7 +364,7 @@ class AutosubmitConfig(object): for parameter in self.warn_config[section]: message += "\n[{0}] {1} ".format(parameter[0],parameter[1]) message += "\n" - Log.printlog(message,6000) + Log.printlog(message,6013) if len(self.wrong_config.keys()) > 0: message = "On Configuration files:\n" diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 715dd326f..917ecbc44 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -546,9 +546,9 @@ class Job(object): platform.test_connection() self.retrieve_logfiles() except Exception: - Log.printlog("Failed to retrieve log file for job {0}".format(self.name),6000) + Log.printlog("Failed to retrieve log file for job {0}".format(self.name),6001) except AutosubmitCritical as e: # Critical errors can't be recovered. Failed configuration or autosubmit error - Log.printlog("Failed to retrieve log file for job {0}".format(self.name),6000) + Log.printlog("Failed to retrieve log file for job {0}".format(self.name),6001) try: platform.closeConnection() except: @@ -598,12 +598,12 @@ class Job(object): self.platform.get_completed_files(self.name) self.check_completion(Status.UNKNOWN) if self.status == Status.UNKNOWN: - Log.printlog("Job {0} is UNKNOWN. Checking completed files to confirm the failure...".format(self.name),6000) + Log.printlog("Job {0} is UNKNOWN. Checking completed files to confirm the failure...".format(self.name),6009) elif self.status == Status.COMPLETED: Log.result("Job {0} is COMPLETED", self.name) elif self.status == Status.SUBMITTED: # after checking the jobs , no job should have the status "submitted" - Log.printlog("Job {0} in SUBMITTED. This should never happen on this step..".format(self.name),6000) + Log.printlog("Job {0} in SUBMITTED status. This should never happen on this step..".format(self.name),6008) if previous_status != Status.RUNNING and self.status in [Status.COMPLETED, Status.FAILED, Status.UNKNOWN, Status.RUNNING]: @@ -654,7 +654,7 @@ class Job(object): if os.path.exists(log_name): self.status = Status.COMPLETED else: - Log.printlog("Job {0} completion check failed. There is no COMPLETED file".format(self.name),6000) + Log.printlog("Job {0} completion check failed. There is no COMPLETED file".format(self.name),6009) self.status = default_status def update_parameters(self, as_conf, parameters, @@ -1185,7 +1185,7 @@ class WrapperJob(Job): reason = self.platform.parse_queue_reason( self.platform._ssh_output, self.id) if self._queuing_reason_cancel(reason): - Log.printlog("Job {0} will be cancelled and set to FAILED as it was queuing due to {1}".format(self.name,reason),6000) + Log.printlog("Job {0} will be cancelled and set to FAILED as it was queuing due to {1}".format(self.name,reason),6009) self.cancel_failed_wrapper_job() self.update_failed_jobs() return @@ -1222,7 +1222,7 @@ class WrapperJob(Job): start_time = self.running_jobs_start[job] if self._is_over_wallclock(start_time, job.wallclock): # if self.as_config.get_wrapper_type() in ['vertical', 'horizontal']: - Log.printlog("Job {0} inside wrapper {1} is running for longer than it's wallclock! Cancelling...".format(job.name,self.name),6000) + Log.printlog("Job {0} inside wrapper {1} is running for longer than it's wallclock! Cancelling...".format(job.name,self.name),6009) job.new_status = Status.FAILED job.update_status(self.as_config.get_copy_remote_logs() == 'true') return True @@ -1293,7 +1293,7 @@ done job) if over_wallclock: Log.printlog( - "Job {0} is FAILED".format(jobname),6000) + "Job {0} is FAILED".format(jobname),6009) elif len(out) == 3: end_time = self._check_time(out, 2) @@ -1333,8 +1333,7 @@ done self._check_finished_job(job) def cancel_failed_wrapper_job(self): - Log.error("Cancelling job with id {0}".format(self.id)) - Log.printlog("Cancelling job with id {0}".format(self.id),6000) + Log.printlog("Cancelling job with id {0}".format(self.id),6009) self.platform.send_command( self.platform.cancel_cmd + " " + str(self.id)) diff --git a/autosubmit/job/job_list.py b/autosubmit/job/job_list.py index 0dbc57f11..724432d8b 100644 --- a/autosubmit/job/job_list.py +++ b/autosubmit/job/job_list.py @@ -1031,7 +1031,7 @@ class JobList: else: return list() except IOError: - Log.printlog("Autosubmit will use a backup for recover the job_list",6000) + Log.printlog("Autosubmit will use a backup for recover the job_list",6010) return list() def load(self): diff --git a/autosubmit/notifications/mail_notifier.py b/autosubmit/notifications/mail_notifier.py index 242810248..396b60478 100644 --- a/autosubmit/notifications/mail_notifier.py +++ b/autosubmit/notifications/mail_notifier.py @@ -36,7 +36,7 @@ class MailNotifier: try: self._send_mail(self.config.MAIL_FROM, mail, message) except BaseException as e: - Log.printlog('An error occurred while sending a mail for the job {0}', job_name,6000) + Log.printlog('An error occurred while sending a mail for the job {0}', job_name,6011) def _send_mail(self, mail_from, mail_to, message): server = smtplib.SMTP_SSL(self.config.SMTP_SERVER) diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 470b56002..93d7900ae 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -80,7 +80,7 @@ class ParamikoPlatform(Platform): transport = self._ssh.get_transport() transport.send_ignore() except EOFError as e: - raise AutosubmitError("After a reconnection procedure, the platform is still not alive.",6000) + raise AutosubmitError("After a reconnection procedure, the platform is still not alive.",6002) def restore_connection(self): self.connected = True @@ -137,7 +137,7 @@ class ParamikoPlatform(Platform): if not reconnect and "," in self._host_config['hostname']: self.restore_connection(reconnect=True) else: - raise AutosubmitError("Couldn't establish a connection to the specified host, wrong configuration?",6000,e.message) + raise AutosubmitError("Couldn't establish a connection to the specified host, wrong configuration?",6003,e.message) def check_completed_files(self, sections=None): if self.host == 'localhost': @@ -188,9 +188,9 @@ class ParamikoPlatform(Platform): self._ftpChannel.chmod(remote_path,os.stat(local_path).st_mode) return True except IOError as e: - raise AutosubmitError('Can not send file {0} to {1}'.format(os.path.join(self.tmp_path, filename)), os.path.join(self.get_files_path(), filename), 6000, e.message) + raise AutosubmitError('Can not send file {0} to {1}'.format(os.path.join(self.tmp_path, filename)), os.path.join(self.get_files_path(), filename), 6004, e.message) except BaseException as e: - raise AutosubmitError('Send file failed. Connection seems to no be active',6000) + raise AutosubmitError('Send file failed. Connection seems to no be active',6004) # Gets .err and .out def get_file(self, filename, must_exist=True, relative_path=''): @@ -220,9 +220,9 @@ class ParamikoPlatform(Platform): return True except Exception as e: if str(e) in "Garbage": - raise AutosubmitError('Files couldn''t be retrieved, session not active'.format(filename),6000,e.message) + raise AutosubmitError('Files couldn''t be retrieved, session not active'.format(filename),6004,e.message) if must_exist: - raise AutosubmitError('A critical file couldn''t be retrieved, File {0} does not exists'.format(filename),6000,e.message) + raise AutosubmitError('A critical file couldn''t be retrieved, File {0} does not exists'.format(filename),6004,e.message) else: Log.printlog("Log file couldn't be retrieved: {0}".format(filename),5000) return False @@ -241,9 +241,8 @@ class ParamikoPlatform(Platform): self._ftpChannel.remove(os.path.join(self.get_files_path(), filename)) return True except IOError as e: - Log.printlog('{0} couldn''t be retrieved, session not active'.format(os.path.join(self.get_files_path(), filename)),5000) + Log.printlog('{0} couldn''t be retrieved, session not active'.format(os.path.join(self.get_files_path(), filename)),6004) return False - #raise AutosubmitError('Files couldn''t be retrieved, session not active'.format(filename), 6000, e.message) except BaseException as e: Log.error('Could not remove file {0} due a wrong configuration'.format(os.path.join(self.get_files_path(), filename))) if e.lower().contains("garbage"): @@ -269,11 +268,11 @@ class ParamikoPlatform(Platform): except (Exception,IOError) as e: if str(e) in "Garbage": - raise AutosubmitError('File {0} does not exists'.format(os.path.join(self.get_files_path(), src)),6000,e.message) + raise AutosubmitError('File {0} does not exists'.format(os.path.join(self.get_files_path(), src)),6004,e.message) if must_exist: - raise AutosubmitError('A critical file couldn''t be retrieved, File {0} does not exists'.format(os.path.join(self.get_files_path(), src)),6000,e.message) + raise AutosubmitError('A critical file couldn''t be retrieved, File {0} does not exists'.format(os.path.join(self.get_files_path(), src)),6004,e.message) else: - Log.printlog("Log file couldn't be moved: {0}".format(os.path.join(self.get_files_path(), src)),5000) + Log.printlog("Log file couldn't be moved: {0}".format(os.path.join(self.get_files_path(), src)),5001) return False def submit_job(self, job, script_name, hold=False): @@ -443,7 +442,7 @@ class ParamikoPlatform(Platform): for job in job_list: job_status = Status.UNKNOWN Log.warning('check_job() The job id ({0}) from platform {1} has an status of {2}.', job.id, self.name, job_status) - raise AutosubmitError("Some Jobs are in Unknown status",6000) + raise AutosubmitError("Some Jobs are in Unknown status",6008) #job.new_status=job_status @@ -523,15 +522,15 @@ class ParamikoPlatform(Platform): self._ssh_output += s for errorLine in stderr_readlines: if errorLine.find("submission failed") != -1 or errorLine.find("git clone") != -1: - raise AutosubmitError('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines),6000)) + raise AutosubmitError('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines),6005)) if not ignore_log: if len(stderr_readlines) > 0: - Log.printlog('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines)),6000) + Log.printlog('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines)),6006) else: Log.debug('Command {0} in {1} successful with out message: {2}', command, self.host, self._ssh_output) return True except BaseException as e: - raise AutosubmitError('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines)),6000,e.message) + raise AutosubmitError('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines)),6005,e.message) def parse_job_output(self, output): """ @@ -712,9 +711,9 @@ class ParamikoPlatform(Platform): if self.send_command(self.get_mkdir_cmd()): Log.debug('{0} has been created on {1} .', self.remote_log_dir, self.host) else: - raise AutosubmitError("SFTP session not active ", 6000,"Could not create the DIR {0} on HPC {1}'.format(self.remote_log_dir, self.host)".format(self.remote_log_dir, self.host)) + raise AutosubmitError("SFTP session not active ", 6007,"Could not create the DIR {0} on HPC {1}'.format(self.remote_log_dir, self.host)".format(self.remote_log_dir, self.host)) except BaseException as e: - raise AutosubmitError("SFTP session not active ", 6000,e.message) + raise AutosubmitError("SFTP session not active ", 6007,e.message) else: try: if self.send_command(self.get_mkdir_cmd()): @@ -722,7 +721,7 @@ class ParamikoPlatform(Platform): else: Log.error('Could not create the DIR {0} on HPC {1}'.format(self.remote_log_dir, self.host)) except BaseException as e: - raise AutosubmitError("Couldn''t send the file", 6000, e.message) + raise AutosubmitError("Couldn''t send the file", 6004, e.message) class ParamikoPlatformException(Exception): diff --git a/autosubmit/platforms/slurmplatform.py b/autosubmit/platforms/slurmplatform.py index db00c91c4..c0b553de3 100644 --- a/autosubmit/platforms/slurmplatform.py +++ b/autosubmit/platforms/slurmplatform.py @@ -79,11 +79,11 @@ class SlurmPlatform(ParamikoPlatform): jobs_id = self.get_submitted_job_id(self.get_ssh_output()) return jobs_id else: - raise AutosubmitError("Jobs couldn't be submitted, retry again in next iteration",6000) + raise AutosubmitError("Jobs couldn't be submitted, retry again in next iteration",6008) except IOError as e: - raise AutosubmitError("Submit script is not found, retry again in next AS iteration", 6000, e.message) + raise AutosubmitError("Submit script is not found, retry again in next AS iteration", 6008, e.message) except BaseException as e: - raise AutosubmitError("Job couldn't be submitted, retry again in next AS iteration", 6000, e.message) + raise AutosubmitError("Job couldn't be submitted, retry again in next AS iteration", 6008, e.message) def update_cmds(self): """ diff --git a/docs/source/faq.rst b/docs/source/faq.rst index cbee7bacb..3da92986e 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -1,5 +1,5 @@ ################################## -Critical Error codes and solutions +Error codes and solutions ################################## Database Issues - Critical Error codes [7001-7005] @@ -14,14 +14,14 @@ Database Issues - Critical Error codes [7001-7005] +------+-----------------------------------------------+-----------------------------------------------------------------+ | 7003 | DB doesn't exists | Check if database exists | +------+-----------------------------------------------+-----------------------------------------------------------------+ -| 7004 | Can't create a new database | check your user permissions | +| 7004 | Can't create a new database | Check your user permissions | +------+-----------------------------------------------+-----------------------------------------------------------------+ | 7005 | AS database is corrupted or locked | Please, open a new issue ASAP. (If you are on BSC environment) | +------+-----------------------------------------------+-----------------------------------------------------------------+ Default Solution ---------------- -These issues are usually from server side, ask first in Autosubmit git if you don't have a custom installation- +These issues are usually from server side, please, ask first in Autosubmit git if you don't have a custom installation. ---- @@ -73,7 +73,7 @@ Check autosubmit log for detailed information, there will be additional error co ---- -Uncatalogued Issues - Critical Error codes [7060+] +Uncatalogued codes - Critical Error codes [7060+] =================================================== +---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -101,3 +101,35 @@ Check autosubmit log for detailed information, there will be additional error co ---- +Uncatalogued Issues - Platform Error codes [6000] +=================================================== + ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| Code | Details | Solution | ++======+======================================================+================================================================================================+ +| 6001 | Failed to retrieve log files | Automatically, if there aren't bigger issues | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6002 | Failed reconection | Automatically, if there aren't bigger issues | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6003 | Failed connection, wrong configuration | Check your platform.conf file | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6004 | input output issues | Automatically, if there aren't bigger issues | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6005 | Unable to execute the command | Automatically, if there aren't bigger issues | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6006 | Failed command | Check err output for more info, command worked but some issue was detected | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6007 | Broken sFTP connection | Automatically, if there aren't bigger issues | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6008 | Inconsistent/unexpected ,jhjob sñtatus | Automatically, if there aren't bigger issues | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6009 | Failed job checker | Automatically, if there aren't bigger issues | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6010 | Corrupted job_list using backup | Automatically, if it fails, Perform mv /pkl/job_list_backup.pkl /pkl/job_list.pkl| ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6011 | Incorrect mail notifier configuration | Double check your mail configuration on job.conf (job status) and autosubmit.conf (email) | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6012 | Migrate , archive/unarchive I/O issues | Check migrate how-to configuration | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ +| 6013 | Configuration issues | Check log output for more info | ++------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ \ No newline at end of file -- GitLab From 5d5857b5651690ae0788d535c5d11730da706ad5 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Tue, 1 Sep 2020 13:18:06 +0200 Subject: [PATCH 20/23] Error trace missing fix --- autosubmit/autosubmit.py | 5 +++-- autosubmit/platforms/paramiko_platform.py | 2 +- docs/source/faq.rst | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index adb7c8ce9..822e9c1b2 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1356,7 +1356,8 @@ class Autosubmit: job_list.save() time.sleep(safetysleeptime) except AutosubmitError as e: #If an error is detected, restore all connections and job_list, keep trying for 5 more retries - Log.error("{1} [eCode={0}]",e.code, e.message) + Log.error("Trace: {0}", e.trace) + Log.error("{1} [eCode={0}]", e.code, e.message) #Save job_list if not is a failed submitted job if "submitted" not in e.message: try: @@ -1373,7 +1374,7 @@ class Autosubmit: try: job_list = Autosubmit.load_job_list(expid, as_conf, notransitive=notransitive) except BaseException as e: - raise AutosubmitCritical("Corrupted job_list, backup couldn''t be restored", 7040, + raise AutosubmitCritical("Corrupted job_list, backup couldn't be restored", 7040, e.message) if main_loop_retrials > 0: # Restore platforms and try again, to avoid endless loop with failed configuration, a hard limit is set. try: diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 93d7900ae..06064de11 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -721,7 +721,7 @@ class ParamikoPlatform(Platform): else: Log.error('Could not create the DIR {0} on HPC {1}'.format(self.remote_log_dir, self.host)) except BaseException as e: - raise AutosubmitError("Couldn''t send the file", 6004, e.message) + raise AutosubmitError("Couldn''t send the file {0} on HPC {1}".format(self.remote_log_dir,self.host), 6004, e.message) class ParamikoPlatformException(Exception): diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 3da92986e..b0822d6a0 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -101,8 +101,8 @@ Check autosubmit log for detailed information, there will be additional error co ---- -Uncatalogued Issues - Platform Error codes [6000] -=================================================== +Minor errors - Error codes [6000+] +=================================== +------+------------------------------------------------------+------------------------------------------------------------------------------------------------+ | Code | Details | Solution | -- GitLab From dd3247bed40e4c50f2c0034e256cc91b0ebbaa4e Mon Sep 17 00:00:00 2001 From: dbeltran Date: Wed, 2 Sep 2020 14:44:30 +0200 Subject: [PATCH 21/23] Missing authentificatio nexception, and other tweaks to non mn4 platform. --- autosubmit/autosubmit.py | 10 +++-- autosubmit/config/config_common.py | 4 +- autosubmit/platforms/lsfplatform.py | 48 +++++++++++------------ autosubmit/platforms/paramiko_platform.py | 45 +++++++++++++-------- autosubmit/platforms/pbsplatform.py | 48 +++++++++++------------ autosubmit/platforms/psplatform.py | 48 +++++++++++------------ 6 files changed, 108 insertions(+), 95 deletions(-) diff --git a/autosubmit/autosubmit.py b/autosubmit/autosubmit.py index 822e9c1b2..09e8ec28c 100644 --- a/autosubmit/autosubmit.py +++ b/autosubmit/autosubmit.py @@ -1225,9 +1225,7 @@ class Autosubmit: Autosubmit.restore_platforms(platforms_to_test) # establish the connection to all platforms save = True while job_list.get_active(): - try: - if Autosubmit.exit: return 0 # reload parameters changes @@ -1377,11 +1375,11 @@ class Autosubmit: raise AutosubmitCritical("Corrupted job_list, backup couldn't be restored", 7040, e.message) if main_loop_retrials > 0: # Restore platforms and try again, to avoid endless loop with failed configuration, a hard limit is set. + main_loop_retrials = main_loop_retrials - 1 try: Autosubmit.restore_platforms(platforms_to_test) - except: + except BaseException: raise AutosubmitCritical("Autosubmit couldn't recover the platforms",7050, e.message) - main_loop_retrials = main_loop_retrials - 1 else: raise AutosubmitCritical("Autosubmit Encounter too much errors during running time",7051,e.message) except AutosubmitCritical as e: # Critical errors can't be recovered. Failed configuration or autosubmit error @@ -1485,6 +1483,8 @@ class Autosubmit: valid_packages_to_submit.append(package) except (IOError, OSError): continue + except AutosubmitError as e: + raise if hasattr(package, "name"): job_list.packages_dict[package.name] = package.jobs from job.job import WrapperJob @@ -1502,6 +1502,8 @@ class Autosubmit: raise AutosubmitCritical("Invalid parameter substitution in {0} template".format(e.job_name),7014) except AutosubmitCritical as e: raise AutosubmitCritical(e.message,e.code,e.trace) + except AutosubmitError as e: + raise except Exception as e: raise diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index f376fac55..845ca754c 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -509,7 +509,7 @@ class AutosubmitConfig(object): if not os.path.exists(os.path.join(self.get_project_dir(),section_file_path)): if parser.check_exists(section, 'CHECK'): if not parser.get_option(section, 'CHECK') in "on_submission": - self.wrong_config["Jobs"] += [[section, "FILE {0} doesn''t exists, check parameter is found however is not in on_submission value".format(section_file_path)]] + self.wrong_config["Jobs"] += [[section, "FILE {0} doesn't exists and check parameter is not set on_submission value".format(section_file_path)]] else: self.wrong_config["Jobs"] += [[section, "FILE {0} doesn't exists".format(os.path.join(self.get_project_dir(),section_file_path))]] except BaseException: @@ -604,7 +604,7 @@ class AutosubmitConfig(object): if not parser.check_exists('local', 'PROJECT_PATH'): self.wrong_config["Expdef"]+=[['local', "PROJECT_PATH parameter is invalid"]] elif project_type == 'none': #debug propouses - self.ignore_file_path = True + self.ignore_file_path = False if project_type != 'none': if not parser.check_exists('project_files', 'FILE_PROJECT_CONF'): diff --git a/autosubmit/platforms/lsfplatform.py b/autosubmit/platforms/lsfplatform.py index dd973552e..7c8c769d3 100644 --- a/autosubmit/platforms/lsfplatform.py +++ b/autosubmit/platforms/lsfplatform.py @@ -110,27 +110,27 @@ class LsfPlatform(ParamikoPlatform): ############################################################################### """.format(filename, queue, project, wallclock, num_procs, dependency, '\n'.ljust(13).join(str(s) for s in directives)) - def connect(self): - """ - In this case, it does nothing because connection is established for each command - - :return: True - :rtype: bool - """ - self.connected = True - def restore_connection(self): - """ - In this case, it does nothing because connection is established for each command - - :return: True - :rtype: bool - """ - self.connected = True - def test_connection(self): - """ - In this case, it does nothing because connection is established for each command - - :return: True - :rtype: bool - """ - self.connected = True \ No newline at end of file + # def connect(self): + # """ + # In this case, it does nothing because connection is established for each command + # + # :return: True + # :rtype: bool + # """ + # self.connected = True + # def restore_connection(self): + # """ + # In this case, it does nothing because connection is established for each command + # + # :return: True + # :rtype: bool + # """ + # self.connected = True + # def test_connection(self): + # """ + # In this case, it does nothing because connection is established for each command + # + # :return: True + # :rtype: bool + # """ + # self.connected = True \ No newline at end of file diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 06064de11..9785e2c75 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -73,6 +73,7 @@ class ParamikoPlatform(Platform): try: transport = self._ssh.get_transport() transport.send_ignore() + pass except BaseException as e: try: self.reset() @@ -83,18 +84,21 @@ class ParamikoPlatform(Platform): raise AutosubmitError("After a reconnection procedure, the platform is still not alive.",6002) def restore_connection(self): - self.connected = True - if self._ssh is None: - retries = 2 - retry = 0 + try: self.connected = False - while self.connected is False and retry < retries: - if self.connect(True): - self.connected = True - retry += 1 - if not self.connected: - trace='Can not create ssh or sftp connection to {0}: Connection could not be established to platform {1}\n Please, check your expid platform.conf to see if there are mistakes in the configuration\n Also Ensure that the login node listed on HOST parameter is available(try to connect via ssh on a terminal)\n Also you can put more than one host using a comma as separator'.format(self.host, self.name) - raise AutosubmitCritical('Experiment cant no continue without unexpected behaviour, Stopping Autosubmit',7050,trace) + if self._ssh is None: + retries = 2 + retry = 0 + while self.connected is False and retry < retries: + self.connect(True) + retry += 1 + if not self.connected: + trace='Can not create ssh or sftp connection to {0}: Connection could not be established to platform {1}\n Please, check your expid platform.conf to see if there are mistakes in the configuration\n Also Ensure that the login node listed on HOST parameter is available(try to connect via ssh on a terminal)\n Also you can put more than one host using a comma as separator'.format(self.host, self.name) + raise AutosubmitCritical('Experiment cant no continue without unexpected behaviour, Stopping Autosubmit',7050,trace) + except AutosubmitCritical: + raise + except: + raise AutosubmitCritical('Cant connect to this platform due an unknown error',7050) def connect(self, reconnect=False): """ @@ -134,6 +138,8 @@ class ParamikoPlatform(Platform): self._ftpChannel = self._ssh.open_sftp() self.connected = True except BaseException as e: + if "Authentication failed." in e.message: + raise AutosubmitCritical("Authentication Failed, please check the platform.conf of {0}".format(self._host_config['hostname']),7050,e.message) if not reconnect and "," in self._host_config['hostname']: self.restore_connection(reconnect=True) else: @@ -220,9 +226,9 @@ class ParamikoPlatform(Platform): return True except Exception as e: if str(e) in "Garbage": - raise AutosubmitError('Files couldn''t be retrieved, session not active'.format(filename),6004,e.message) + raise AutosubmitError("Files couldn't be retrieved, session not active".format(filename),6004,e.message) if must_exist: - raise AutosubmitError('A critical file couldn''t be retrieved, File {0} does not exists'.format(filename),6004,e.message) + raise AutosubmitError("A critical file couldn't be retrieved, File {0} does not exists".format(filename),6004,e.message) else: Log.printlog("Log file couldn't be retrieved: {0}".format(filename),5000) return False @@ -483,15 +489,17 @@ class ParamikoPlatform(Platform): timeout = 60/2 else: timeout = 60*2 + stderr_readlines = [] + stdout_chunks = [] try: stdin, stdout, stderr = self._ssh.exec_command(command) channel = stdout.channel channel.settimeout(timeout) stdin.close() channel.shutdown_write() - stdout_chunks = [] + stdout_chunks.append(stdout.channel.recv(len(stdout.channel.in_buffer))) - stderr_readlines = [] + while not channel.closed or channel.recv_ready() or channel.recv_stderr_ready(): # stop if channel was closed prematurely, and there is no data in the buffers. @@ -529,6 +537,9 @@ class ParamikoPlatform(Platform): else: Log.debug('Command {0} in {1} successful with out message: {2}', command, self.host, self._ssh_output) return True + except AttributeError as e: + raise AutosubmitError( + 'Session not active: {0}'.format(e.message), 6005) except BaseException as e: raise AutosubmitError('Command {0} in {1} warning: {2}'.format(command, self.host, '\n'.join(stderr_readlines)),6005,e.message) @@ -719,9 +730,9 @@ class ParamikoPlatform(Platform): if self.send_command(self.get_mkdir_cmd()): Log.debug('{0} has been created on {1} .', self.remote_log_dir, self.host) else: - Log.error('Could not create the DIR {0} on HPC {1}'.format(self.remote_log_dir, self.host)) + Log.error('Could not create the DIR {0} to HPC {1}'.format(self.remote_log_dir, self.host)) except BaseException as e: - raise AutosubmitError("Couldn''t send the file {0} on HPC {1}".format(self.remote_log_dir,self.host), 6004, e.message) + raise AutosubmitError("Couldn't send the file {0} to HPC {1}".format(self.remote_log_dir,self.host), 6004, e.message) class ParamikoPlatformException(Exception): diff --git a/autosubmit/platforms/pbsplatform.py b/autosubmit/platforms/pbsplatform.py index 5150a6b8e..7003323c4 100644 --- a/autosubmit/platforms/pbsplatform.py +++ b/autosubmit/platforms/pbsplatform.py @@ -101,27 +101,27 @@ class PBSPlatform(ParamikoPlatform): return self._checkjob_cmd + str(job_id) else: return "ssh " + self.host + " " + self.get_qstatjob(job_id) - def connect(self): - """ - In this case, it does nothing because connection is established for each command - - :return: True - :rtype: bool - """ - self.connected = True - def restore_connection(self): - """ - In this case, it does nothing because connection is established for each command - - :return: True - :rtype: bool - """ - self.connected = True - def test_connection(self): - """ - In this case, it does nothing because connection is established for each command - - :return: True - :rtype: bool - """ - self.connected = True \ No newline at end of file + # def connect(self): + # """ + # In this case, it does nothing because connection is established for each command + # + # :return: True + # :rtype: bool + # """ + # self.connected = True + # def restore_connection(self): + # """ + # In this case, it does nothing because connection is established for each command + # + # :return: True + # :rtype: bool + # """ + # self.connected = True + # def test_connection(self): + # """ + # In this case, it does nothing because connection is established for each command + # + # :return: True + # :rtype: bool + # """ + # self.connected = True \ No newline at end of file diff --git a/autosubmit/platforms/psplatform.py b/autosubmit/platforms/psplatform.py index a171992b4..e8981eec8 100644 --- a/autosubmit/platforms/psplatform.py +++ b/autosubmit/platforms/psplatform.py @@ -79,27 +79,27 @@ class PsPlatform(ParamikoPlatform): def get_checkjob_cmd(self, job_id): return self.get_pscall(job_id) - def connect(self): - """ - In this case, it does nothing because connection is established for each command - - :return: True - :rtype: bool - """ - self.connected = True - def restore_connection(self): - """ - In this case, it does nothing because connection is established for each command - - :return: True - :rtype: bool - """ - self.connected = True - def test_connection(self): - """ - In this case, it does nothing because connection is established for each command - - :return: True - :rtype: bool - """ - self.connected = True \ No newline at end of file + # def connect(self): + # """ + # In this case, it does nothing because connection is established for each command + # + # :return: True + # :rtype: bool + # """ + # self.connected = True + # def restore_connection(self): + # """ + # In this case, it does nothing because connection is established for each command + # + # :return: True + # :rtype: bool + # """ + # self.connected = True + # def test_connection(self): + # """ + # In this case, it does nothing because connection is established for each command + # + # :return: True + # :rtype: bool + # """ + # self.connected = True \ No newline at end of file -- GitLab From 494e7f57b62c83e76236bd4da536bc75efbba624 Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 3 Sep 2020 12:01:13 +0200 Subject: [PATCH 22/23] Added log error on ecplatform --- autosubmit/platforms/ecplatform.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/autosubmit/platforms/ecplatform.py b/autosubmit/platforms/ecplatform.py index d364c7b6e..d473d230d 100644 --- a/autosubmit/platforms/ecplatform.py +++ b/autosubmit/platforms/ecplatform.py @@ -20,14 +20,13 @@ import os import subprocess from autosubmit.platforms.paramiko_platform import ParamikoPlatform, ParamikoPlatformException -from log.log import Log +from log.log import Log,AutosubmitCritical,AutosubmitError from autosubmit.platforms.headers.ec_header import EcHeader from autosubmit.platforms.headers.ec_cca_header import EcCcaHeader from autosubmit.platforms.headers.slurm_header import SlurmHeader from autosubmit.platforms.wrappers.wrapper_factory import EcWrapperFactory from time import sleep - class EcPlatform(ParamikoPlatform): """ Class to manage queues with ecaccess @@ -142,7 +141,7 @@ class EcPlatform(ParamikoPlatform): output = subprocess.check_output(command, shell=True) except subprocess.CalledProcessError as e: if not ignore_log: - Log.error('Could not execute command {0} on {1}'.format(e.cmd, self.host)) + raise AutosubmitError('Could not execute command {0} on {1}'.format(e.cmd, self.host),7500,e.message) return False self._ssh_output = output return True -- GitLab From d7edb893710ce4e257f58e7da6cd99da9f8b5b2d Mon Sep 17 00:00:00 2001 From: dbeltran Date: Thu, 3 Sep 2020 13:57:59 +0200 Subject: [PATCH 23/23] Fixed changes mentioned by miguel, fixed logs mechanism retrieval ( and now is working much better) --- autosubmit/config/basicConfig.py | 2 +- autosubmit/config/config_common.py | 19 +++++++------ autosubmit/database/db_common.py | 12 ++++----- autosubmit/git/autosubmit_git.py | 2 +- autosubmit/job/job.py | 33 +++++++++++------------ autosubmit/job/job_packages.py | 2 +- autosubmit/platforms/paramiko_platform.py | 4 +-- bin/autosubmit | 1 + 8 files changed, 39 insertions(+), 36 deletions(-) diff --git a/autosubmit/config/basicConfig.py b/autosubmit/config/basicConfig.py index cd031d101..0cecd9058 100755 --- a/autosubmit/config/basicConfig.py +++ b/autosubmit/config/basicConfig.py @@ -29,7 +29,7 @@ from log.log import Log, AutosubmitError,AutosubmitCritical class BasicConfig: """ - Class to manage configuration for autosubmit path, database and default values for new experiments + Class to manage configuration for Autosubmit path, database and default values for new experiments """ def __init__(self): diff --git a/autosubmit/config/config_common.py b/autosubmit/config/config_common.py index 845ca754c..e44f11e67 100644 --- a/autosubmit/config/config_common.py +++ b/autosubmit/config/config_common.py @@ -356,11 +356,11 @@ class AutosubmitConfig(object): if len(self.warn_config.keys()) == 0 and len(self.wrong_config.keys()) == 0: Log.result("Configuration files OK\n") elif len(self.warn_config.keys()) > 0 and len(self.wrong_config.keys()) == 0: - Log.result("Configuration files contains some issues ignored") + Log.result("Configuration files contain some issues ignored") if len(self.warn_config.keys()) > 0: - message = "On Configuration files:\n" + message = "In Configuration files:\n" for section in self.warn_config: - message += "Issues on [{0}] config file:".format(section) + message += "Issues in [{0}] config file:".format(section) for parameter in self.warn_config[section]: message += "\n[{0}] {1} ".format(parameter[0],parameter[1]) message += "\n" @@ -387,22 +387,25 @@ class AutosubmitConfig(object): Log.info('\nChecking configuration files...') self.ignore_file_path = check_file self.reload() + #Annotates all errors found in the configuration files in dictionaries self.warn_config and self.wrong_config. self.check_expdef_conf() self.check_platforms_conf() self.check_jobs_conf() self.check_autosubmit_conf() - try: if self.get_project_type() != "none": # Check proj configuration self.check_proj() except: - pass # test doesn't check proj + pass # This exception is in case that the experiment doesn't contains any file ( usefull for test the workflow with None Option) + # End of checkers. + + # This Try/Except is in charge of print all the info gathered by all the checkers and stop the program if any critical error is found. try: result = self.show_messages() return result except AutosubmitCritical as e: - raise AutosubmitCritical(e.message,e.code,e.trace) + raise AutosubmitCritical(e.message,e.code,e.trace) # In case that there are critical errors in the configuration, Autosubmit won't continue. except Exception as e: raise AutosubmitCritical("There was an error while showing the config log messages",7014,e.message) @@ -509,9 +512,9 @@ class AutosubmitConfig(object): if not os.path.exists(os.path.join(self.get_project_dir(),section_file_path)): if parser.check_exists(section, 'CHECK'): if not parser.get_option(section, 'CHECK') in "on_submission": - self.wrong_config["Jobs"] += [[section, "FILE {0} doesn't exists and check parameter is not set on_submission value".format(section_file_path)]] + self.wrong_config["Jobs"] += [[section, "FILE {0} doesn't exist and check parameter is not set on_submission value".format(section_file_path)]] else: - self.wrong_config["Jobs"] += [[section, "FILE {0} doesn't exists".format(os.path.join(self.get_project_dir(),section_file_path))]] + self.wrong_config["Jobs"] += [[section, "FILE {0} doesn't exist".format(os.path.join(self.get_project_dir(),section_file_path))]] except BaseException: pass # tests conflict quick-patch if not parser.check_is_boolean(section, 'RERUN_ONLY', False): diff --git a/autosubmit/database/db_common.py b/autosubmit/database/db_common.py index d3b5b7e03..cfc1c7bed 100644 --- a/autosubmit/database/db_common.py +++ b/autosubmit/database/db_common.py @@ -40,7 +40,7 @@ def create_db(qry): try: (conn, cursor) = open_conn(False) except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7001,e.message) + raise AutosubmitCritical("Could not establish a connection to database",7001,e.message) try: @@ -140,7 +140,7 @@ def save_experiment(name, description, version): try: (conn, cursor) = open_conn() except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7001,e.message) + raise AutosubmitCritical("Could not establish a connection to database",7001,e.message) try: cursor.execute('INSERT INTO experiment (name, description, autosubmit_version) VALUES (:name, :description, ' ':version)', @@ -171,7 +171,7 @@ def check_experiment_exists(name, error_on_inexistence=True): try: (conn, cursor) = open_conn() except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7001,e.message) + raise AutosubmitCritical("Could not establish a connection to database",7001,e.message) conn.isolation_level = None # SQLite always return a unicode object, but we can change this @@ -202,7 +202,7 @@ def get_autosubmit_version(expid): try: (conn, cursor) = open_conn() except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7001,e.message) + raise AutosubmitCritical("Could not establish a connection to database",7001,e.message) conn.isolation_level = None # SQLite always return a unicode object, but we can change this @@ -232,7 +232,7 @@ def last_name_used(test=False, operational=False): try: (conn, cursor) = open_conn() except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7001,e.message) + raise AutosubmitCritical("Could not establish a connection to database",7001,e.message) conn.text_factory = str if test: cursor.execute('SELECT name ' @@ -281,7 +281,7 @@ def delete_experiment(experiment_id): try: (conn, cursor) = open_conn() except DbException as e: - raise AutosubmitCritical('Connection to database could not be established',7001,e.message) + raise AutosubmitCritical("Could not establish a connection to database",7001,e.message) return False cursor.execute('DELETE FROM experiment ' 'WHERE name=:name', {'name': experiment_id}) diff --git a/autosubmit/git/autosubmit_git.py b/autosubmit/git/autosubmit_git.py index afa0d5f25..a4fd23847 100644 --- a/autosubmit/git/autosubmit_git.py +++ b/autosubmit/git/autosubmit_git.py @@ -124,7 +124,7 @@ class AutosubmitGit: :return: True if clone was successful, False otherwise """ if not as_conf.is_valid_git_repository(): - raise AutosubmitCritical("Incorrect Git Configuration, check origin,commit and branch settings of expdef file", 7064) + raise AutosubmitCritical("Incorrect git Configuration, check origin,commit and branch settings of expdef file", 7064) git_project_origin = as_conf.get_git_project_origin() git_project_branch = as_conf.get_git_project_branch() git_remote_project_path = as_conf.get_git_remote_project_root() diff --git a/autosubmit/job/job.py b/autosubmit/job/job.py index 917ecbc44..2a4f8bfaa 100644 --- a/autosubmit/job/job.py +++ b/autosubmit/job/job.py @@ -18,7 +18,7 @@ # along with Autosubmit. If not, see . """ -Main module for autosubmit. Only contains an interface class to all functionality implemented on autosubmit +Main module for Autosubmit. Only contains an interface class to all functionality implemented on Autosubmit """ import os @@ -518,16 +518,22 @@ class Job(object): retries = 3 sleeptime = 5 i = 0 + sleep(10) try: while (not out_exist or not err_exist) and i < retries: - out_exist = platform.check_file_exists( - remote_logs[0]) # will do 5 retries - err_exist = platform.check_file_exists( - remote_logs[1]) # will do 5 retries + try: + out_exist = platform.check_file_exists(remote_logs[0]) # will do 5 retries + err_exist = platform.check_file_exists(remote_logs[1]) # will do 5 retries + except AutosubmitError as e: + out_exist = False + err_exist = False + pass if not out_exist or not err_exist: sleeptime = sleeptime + 5 i = i + 1 sleep(sleeptime) + if i >= retries: + raise AutosubmitError("Failed to retrieve log files",6001) if out_exist and err_exist: if copy_remote_logs: if local_logs != remote_logs: @@ -537,23 +543,16 @@ class Job(object): platform.get_logs_files(self.expid, remote_logs) # Update the logs with Autosubmit Job Id Brand for local_log in local_logs: - platform.write_jobid(self.id, os.path.join( - self._tmp_path, 'LOG_' + str(self.expid), local_log)) + platform.write_jobid(self.id, os.path.join(self._tmp_path, 'LOG_' + str(self.expid), local_log)) except AutosubmitError as e: - Log.error("{1} [eCode={0}]", e.code, e.message) - # Save job_list if not is a failed submitted job - try: - platform.test_connection() - self.retrieve_logfiles() - except Exception: - Log.printlog("Failed to retrieve log file for job {0}".format(self.name),6001) + Log.printlog("Failed to retrieve log file for job {0}".format(self.name), 6001) except AutosubmitCritical as e: # Critical errors can't be recovered. Failed configuration or autosubmit error - Log.printlog("Failed to retrieve log file for job {0}".format(self.name),6001) + Log.printlog("Failed to retrieve log file for job {0}".format(self.name), 6001) try: platform.closeConnection() except: pass - sleep(2) + sleep(5) # safe wait before end a thread return def update_status(self, copy_remote_logs=False): @@ -624,7 +623,7 @@ class Job(object): @staticmethod def _get_submitter(as_conf): """ - Returns the submitter corresponding to the communication defined on autosubmit's config file + Returns the submitter corresponding to the communication defined on Autosubmit's config file :return: submitter :rtype: Submitter diff --git a/autosubmit/job/job_packages.py b/autosubmit/job/job_packages.py index 175110018..00c4cbada 100644 --- a/autosubmit/job/job_packages.py +++ b/autosubmit/job/job_packages.py @@ -93,7 +93,7 @@ class JobPackageBase(object): exit=True break if not os.path.exists(os.path.join(configuration.get_project_dir(), job.file)): - raise AutosubmitCritical("check=on_submission parameter didn't generate the template {0}".format(job.name),7014) + raise AutosubmitCritical("Template [ {0} ] using CHECK=On_submission has some empty variable {0}".format(job.name),7014) if not job.check_script(configuration, parameters,show_logs=job.check_warnings): Log.warning("Script {0} check failed",job.name) Log.warning("On submission script has some empty variables") diff --git a/autosubmit/platforms/paramiko_platform.py b/autosubmit/platforms/paramiko_platform.py index 9785e2c75..75d0dbed7 100644 --- a/autosubmit/platforms/paramiko_platform.py +++ b/autosubmit/platforms/paramiko_platform.py @@ -247,7 +247,7 @@ class ParamikoPlatform(Platform): self._ftpChannel.remove(os.path.join(self.get_files_path(), filename)) return True except IOError as e: - Log.printlog('{0} couldn''t be retrieved, session not active'.format(os.path.join(self.get_files_path(), filename)),6004) + Log.printlog("{0} couldn't be retrieved, session not active".format(os.path.join(self.get_files_path(), filename)),6004) return False except BaseException as e: Log.error('Could not remove file {0} due a wrong configuration'.format(os.path.join(self.get_files_path(), filename))) @@ -276,7 +276,7 @@ class ParamikoPlatform(Platform): if str(e) in "Garbage": raise AutosubmitError('File {0} does not exists'.format(os.path.join(self.get_files_path(), src)),6004,e.message) if must_exist: - raise AutosubmitError('A critical file couldn''t be retrieved, File {0} does not exists'.format(os.path.join(self.get_files_path(), src)),6004,e.message) + raise AutosubmitError("A critical file couldn't be retrieved, File {0} does not exists".format(os.path.join(self.get_files_path(), src)),6004,e.message) else: Log.printlog("Log file couldn't be moved: {0}".format(os.path.join(self.get_files_path(), src)),5001) return False diff --git a/bin/autosubmit b/bin/autosubmit index 31dccac6b..64b1be42d 100755 --- a/bin/autosubmit +++ b/bin/autosubmit @@ -41,6 +41,7 @@ def main(): if e.trace is not None: Log.error("Trace: {0}", e.trace) Log.critical("{1} [eCode={0}]", e.code, e.message) + Log.info("More info at https://autosubmit.readthedocs.io/en/latest/faq.html") os._exit(1) except Exception as e: Log.error("Trace: {0}", e.message) -- GitLab