diff --git a/autosubmit_api/autosubmit_legacy/job/job_list.py b/autosubmit_api/autosubmit_legacy/job/job_list.py index cc6e085f87cc4247f5544c1772cf77b8f2b10282..b153650bbfa93ef33b6c54e4b78dbe7c04adddb3 100644 --- a/autosubmit_api/autosubmit_legacy/job/job_list.py +++ b/autosubmit_api/autosubmit_legacy/job/job_list.py @@ -31,14 +31,14 @@ from dateutil.relativedelta import * from autosubmit_api.autosubmit_legacy.job.job_utils import SubJob from autosubmit_api.autosubmit_legacy.job.job_utils import SubJobManager, job_times_to_text from autosubmit_api.config.basicConfig import APIBasicConfig +from autosubmit_api.database.repositories import ExperimentStructureDbRepository from autosubmit_api.performance.utils import calculate_ASYPD_perjob, calculate_SYPD_perjob from autosubmit_api.components.jobs import utils as JUtils from autosubmit_api.monitor.monitor import Monitor from autosubmit_api.common.utils import Status from bscearth.utils.date import date2str, parse_date # from autosubmit_legacy.job.tree import Tree -from autosubmit_api.database import db_structure as DbStructure -from autosubmit_api.database.db_jobdata import JobDataStructure, JobRow +from autosubmit_api.components.jobdata import JobDataStructure, JobRow from autosubmit_api.builders.experiment_history_builder import ExperimentHistoryDirector, ExperimentHistoryBuilder from autosubmit_api.history.data_classes.job_data import JobData @@ -114,7 +114,7 @@ class JobList: else: raise Exception("Autosubmit couldn't fin the experiment header information necessary to complete this request.") job_list = job_data_structure.get_current_job_data( - run_id, all_states=True) + run_id) if not job_list: return [], [], {} else: @@ -592,7 +592,7 @@ class JobList: job_data = None try: experiment_history = ExperimentHistoryDirector(ExperimentHistoryBuilder(expid)).build_reader_experiment_history() - job_data = experiment_history.manager.get_all_last_job_data_dcs() if experiment_history.is_header_ready() else None + job_data = experiment_history.manager.get_all_last_job_data_dcs() except Exception: print(traceback.print_exc()) # Result variables @@ -606,7 +606,7 @@ class JobList: # Get structure if there are packages because package require special time calculation # print("Get Structure") if (job_to_package): - current_table_structure = DbStructure.get_structure(expid, path_structure) + current_table_structure = ExperimentStructureDbRepository(expid).get_structure() # Main loop # print("Start main loop") for job in allJobs: diff --git a/autosubmit_api/bgtasks/tasks/status_updater.py b/autosubmit_api/bgtasks/tasks/status_updater.py index 971cd1f14c7eea3a43228c8fb5c9d362878a858b..0be7307bc7b7edbd3128fc51f11c3bbbf9200619 100644 --- a/autosubmit_api/bgtasks/tasks/status_updater.py +++ b/autosubmit_api/bgtasks/tasks/status_updater.py @@ -1,15 +1,12 @@ -from datetime import datetime import os import time -from typing import Dict, List +from typing import List -from sqlalchemy import select from autosubmit_api.bgtasks.bgtask import BackgroundTaskTemplate -from autosubmit_api.database import tables -from autosubmit_api.database.common import ( - create_autosubmit_db_engine, - create_as_times_db_engine, - create_main_db_conn, +from autosubmit_api.database.repositories import ( + ExperimentStatusDbRepository, + ExperimentDbRepository, + ExperimentJoinDbRepository, ) from autosubmit_api.database.models import ExperimentModel from autosubmit_api.experiment.common_requests import _is_exp_running @@ -26,38 +23,21 @@ class StatusUpdater(BackgroundTaskTemplate): """ Clears the experiments that are not in the experiments table """ - with create_main_db_conn() as conn: - try: - del_stmnt = tables.experiment_status_table.delete().where( - tables.experiment_status_table.c.exp_id.not_in( - select(tables.experiment_table.c.id) - ) - ) - conn.execute(del_stmnt) - conn.commit() - except Exception as exc: - conn.rollback() - cls.logger.error( - f"[{cls.id}] Error while clearing missing experiments status: {exc}" - ) + + try: + ExperimentJoinDbRepository().drop_status_from_deleted_experiments() + except Exception as exc: + cls.logger.error( + f"[{cls.id}] Error while clearing missing experiments status: {exc}" + ) @classmethod def _get_experiments(cls) -> List[ExperimentModel]: """ Get the experiments list """ - with create_autosubmit_db_engine().connect() as conn: - query_result = conn.execute(tables.experiment_table.select()).all() - return [ExperimentModel.model_validate(row._mapping) for row in query_result] - - @classmethod - def _get_current_status(cls) -> Dict[str, str]: - """ - Get the current status of the experiments - """ - with create_as_times_db_engine().connect() as conn: - query_result = conn.execute(tables.experiment_status_table.select()).all() - return {row.name: row.status for row in query_result} + query_result = ExperimentDbRepository().get_all() + return [ExperimentModel.model_validate(row) for row in query_result] @classmethod def _check_exp_running(cls, expid: str) -> bool: @@ -87,30 +67,16 @@ class StatusUpdater(BackgroundTaskTemplate): @classmethod def _update_experiment_status(cls, experiment: ExperimentModel, is_running: bool): - with create_as_times_db_engine().connect() as conn: - try: - del_stmnt = tables.experiment_status_table.delete().where( - tables.experiment_status_table.c.exp_id == experiment.id - ) - ins_stmnt = tables.experiment_status_table.insert().values( - exp_id=experiment.id, - name=experiment.name, - status=( - RunningStatus.RUNNING - if is_running - else RunningStatus.NOT_RUNNING - ), - seconds_diff=0, - modified=datetime.now().isoformat(sep="-", timespec="seconds"), - ) - conn.execute(del_stmnt) - conn.execute(ins_stmnt) - conn.commit() - except Exception as exc: - conn.rollback() - cls.logger.error( - f"[{cls.id}] Error while doing database operations on experiment {experiment.name}: {exc}" - ) + try: + ExperimentStatusDbRepository().upsert_status( + experiment.id, + experiment.name, + RunningStatus.RUNNING if is_running else RunningStatus.NOT_RUNNING, + ) + except Exception as exc: + cls.logger.error( + f"[{cls.id}] Error while doing database operations on experiment {experiment.name}: {exc}" + ) @classmethod def procedure(cls): @@ -123,7 +89,7 @@ class StatusUpdater(BackgroundTaskTemplate): exp_list = cls._get_experiments() # Read current status of all experiments - current_status = cls._get_current_status() + current_status = ExperimentStatusDbRepository().get_all_dict() # Check every experiment status & update for experiment in exp_list: @@ -131,10 +97,7 @@ class StatusUpdater(BackgroundTaskTemplate): new_status = ( RunningStatus.RUNNING if is_running else RunningStatus.NOT_RUNNING ) - if ( - current_status.get(experiment.name, RunningStatus.NOT_RUNNING) - != new_status - ): + if current_status.get(experiment.name) != new_status: cls.logger.info( f"[{cls.id}] Updating status of {experiment.name} to {new_status}" ) diff --git a/autosubmit_api/builders/experiment_builder.py b/autosubmit_api/builders/experiment_builder.py index 4aab284df73fbdb3804af209b9b663352b050536..9a4720a34dd16f3a64b425ba4b06af325b3ef28a 100644 --- a/autosubmit_api/builders/experiment_builder.py +++ b/autosubmit_api/builders/experiment_builder.py @@ -1,14 +1,11 @@ import datetime +from autosubmit_api.logger import logger from autosubmit_api.builders import BaseBuilder from autosubmit_api.builders.configuration_facade_builder import ( AutosubmitConfigurationFacadeBuilder, ConfigurationFacadeDirector, ) -from autosubmit_api.database import tables -from autosubmit_api.database.common import ( - create_autosubmit_db_engine, - create_main_db_conn, -) +from autosubmit_api.database.repositories import ExperimentDbRepository, ExperimentDetailsDbRepository from autosubmit_api.database.models import ExperimentModel @@ -24,19 +21,14 @@ class ExperimentBuilder(BaseBuilder): """ Produce basic information from the main experiment table """ - with create_autosubmit_db_engine().connect() as conn: - result = conn.execute( - tables.experiment_table.select().where( - tables.experiment_table.c.name == expid - ) - ).one() + result = ExperimentDbRepository().get_by_expid(expid) # Set new product self._product = ExperimentModel( - id=result.id, - name=result.name, - description=result.description, - autosubmit_version=result.autosubmit_version, + id=result["id"], + name=result["name"], + description=result["description"], + autosubmit_version=result["autosubmit_version"], ) def produce_details(self): @@ -44,12 +36,11 @@ class ExperimentBuilder(BaseBuilder): Produce data from the details table """ exp_id = self._product.id - with create_autosubmit_db_engine().connect() as conn: - result = conn.execute( - tables.details_table.select().where( - tables.details_table.c.exp_id == exp_id - ) - ).one_or_none() + result = None + try: + result = ExperimentDetailsDbRepository().get_by_exp_id(exp_id) + except Exception: + logger.error(f"Error getting details for exp_id {exp_id}") # Set details props if result: @@ -81,6 +72,7 @@ class ExperimentBuilder(BaseBuilder): ).isoformat() except Exception: self._product.modified = None + logger.error(f"Error getting modified date for expid {expid}") @property def product(self) -> ExperimentModel: diff --git a/autosubmit_api/builders/experiment_history_builder.py b/autosubmit_api/builders/experiment_history_builder.py index dd13a67b75371458644483387ff7a2bb18c52ebd..c2ac367021117628845d921a24a7a346f0cf37b1 100644 --- a/autosubmit_api/builders/experiment_history_builder.py +++ b/autosubmit_api/builders/experiment_history_builder.py @@ -1,9 +1,9 @@ #!/usr/bin/python3.7 -from ..history.experiment_history import ExperimentHistory -from ..history.internal_logging import Logging -from ..config.basicConfig import APIBasicConfig -from ..history.database_managers.experiment_history_db_manager import ExperimentHistoryDbManager -from .basic_builder import BasicBuilder +from autosubmit_api.history.experiment_history import ExperimentHistory +from autosubmit_api.history.internal_logging import Logging +from autosubmit_api.config.basicConfig import APIBasicConfig +from autosubmit_api.history.database_managers.experiment_history_db_manager import ExperimentHistoryDbManager +from autosubmit_api.builders.basic_builder import BasicBuilder from abc import ABCMeta, abstractmethod class Builder(BasicBuilder, metaclass=ABCMeta): @@ -39,13 +39,11 @@ class ExperimentHistoryBuilder(Builder): def generate_experiment_history_db_manager(self): # type: () -> None self._validate_basic_config() - self.experiment_history_db_manager = ExperimentHistoryDbManager(self.expid, self.basic_config) + self.experiment_history_db_manager = ExperimentHistoryDbManager(self.expid) def initialize_experiment_history_db_manager(self): # type: () -> None - if not self.experiment_history_db_manager: - raise Exception("Experiment Database Manager is missing") - self.experiment_history_db_manager.initialize() + return NotImplementedError def generate_logger(self): # type: () -> None @@ -58,7 +56,7 @@ class ExperimentHistoryBuilder(Builder): if not self.experiment_history_db_manager: raise Exception("Experiment Database Manager is missing") else: - if not self.experiment_history_db_manager.my_database_exists(): + if APIBasicConfig == "sqlite" and not self.experiment_history_db_manager.my_database_exists(): raise Exception("Job/Runs database does not exist") if not self.logger: raise Exception("Logging is missing.") @@ -69,18 +67,6 @@ class ExperimentHistoryDirector(object): # type: (Builder) -> None self.builder = builder - def build_current_experiment_history(self, basic_config=None): - # type: (APIBasicConfig) -> ExperimentHistory - """ Builds ExperimentHistory updated to current version. """ - if basic_config: - self.builder.set_basic_config(basic_config) - else: - self.builder.generate_basic_config() - self.builder.generate_experiment_history_db_manager() - self.builder.initialize_experiment_history_db_manager() - self.builder.generate_logger() - return self.builder.make_experiment_history() - def build_reader_experiment_history(self, basic_config=None): # type: (APIBasicConfig) -> ExperimentHistory """ Buids ExperimentHistory that doesn't update to current version automatically. """ diff --git a/autosubmit_api/components/experiment/graph_drawer.py b/autosubmit_api/components/experiment/graph_drawer.py new file mode 100644 index 0000000000000000000000000000000000000000..663a92220b67a8ad4080aeaf7263c15d736f6b36 --- /dev/null +++ b/autosubmit_api/components/experiment/graph_drawer.py @@ -0,0 +1,204 @@ +from typing import List, Optional, Tuple +import portalocker +import os +import traceback +from autosubmit_api.config.basicConfig import APIBasicConfig +from autosubmit_api.database.repositories.graph_draw import ExpGraphDrawDBRepository +from autosubmit_api.logger import logger +from autosubmit_api.monitor.monitor import Monitor + + +class ExperimentGraphDrawing: + def __init__(self, expid): + """ + Sets and validates graph drawing. + :param expid: Name of experiment + :type expid: str + """ + APIBasicConfig.read() + self.expid = expid + self.folder_path = APIBasicConfig.LOCAL_ROOT_DIR + self.graph_data_db = ExpGraphDrawDBRepository(expid) + self.graph_data_db.create_table() + self.lock_name = "calculation_in_progress.lock" + self.current_position_dictionary = None + self.current_jobs_set = set() + self.coordinates = list() + self.set_current_position() + self.should_update = False + self.locked = False + self.test_locked() + + def test_locked(self): + self.locked = True + try: + with portalocker.Lock( + os.path.join(self.folder_path, self.lock_name), timeout=1 + ) as fh: + self.locked = False + fh.flush() + os.fsync(fh.fileno()) + except portalocker.AlreadyLocked: + logger.error("It is locked") + self.locked = True + except Exception: + self.locked = True + + def get_validated_data(self, allJobs): + """ + Validates if should update current graph drawing. + :return: None if graph drawing should be updated, otherwise, it returns the position data. + :rype: None or dict() + """ + job_names = {job.name for job in allJobs} + # Validating content + difference = job_names - self.current_jobs_set + if difference and len(difference) > 0: + # Intersection found. Graph Drawing database needs to be updated + self.should_update = True + # Clear database + return None + return self.current_position_dictionary + # return None if self.should_update == True else self.current_position_dictionary + + def calculate_drawing( + self, allJobs, independent=False, num_chunks=48, job_dictionary=None + ): + """ + Called in a thread. + :param allJobs: list of jobs (usually from job_list object) + :type allJobs: list() + :return: Last row Id + :rtype: int + """ + lock_name = ( + "calculation_{}_in_progress.lock".format(self.expid) + if independent is True + else self.lock_name + ) + lock_path_file = os.path.join(self.folder_path, lock_name) + try: + with portalocker.Lock(lock_path_file, timeout=1) as fh: + monitor = Monitor() + graph = monitor.create_tree_list( + self.expid, allJobs, None, dict(), False, job_dictionary + ) + if len(allJobs) > 1000: + # Logic: Start with 48 as acceptable number of chunks for Gmaxiter = 100 + # Minimum Gmaxiter will be 10 + maxiter = max(10, 148 - num_chunks) + # print("Experiment {} num_chunk {} maxiter {}".format( + # self.expid, num_chunks, maxiter)) + result = graph.create( + [ + "dot", + "-Gnslimit=2", + "-Gnslimit1=2", + "-Gmaxiter={}".format(maxiter), + "-Gsplines=none", + "-v", + ], + format="plain", + ) + else: + result = graph.create("dot", format="plain") + for u in result.split(b"\n"): + splitList = u.split(b" ") + if len(splitList) > 1 and splitList[0].decode() == "node": + self.coordinates.append( + ( + splitList[1].decode(), + int(float(splitList[2].decode()) * 90), + int(float(splitList[3].decode()) * -90), + ) + ) + # self.coordinates[splitList[1]] = ( + # int(float(splitList[2]) * 90), int(float(splitList[3]) * -90)) + self.insert_coordinates() + fh.flush() + os.fsync(fh.fileno()) + os.remove(lock_path_file) + return self.get_validated_data(allJobs) + except portalocker.AlreadyLocked: + message = "Already calculating graph drawing." + print(message) + return None + except Exception as exc: + logger.error((traceback.format_exc())) + os.remove(lock_path_file) + logger.error( + ("Exception while calculating coordinates {}".format(str(exc))) + ) + return None + + def insert_coordinates(self) -> Optional[int]: + """ + Prepares and inserts new coordinates. + """ + try: + # Start by clearing database + self._clear_graph_database() + result = None + if self.coordinates and len(self.coordinates) > 0: + result = self._insert_many_graph_coordinates(self.coordinates) + return result + return None + except Exception as exc: + logger.error((str(exc))) + return None + + def set_current_position(self) -> None: + """ + Sets all registers in the proper variables. + current_position_dictionary: JobName -> (x, y) + current_jobs_set: JobName + """ + current_table = self._get_current_position() + if current_table and len(current_table) > 0: + self.current_position_dictionary = { + row[1]: (row[2], row[3]) for row in current_table + } + self.current_jobs_set = set(self.current_position_dictionary.keys()) + + def _get_current_position(self) -> List[Tuple[int, str, int, int]]: + """ + Get all registers from experiment_graph_draw.\n + :return: row content: id, job_name, x, y + :rtype: 4-tuple (int, str, int, int) + """ + try: + result = self.graph_data_db.get_all() + return [(item.id, item.job_name, item.x, item.y) for item in result] + except Exception as exc: + logger.error((traceback.format_exc())) + logger.error((str(exc))) + return None + + def _insert_many_graph_coordinates( + self, values: List[Tuple[str, int, int]] + ) -> Optional[int]: + """ + Create many graph coordinates + """ + try: + _vals = [ + {"job_name": item[0], "x": item[1], "y": item[2]} for item in values + ] + logger.debug(_vals) + return self.graph_data_db.insert_many(_vals) + except Exception as exc: + logger.error((traceback.format_exc())) + logger.error("Error on Insert many graph drawing : {}".format(str(exc))) + return None + + def _clear_graph_database(self): + """ + Clear all content from graph drawing database + """ + try: + self.graph_data_db.delete_all() + except Exception as exc: + logger.error((traceback.format_exc())) + logger.error(("Error on Database clear: {}".format(str(exc)))) + return False + return True diff --git a/autosubmit_api/components/jobdata.py b/autosubmit_api/components/jobdata.py new file mode 100644 index 0000000000000000000000000000000000000000..1ef6e87a24459fd0cb4aff55dbd21b1f8b1d2284 --- /dev/null +++ b/autosubmit_api/components/jobdata.py @@ -0,0 +1,596 @@ +#!/usr/bin/env python + +# Copyright 2015 Earth Sciences Department, BSC-CNS + +# This file is part of Autosubmit. + +# Autosubmit is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# Autosubmit is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with Autosubmit. If not, see . + +import time +import traceback +import collections +from datetime import datetime, timedelta +from json import loads +from autosubmit_api.components.jobs.utils import generate_job_html_title +# from networkx import DiGraph +from autosubmit_api.config.basicConfig import APIBasicConfig +from autosubmit_api.database.repositories import ExperimentRunDbRepository +from autosubmit_api.database.repositories.job_data import JobDataDbRepository +from autosubmit_api.monitor.monitor import Monitor +from autosubmit_api.performance.utils import calculate_ASYPD_perjob +from autosubmit_api.components.jobs.job_factory import SimJob +from autosubmit_api.common.utils import get_jobs_with_no_outliers, Status, datechunk_to_year +# from autosubmitAPIwu.job.job_list +# import autosubmitAPIwu.experiment.common_db_requests as DbRequests +from bscearth.utils.date import Log + + + +# Version 15 includes out err MaxRSS AveRSS and rowstatus +CURRENT_DB_VERSION = 15 # Used to be 10 or 0 +DB_VERSION_SCHEMA_CHANGES = 12 +DB_EXPERIMENT_HEADER_SCHEMA_CHANGES = 14 +_debug = True +JobItem_10 = collections.namedtuple('JobItem', ['id', 'counter', 'job_name', 'created', 'modified', 'submit', 'start', 'finish', + 'status', 'rowtype', 'ncpus', 'wallclock', 'qos', 'energy', 'date', 'section', 'member', 'chunk', 'last', 'platform', 'job_id', 'extra_data']) +JobItem_12 = collections.namedtuple('JobItem', ['id', 'counter', 'job_name', 'created', 'modified', 'submit', 'start', 'finish', + 'status', 'rowtype', 'ncpus', 'wallclock', 'qos', 'energy', 'date', 'section', 'member', 'chunk', 'last', 'platform', 'job_id', 'extra_data', 'nnodes', 'run_id']) +JobItem_15 = collections.namedtuple('JobItem', ['id', 'counter', 'job_name', 'created', 'modified', 'submit', 'start', 'finish', + 'status', 'rowtype', 'ncpus', 'wallclock', 'qos', 'energy', 'date', 'section', 'member', 'chunk', 'last', 'platform', 'job_id', 'extra_data', 'nnodes', 'run_id', 'MaxRSS', 'AveRSS', 'out', 'err', 'rowstatus']) + +ExperimentRunItem = collections.namedtuple('ExperimentRunItem', [ + 'run_id', 'created', 'start', 'finish', 'chunk_unit', 'chunk_size', 'completed', 'total', 'failed', 'queuing', 'running', 'submitted']) +ExperimentRunItem_14 = collections.namedtuple('ExperimentRunItem', [ + 'run_id', 'created', 'start', 'finish', 'chunk_unit', 'chunk_size', 'completed', 'total', 'failed', 'queuing', 'running', 'submitted', 'suspended', 'metadata']) + +ExperimentRow = collections.namedtuple( + 'ExperimentRow', ['exp_id', 'expid', 'status', 'seconds']) + +JobRow = collections.namedtuple( + 'JobRow', ['name', 'queue_time', 'run_time', 'status', 'energy', 'submit', 'start', 'finish', 'ncpus', 'run_id']) + + +class ExperimentRun(): + + def __init__(self, run_id, created=None, start=0, finish=0, chunk_unit="NA", chunk_size=0, completed=0, total=0, failed=0, queuing=0, running=0, submitted=0, suspended=0, metadata="", modified=None): + self.run_id = run_id + self.created = created if created else datetime.today().strftime('%Y-%m-%d-%H:%M:%S') + self.start = start + self.finish = finish + self.chunk_unit = chunk_unit + self.chunk_size = chunk_size + self.submitted = submitted + self.queuing = queuing + self.running = running + self.completed = completed + self.failed = failed + self.total = total + self.suspended = suspended + self.metadata = metadata + self.modified = modified + + def getSYPD(self, job_list): + """ + Gets SYPD per run + """ + outlier_free_list = [] + if job_list: + performance_jobs = [SimJob.from_old_job_data(job_db) for job_db in job_list] + outlier_free_list = get_jobs_with_no_outliers(performance_jobs) + # print("{} -> {}".format(self.run_id, len(outlier_free_list))) + if len(outlier_free_list) > 0: + years_per_sim = datechunk_to_year(self.chunk_unit, self.chunk_size) + # print(self.run_id) + # print(years_per_sim) + seconds_per_day = 86400 + number_SIM = len(outlier_free_list) + # print(len(job_list)) + total_run_time = sum(job.run_time for job in outlier_free_list) + # print("run {3} yps {0} n {1} run_time {2}".format(years_per_sim, number_SIM, total_run_time, self.run_id)) + if total_run_time > 0: + return round((years_per_sim * number_SIM * seconds_per_day) / total_run_time, 2) + return None + + def getASYPD(self, job_sim_list, job_post_list, package_jobs): + """ + Gets ASYPD per run + package_jobs package_name => { job_id => (queue_time, parents, job_id, start_time) } + """ + SIM_no_outlier_list = [] + if job_sim_list and len(job_sim_list) > 0: + performance_jobs = [SimJob.from_old_job_data(job_db) for job_db in job_sim_list] + SIM_no_outlier_list = get_jobs_with_no_outliers(performance_jobs) + valid_names = set([job.name for job in SIM_no_outlier_list]) + job_sim_list = [job for job in job_sim_list if job.job_name in valid_names] + + # print("Run Id {}".format(self.run_id)) + if job_sim_list and len(job_sim_list) > 0 and job_post_list and len(job_post_list) > 0: + years_per_sim = datechunk_to_year(self.chunk_unit, self.chunk_size) + seconds_per_day = 86400 + number_SIM = len(job_sim_list) + number_POST = len(job_post_list) + + # print("SIM # {}".format(number_SIM)) + # print("POST # {}".format(number_POST)) + average_POST = round(sum(job.queuing_time(package_jobs.get( + job.rowtype, None) if package_jobs is not None else None) + job.running_time() for job in job_post_list) / number_POST, 2) + # print("Average POST {}".format(average_POST)) + # for job in job_sim_list: + # print("{} : {} {}".format(job.job_name, job.start, job.submit)) + # print("Run time {} -> {}".format(job.job_name, job.running_time())) + # print(job.job_name) + # print(package_jobs.get(job.rowtype, None)) + # print("Queue time {}".format(job.queuing_time(package_jobs.get( + # job.rowtype, None) if package_jobs is not None else None))) + sum_SIM = round(sum(job.queuing_time(package_jobs.get( + job.rowtype, None) if package_jobs is not None else None) + job.running_time() for job in job_sim_list), 2) + if (sum_SIM + average_POST) > 0: + return round((years_per_sim * number_SIM * seconds_per_day) / (sum_SIM + average_POST), 2) + return None + + +class JobData(object): + """Job Data object + """ + + def __init__(self, _id, counter=1, job_name="None", created=None, modified=None, submit=0, start=0, finish=0, status="UNKNOWN", rowtype=1, ncpus=0, wallclock="00:00", qos="debug", energy=0, date="", section="", member="", chunk=0, last=1, platform="NA", job_id=0, extra_data=dict(), nnodes=0, run_id=None, MaxRSS=0.0, AveRSS=0.0, out='', err='', rowstatus=0): + """[summary] + + Args: + _id (int): Internal Id + counter (int, optional): [description]. Defaults to 1. + job_name (str, optional): [description]. Defaults to "None". + created (datetime, optional): [description]. Defaults to None. + modified (datetime, optional): [description]. Defaults to None. + submit (int, optional): [description]. Defaults to 0. + start (int, optional): [description]. Defaults to 0. + finish (int, optional): [description]. Defaults to 0. + status (str, optional): [description]. Defaults to "UNKNOWN". + rowtype (int, optional): [description]. Defaults to 1. + ncpus (int, optional): [description]. Defaults to 0. + wallclock (str, optional): [description]. Defaults to "00:00". + qos (str, optional): [description]. Defaults to "debug". + energy (int, optional): [description]. Defaults to 0. + date (str, optional): [description]. Defaults to "". + section (str, optional): [description]. Defaults to "". + member (str, optional): [description]. Defaults to "". + chunk (int, optional): [description]. Defaults to 0. + last (int, optional): [description]. Defaults to 1. + platform (str, optional): [description]. Defaults to "NA". + job_id (int, optional): [description]. Defaults to 0. + """ + self._id = _id + self.counter = counter + self.job_name = job_name + self.created = created if created else datetime.today().strftime('%Y-%m-%d-%H:%M:%S') + self.modified = modified if modified else datetime.today().strftime('%Y-%m-%d-%H:%M:%S') + self._submit = int(submit) + self._start = int(start) + self._finish = int(finish) + # self._queue_time = 0 + # self._run_time = 0 + self.status = status + self.rowtype = rowtype + self.ncpus = ncpus + self.wallclock = wallclock + self.qos = qos if qos else "debug" + self._energy = energy if energy else 0 + self.date = date if date else "" + # member and section were confused in the database. + self.section = section if section else "" + self.member = member if member else "" + self.chunk = chunk if chunk else 0 + self.last = last + self._platform = platform if platform and len( + platform) > 0 else "NA" + self.job_id = job_id if job_id else 0 + try: + self.extra_data = loads(extra_data) + except Exception: + self.extra_data = "" + + self.nnodes = nnodes + self.run_id = run_id + self.MaxRSS = MaxRSS + self.AveRSS = AveRSS + self.out = out + self.err = err + self.rowstatus = rowstatus + + self.require_update = False + self.metric_SYPD = None + self.metric_ASYPD = None + # self.title = getTitle(self.job_name, Monitor.color_status( + # Status.STRING_TO_CODE[self.status]), self.status) + self.tree_parent = [] + + @property + def title(self): + return generate_job_html_title(self.job_name, Monitor.color_status(Status.STRING_TO_CODE[self.status]), self.status) + + def calculateSYPD(self, years_per_sim): + """ + """ + seconds_in_a_day = 86400 + # Make sure it is possible to generate + # print("yps {0} date {1} chunk {2}".format( + # years_per_sim, self.date, self.chunk)) + if (years_per_sim > 0 and self.date is not None and len(self.date) > 0 and self.chunk > 0): + # print("run {0}".format(self.running_time())) + self.metric_SYPD = round(years_per_sim * seconds_in_a_day / + self.running_time(), 2) if self.running_time() > 0 else None + + def calculateASYPD(self, chunk_unit, chunk_size, job_package_data, average_post_time): + """ + Calculates ASYPD for a job in a run + + :param chunk_unit: chunk unit of the experiment + :type chunk_unit: str + :param chunk_size: chunk size of the experiment + :type chunk_size: str + :param job_package_data: jobs in the package (if self belongs to a package) + :type: list() + :param average_post_time: average queuing + running time of the post jobs in the run of self. + :type average_post_time: float + :return: void + :rtype: void + """ + result_ASYPD = calculate_ASYPD_perjob( + chunk_unit, chunk_size, self.chunk, self.queuing_time(job_package_data) + self.running_time(), average_post_time, Status.STRING_TO_CODE[self.status]) + self.metric_ASYPD = result_ASYPD if result_ASYPD > 0 else None + + def delta_queue_time(self, job_data_in_package=None): + """ + Retrieves queuing time in timedelta format HH:mm:ss + """ + return str(timedelta(seconds=self.queuing_time(job_data_in_package))) + + def delta_running_time(self): + return str(timedelta(seconds=self.running_time())) + + def submit_datetime(self): + if self.submit > 0: + return datetime.fromtimestamp(self.submit) + return None + + def start_datetime(self): + if self.start > 0: + return datetime.fromtimestamp(self.start) + # if self.last == 0 and self.submit > 0: + # return datetime.fromtimestamp(self.submit) + return None + + def finish_datetime(self): + if self.finish > 0: + return datetime.fromtimestamp(self.finish) + # if self.last == 0: + # if self.start > 0: + # return datetime.fromtimestamp(self.start) + # if self.submit > 0: + # return datetime.fromtimestamp(self.submit) + return None + + def submit_datetime_str(self): + o_datetime = self.submit_datetime() + if o_datetime: + return o_datetime.strftime('%Y-%m-%d-%H:%M:%S') + else: + return None + + def start_datetime_str(self): + o_datetime = self.start_datetime() + if o_datetime: + return o_datetime.strftime('%Y-%m-%d-%H:%M:%S') + else: + return None + + def finish_datetime_str(self): + o_datetime = self.finish_datetime() + if o_datetime: + return o_datetime.strftime('%Y-%m-%d-%H:%M:%S') + else: + return None + + def queuing_time(self, job_data_in_package=None): + """ + Calculates the queuing time of the job. + jobs_data_in_package dict job_id => (queue_time, parents, job_name, start_time, finish_time) + + Returns: + int: queueing time + """ + max_queue = queue = 0 + job_name_max_queue = None + + if job_data_in_package and len(job_data_in_package) > 0: + # Only consider those jobs with starting time less than the start time of the job minus 20 seconds. + + jobs_times = [job_data_in_package[key] + for key in job_data_in_package if job_data_in_package[key][3] < (self._start - 20)] + + if jobs_times and len(jobs_times) > 0: + # There are previous jobs + # Sort by Queuing Time from Highest to Lowest + jobs_times.sort(key=lambda a: a[0], reverse=True) + # Select the maximum queue time + max_queue, _, job_name_max_queue, start, finish = jobs_times[0] + # Add the running time to the max queue time + max_queue += (finish - start) if finish > start else 0 + + if self.status in ["SUBMITTED", "QUEUING", "RUNNING", "COMPLETED", "HELD", "PREPARED", "FAILED"]: + # Substract the total time from the max_queue job in the package + # This adjustment should cover most of the wrapper types. + # TODO: Test this mechanism against all wrapper types + queue = int((self.start if self.start > + 0 else time.time()) - self.submit) - int(max_queue) + if queue > 0: + return queue + return 0 + + def running_time(self): + """Calculates the running time of the job. + + Returns: + int: running time + """ + if self.status in ["RUNNING", "COMPLETED", "FAILED"]: + # print("Finish: {0}".format(self.finish)) + if self.start == 0: + return 0 + + run = int((self.finish if self.finish > + 0 else time.time()) - self.start) + # print("RUN {0}".format(run)) + if run > 0: + return run + return 0 + + def energy_string(self): + return str(int(self.energy / 1000)) + "K" + + @property + def submit(self): + return int(self._submit) + + @property + def start(self): + if int(self._start) > 0: + return int(self._start) + if self.last == 0: + if int(self.submit) > 0: + return int(self._submit) + return int(self._start) + + @property + def finish(self): + if int(self._finish) > 0: + return int(self._finish) + if self.last == 0: + if int(self._start) > 0: + return int(self._start) + if int(self._submit) > 0: + return int(self._submit) + return int(self._finish) + + @property + def platform(self): + return self._platform + + @property + def energy(self): + """ + Return as integer + """ + return int(self._energy) + + @submit.setter + def submit(self, submit): + self._submit = int(submit) + + @start.setter + def start(self, start): + self._start = int(start) + + @finish.setter + def finish(self, finish): + self._finish = int(finish) + + @platform.setter + def platform(self, platform): + self._platform = platform if platform and len(platform) > 0 else "NA" + + @energy.setter + def energy(self, energy): + # print("Energy {0}".format(energy)) + if energy > 0: + if (energy != self._energy): + # print("Updating energy to {0} from {1}.".format( + # energy, self._energy)) + self.require_update = True + self._energy = energy if energy else 0 + + +class JobDataStructure: + + def __init__(self, expid: str, basic_config: APIBasicConfig): + """Initializes the object based on the unique identifier of the experiment. + + Args: + expid (str): Experiment identifier + """ + self.db_version = 99 # Previous versions are unsupported + + self.exp_run_db = ExperimentRunDbRepository(expid) + self.job_data_db = JobDataDbRepository(expid) + + def __str__(self): + return "Data structure" + + def get_max_id_experiment_run(self): + """ + Get last (max) experiment run object. + :return: ExperimentRun data + :rtype: ExperimentRun object + """ + try: + current_experiment_run = self.exp_run_db.get_last_run() + + if current_experiment_run: + return ExperimentRun( + run_id=current_experiment_run.get("run_id"), + created=current_experiment_run.get("created"), + start=current_experiment_run.get("start"), + finish=current_experiment_run.get("finish"), + chunk_unit=current_experiment_run.get("chunk_unit"), + chunk_size=current_experiment_run.get("chunk_size"), + completed=current_experiment_run.get("completed"), + total=current_experiment_run.get("total"), + failed=current_experiment_run.get("failed"), + queuing=current_experiment_run.get("queuing"), + running=current_experiment_run.get("running"), + submitted=current_experiment_run.get("submitted"), + suspended=current_experiment_run.get("suspended", 0), + metadata=current_experiment_run.get("metadata", ""), + modified=current_experiment_run.get("modified"), + ) + else: + return None + except Exception as exp: + print((str(exp))) + print((traceback.format_exc())) + return None + + def get_experiment_run_by_id(self, run_id: int): + """ + Get experiment run stored in database by run_id + """ + try: + current_experiment_run = self.exp_run_db.get_run_by_id(run_id) + + if current_experiment_run: + return ExperimentRun( + run_id=current_experiment_run.get("run_id"), + created=current_experiment_run.get("created"), + start=current_experiment_run.get("start"), + finish=current_experiment_run.get("finish"), + chunk_unit=current_experiment_run.get("chunk_unit"), + chunk_size=current_experiment_run.get("chunk_size"), + completed=current_experiment_run.get("completed"), + total=current_experiment_run.get("total"), + failed=current_experiment_run.get("failed"), + queuing=current_experiment_run.get("queuing"), + running=current_experiment_run.get("running"), + submitted=current_experiment_run.get("submitted"), + suspended=current_experiment_run.get("suspended", 0), + metadata=current_experiment_run.get("metadata", ""), + modified=current_experiment_run.get("modified"), + ) + else: + return None + except Exception as exc: + if _debug is True: + Log.info(traceback.format_exc()) + Log.debug(traceback.format_exc()) + Log.warning( + "Autosubmit couldn't retrieve experiment run. get_experiment_run_by_id. Exception {0}".format(str(exc))) + return None + + def get_current_job_data(self, run_id: int): + """ + Gets the job historical data for a run_id. + :param run_id: Run identifier + :type run_id: int + :return: List of jobdata rows + :rtype: list() of JobData objects + """ + try: + current_job_data = self.job_data_db.get_last_job_data_by_run_id(run_id) + + if current_job_data: + current_collection = [] + for job_data in current_job_data: + current_collection.append( + JobData( + _id=job_data.get("id"), + counter=job_data.get("counter"), + job_name=job_data.get("job_name"), + created=job_data.get("created"), + modified=job_data.get("modified"), + submit=job_data.get("submit"), + start=job_data.get("start"), + finish=job_data.get("finish"), + status=job_data.get("status"), + rowtype=job_data.get("rowtype"), + ncpus=job_data.get("ncpus"), + wallclock=job_data.get("wallclock"), + qos=job_data.get("qos"), + energy=job_data.get("energy"), + date=job_data.get("date"), + section=job_data.get("section"), + member=job_data.get("member"), + chunk=job_data.get("chunk"), + last=job_data.get("last"), + platform=job_data.get("platform"), + job_id=job_data.get("job_id"), + extra_data=job_data.get("extra_data"), + nnodes=job_data.get("nnodes"), + run_id=job_data.get("run_id"), + MaxRSS=job_data.get("MaxRSS", 0), + AveRSS=job_data.get("AveRSS", 0), + out=job_data.get("out", ""), + err=job_data.get("err", ""), + rowstatus=job_data.get("rowstatus", 0), + ) + ) + return current_collection + else: + return None + except Exception: + print((traceback.format_exc())) + print(("Error on returning current job data. run_id {0}".format(run_id))) + return None + + +def parse_output_number(string_number): + """ + Parses number in format 1.0K 1.0M 1.0G + + :param string_number: String representation of number + :type string_number: str + :return: number in float format + :rtype: float + """ + number = 0.0 + if (string_number): + if string_number == "NA": + return 0.0 + last_letter = string_number.strip()[-1] + multiplier = 1.0 + if last_letter == "G": + multiplier = 1000000000.0 + number = string_number[:-1] + elif last_letter == "M": + multiplier = 1000000.0 + number = string_number[:-1] + elif last_letter == "K": + multiplier = 1000.0 + number = string_number[:-1] + else: + number = string_number + try: + number = float(number) * multiplier + except Exception: + number = 0.0 + + return number diff --git a/autosubmit_api/components/jobs/joblist_helper.py b/autosubmit_api/components/jobs/joblist_helper.py index 14a87f726772eaf0b284187e898c0a22aef4376e..2dc29c9d2fbb0465a12e8052eb9f06c7f56f32b5 100644 --- a/autosubmit_api/components/jobs/joblist_helper.py +++ b/autosubmit_api/components/jobs/joblist_helper.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from autosubmit_api.autosubmit_legacy.job.job_list import JobList from autosubmit_api.common.utils import datechunk_to_year -from autosubmit_api.database.db_jobdata import JobDataStructure, JobRow +from autosubmit_api.components.jobdata import JobDataStructure, JobRow from autosubmit_api.components.experiment.configuration_facade import AutosubmitConfigurationFacade from autosubmit_api.components.experiment.pkl_organizer import PklOrganizer from autosubmit_api.config.basicConfig import APIBasicConfig diff --git a/autosubmit_api/components/jobs/joblist_loader.py b/autosubmit_api/components/jobs/joblist_loader.py index 1edc563f9fb63e9ceca08fcd02d09ac33af4e1a5..7dcd65ff86b88144a9b141677428f21338c78633 100644 --- a/autosubmit_api/components/jobs/joblist_loader.py +++ b/autosubmit_api/components/jobs/joblist_loader.py @@ -4,13 +4,14 @@ import os from fnmatch import fnmatch from autosubmit_api.components.jobs.joblist_helper import JobListHelper from autosubmit_api.components.jobs.job_factory import StandardJob, Job -from autosubmit_api.database.db_structure import get_structure from autosubmit_api.common.utils import Status from bscearth.utils.date import date2str from typing import Dict, List, Set # Builder Imports import logging +from autosubmit_api.database.repositories import ExperimentStructureDbRepository + logger = logging.getLogger('gunicorn.error') @@ -144,7 +145,7 @@ class JobListLoader(object): self._job_dictionary[job.name] = job def load_existing_structure_adjacency(self): - self._structure_adjacency = get_structure(self.expid, self.configuration_facade.structures_path) + self._structure_adjacency = ExperimentStructureDbRepository(self.expid).get_structure() def distribute_adjacency_into_jobs(self): parents_adjacency = {} diff --git a/autosubmit_api/components/representations/graph/graph.py b/autosubmit_api/components/representations/graph/graph.py index 5e4c96cbc104acb8476c7a4f9cbeea256dd448ce..63f4901ebc50d0cbfbdece4b7a676e1c4ed1a550 100644 --- a/autosubmit_api/components/representations/graph/graph.py +++ b/autosubmit_api/components/representations/graph/graph.py @@ -9,7 +9,7 @@ from networkx.linalg.laplacianmatrix import laplacian_matrix from ...jobs.job_factory import Job from ...jobs.joblist_loader import JobListLoader from ....monitor.monitor import Monitor -from ....database.db_jobdata import ExperimentGraphDrawing +from ...experiment.graph_drawer import ExperimentGraphDrawing from .edge import Edge, RealEdge from typing import List, Dict, Tuple, Set, Any diff --git a/autosubmit_api/database/README.md b/autosubmit_api/database/README.md index 3fee0393761cd8218ea45b799d828565a89017a4..304149fd3a986ce1a9027c9584473edd6798edcc 100644 --- a/autosubmit_api/database/README.md +++ b/autosubmit_api/database/README.md @@ -1,7 +1,12 @@ -**db_common** contains some function to access Autosubmit database. It is mostly legacy code that needs to be restructured. +This package aims to encapsulate everything related to structured DDBB (SQLite/Postgres) operations. -**db_jobdata** contains most of the classes of the old implementation of the `historical database`. It needs to be deleted, but some functions still use it. Replace the references to this old implementation for the new implementation `history` module and proceed to delete this file. Also, take out the `Graph Drawing` class. +* **common.py**: This module have all the common functions to allow DDBB interaction. -**db_manager** is mostly legacy code that is still referenced. +* **tables.py**: Holds all the table schemas. This module extends `autosubmit.tables`. + +* **models.py**: Holds data validators. Might be refactored in the future. + +* **table_manager.py**: Provides a generalized interface to interact with one table at the time. + +* **adapters**: This subpackage holds all the entities and their corresponding operations. It should provide an interface for other parts of the API that prevents them to worry about DDBB logic. -**db_structure** handles the consumption of the structure database of the experiment. diff --git a/autosubmit_api/database/__init__.py b/autosubmit_api/database/__init__.py index e2c02e22ffbd99e3569df2bf6ffeae33f39325d4..fb661d18bcaff27c3a94386ce811938c250cd656 100644 --- a/autosubmit_api/database/__init__.py +++ b/autosubmit_api/database/__init__.py @@ -1,19 +1,11 @@ -from sqlalchemy import text -from autosubmit_api.database.common import ( - create_as_times_db_engine, - create_autosubmit_db_engine, +from autosubmit_api.database.repositories import ( + ExperimentDbRepository, + ExperimentDetailsDbRepository, + ExperimentStatusDbRepository, ) -from autosubmit_api.database.tables import experiment_status_table, details_table def prepare_db(): - with create_as_times_db_engine().connect() as conn: - experiment_status_table.create(conn, checkfirst=True) - - with create_autosubmit_db_engine().connect() as conn: - details_table.create(conn, checkfirst=True) - - view_name = "listexp" - view_from = "select id,name,user,created,model,branch,hpc,description from experiment left join details on experiment.id = details.exp_id" - new_view_stmnt = f"CREATE VIEW IF NOT EXISTS {view_name} as {view_from}" - conn.execute(text(new_view_stmnt)) + ExperimentDbRepository().create_table() + ExperimentDetailsDbRepository().create_table() + ExperimentStatusDbRepository().create_table() diff --git a/autosubmit_api/database/common.py b/autosubmit_api/database/common.py index defe7056fce0dd39124123d138f33849032eea0a..14863a1710ee22fe0d385563220af57323c932b1 100644 --- a/autosubmit_api/database/common.py +++ b/autosubmit_api/database/common.py @@ -13,11 +13,18 @@ from sqlalchemy import ( from autosubmit_api.builders import BaseBuilder from autosubmit_api.logger import logger from autosubmit_api.config.basicConfig import APIBasicConfig +from autosubmit_api.database import session + + +def get_postgres_engine(): + return session.Session().bind class AttachedDatabaseConnBuilder(BaseBuilder): """ SQLite utility to build attached databases. + + MUST BE USED ONLY FOR DATABASE MODULE and TESTS. """ def __init__(self) -> None: @@ -47,7 +54,12 @@ class AttachedDatabaseConnBuilder(BaseBuilder): def create_main_db_conn() -> Connection: """ Connection with the autosubmit and as_times DDBB. + + MUST BE USED ONLY FOR DATABASE MODULE and TESTS. """ + APIBasicConfig.read() + if APIBasicConfig.DATABASE_BACKEND == "postgres": + return get_postgres_engine().connect() builder = AttachedDatabaseConnBuilder() builder.attach_autosubmit_db() builder.attach_as_times_db() @@ -58,8 +70,12 @@ def create_main_db_conn() -> Connection: def create_autosubmit_db_engine() -> Engine: """ Create an engine for the autosubmit DDBB. Usually named autosubmit.db + + MUST BE USED ONLY FOR DATABASE MODULE and TESTS. """ APIBasicConfig.read() + if APIBasicConfig.DATABASE_BACKEND == "postgres": + return get_postgres_engine() return create_engine( f"sqlite:///{ os.path.abspath(APIBasicConfig.DB_PATH)}", poolclass=NullPool ) @@ -68,8 +84,13 @@ def create_autosubmit_db_engine() -> Engine: def create_as_times_db_engine() -> Engine: """ Create an engine for the AS_TIMES DDBB. Usually named as_times.db + + MUST BE USED ONLY FOR DATABASE MODULE and TESTS. """ + APIBasicConfig.read() + if APIBasicConfig.DATABASE_BACKEND == "postgres": + return get_postgres_engine() db_path = os.path.join(APIBasicConfig.DB_DIR, APIBasicConfig.AS_TIMES_DB) return create_engine(f"sqlite:///{ os.path.abspath(db_path)}", poolclass=NullPool) diff --git a/autosubmit_api/database/db_common.py b/autosubmit_api/database/db_common.py deleted file mode 100644 index 6eec21ad01cca983e74344c65c9afb7cebffaf44..0000000000000000000000000000000000000000 --- a/autosubmit_api/database/db_common.py +++ /dev/null @@ -1,522 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2015 Earth Sciences Department, BSC-CNS - -# This file is part of Autosubmit. - -# Autosubmit is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# Autosubmit is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with Autosubmit. If not, see . - -""" -Module containing functions to manage autosubmit's database. -""" -import os -from sqlite3 import Connection, Cursor -import sqlite3 - -from bscearth.utils.log import Log -from autosubmit_api.config.basicConfig import APIBasicConfig -from autosubmit_api.builders.experiment_history_builder import ExperimentHistoryDirector, ExperimentHistoryBuilder -from autosubmit_api.builders.configuration_facade_builder import ConfigurationFacadeDirector, AutosubmitConfigurationFacadeBuilder -from autosubmit_api.database.utils import get_headers_sqlite, map_row_result_to_dict_sqlite -from autosubmit_api.experiment import common_db_requests as DbRequests -from typing import Dict, Any, Tuple - -CURRENT_DATABASE_VERSION = 1 - - -def check_db(): - """ - Checks if database file exist - - :return: None if exists, terminates program if not - """ - APIBasicConfig.read() - if not os.path.exists(APIBasicConfig.DB_PATH): - Log.error('Some problem has happened...check the database file.' + - 'DB file:' + APIBasicConfig.DB_PATH) - return False - return True - - -def open_conn(check_version=True) -> Tuple[Connection, Cursor]: - """ - Opens a connection to database - - :param check_version: If true, check if the database is compatible with this autosubmit version - :type check_version: bool - :return: connection object, cursor object - :rtype: sqlite3.Connection, sqlite3.Cursor - """ - APIBasicConfig.read() - print((APIBasicConfig.DB_PATH)) - conn = sqlite3.connect(APIBasicConfig.DB_PATH) - cursor = conn.cursor() - - # Getting database version - if check_version: - try: - cursor.execute('SELECT version ' - 'FROM db_version;') - row = cursor.fetchone() - version = row[0] - except sqlite3.OperationalError: - # If this exception is thrown it's because db_version does not exist. - # Database is from 2.x or 3.0 beta releases - try: - cursor.execute('SELECT type ' - 'FROM experiment;') - # If type field exists, it's from 2.x - version = -1 - except sqlite3.Error: - # If raises and error , it's from 3.0 beta releases - version = 0 - - # If database version is not the expected, update database.... - if version < CURRENT_DATABASE_VERSION: - if not _update_database(version, cursor): - raise DbException('Database version could not be updated') - - # ... or ask for autosubmit upgrade - elif version > CURRENT_DATABASE_VERSION: - Log.critical('Database version is not compatible with this autosubmit version. Please execute pip install ' - 'autosubmit --upgrade') - raise DbException('Database version not compatible') - - return conn, cursor - - -def close_conn(conn: Connection, cursor): - """ - Commits changes and close connection to database - - :param conn: connection to close - :type conn: sqlite3.Connection - :param cursor: cursor to close - :type cursor: sqlite3.Cursor - """ - conn.commit() - cursor.close() - conn.close() - return - - -def check_experiment_exists(name, error_on_inexistence=True): - """ - Checks if exist an experiment with the given name. - - :param error_on_inexistence: if True, adds an error log if experiment does not exists - :type error_on_inexistence: bool - :param name: Experiment name - :type name: str - :return: If experiment exists returns true, if not returns false - :rtype: bool - """ - if not check_db(): - return False - try: - (conn, cursor) = open_conn() - except DbException as e: - Log.error( - 'Connection to database could not be established: {0}', e.message) - return False - conn.isolation_level = None - - # SQLite always return a unicode object, but we can change this - # behaviour with the next sentence - conn.text_factory = str - cursor.execute( - 'select name from experiment where name=:name', {'name': name}) - row = cursor.fetchone() - close_conn(conn, cursor) - if row is None: - if error_on_inexistence: - Log.error('The experiment name "{0}" does not exist yet!!!', name) - return False - return True - - -def get_autosubmit_version(expid, log=None): - """ - Get the minimun autosubmit version needed for the experiment - - :param expid: Experiment name - :type expid: str - :return: If experiment exists returns the autosubmit version for it, if not returns None - :rtype: str - """ - if not check_db(): - return False - - try: - (conn, cursor) = open_conn() - except DbException as e: - if log: - log.error( - 'Connection to database could not be established: {0}', e.message) - return False - conn.isolation_level = None - - # SQLite always return a unicode object, but we can change this - # behaviour with the next sentence - conn.text_factory = str - cursor.execute('SELECT autosubmit_version FROM experiment WHERE name=:expid', { - 'expid': expid}) - row = cursor.fetchone() - close_conn(conn, cursor) - if row is None: - if log: - log.error('The experiment "{0}" does not exist yet!!!', expid) - return None - return row[0] - - -def search_experiment_by_id(query, exp_type=None, only_active=None, owner=None): - """ - Search experiments using provided data. Main query searches in the view listexp of ec_earth.db. - - :param searchString: string used to match columns in the table - :type searchString: str - :param typeExp: Assumes values "test" (only experiments starting with 't') or "experiment" (not experiment starting with 't') or "all" (indistinct). - :type typeExp: str - :param onlyActive: Assumes "active" (only active experiments) or "" (indistinct) - :type onlyActive: str - :param owner: return only experiment that match the provided owner of the experiment - :type owner: str - :return: list of experiments that match the search - :rtype: JSON - """ - if not check_db(): - return False - try: - (conn, cursor) = open_conn() - except DbException as e: - Log.error( - 'Connection to database could not be established: {0}', e.message) - return False - if owner: - query = "SELECT id,name,user,created,model,branch,hpc,description FROM experiment e left join details d on e.id = d.exp_id WHERE user='{0}'".format(owner) - # print(query) - else: - query = "SELECT id,name,user,created,model,branch,hpc,description FROM experiment e left join details d on e.id = d.exp_id WHERE (name LIKE '" + query + \ - "%' OR description LIKE '%" + query + \ - "%' OR user LIKE '%" + query + "%')" - if exp_type and len(exp_type) > 0: - if exp_type == "test": - query += " AND name LIKE 't%'" - elif exp_type == "experiment": - query += " AND name NOT LIKE 't%'" - else: - # Indistinct - pass - # Query DESC by name - query += " ORDER BY name DESC" - # print(query) - cursor.execute(query) - table = cursor.fetchall() - cursor.close() - conn.close() - result = list() - experiment_status = dict() - experiment_times = dict() - if len(table) > 0: - experiment_status = DbRequests.get_experiment_status() - # REMOVED: experiment_times = DbRequests.get_experiment_times() - for row in table: - expid = str(row[1]) - - status = experiment_status.get(expid, "NOT RUNNING") - if only_active == "active" and status != "RUNNING": - continue - - completed = "NA" - total = "NA" - submitted = 0 - queuing = 0 - running = 0 - failed = 0 - suspended = 0 - version = "Unknown" - wrapper = None - last_modified_timestamp = None - last_modified_pkl_datetime = None - hpc = row[6] - try: - autosubmit_config_facade = ConfigurationFacadeDirector(AutosubmitConfigurationFacadeBuilder(expid)).build_autosubmit_configuration_facade() - version = autosubmit_config_facade.get_autosubmit_version() - wrapper = autosubmit_config_facade.get_wrapper_type() - last_modified_pkl_datetime = autosubmit_config_facade.get_pkl_last_modified_time_as_datetime() - hpc = autosubmit_config_facade.get_main_platform() - except Exception as exp: - last_modified_pkl_datetime = None - pass - - total, completed, last_modified_timestamp = experiment_times.get( - expid, ("NA", "NA", None)) - - # Getting run data from historical database - - try: - current_run = ExperimentHistoryDirector(ExperimentHistoryBuilder(expid)).build_reader_experiment_history().manager.get_experiment_run_dc_with_max_id() - if current_run and current_run.total > 0: - completed = current_run.completed - total = current_run.total - submitted = current_run.submitted - queuing = current_run.queuing - running = current_run.running - failed = current_run.failed - suspended = current_run.suspended - last_modified_timestamp = current_run.modified_timestamp - except Exception as exp: - print(("Exception on search_experiment_by_id : {}".format(exp))) - pass - - result.append({'id': row[0], 'name': row[1], 'user': row[2], 'description': row[7], - 'hpc': hpc, 'status': status, 'completed': completed, 'total': total, - 'version': version, 'wrapper': wrapper, "submitted": submitted, "queuing": queuing, - "running": running, "failed": failed, "suspended": suspended, "modified": last_modified_pkl_datetime}) - return {'experiment': result} - - -def get_current_running_exp(): - """ - Simple query that gets the list of experiments currently running - - :rtype: list of users - """ - if not check_db(): - return False - try: - (conn, cursor) = open_conn() - except DbException as e: - Log.error( - 'Connection to database could not be established: {0}', e.message) - return False - query = "SELECT id,name,user,created,model,branch,hpc,description FROM experiment e left join details d on e.id = d.exp_id" - APIBasicConfig.read() - # print(query) - cursor.execute(query) - table = cursor.fetchall() - cursor.close() - conn.close() - result = list() - experiment_status = dict() - experiment_times = dict() - experiment_status = DbRequests.get_experiment_status() - # REMOVED: experiment_times = DbRequests.get_experiment_times() - for row in table: - expid = str(row[1]) - status = "NOT RUNNING" - completed = "NA" - total = "NA" - submitted = 0 - queuing = 0 - running = 0 - failed = 0 - suspended = 0 - user = str(row[2]) - version = "Unknown" - wrapper = None - last_modified_timestamp = None - last_modified_pkl_datetime = None - if (expid in experiment_status): - status = experiment_status[expid] - if status == "RUNNING": - try: - autosubmit_config_facade = ConfigurationFacadeDirector(AutosubmitConfigurationFacadeBuilder(expid)).build_autosubmit_configuration_facade() - version = autosubmit_config_facade.get_autosubmit_version() - wrapper = autosubmit_config_facade.get_wrapper_type() - last_modified_pkl_datetime = autosubmit_config_facade.get_pkl_last_modified_time_as_datetime() - hpc = autosubmit_config_facade.get_main_platform() - except Exception as exp: - last_modified_pkl_datetime = None - pass - if (expid in experiment_times): - if len(user) == 0: - # Retrieve user from path - path = APIBasicConfig.LOCAL_ROOT_DIR + '/' + expid - if (os.path.exists(path)): - main_folder = os.stat(path) - user = os.popen( - 'id -nu {0}'.format(str(main_folder.st_uid))).read().strip() - total, completed, last_modified_timestamp = experiment_times[expid] - # Try to retrieve experiment_run data - try: - current_run = ExperimentHistoryDirector(ExperimentHistoryBuilder(expid)).build_reader_experiment_history().manager.get_experiment_run_dc_with_max_id() - if current_run and current_run.total > 0: - completed = current_run.completed - total = current_run.total - submitted = current_run.submitted - queuing = current_run.queuing - running = current_run.running - failed = current_run.failed - suspended = current_run.suspended - last_modified_timestamp = current_run.modified_timestamp - except Exception as exp: - print(("Exception on get_current_running_exp : {}".format(exp))) - pass - result.append({'id': row[0], 'name': row[1], 'user': user, 'description': row[7], - 'hpc': hpc, 'status': status, 'completed': completed, 'total': total, - 'version': version, 'wrapper': wrapper, "submitted": submitted, "queuing": queuing, - "running": running, "failed": failed, "suspended": suspended, "modified": last_modified_pkl_datetime}) - return {'experiment': result} - - -def get_experiment_by_id(expid): - # type: (str) -> Dict[str, Any] - result = {'id': 0, 'name': expid, 'description': "NA", 'version': "NA"} - if not check_db(): - return result - (conn, cursor) = open_conn() - query = "SELECT id, name, description, autosubmit_version FROM experiment WHERE name ='" + expid + "'" - cursor.execute(query) - headers = get_headers_sqlite(cursor) - row = cursor.fetchone() - close_conn(conn, cursor) - if row is not None: - obj = map_row_result_to_dict_sqlite(row, headers) - result['id'] = obj["id"] - result['name'] = obj["name"] - result['description'] = obj["description"] - result['version'] = obj["autosubmit_version"] - return result - - -def _update_database(version, cursor): - Log.info("Autosubmit's database version is {0}. Current version is {1}. Updating...", - version, CURRENT_DATABASE_VERSION) - try: - # For databases from Autosubmit 2 - if version <= -1: - cursor.executescript('CREATE TABLE experiment_backup(id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' - 'name VARCHAR NOT NULL, type VARCHAR, autosubmit_version VARCHAR, ' - 'description VARCHAR NOT NULL, model_branch VARCHAR, template_name VARCHAR, ' - 'template_branch VARCHAR, ocean_diagnostics_branch VARCHAR);' - 'INSERT INTO experiment_backup (name,type,description,model_branch,template_name,' - 'template_branch,ocean_diagnostics_branch) SELECT name,type,description,model_branch,' - 'template_name,template_branch,ocean_diagnostics_branch FROM experiment;' - 'UPDATE experiment_backup SET autosubmit_version = "2";' - 'DROP TABLE experiment;' - 'ALTER TABLE experiment_backup RENAME TO experiment;') - if version <= 0: - # Autosubmit beta version. Create db_version table - cursor.executescript('CREATE TABLE db_version(version INTEGER NOT NULL);' - 'INSERT INTO db_version (version) VALUES (1);' - 'ALTER TABLE experiment ADD COLUMN autosubmit_version VARCHAR;' - 'UPDATE experiment SET autosubmit_version = "3.0.0b" ' - 'WHERE autosubmit_version NOT NULL;') - cursor.execute('UPDATE db_version SET version={0};'.format( - CURRENT_DATABASE_VERSION)) - except sqlite3.Error as e: - Log.critical('Can not update database: {0}', e) - return False - Log.info("Update completed") - return True - - -def update_experiment_description_owner(name, new_description=None, owner=None): - """ - We are suppossing that the front-end is making the owner validation. - :param expid: - :type expid: - :param new_description: - :type new_description: - :param owner: - :type owner: - """ - error = False - auth = False - description = None - message = None - try: - if new_description and owner: - result = _update_experiment_descrip_version(name, new_description) - if result: - auth = True - description = new_description - message = "Description Updated." - else: - error = True - if not new_description and not owner: - auth = False - message = "Not a valid user and no description provided" - elif new_description and not owner: - # Description provided by no valid user - auth = False - message = "It seems that your session has expired, please log in again." - else: - message = "No description provided." - except Exception as exp: - error = True - message = str(exp) - return { - 'error': error, - 'auth': auth, - 'description': description, - 'message': message - } - - -def _update_experiment_descrip_version(name, description=None, version=None): - """ - Updates the experiment's description and/or version - - :param name: experiment name (expid) - :rtype name: str - :param description: experiment new description - :rtype description: str - :param version: experiment autosubmit version - :rtype version: str - :return: If description has been update, True; otherwise, False. - :rtype: bool - """ - if not check_db(): - return False - try: - (conn, cursor) = open_conn() - except DbException as e: - raise Exception( - "Could not establish a connection to the database.") - conn.isolation_level = None - - # Changing default unicode - conn.text_factory = str - # Conditional update - if description is not None and version is not None: - cursor.execute('update experiment set description=:description, autosubmit_version=:version where name=:name', { - 'description': description, 'version': version, 'name': name}) - elif description is not None and version is None: - cursor.execute('update experiment set description=:description where name=:name', { - 'description': description, 'name': name}) - elif version is not None and description is None: - cursor.execute('update experiment set autosubmit_version=:version where name=:name', { - 'version': version, 'name': name}) - else: - raise Exception( - "Not enough data to update {}.".format(name)) - row = cursor.rowcount - close_conn(conn, cursor) - if row == 0: - raise Exception( - "Update on experiment {} failed.".format(name)) - return False - return True - - -class DbException(Exception): - """ - Exception class for database errors - """ - - def __init__(self, message): - self.message = message diff --git a/autosubmit_api/database/db_jobdata.py b/autosubmit_api/database/db_jobdata.py deleted file mode 100644 index b19d6c2f63a1e7e31ac3ad8ed2311fe2b8663819..0000000000000000000000000000000000000000 --- a/autosubmit_api/database/db_jobdata.py +++ /dev/null @@ -1,1031 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2015 Earth Sciences Department, BSC-CNS - -# This file is part of Autosubmit. - -# Autosubmit is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# Autosubmit is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with Autosubmit. If not, see . - -import os -import time -import textwrap -import traceback -import sqlite3 -import collections -import portalocker -from datetime import datetime, timedelta -from json import loads -from time import mktime -from autosubmit_api.components.jobs.utils import generate_job_html_title -# from networkx import DiGraph -from autosubmit_api.config.basicConfig import APIBasicConfig -from autosubmit_api.monitor.monitor import Monitor -from autosubmit_api.performance.utils import calculate_ASYPD_perjob -from autosubmit_api.components.jobs.job_factory import SimJob -from autosubmit_api.common.utils import get_jobs_with_no_outliers, Status, datechunk_to_year -# from autosubmitAPIwu.job.job_list -# import autosubmitAPIwu.experiment.common_db_requests as DbRequests -from bscearth.utils.date import Log - -from autosubmit_api.persistance.experiment import ExperimentPaths - - -# Version 15 includes out err MaxRSS AveRSS and rowstatus -CURRENT_DB_VERSION = 15 # Used to be 10 or 0 -DB_VERSION_SCHEMA_CHANGES = 12 -DB_EXPERIMENT_HEADER_SCHEMA_CHANGES = 14 -_debug = True -JobItem_10 = collections.namedtuple('JobItem', ['id', 'counter', 'job_name', 'created', 'modified', 'submit', 'start', 'finish', - 'status', 'rowtype', 'ncpus', 'wallclock', 'qos', 'energy', 'date', 'section', 'member', 'chunk', 'last', 'platform', 'job_id', 'extra_data']) -JobItem_12 = collections.namedtuple('JobItem', ['id', 'counter', 'job_name', 'created', 'modified', 'submit', 'start', 'finish', - 'status', 'rowtype', 'ncpus', 'wallclock', 'qos', 'energy', 'date', 'section', 'member', 'chunk', 'last', 'platform', 'job_id', 'extra_data', 'nnodes', 'run_id']) -JobItem_15 = collections.namedtuple('JobItem', ['id', 'counter', 'job_name', 'created', 'modified', 'submit', 'start', 'finish', - 'status', 'rowtype', 'ncpus', 'wallclock', 'qos', 'energy', 'date', 'section', 'member', 'chunk', 'last', 'platform', 'job_id', 'extra_data', 'nnodes', 'run_id', 'MaxRSS', 'AveRSS', 'out', 'err', 'rowstatus']) - -ExperimentRunItem = collections.namedtuple('ExperimentRunItem', [ - 'run_id', 'created', 'start', 'finish', 'chunk_unit', 'chunk_size', 'completed', 'total', 'failed', 'queuing', 'running', 'submitted']) -ExperimentRunItem_14 = collections.namedtuple('ExperimentRunItem', [ - 'run_id', 'created', 'start', 'finish', 'chunk_unit', 'chunk_size', 'completed', 'total', 'failed', 'queuing', 'running', 'submitted', 'suspended', 'metadata']) - -ExperimentRow = collections.namedtuple( - 'ExperimentRow', ['exp_id', 'expid', 'status', 'seconds']) - -JobRow = collections.namedtuple( - 'JobRow', ['name', 'queue_time', 'run_time', 'status', 'energy', 'submit', 'start', 'finish', 'ncpus', 'run_id']) - - -class ExperimentRun(): - - def __init__(self, run_id, created=None, start=0, finish=0, chunk_unit="NA", chunk_size=0, completed=0, total=0, failed=0, queuing=0, running=0, submitted=0, suspended=0, metadata="", modified=None): - self.run_id = run_id - self.created = created if created else datetime.today().strftime('%Y-%m-%d-%H:%M:%S') - self.start = start - self.finish = finish - self.chunk_unit = chunk_unit - self.chunk_size = chunk_size - self.submitted = submitted - self.queuing = queuing - self.running = running - self.completed = completed - self.failed = failed - self.total = total - self.suspended = suspended - self.metadata = metadata - self.modified = modified - - def getSYPD(self, job_list): - """ - Gets SYPD per run - """ - outlier_free_list = [] - if job_list: - performance_jobs = [SimJob.from_old_job_data(job_db) for job_db in job_list] - outlier_free_list = get_jobs_with_no_outliers(performance_jobs) - # print("{} -> {}".format(self.run_id, len(outlier_free_list))) - if len(outlier_free_list) > 0: - years_per_sim = datechunk_to_year(self.chunk_unit, self.chunk_size) - # print(self.run_id) - # print(years_per_sim) - seconds_per_day = 86400 - number_SIM = len(outlier_free_list) - # print(len(job_list)) - total_run_time = sum(job.run_time for job in outlier_free_list) - # print("run {3} yps {0} n {1} run_time {2}".format(years_per_sim, number_SIM, total_run_time, self.run_id)) - if total_run_time > 0: - return round((years_per_sim * number_SIM * seconds_per_day) / total_run_time, 2) - return None - - def getASYPD(self, job_sim_list, job_post_list, package_jobs): - """ - Gets ASYPD per run - package_jobs package_name => { job_id => (queue_time, parents, job_id, start_time) } - """ - SIM_no_outlier_list = [] - if job_sim_list and len(job_sim_list) > 0: - performance_jobs = [SimJob.from_old_job_data(job_db) for job_db in job_sim_list] - SIM_no_outlier_list = get_jobs_with_no_outliers(performance_jobs) - valid_names = set([job.name for job in SIM_no_outlier_list]) - job_sim_list = [job for job in job_sim_list if job.job_name in valid_names] - - # print("Run Id {}".format(self.run_id)) - if job_sim_list and len(job_sim_list) > 0 and job_post_list and len(job_post_list) > 0: - years_per_sim = datechunk_to_year(self.chunk_unit, self.chunk_size) - seconds_per_day = 86400 - number_SIM = len(job_sim_list) - number_POST = len(job_post_list) - - # print("SIM # {}".format(number_SIM)) - # print("POST # {}".format(number_POST)) - average_POST = round(sum(job.queuing_time(package_jobs.get( - job.rowtype, None) if package_jobs is not None else None) + job.running_time() for job in job_post_list) / number_POST, 2) - # print("Average POST {}".format(average_POST)) - # for job in job_sim_list: - # print("{} : {} {}".format(job.job_name, job.start, job.submit)) - # print("Run time {} -> {}".format(job.job_name, job.running_time())) - # print(job.job_name) - # print(package_jobs.get(job.rowtype, None)) - # print("Queue time {}".format(job.queuing_time(package_jobs.get( - # job.rowtype, None) if package_jobs is not None else None))) - sum_SIM = round(sum(job.queuing_time(package_jobs.get( - job.rowtype, None) if package_jobs is not None else None) + job.running_time() for job in job_sim_list), 2) - if (sum_SIM + average_POST) > 0: - return round((years_per_sim * number_SIM * seconds_per_day) / (sum_SIM + average_POST), 2) - return None - - -class JobData(object): - """Job Data object - """ - - def __init__(self, _id, counter=1, job_name="None", created=None, modified=None, submit=0, start=0, finish=0, status="UNKNOWN", rowtype=1, ncpus=0, wallclock="00:00", qos="debug", energy=0, date="", section="", member="", chunk=0, last=1, platform="NA", job_id=0, extra_data=dict(), nnodes=0, run_id=None, MaxRSS=0.0, AveRSS=0.0, out='', err='', rowstatus=0): - """[summary] - - Args: - _id (int): Internal Id - counter (int, optional): [description]. Defaults to 1. - job_name (str, optional): [description]. Defaults to "None". - created (datetime, optional): [description]. Defaults to None. - modified (datetime, optional): [description]. Defaults to None. - submit (int, optional): [description]. Defaults to 0. - start (int, optional): [description]. Defaults to 0. - finish (int, optional): [description]. Defaults to 0. - status (str, optional): [description]. Defaults to "UNKNOWN". - rowtype (int, optional): [description]. Defaults to 1. - ncpus (int, optional): [description]. Defaults to 0. - wallclock (str, optional): [description]. Defaults to "00:00". - qos (str, optional): [description]. Defaults to "debug". - energy (int, optional): [description]. Defaults to 0. - date (str, optional): [description]. Defaults to "". - section (str, optional): [description]. Defaults to "". - member (str, optional): [description]. Defaults to "". - chunk (int, optional): [description]. Defaults to 0. - last (int, optional): [description]. Defaults to 1. - platform (str, optional): [description]. Defaults to "NA". - job_id (int, optional): [description]. Defaults to 0. - """ - self._id = _id - self.counter = counter - self.job_name = job_name - self.created = created if created else datetime.today().strftime('%Y-%m-%d-%H:%M:%S') - self.modified = modified if modified else datetime.today().strftime('%Y-%m-%d-%H:%M:%S') - self._submit = int(submit) - self._start = int(start) - self._finish = int(finish) - # self._queue_time = 0 - # self._run_time = 0 - self.status = status - self.rowtype = rowtype - self.ncpus = ncpus - self.wallclock = wallclock - self.qos = qos if qos else "debug" - self._energy = energy if energy else 0 - self.date = date if date else "" - # member and section were confused in the database. - self.section = section if section else "" - self.member = member if member else "" - self.chunk = chunk if chunk else 0 - self.last = last - self._platform = platform if platform and len( - platform) > 0 else "NA" - self.job_id = job_id if job_id else 0 - try: - self.extra_data = loads(extra_data) - except Exception as exp: - self.extra_data = "" - pass - self.nnodes = nnodes - self.run_id = run_id - self.MaxRSS = MaxRSS - self.AveRSS = AveRSS - self.out = out - self.err = err - self.rowstatus = rowstatus - - self.require_update = False - self.metric_SYPD = None - self.metric_ASYPD = None - # self.title = getTitle(self.job_name, Monitor.color_status( - # Status.STRING_TO_CODE[self.status]), self.status) - self.tree_parent = [] - - @property - def title(self): - return generate_job_html_title(self.job_name, Monitor.color_status(Status.STRING_TO_CODE[self.status]), self.status) - - def calculateSYPD(self, years_per_sim): - """ - """ - seconds_in_a_day = 86400 - # Make sure it is possible to generate - # print("yps {0} date {1} chunk {2}".format( - # years_per_sim, self.date, self.chunk)) - if (years_per_sim > 0 and self.date is not None and len(self.date) > 0 and self.chunk > 0): - # print("run {0}".format(self.running_time())) - self.metric_SYPD = round(years_per_sim * seconds_in_a_day / - self.running_time(), 2) if self.running_time() > 0 else None - - def calculateASYPD(self, chunk_unit, chunk_size, job_package_data, average_post_time): - """ - Calculates ASYPD for a job in a run - - :param chunk_unit: chunk unit of the experiment - :type chunk_unit: str - :param chunk_size: chunk size of the experiment - :type chunk_size: str - :param job_package_data: jobs in the package (if self belongs to a package) - :type: list() - :param average_post_time: average queuing + running time of the post jobs in the run of self. - :type average_post_time: float - :return: void - :rtype: void - """ - result_ASYPD = calculate_ASYPD_perjob( - chunk_unit, chunk_size, self.chunk, self.queuing_time(job_package_data) + self.running_time(), average_post_time, Status.STRING_TO_CODE[self.status]) - self.metric_ASYPD = result_ASYPD if result_ASYPD > 0 else None - - def delta_queue_time(self, job_data_in_package=None): - """ - Retrieves queuing time in timedelta format HH:mm:ss - """ - return str(timedelta(seconds=self.queuing_time(job_data_in_package))) - - def delta_running_time(self): - return str(timedelta(seconds=self.running_time())) - - def submit_datetime(self): - if self.submit > 0: - return datetime.fromtimestamp(self.submit) - return None - - def start_datetime(self): - if self.start > 0: - return datetime.fromtimestamp(self.start) - # if self.last == 0 and self.submit > 0: - # return datetime.fromtimestamp(self.submit) - return None - - def finish_datetime(self): - if self.finish > 0: - return datetime.fromtimestamp(self.finish) - # if self.last == 0: - # if self.start > 0: - # return datetime.fromtimestamp(self.start) - # if self.submit > 0: - # return datetime.fromtimestamp(self.submit) - return None - - def submit_datetime_str(self): - o_datetime = self.submit_datetime() - if o_datetime: - return o_datetime.strftime('%Y-%m-%d-%H:%M:%S') - else: - return None - - def start_datetime_str(self): - o_datetime = self.start_datetime() - if o_datetime: - return o_datetime.strftime('%Y-%m-%d-%H:%M:%S') - else: - return None - - def finish_datetime_str(self): - o_datetime = self.finish_datetime() - if o_datetime: - return o_datetime.strftime('%Y-%m-%d-%H:%M:%S') - else: - return None - - def queuing_time(self, job_data_in_package=None): - """ - Calculates the queuing time of the job. - jobs_data_in_package dict job_id => (queue_time, parents, job_name, start_time, finish_time) - - Returns: - int: queueing time - """ - max_queue = queue = 0 - job_name_max_queue = None - - if job_data_in_package and len(job_data_in_package) > 0: - # Only consider those jobs with starting time less than the start time of the job minus 20 seconds. - - jobs_times = [job_data_in_package[key] - for key in job_data_in_package if job_data_in_package[key][3] < (self._start - 20)] - - if jobs_times and len(jobs_times) > 0: - # There are previous jobs - # Sort by Queuing Time from Highest to Lowest - jobs_times.sort(key=lambda a: a[0], reverse=True) - # Select the maximum queue time - max_queue, _, job_name_max_queue, start, finish = jobs_times[0] - # Add the running time to the max queue time - max_queue += (finish - start) if finish > start else 0 - - if self.status in ["SUBMITTED", "QUEUING", "RUNNING", "COMPLETED", "HELD", "PREPARED", "FAILED"]: - # Substract the total time from the max_queue job in the package - # This adjustment should cover most of the wrapper types. - # TODO: Test this mechanism against all wrapper types - queue = int((self.start if self.start > - 0 else time.time()) - self.submit) - int(max_queue) - if queue > 0: - return queue - return 0 - - def running_time(self): - """Calculates the running time of the job. - - Returns: - int: running time - """ - if self.status in ["RUNNING", "COMPLETED", "FAILED"]: - # print("Finish: {0}".format(self.finish)) - if self.start == 0: - return 0 - - run = int((self.finish if self.finish > - 0 else time.time()) - self.start) - # print("RUN {0}".format(run)) - if run > 0: - return run - return 0 - - def energy_string(self): - return str(int(self.energy / 1000)) + "K" - - @property - def submit(self): - return int(self._submit) - - @property - def start(self): - if int(self._start) > 0: - return int(self._start) - if self.last == 0: - if int(self.submit) > 0: - return int(self._submit) - return int(self._start) - - @property - def finish(self): - if int(self._finish) > 0: - return int(self._finish) - if self.last == 0: - if int(self._start) > 0: - return int(self._start) - if int(self._submit) > 0: - return int(self._submit) - return int(self._finish) - - @property - def platform(self): - return self._platform - - @property - def energy(self): - """ - Return as integer - """ - return int(self._energy) - - @submit.setter - def submit(self, submit): - self._submit = int(submit) - - @start.setter - def start(self, start): - self._start = int(start) - - @finish.setter - def finish(self, finish): - self._finish = int(finish) - - @platform.setter - def platform(self, platform): - self._platform = platform if platform and len(platform) > 0 else "NA" - - @energy.setter - def energy(self, energy): - # print("Energy {0}".format(energy)) - if energy > 0: - if (energy != self._energy): - # print("Updating energy to {0} from {1}.".format( - # energy, self._energy)) - self.require_update = True - self._energy = energy if energy else 0 - - -class JobStepExtraData(): - def __init__(self, key, dict_data): - self.key = key - if isinstance(dict_data, dict): - # dict_data["ncpus"] if dict_data and "ncpus" in dict_data.keys( - self.ncpus = dict_data.get("ncpus", 0) if dict_data else 0 - # ) else 0 - self.nnodes = dict_data.get( - "nnodes", 0) if dict_data else 0 # and "nnodes" in dict_data.keys( - # ) else 0 - self.submit = int(mktime(datetime.strptime(dict_data["submit"], "%Y-%m-%dT%H:%M:%S").timetuple())) if dict_data and "submit" in list(dict_data.keys( - )) else 0 - self.start = int(mktime(datetime.strptime(dict_data["start"], "%Y-%m-%dT%H:%M:%S").timetuple())) if dict_data and "start" in list(dict_data.keys( - )) else 0 - self.finish = int(mktime(datetime.strptime(dict_data["finish"], "%Y-%m-%dT%H:%M:%S").timetuple())) if dict_data and "finish" in list(dict_data.keys( - )) and dict_data["finish"] != "Unknown" else 0 - self.energy = parse_output_number(dict_data["energy"]) if dict_data and "energy" in list(dict_data.keys( - )) else 0 - # if dict_data and "MaxRSS" in dict_data.keys( - self.maxRSS = dict_data.get("MaxRSS", 0) - # ) else 0 - # if dict_data and "AveRSS" in dict_data.keys( - self.aveRSS = dict_data.get("AveRSS", 0) - # ) else 0 - else: - self.ncpus = 0 - self.nnodes = 0 - self.submit = 0 - self.start = 0 - self.finish = 0 - self.energy = 0 - self.maxRSS = 0 - self.aveRSS = 0 - - -class MainDataBase(): - def __init__(self, expid): - self.expid = expid - self.conn = None - self.conn_ec = None - self.create_table_query = None - self.db_version = None - - def create_connection(self, db_file): - """ - Create a database connection to the SQLite database specified by db_file. - :param db_file: database file name - :return: Connection object or None - """ - try: - conn = sqlite3.connect(db_file) - return conn - except: - return None - - def create_table(self): - """ create a table from the create_table_sql statement - :param conn: Connection object - :param create_table_sql: a CREATE TABLE statement - :return: - """ - try: - if self.conn: - c = self.conn.cursor() - c.execute(self.create_table_query) - self.conn.commit() - else: - raise IOError("Not a valid connection") - except IOError as exp: - Log.warning(exp) - return None - except sqlite3.Error as e: - if _debug == True: - Log.info(traceback.format_exc()) - Log.warning("Error on create table : " + str(type(e).__name__)) - return None - - def create_index(self): - """ Creates index from statement defined in child class - """ - try: - if self.conn: - c = self.conn.cursor() - c.execute(self.create_index_query) - self.conn.commit() - else: - raise IOError("Not a valid connection") - except IOError as exp: - Log.warning(exp) - return None - except sqlite3.Error as e: - if _debug == True: - Log.info(traceback.format_exc()) - Log.debug(str(type(e).__name__)) - Log.warning("Error on create index . create_index") - return None - - -class ExperimentGraphDrawing(MainDataBase): - def __init__(self, expid): - """ - Sets and validates graph drawing. - :param expid: Name of experiment - :type expid: str - :param allJobs: list of all jobs objects (usually from job_list) - :type allJobs: list() - """ - MainDataBase.__init__(self, expid) - APIBasicConfig.read() - self.expid = expid - exp_paths = ExperimentPaths(expid) - self.folder_path = APIBasicConfig.LOCAL_ROOT_DIR - self.database_path = exp_paths.graph_data_db - self.create_table_query = textwrap.dedent( - '''CREATE TABLE - IF NOT EXISTS experiment_graph_draw ( - id INTEGER PRIMARY KEY, - job_name text NOT NULL, - x INTEGER NOT NULL, - y INTEGER NOT NULL - );''') - - if not os.path.exists(self.database_path): - os.umask(0) - if not os.path.exists(os.path.dirname(self.database_path)): - os.makedirs(os.path.dirname(self.database_path)) - os.open(self.database_path, os.O_WRONLY | os.O_CREAT, 0o777) - self.conn = self.create_connection(self.database_path) - self.create_table() - else: - self.conn = self.create_connection(self.database_path) - self.lock_name = "calculation_in_progress.lock" - self.current_position_dictionary = None - self.current_jobs_set = set() - self.coordinates = list() - self.set_current_position() - self.should_update = False - self.locked = False - self.test_locked() - - def test_locked(self): - self.locked = True - try: - with portalocker.Lock(os.path.join(self.folder_path, self.lock_name), timeout=1) as fh: - self.locked = False - fh.flush() - os.fsync(fh.fileno()) - except portalocker.AlreadyLocked: - print("It is locked") - self.locked = True - except Exception as exp: - self.locked = True - - def get_validated_data(self, allJobs): - """ - Validates if should update current graph drawing. - :return: None if graph drawing should be updated, otherwise, it returns the position data. - :rype: None or dict() - """ - job_names = {job.name for job in allJobs} - # Validating content - difference = job_names - self.current_jobs_set - if difference and len(difference) > 0: - # Intersection found. Graph Drawing database needs to be updated - self.should_update = True - # Clear database - return None - return self.current_position_dictionary - # return None if self.should_update == True else self.current_position_dictionary - - def calculate_drawing(self, allJobs, independent=False, num_chunks=48, job_dictionary=None): - """ - Called in a thread. - :param allJobs: list of jobs (usually from job_list object) - :type allJobs: list() - :return: Last row Id - :rtype: int - """ - lock_name = "calculation_{}_in_progress.lock".format(self.expid) if independent == True else self.lock_name - lock_path_file = os.path.join(self.folder_path, lock_name) - try: - with portalocker.Lock(lock_path_file, timeout=1) as fh: - self.conn = self.create_connection(self.database_path) - monitor = Monitor() - graph = monitor.create_tree_list( - self.expid, allJobs, None, dict(), False, job_dictionary) - if len(allJobs) > 1000: - # Logic: Start with 48 as acceptable number of chunks for Gmaxiter = 100 - # Minimum Gmaxiter will be 10 - maxiter = max(10, 148 - num_chunks) - # print("Experiment {} num_chunk {} maxiter {}".format( - # self.expid, num_chunks, maxiter)) - result = graph.create( - ['dot', '-Gnslimit=2', '-Gnslimit1=2', '-Gmaxiter={}'.format(maxiter), '-Gsplines=none', '-v'], format="plain") - else: - result = graph.create('dot', format="plain") - for u in result.split(b"\n"): - splitList = u.split(b" ") - if len(splitList) > 1 and splitList[0].decode() == "node": - - self.coordinates.append((splitList[1].decode(), int( - float(splitList[2].decode()) * 90), int(float(splitList[3].decode()) * -90))) - # self.coordinates[splitList[1]] = ( - # int(float(splitList[2]) * 90), int(float(splitList[3]) * -90)) - self.insert_coordinates() - fh.flush() - os.fsync(fh.fileno()) - os.remove(lock_path_file) - return self.get_validated_data(allJobs) - except portalocker.AlreadyLocked: - message = "Already calculating graph drawing." - print(message) - return None - except Exception as exp: - print((traceback.format_exc())) - os.remove(lock_path_file) - print(("Exception while calculating coordinates {}".format(str(exp)))) - return None - - def insert_coordinates(self): - """ - Prepares and inserts new coordinates. - """ - try: - # Start by clearing database - self._clear_graph_database() - result = None - if self.coordinates and len(self.coordinates) > 0: - result = self._insert_many_graph_coordinates(self.coordinates) - return result - return None - except Exception as exp: - print((str(exp))) - return None - - def set_current_position(self): - """ - Sets all registers in the proper variables. - current_position_dictionary: JobName -> (x, y) - current_jobs_set: JobName - """ - current_table = self._get_current_position() - if current_table and len(current_table) > 0: - self.current_position_dictionary = {row[1]: (row[2], row[3]) for row in current_table} - self.current_jobs_set = set(self.current_position_dictionary.keys()) - - def _get_current_position(self): - """ - Get all registers from experiment_graph_draw.\n - :return: row content: id, job_name, x, y - :rtype: 4-tuple (int, str, int, int) - """ - try: - if self.conn: - # conn = create_connection(DB_FILE_AS_TIMES) - self.conn.text_factory = str - cur = self.conn.cursor() - cur.execute( - "SELECT id, job_name, x, y FROM experiment_graph_draw") - rows = cur.fetchall() - return rows - return None - except Exception as exp: - print((traceback.format_exc())) - print((str(exp))) - return None - - def _insert_many_graph_coordinates(self, values): - """ - Create many graph coordinates - :param conn: - :param details: - :return: - """ - try: - if self.conn: - # exp_id = self._get_id_db() - # conn = create_connection(DB_FILE_AS_TIMES) - # creation_date = datetime.today().strftime('%Y-%m-%d-%H:%M:%S') - sql = ''' INSERT INTO experiment_graph_draw(job_name, x, y) VALUES(?,?,?) ''' - # print(row_content) - cur = self.conn.cursor() - cur.executemany(sql, values) - # print(cur) - self.conn.commit() - return cur.lastrowid - except Exception as exp: - print((traceback.format_exc())) - Log.warning( - "Error on Insert many graph drawing : {}".format(str(exp))) - return None - - def _clear_graph_database(self): - """ - Clear all content from graph drawing database - """ - try: - if self.conn: - # conn = create_connection(DB_FILE_AS_TIMES) - # modified_date = datetime.today().strftime('%Y-%m-%d-%H:%M:%S') - sql = ''' DELETE FROM experiment_graph_draw ''' - cur = self.conn.cursor() - cur.execute(sql, ) - self.conn.commit() - return True - return False - except Exception as exp: - print((traceback.format_exc())) - print(("Error on Database clear: {}".format(str(exp)))) - return False - -class JobDataStructure(MainDataBase): - - def __init__(self, expid: str, basic_config: APIBasicConfig): - """Initializes the object based on the unique identifier of the experiment. - - Args: - expid (str): Experiment identifier - """ - MainDataBase.__init__(self, expid) - # BasicConfig.read() - # self.expid = expid - self.folder_path = basic_config.JOBDATA_DIR - exp_paths = ExperimentPaths(expid) - self.database_path = exp_paths.job_data_db - # self.conn = None - self.db_version = None - # self.jobdata_list = JobDataList(self.expid) - self.create_index_query = textwrap.dedent(''' - CREATE INDEX IF NOT EXISTS ID_JOB_NAME ON job_data(job_name); - ''') - if not os.path.exists(self.database_path): - self.conn = None - else: - self.conn = self.create_connection(self.database_path) - self.db_version = self._select_pragma_version() - # self.query_job_historic = None - # Historic only working on DB 12 now - self.query_job_historic = "SELECT id, counter, job_name, created, modified, submit, start, finish, status, rowtype, ncpus, wallclock, qos, energy, date, section, member, chunk, last, platform, job_id, extra_data, nnodes, run_id FROM job_data WHERE job_name=? ORDER BY counter DESC" - - if self.db_version < DB_VERSION_SCHEMA_CHANGES: - try: - self.create_index() - except Exception as exp: - print(exp) - pass - - def __str__(self): - return '{} {}'.format("Data structure. Version:", self.db_version) - - def get_max_id_experiment_run(self): - """ - Get last (max) experiment run object. - :return: ExperimentRun data - :rtype: ExperimentRun object - """ - try: - # expe = list() - if not os.path.exists(self.database_path): - raise Exception("Job data folder not found {0} or the database version is outdated.".format(str(self.database_path))) - if self.db_version < DB_VERSION_SCHEMA_CHANGES: - print(("Job database version {0} outdated.".format(str(self.db_version)))) - if os.path.exists(self.database_path) and self.db_version >= DB_VERSION_SCHEMA_CHANGES: - modified_time = int(os.stat(self.database_path).st_mtime) - current_experiment_run = self._get_max_id_experiment_run() - if current_experiment_run: - exprun_item = ExperimentRunItem_14( - *current_experiment_run) if self.db_version >= DB_EXPERIMENT_HEADER_SCHEMA_CHANGES else ExperimentRunItem(*current_experiment_run) - return ExperimentRun(exprun_item.run_id, exprun_item.created, exprun_item.start, exprun_item.finish, exprun_item.chunk_unit, exprun_item.chunk_size, exprun_item.completed, exprun_item.total, exprun_item.failed, exprun_item.queuing, exprun_item.running, exprun_item.submitted, exprun_item.suspended if self.db_version >= DB_EXPERIMENT_HEADER_SCHEMA_CHANGES else 0, exprun_item.metadata if self.db_version >= DB_EXPERIMENT_HEADER_SCHEMA_CHANGES else "", modified_time) - else: - return None - else: - raise Exception("Job data folder not found {0} or the database version is outdated.".format( - str(self.database_path))) - except Exception as exp: - print((str(exp))) - print((traceback.format_exc())) - return None - - def get_experiment_run_by_id(self, run_id): - """ - Get experiment run stored in database by run_id - """ - try: - # expe = list() - if os.path.exists(self.folder_path) and self.db_version >= DB_VERSION_SCHEMA_CHANGES: - result = None - current_experiment_run = self._get_experiment_run_by_id(run_id) - if current_experiment_run: - # for run in current_experiment_run: - exprun_item = ExperimentRunItem_14( - *current_experiment_run) if self.db_version >= DB_EXPERIMENT_HEADER_SCHEMA_CHANGES else ExperimentRunItem(*current_experiment_run) - result = ExperimentRun(exprun_item.run_id, exprun_item.created, exprun_item.start, exprun_item.finish, exprun_item.chunk_unit, exprun_item.chunk_size, exprun_item.completed, exprun_item.total, exprun_item.failed, exprun_item.queuing, - exprun_item.running, exprun_item.submitted, exprun_item.suspended if self.db_version >= DB_EXPERIMENT_HEADER_SCHEMA_CHANGES else 0, exprun_item.metadata if self.db_version >= DB_EXPERIMENT_HEADER_SCHEMA_CHANGES else "") - return result - else: - return None - else: - raise Exception("Job data folder not found {0} or the database version is outdated.".format( - str(self.database_path))) - except Exception as exp: - if _debug == True: - Log.info(traceback.format_exc()) - Log.debug(traceback.format_exc()) - Log.warning( - "Autosubmit couldn't retrieve experiment run. get_experiment_run_by_id. Exception {0}".format(str(exp))) - return None - - def get_current_job_data(self, run_id, all_states=False): - """ - Gets the job historical data for a run_id. - :param run_id: Run identifier - :type run_id: int - :param all_states: False if only last=1 should be included, otherwise all rows - :return: List of jobdata rows - :rtype: list() of JobData objects - """ - try: - current_collection = [] - if self.db_version < DB_VERSION_SCHEMA_CHANGES: - raise Exception("This function requieres a newer DB version.") - if os.path.exists(self.folder_path): - current_job_data = self._get_current_job_data( - run_id, all_states) - if current_job_data: - for job_data in current_job_data: - if self.db_version >= CURRENT_DB_VERSION: - jobitem = JobItem_15(*job_data) - current_collection.append(JobData(jobitem.id, jobitem.counter, jobitem.job_name, jobitem.created, jobitem.modified, jobitem.submit, jobitem.start, jobitem.finish, jobitem.status, jobitem.rowtype, jobitem.ncpus, - jobitem.wallclock, jobitem.qos, jobitem.energy, jobitem.date, jobitem.section, jobitem.member, jobitem.chunk, jobitem.last, jobitem.platform, jobitem.job_id, jobitem.extra_data, jobitem.nnodes, jobitem.run_id, jobitem.MaxRSS, jobitem.AveRSS, jobitem.out, jobitem.err, jobitem.rowstatus)) - else: - jobitem = JobItem_12(*job_data) - current_collection.append(JobData(jobitem.id, jobitem.counter, jobitem.job_name, jobitem.created, jobitem.modified, jobitem.submit, jobitem.start, jobitem.finish, jobitem.status, jobitem.rowtype, jobitem.ncpus, - jobitem.wallclock, jobitem.qos, jobitem.energy, jobitem.date, jobitem.section, jobitem.member, jobitem.chunk, jobitem.last, jobitem.platform, jobitem.job_id, jobitem.extra_data, jobitem.nnodes, jobitem.run_id)) - return current_collection - return None - except Exception as exp: - print((traceback.format_exc())) - print(( - "Error on returning current job data. run_id {0}".format(run_id))) - return None - - def _get_experiment_run_by_id(self, run_id): - """ - :param run_id: Run Identifier - :type run_id: int - :return: First row that matches the run_id - :rtype: Row as Tuple - """ - try: - if self.conn: - self.conn.text_factory = str - cur = self.conn.cursor() - if self.db_version >= DB_EXPERIMENT_HEADER_SCHEMA_CHANGES: - cur.execute( - "SELECT run_id,created,start,finish,chunk_unit,chunk_size,completed,total,failed,queuing,running,submitted,suspended, metadata FROM experiment_run WHERE run_id=? and total > 0 ORDER BY run_id DESC", (run_id,)) - else: - cur.execute( - "SELECT run_id,created,start,finish,chunk_unit,chunk_size,completed,total,failed,queuing,running,submitted FROM experiment_run WHERE run_id=? and total > 0 ORDER BY run_id DESC", (run_id,)) - rows = cur.fetchall() - if len(rows) > 0: - return rows[0] - else: - return None - else: - raise Exception("Not a valid connection.") - except sqlite3.Error as e: - if _debug == True: - print((traceback.format_exc())) - print(("Error while retrieving run {0} information. {1}".format( - run_id, "_get_experiment_run_by_id"))) - return None - - def _select_pragma_version(self): - """ Retrieves user_version from database - """ - try: - if self.conn: - self.conn.text_factory = str - cur = self.conn.cursor() - cur.execute("pragma user_version;") - rows = cur.fetchall() - # print("Result {0}".format(str(rows))) - if len(rows) > 0: - # print(rows) - # print("Row " + str(rows[0])) - result, = rows[0] - # print(result) - return int(result) if result >= 0 else None - else: - # Starting value - return None - except sqlite3.Error as e: - if _debug == True: - Log.info(traceback.format_exc()) - Log.debug(traceback.format_exc()) - Log.warning("Error while retrieving version: " + - str(type(e).__name__)) - return None - - def _get_max_id_experiment_run(self): - """Return the max id from experiment_run - - :return: max run_id, None - :rtype: int, None - """ - try: - if self.conn: - self.conn.text_factory = str - cur = self.conn.cursor() - if self.db_version >= DB_EXPERIMENT_HEADER_SCHEMA_CHANGES: - cur.execute( - "SELECT run_id,created,start,finish,chunk_unit,chunk_size,completed,total,failed,queuing,running,submitted,suspended, metadata from experiment_run ORDER BY run_id DESC LIMIT 0, 1") - else: - cur.execute( - "SELECT run_id,created,start,finish,chunk_unit,chunk_size,completed,total,failed,queuing,running,submitted from experiment_run ORDER BY run_id DESC LIMIT 0, 1") - rows = cur.fetchall() - if len(rows) > 0: - return rows[0] - else: - return None - return None - except sqlite3.Error as e: - if _debug == True: - Log.info(traceback.format_exc()) - Log.debug(traceback.format_exc()) - Log.warning("Error on select max run_id : " + - str(type(e).__name__)) - return None - - def _get_current_job_data(self, run_id, all_states=False): - """ - Get JobData by run_id. - :param run_id: Run Identifier - :type run_id: int - :param all_states: False if only last=1, True all - :type all_states: bool - """ - try: - if self.conn: - # print("Run {0} states {1} db {2}".format( - # run_id, all_states, self.db_version)) - self.conn.text_factory = str - cur = self.conn.cursor() - request_string = "" - if all_states == False: - if self.db_version >= CURRENT_DB_VERSION: - request_string = "SELECT id, counter, job_name, created, modified, submit, start, finish, status, rowtype, ncpus, wallclock, qos, energy, date, section, member, chunk, last, platform, job_id, extra_data, nnodes, run_id, MaxRSS, AveRSS, out, err, rowstatus from job_data WHERE run_id=? and last=1 and finish > 0 and rowtype >= 2 ORDER BY id" - else: - request_string = "SELECT id, counter, job_name, created, modified, submit, start, finish, status, rowtype, ncpus, wallclock, qos, energy, date, section, member, chunk, last, platform, job_id, extra_data, nnodes, run_id from job_data WHERE run_id=? and last=1 and finish > 0 and rowtype >= 2 ORDER BY id" - - else: - if self.db_version >= CURRENT_DB_VERSION: - request_string = "SELECT id, counter, job_name, created, modified, submit, start, finish, status, rowtype, ncpus, wallclock, qos, energy, date, section, member, chunk, last, platform, job_id, extra_data, nnodes, run_id, MaxRSS, AveRSS, out, err, rowstatus from job_data WHERE run_id=? and rowtype >= 2 ORDER BY id" - else: - request_string = "SELECT id, counter, job_name, created, modified, submit, start, finish, status, rowtype, ncpus, wallclock, qos, energy, date, section, member, chunk, last, platform, job_id, extra_data, nnodes, run_id from job_data WHERE run_id=? and rowtype >= 2 ORDER BY id" - - cur.execute(request_string, (run_id,)) - rows = cur.fetchall() - # print(rows) - if len(rows) > 0: - return rows - else: - return None - except sqlite3.Error as e: - if _debug == True: - print((traceback.format_exc())) - print(("Error on select job data: {0}".format( - str(type(e).__name__)))) - return None - - -def parse_output_number(string_number): - """ - Parses number in format 1.0K 1.0M 1.0G - - :param string_number: String representation of number - :type string_number: str - :return: number in float format - :rtype: float - """ - number = 0.0 - if (string_number): - if string_number == "NA": - return 0.0 - last_letter = string_number.strip()[-1] - multiplier = 1.0 - if last_letter == "G": - multiplier = 1000000000.0 - number = string_number[:-1] - elif last_letter == "M": - multiplier = 1000000.0 - number = string_number[:-1] - elif last_letter == "K": - multiplier = 1000.0 - number = string_number[:-1] - else: - number = string_number - try: - number = float(number) * multiplier - except Exception as exp: - number = 0.0 - pass - return number diff --git a/autosubmit_api/database/db_manager.py b/autosubmit_api/database/db_manager.py deleted file mode 100644 index ad46394e8d1f6954d6cc00b4b99fa48ed7b35eec..0000000000000000000000000000000000000000 --- a/autosubmit_api/database/db_manager.py +++ /dev/null @@ -1,272 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2015 Earth Sciences Department, BSC-CNS - -# This file is part of Autosubmit. - -# Autosubmit is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# Autosubmit is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with Autosubmit. If not, see . - -import sqlite3 -import os -from typing import List - -class DbManager(object): - """ - Class to manage an SQLite database. - """ - - def __init__(self, root_path: str, db_name: str, db_version: int = 1): - self.root_path = root_path - self.db_name = db_name - self.db_version = db_version - # is_new = not - if os.path.exists(self._get_db_filepath()): - self.connection = sqlite3.connect(self._get_db_filepath()) - elif os.path.exists(self._get_db_filepath() + ".db"): - self.connection = sqlite3.connect(self._get_db_filepath() + ".db") - else: - self.connection = None - # if is_new: - # self._initialize_database() - - def disconnect(self): - """ - Closes the manager connection - """ - if self.connection: - self.connection.close() - - def create_table(self, table_name: str, fields: List[str]): - """ - Creates a new table with the given fields - :param table_name: str - :param fields: List[str] - """ - if self.connection: - cursor = self.connection.cursor() - create_command = self.generate_create_table_command( - table_name, fields) - # print(create_command) - cursor.execute(create_command) - self.connection.commit() - - def create_view(self, view_name: str, statement: str): - """ - Creates a new view with the given statement - - Parameters - ---------- - view_name : str - Name of the view to create - statement : str - SQL statement - """ - if self.connection: - cursor = self.connection.cursor() - create_command = self.generate_create_view_command(view_name, statement) - # print(create_command) - cursor.execute(create_command) - self.connection.commit() - - def drop_table(self, table_name): - """ - Drops the given table - :param table_name: str - - """ - if self.connection: - cursor = self.connection.cursor() - drop_command = self.generate_drop_table_command(table_name) - cursor.execute(drop_command) - self.connection.commit() - - def insert(self, table_name, columns, values): - """ - Inserts a new row on the given table - :param table_name: str - :param columns: [str] - :param values: [str] - - """ - if self.connection: - cursor = self.connection.cursor() - insert_command = self.generate_insert_command( - table_name, columns[:], values[:]) - cursor.execute(insert_command) - self.connection.commit() - - def insertMany(self, table_name, data): - """ - Inserts multiple new rows on the given table - :param table_name: str - :param data: [()] - - """ - if self.connection: - cursor = self.connection.cursor() - insert_many_command = self.generate_insert_many_command( - table_name, len(data[0])) - cursor.executemany(insert_many_command, data) - self.connection.commit() - - def select_first(self, table_name): - """ - Returns the first row of the given table - :param table_name: str - :return row: [] - """ - if self.connection: - cursor = self._select_with_all_fields(table_name) - return cursor.fetchone() - - def select_first_where(self, table_name, where): - """ - Returns the first row of the given table that matches the given where conditions - :param table_name: str - :param where: [str] - :return row: [] - """ - if self.connection: - cursor = self._select_with_all_fields(table_name, where) - return cursor.fetchone() - - def select_all(self, table_name): - """ - Returns all the rows of the given table - :param table_name: str - :return rows: [[]] - """ - if self.connection: - cursor = self._select_with_all_fields(table_name) - return cursor.fetchall() - - def select_all_where(self, table_name, where): - """ - Returns all the rows of the given table that matches the given where conditions - :param table_name: str - :param where: [str] - :return rows: [[]] - """ - if self.connection: - cursor = self._select_with_all_fields(table_name, where) - return cursor.fetchall() - - def count(self, table_name): - """ - Returns the number of rows of the given table - :param table_name: str - :return int - """ - if self.connection: - cursor = self.connection.cursor() - count_command = self.generate_count_command(table_name) - cursor.execute(count_command) - return cursor.fetchone()[0] - - def drop(self): - """ - Drops the database (deletes the .db file) - - """ - if self.connection: - self.connection.close() - if os.path.exists(self._get_db_filepath()): - os.remove(self._get_db_filepath()) - - def _get_db_filepath(self) -> str: - """ - Returns the path of the .db file - """ - return os.path.join(self.root_path, self.db_name) - - def _initialize_database(self): - """ - Initialize the database with an options table - with the name and the version of the DB - - """ - if self.connection: - options_table_name = 'db_options' - columns = ['option_name', 'option_value'] - self.create_table(options_table_name, columns) - self.insert(options_table_name, columns, ['name', self.db_name]) - self.insert(options_table_name, columns, - ['version', self.db_version]) - - def _select_with_all_fields(self, table_name, where=[]): - """ - Returns the cursor of the select command with the given parameters - :param table_name: str - :param where: [str] - :return cursor: Cursor - """ - if self.connection: - cursor = self.connection.cursor() - count_command = self.generate_select_command(table_name, where[:]) - cursor.execute(count_command) - return cursor - - """ - Static methods that generates the SQLite commands to make the queries - """ - - @staticmethod - def generate_create_table_command(table_name: str, fields: List[str]) -> str: - create_command = f'CREATE TABLE IF NOT EXISTS {table_name} ( {", ".join(fields)} )' - return create_command - - @staticmethod - def generate_create_view_command(view_name: str, statement: str) -> str: - create_command = f'CREATE VIEW IF NOT EXISTS {view_name} as {statement}' - return create_command - - @staticmethod - def generate_drop_table_command(table_name: str): - drop_command = f'DROP TABLE IF EXISTS {table_name}' - return drop_command - - @staticmethod - def generate_insert_command(table_name, columns, values): - insert_command = 'INSERT INTO ' + table_name + '(' + columns.pop(0) - for column in columns: - insert_command += (', ' + column) - insert_command += (') VALUES ("' + str(values.pop(0)) + '"') - for value in values: - insert_command += (', "' + str(value) + '"') - insert_command += ')' - return insert_command - - @staticmethod - def generate_insert_many_command(table_name, num_of_values): - insert_command = 'INSERT INTO ' + table_name + ' VALUES (?' - num_of_values -= 1 - while num_of_values > 0: - insert_command += ',?' - num_of_values -= 1 - insert_command += ')' - return insert_command - - @staticmethod - def generate_count_command(table_name): - count_command = 'SELECT count(*) FROM ' + table_name - return count_command - - @staticmethod - def generate_select_command(table_name, where=[]): - basic_select = 'SELECT * FROM ' + table_name - select_command = basic_select if len( - where) == 0 else basic_select + ' WHERE ' + where.pop(0) - for condition in where: - select_command += ' AND ' + condition - return select_command diff --git a/autosubmit_api/database/db_structure.py b/autosubmit_api/database/db_structure.py deleted file mode 100644 index 06ad129852a3e498e5904472343cd3f6f5752866..0000000000000000000000000000000000000000 --- a/autosubmit_api/database/db_structure.py +++ /dev/null @@ -1,104 +0,0 @@ -import os -import textwrap -import traceback -import sqlite3 - -from autosubmit_api.persistance.experiment import ExperimentPaths - -def get_structure(expid, structures_path): - """ - Creates file of database and table of experiment structure if it does not exist. - Returns current structure as a Dictionary Job Name -> Children's Names - - :return: Map from job to children - :rtype: Dictionary Key: String, Value: List(of String) - """ - try: - exp_paths = ExperimentPaths(expid) - db_structure_path = exp_paths.structure_db - #pkl_path = os.path.join(exp_path, expid, "pkl") - if os.path.exists(db_structure_path): - # Create file - os.umask(0) - if not os.path.exists(db_structure_path): - os.open(db_structure_path, os.O_WRONLY | os.O_CREAT, 0o777) - # open(db_structure_path, "w") - # print(db_structure_path) - create_table_query = textwrap.dedent( - '''CREATE TABLE - IF NOT EXISTS experiment_structure ( - e_from text NOT NULL, - e_to text NOT NULL, - UNIQUE(e_from,e_to) - );''') - with create_connection(db_structure_path) as conn: - create_table(conn, create_table_query) - current_table = _get_exp_structure(db_structure_path) - # print("Current table: ") - # print(current_table) - current_table_structure = dict() - for item in current_table: - _from, _to = item - current_table_structure.setdefault(_from, []).append(_to) - current_table_structure.setdefault(_to, []) - # if _from not in current_table_structure.keys(): - # current_table_structure[_from] = list() - # if _to not in current_table_structure.keys(): - # current_table_structure[_to] = list() - # current_table_structure[_from].append(_to) - if (len(list(current_table_structure.keys())) > 0): - # print("Return structure") - return current_table_structure - else: - return dict() - else: - # pkl folder not found - raise Exception("structures db not found " + - str(db_structure_path)) - except Exception as exp: - print((traceback.format_exc())) - - -def create_connection(db_file): - """ - Create a database connection to the SQLite database specified by db_file. - :param db_file: database file name - :return: Connection object or None - """ - try: - conn = sqlite3.connect(db_file) - return conn - except: - return None - - -def create_table(conn: sqlite3.Connection, create_table_sql): - """ create a table from the create_table_sql statement - :param conn: Connection object - :param create_table_sql: a CREATE TABLE statement - :return: - """ - try: - c = conn.cursor() - c.execute(create_table_sql) - except Exception as e: - print(e) - - -def _get_exp_structure(path): - """ - Get all registers from experiment_status.\n - :return: row content: exp_id, name, status, seconds_diff - :rtype: 4-tuple (int, str, str, int) - """ - try: - with create_connection(path) as conn: - conn.text_factory = str - cur = conn.cursor() - cur.execute( - "SELECT e_from, e_to FROM experiment_structure") - rows = cur.fetchall() - return rows - except Exception as exp: - print((traceback.format_exc())) - return dict() diff --git a/autosubmit_api/database/queries.py b/autosubmit_api/database/queries.py deleted file mode 100644 index 88c8acd8a58eec5594cab8a7984911e9f47b5da9..0000000000000000000000000000000000000000 --- a/autosubmit_api/database/queries.py +++ /dev/null @@ -1,87 +0,0 @@ -from typing import Optional -from pyparsing import Any -from sqlalchemy import Column, select, or_ -from autosubmit_api.database import tables - - -def generate_query_listexp_extended( - query: str = None, - only_active: bool = False, - owner: str = None, - exp_type: str = None, - autosubmit_version: str = None, - order_by: str = None, - order_desc: bool = False, -): - """ - Query listexp without accessing the view with status and total/completed jobs. - """ - - statement = ( - select( - tables.experiment_table, - tables.details_table, - tables.experiment_status_table.c.exp_id, - tables.experiment_status_table.c.status, - ) - .join( - tables.details_table, - tables.experiment_table.c.id == tables.details_table.c.exp_id, - isouter=True, - ) - .join( - tables.experiment_status_table, - tables.experiment_table.c.id == tables.experiment_status_table.c.exp_id, - isouter=True, - ) - ) - - # Build filters - filter_stmts = [] - - if query: - filter_stmts.append( - or_( - tables.experiment_table.c.name.like(f"{query}%"), - tables.experiment_table.c.description.like(f"%{query}%"), - tables.details_table.c.user.like(f"%{query}%"), - ) - ) - - if only_active: - filter_stmts.append(tables.experiment_status_table.c.status == "RUNNING") - - if owner: - filter_stmts.append(tables.details_table.c.user == owner) - - if exp_type == "test": - filter_stmts.append(tables.experiment_table.c.name.like(f"t%")) - elif exp_type == "operational": - filter_stmts.append(tables.experiment_table.c.name.like(f"o%")) - elif exp_type == "experiment": - filter_stmts.append(tables.experiment_table.c.name.not_like(f"t%")) - filter_stmts.append(tables.experiment_table.c.name.not_like(f"o%")) - - if autosubmit_version: - filter_stmts.append( - tables.experiment_table.c.autosubmit_version == autosubmit_version - ) - - statement = statement.where(*filter_stmts) - - # Order by - ORDER_OPTIONS = { - "expid": tables.experiment_table.c.name, - "created": tables.details_table.c.created, - "description": tables.experiment_table.c.description, - } - order_col: Optional[Column[Any]] = None - if order_by: - order_col = ORDER_OPTIONS.get(order_by, None) - - if isinstance(order_col, Column): - if order_desc: - order_col = order_col.desc() - statement = statement.order_by(order_col) - - return statement diff --git a/autosubmit_api/database/repositories/__init__.py b/autosubmit_api/database/repositories/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8e772bb61fbad1f95e6244ff7badebb8b62a2c09 --- /dev/null +++ b/autosubmit_api/database/repositories/__init__.py @@ -0,0 +1,44 @@ +""" +This module contains the repositories for the database tables. + +The repositories are used to interact with the database tables delegating the SQL statements generation and execution order. + +Other modules can use the repositories to interact with the database tables without the need to know the SQL syntax. +""" + +from autosubmit_api.database.repositories.experiment import ExperimentDbRepository +from autosubmit_api.database.repositories.experiment_details import ( + ExperimentDetailsDbRepository, +) +from autosubmit_api.database.repositories.experiment_status import ( + ExperimentStatusDbRepository, +) +from autosubmit_api.database.repositories.experiment_structure import ( + ExperimentStructureDbRepository, +) +from autosubmit_api.database.repositories.graph_draw import ExpGraphDrawDBRepository +from autosubmit_api.database.repositories.join.experiment_join import ( + ExperimentJoinDbRepository, +) +from autosubmit_api.database.repositories.job_packages import ( + JobPackagesDbRepository, + WrapperJobPackagesDbRepository, +) +from autosubmit_api.database.repositories.experiment_run import ( + ExperimentRunDbRepository, +) +from autosubmit_api.database.repositories.job_data import JobDataDbRepository + + +__all__ = [ + "ExperimentDbRepository", + "ExperimentDetailsDbRepository", + "ExperimentStatusDbRepository", + "ExperimentStructureDbRepository", + "ExperimentRunDbRepository", + "JobDataDbRepository", + "ExpGraphDrawDBRepository", + "ExperimentJoinDbRepository", + "JobPackagesDbRepository", + "WrapperJobPackagesDbRepository", +] diff --git a/autosubmit_api/database/repositories/experiment.py b/autosubmit_api/database/repositories/experiment.py new file mode 100644 index 0000000000000000000000000000000000000000..a55aaca7a816c995fb3f914d57287599ae814a75 --- /dev/null +++ b/autosubmit_api/database/repositories/experiment.py @@ -0,0 +1,44 @@ +from typing import Any, Dict, List +from autosubmit_api.database.table_manager import create_db_table_manager + +from autosubmit_api.config.basicConfig import APIBasicConfig +from autosubmit_api.database import tables + + +class ExperimentDbRepository: + def __init__(self): + self.table_manager = create_db_table_manager( + table=tables.ExperimentTable, + db_filepath=APIBasicConfig.DB_PATH, + ) + + def create_table(self): + """ + Create the experiment table. + """ + with self.table_manager.get_connection() as conn: + self.table_manager.create_table(conn) + + def get_all(self) -> List[Dict[str, Any]]: + """ + Return all experiments. + """ + with self.table_manager.get_connection() as conn: + rows = self.table_manager.select_all(conn) + return [row._asdict() for row in rows] + + def get_by_expid(self, expid: str) -> Dict[str, Any]: + """ + Get experiment by expid. + + :param expid: Experiment ID. + :raises: sqlalchemy.orm.exc.NoResultFound if no experiment is found. + :raises: sqlalchemy.orm.exc.MultipleResultsFound if more than one experiment is found. + """ + with self.table_manager.get_connection() as conn: + row = conn.execute( + self.table_manager.table.select().where( + tables.ExperimentTable.c.name == expid + ) + ).one() + return row._asdict() diff --git a/autosubmit_api/database/repositories/experiment_details.py b/autosubmit_api/database/repositories/experiment_details.py new file mode 100644 index 0000000000000000000000000000000000000000..950d7f76a0c53e756c22e9f2fdd7b27ee077ceb8 --- /dev/null +++ b/autosubmit_api/database/repositories/experiment_details.py @@ -0,0 +1,48 @@ +from typing import Any, Dict, List +from autosubmit_api.database.table_manager import create_db_table_manager +from autosubmit_api.config.basicConfig import APIBasicConfig +from autosubmit_api.database import tables + + +class ExperimentDetailsDbRepository: + def __init__(self) -> None: + APIBasicConfig.read() + self.table_manager = create_db_table_manager( + table=tables.DetailsTable, + db_filepath=APIBasicConfig.DB_PATH, + ) + + def create_table(self): + """ + Create the details table. + """ + with self.table_manager.get_connection() as conn: + self.table_manager.create_table(conn) + + def delete_all(self) -> int: + """ + Clear the details table. + """ + with self.table_manager.get_connection() as conn: + rowcount = self.table_manager.delete_all(conn) + return rowcount + + def insert_many(self, values: List[Dict[str, Any]]) -> int: + """ + Insert many rows into the details table. + """ + with self.table_manager.get_connection() as conn: + rowcount = self.table_manager.insert_many(conn, values) + return rowcount + + def get_by_exp_id(self, exp_id: int) -> Dict[str, Any]: + """ + Get experiment details by the numerical exp_id. + """ + with self.table_manager.get_connection() as conn: + row = conn.execute( + self.table_manager.table.select().where( + tables.DetailsTable.c.exp_id == exp_id + ) + ).one() + return row._asdict() diff --git a/autosubmit_api/database/repositories/experiment_run.py b/autosubmit_api/database/repositories/experiment_run.py new file mode 100644 index 0000000000000000000000000000000000000000..1ff12b72ac0c136c22221831e3cd975c43ab4681 --- /dev/null +++ b/autosubmit_api/database/repositories/experiment_run.py @@ -0,0 +1,52 @@ +from typing import Any, Dict, List, Optional +from autosubmit_api.database.table_manager import create_db_table_manager +from sqlalchemy import select + +from autosubmit_api.database import tables +from autosubmit_api.persistance.experiment import ExperimentPaths + + +class ExperimentRunDbRepository: + def __init__(self, expid: str) -> None: + self.expid = expid + self.table_manager = create_db_table_manager( + table=tables.ExperimentRunTable, + db_filepath=ExperimentPaths(expid).job_data_db, + schema=expid, + ) + self.table = self.table_manager.table + + def get_all(self) -> List[Dict[str, Any]]: + """ + Gets all runs of the experiment + """ + with self.table_manager.get_connection() as conn: + rows = conn.execute(select(self.table)).all() + + return [row._asdict() for row in rows] + + def get_last_run(self) -> Optional[Dict[str, Any]]: + """ + Gets last run of the experiment + """ + with self.table_manager.get_connection() as conn: + row = conn.execute( + select(self.table) + .order_by(self.table.c.run_id.desc()) + .limit(1) + ).one_or_none() + + return row._asdict() if row else None + + def get_run_by_id(self, run_id: int) -> Optional[Dict[str, Any]]: + """ + Gets run by id + """ + with self.table_manager.get_connection() as conn: + row = conn.execute( + select(self.table) + .where(self.table.c.run_id == run_id) + ).one_or_none() + + return row._asdict() if row else None + diff --git a/autosubmit_api/database/repositories/experiment_status.py b/autosubmit_api/database/repositories/experiment_status.py new file mode 100644 index 0000000000000000000000000000000000000000..4d22d6521c8c57d0dee8e40f5cd5f35ce536218a --- /dev/null +++ b/autosubmit_api/database/repositories/experiment_status.py @@ -0,0 +1,79 @@ +from datetime import datetime +import os +from typing import Dict, List +from autosubmit_api.database.table_manager import create_db_table_manager +from sqlalchemy import delete, insert, select +from autosubmit_api.config.basicConfig import APIBasicConfig +from autosubmit_api.database import tables + + +class ExperimentStatusDbRepository: + def __init__(self) -> None: + APIBasicConfig.read() + self.table_manager = create_db_table_manager( + table=tables.ExperimentStatusTable, + db_filepath=os.path.join(APIBasicConfig.DB_DIR, APIBasicConfig.AS_TIMES_DB), + ) + + def create_table(self): + """ + Create the experiment_status table. + """ + with self.table_manager.get_connection() as conn: + self.table_manager.create_table(conn) + + def get_all_dict(self) -> Dict[str, str]: + """ + Gets table experiment_status as dictionary {expid: status} + """ + result = dict() + with self.table_manager.get_connection() as conn: + cursor = conn.execute(select(self.table_manager.table)) + for row in cursor: + result[row.name] = row.status + return result + + def get_only_running_expids(self) -> List[str]: + """ + Gets list of running experiments + """ + with self.table_manager.get_connection() as conn: + rows = conn.execute( + select(self.table_manager.table).where( + self.table_manager.table.c.status == "RUNNING" + ) + ).all() + return [row.name for row in rows] + + def get_status(self, expid: str) -> str: + """ + Gets the current status of one experiment + """ + with self.table_manager.get_connection() as conn: + row = conn.execute( + select(self.table_manager.table).where( + self.table_manager.table.c.name == expid + ) + ).one_or_none() + return row.status if row else "NOT RUNNING" + + def upsert_status(self, exp_id: int, expid: str, status: str): + """ + Upsert (Delete/Insert) the status of one experiment + """ + with self.table_manager.get_connection() as conn: + del_stmnt = delete(tables.ExperimentStatusTable).where( + tables.ExperimentStatusTable.c.name == expid + ) + ins_stmnt = insert(tables.ExperimentStatusTable).values( + exp_id=exp_id, + name=expid, + status=status, + seconds_diff=0, + modified=datetime.now().isoformat(sep="-", timespec="seconds"), + ) + conn.execute(del_stmnt) + result = conn.execute(ins_stmnt) + conn.commit() + + return result.rowcount diff --git a/autosubmit_api/database/repositories/experiment_structure.py b/autosubmit_api/database/repositories/experiment_structure.py new file mode 100644 index 0000000000000000000000000000000000000000..a587861ee7d4dd063592f9dcfb14ff898634a1cc --- /dev/null +++ b/autosubmit_api/database/repositories/experiment_structure.py @@ -0,0 +1,37 @@ +from typing import Dict, List +from sqlalchemy import select +from autosubmit_api.config.basicConfig import APIBasicConfig +from autosubmit_api.database import tables +from autosubmit_api.database.table_manager import create_db_table_manager +from autosubmit_api.persistance.experiment import ExperimentPaths + + + +class ExperimentStructureDbRepository: + def __init__(self, expid: str): + APIBasicConfig.read() + self.table_manager = create_db_table_manager( + table=tables.ExperimentStructureTable, + db_filepath=ExperimentPaths(expid).structure_db, + schema=expid, + ) + # with self.table_manager.get_connection() as conn: + # self.table_manager.create_table(conn) + + def get_structure(self): + structure: Dict[str, List[str]] = {} + + with self.table_manager.get_connection() as conn: + rows = conn.execute( + select(self.table_manager.table) + ).all() + + for row in rows: + edge = row._asdict() + _from, _to = edge.get("e_from"), edge.get("e_to") + + structure.setdefault(_from, []).append(_to) + structure.setdefault(_to, []) + + return structure + diff --git a/autosubmit_api/database/repositories/graph_draw.py b/autosubmit_api/database/repositories/graph_draw.py new file mode 100644 index 0000000000000000000000000000000000000000..a1b396a83811d1e34a448115269ebacdbaed42fb --- /dev/null +++ b/autosubmit_api/database/repositories/graph_draw.py @@ -0,0 +1,36 @@ +from autosubmit_api.database.table_manager import create_db_table_manager +from autosubmit_api.database import tables +from autosubmit_api.persistance.experiment import ExperimentPaths +from typing import Any, Dict, List + + +class ExpGraphDrawDBRepository: + def __init__(self, expid: str) -> None: + self.expid = expid + self.table_manager = create_db_table_manager( + table=tables.GraphDataTable, + db_filepath=ExperimentPaths(expid).graph_data_db, + schema=expid, + ) + + def create_table(self): + """ + Create the graph data table. + """ + with self.table_manager.get_connection() as conn: + self.table_manager.create_table(conn) + + def get_all(self) -> List[Dict[str, Any]]: + with self.table_manager.get_connection() as conn: + result = self.table_manager.select_all(conn) + return [x._asdict() for x in result] + + def delete_all(self) -> int: + with self.table_manager.get_connection() as conn: + rowcount = self.table_manager.delete_all(conn) + return rowcount + + def insert_many(self, values: List[Dict[str, Any]]) -> int: + with self.table_manager.get_connection() as conn: + rowcount = self.table_manager.insert_many(conn, values) + return rowcount diff --git a/autosubmit_api/database/repositories/job_data.py b/autosubmit_api/database/repositories/job_data.py new file mode 100644 index 0000000000000000000000000000000000000000..babcf4a89a33c3ebc60a29380c3b6ab65b6d7325 --- /dev/null +++ b/autosubmit_api/database/repositories/job_data.py @@ -0,0 +1,111 @@ +from typing import Any, Dict, List +from autosubmit_api.database.table_manager import create_db_table_manager +from sqlalchemy import or_, select + +from autosubmit_api.database import tables +from autosubmit_api.persistance.experiment import ExperimentPaths + + +class JobDataDbRepository: + def __init__(self, expid: str) -> None: + self.expid = expid + self.table_manager = create_db_table_manager( + table=tables.JobDataTable, + db_filepath=ExperimentPaths(expid).job_data_db, + schema=expid, + ) + self.table = self.table_manager.table + with self.table_manager.get_connection() as conn: + self.table_manager.create_table(conn) + + def get_last_job_data_by_run_id(self, run_id: int) -> List[Dict[str, Any]]: + """ + Gets last job data of an specific run id + """ + with self.table_manager.get_connection() as conn: + row = conn.execute( + select(self.table) + .where( + (self.table.c.run_id == run_id), + (self.table.c.rowtype == 2), + ) + .order_by(self.table.c.id.desc()) + ).all() + + return [row._asdict() for row in row] + + def get_last_job_data(self) -> List[Dict[str, Any]]: + """ + Gets last job data + """ + with self.table_manager.get_connection() as conn: + row = conn.execute( + select(self.table).where( + (self.table.c.last == 1), + (self.table.c.rowtype >= 2), + ) + ).all() + return [row._asdict() for row in row] + + def get_jobs_by_name(self, job_name: str) -> List[Dict[str, Any]]: + """ + Gets job data by name + """ + with self.table_manager.get_connection() as conn: + rows = conn.execute( + select(self.table) + .where(self.table.c.job_name == job_name) + .order_by(self.table.c.counter.desc()) + ).all() + + return [row._asdict() for row in rows] + + def get_all(self) -> List[Dict[str, Any]]: + """ + Gets all job data + """ + with self.table_manager.get_connection() as conn: + statement = ( + select(self.table) + .where(self.table.c.id > 0) + .order_by(self.table.c.id) + ) + rows = conn.execute(statement).all() + + return [row._asdict() for row in rows] + + def get_job_data_COMPLETED_by_rowtype_run_id(self, rowtype: int, run_id: int) -> List[Dict[str, Any]]: + """ + Gets job data by rowtype and run id + """ + with self.table_manager.get_connection() as conn: + rows = conn.execute( + select(self.table) + .where( + (self.table.c.run_id == run_id), + (self.table.c.rowtype == rowtype), + (self.table.c.status == "COMPLETED"), + ) + .order_by(self.table.c.id) + ).all() + + return [row._asdict() for row in rows] + + def get_job_data_COMPLETD_by_section(self, section: str)-> List[Dict[str, Any]]: + """ + Gets job data by section + """ + with self.table_manager.get_connection() as conn: + rows = conn.execute( + select(self.table) + .where( + (self.table.c.status == "COMPLETED"), + or_( + (self.table.c.section == section), + (self.table.c.member == section) + ) + ) + .order_by(self.table.c.id) + ).all() + + return [row._asdict() for row in rows] \ No newline at end of file diff --git a/autosubmit_api/database/repositories/job_packages.py b/autosubmit_api/database/repositories/job_packages.py new file mode 100644 index 0000000000000000000000000000000000000000..02d343b86b7095e22c7a69dae3701215b3197e05 --- /dev/null +++ b/autosubmit_api/database/repositories/job_packages.py @@ -0,0 +1,42 @@ +from typing import Dict, List +from autosubmit_api.database.table_manager import create_db_table_manager +from sqlalchemy import select + +from autosubmit_api.database import tables +from autosubmit_api.persistance.experiment import ExperimentPaths + + +class JobPackagesDbRepository: + def __init__(self, expid: str) -> None: + self.expid = expid + self.table_manager = create_db_table_manager( + table=tables.JobPackageTable, + db_filepath=ExperimentPaths(expid).job_packages_db, + schema=expid, + ) + + def get_all(self) -> List[Dict[str, str]]: + """ + Get all job packages. + """ + with self.table_manager.get_connection() as conn: + rows = conn.execute(select(self.table_manager.table)).all() + return [row._asdict() for row in rows] + + +class WrapperJobPackagesDbRepository: + def __init__(self, expid: str) -> None: + self.expid = expid + self.table_manager = create_db_table_manager( + table=tables.WrapperJobPackageTable, + db_filepath=ExperimentPaths(expid).job_packages_db, + schema=expid, + ) + + def get_all(self) -> List[Dict[str, str]]: + """ + Get all job packages. + """ + with self.table_manager.get_connection() as conn: + rows = conn.execute(select(self.table_manager.table)).all() + return [row._asdict() for row in rows] diff --git a/autosubmit_api/database/repositories/join/__init__.py b/autosubmit_api/database/repositories/join/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/autosubmit_api/database/repositories/join/experiment_join.py b/autosubmit_api/database/repositories/join/experiment_join.py new file mode 100644 index 0000000000000000000000000000000000000000..3b68ad826e7758b3b89066f297b5d7a74e98e985 --- /dev/null +++ b/autosubmit_api/database/repositories/join/experiment_join.py @@ -0,0 +1,146 @@ +from typing import Optional +from pyparsing import Any +from sqlalchemy import Column, or_, select +from autosubmit_api.database import tables +from autosubmit_api.database.common import ( + create_main_db_conn, + execute_with_limit_offset, +) + + +def generate_query_listexp_extended( + query: str = None, + only_active: bool = False, + owner: str = None, + exp_type: str = None, + autosubmit_version: str = None, + order_by: str = None, + order_desc: bool = False, +): + """ + Query listexp without accessing the view with status and total/completed jobs. + """ + + statement = ( + select( + tables.ExperimentTable, + tables.DetailsTable, + tables.ExperimentStatusTable.c.exp_id, + tables.ExperimentStatusTable.c.status, + ) + .join( + tables.DetailsTable, + tables.ExperimentTable.c.id == tables.DetailsTable.c.exp_id, + isouter=True, + ) + .join( + tables.ExperimentStatusTable, + tables.ExperimentTable.c.id == tables.ExperimentStatusTable.c.exp_id, + isouter=True, + ) + ) + + # Build filters + filter_stmts = [] + + if query: + filter_stmts.append( + or_( + tables.ExperimentTable.c.name.like(f"{query}%"), + tables.ExperimentTable.c.description.like(f"%{query}%"), + tables.DetailsTable.c.user.like(f"%{query}%"), + ) + ) + + if only_active: + filter_stmts.append(tables.ExperimentStatusTable.c.status == "RUNNING") + + if owner: + filter_stmts.append(tables.DetailsTable.c.user == owner) + + if exp_type == "test": + filter_stmts.append(tables.ExperimentTable.c.name.like("t%")) + elif exp_type == "operational": + filter_stmts.append(tables.ExperimentTable.c.name.like("o%")) + elif exp_type == "experiment": + filter_stmts.append(tables.ExperimentTable.c.name.not_like("t%")) + filter_stmts.append(tables.ExperimentTable.c.name.not_like("o%")) + + if autosubmit_version: + filter_stmts.append( + tables.ExperimentTable.c.autosubmit_version == autosubmit_version + ) + + statement = statement.where(*filter_stmts) + + # Order by + ORDER_OPTIONS = { + "expid": tables.ExperimentTable.c.name, + "created": tables.DetailsTable.c.created, + "description": tables.ExperimentTable.c.description, + } + order_col: Optional[Column[Any]] = None + if order_by: + order_col = ORDER_OPTIONS.get(order_by, None) + + if isinstance(order_col, Column): + if order_desc: + order_col = order_col.desc() + statement = statement.order_by(order_col) + + return statement + + +class ExperimentJoinDbRepository: + """ + View experiments using Experiment, ExperimentStatus and ExperimentDetails tables. + """ + + def _get_connection(self): + return create_main_db_conn() + + def drop_status_from_deleted_experiments(self) -> int: + with self._get_connection() as conn: + del_stmnt = tables.ExperimentStatusTable.delete().where( + tables.ExperimentStatusTable.c.exp_id.not_in( + select(tables.ExperimentTable.c.id) + ) + ) + result = conn.execute(del_stmnt) + conn.commit() + + return result.rowcount + + def search( + self, + query: str = None, + only_active: bool = False, + owner: str = None, + exp_type: str = None, + autosubmit_version: str = None, + order_by: str = None, + order_desc: bool = False, + limit: int = None, + offset: int = None, + ): + """ + Search experiments with extended information. + """ + statement = generate_query_listexp_extended( + query=query, + only_active=only_active, + owner=owner, + exp_type=exp_type, + autosubmit_version=autosubmit_version, + order_by=order_by, + order_desc=order_desc, + ) + with self._get_connection() as conn: + query_result, total_rows = execute_with_limit_offset( + statement=statement, + conn=conn, + limit=limit, + offset=offset, + ) + + return [row._asdict() for row in query_result], total_rows diff --git a/autosubmit_api/database/session.py b/autosubmit_api/database/session.py new file mode 100644 index 0000000000000000000000000000000000000000..ac389220d1bc973a14bf636705e834e80db1bb7f --- /dev/null +++ b/autosubmit_api/database/session.py @@ -0,0 +1,21 @@ +import os +from sqlalchemy import Engine, NullPool, create_engine +from sqlalchemy.orm import sessionmaker, scoped_session +from autosubmit_api.config.basicConfig import APIBasicConfig + + +def create_sqlite_engine(path: str = "") -> Engine: + if path: + return create_engine(f"sqlite:///{os.path.abspath(path)}", poolclass=NullPool) + # Else return memory database + return create_engine("sqlite://", poolclass=NullPool) + + +APIBasicConfig.read() +if APIBasicConfig.DATABASE_BACKEND == "postgres": + engine = create_engine(APIBasicConfig.DATABASE_CONN_URL) +else: + engine = create_sqlite_engine() # Placeholder sqlite engine + +session_factory = sessionmaker(bind=engine) +Session = scoped_session(session_factory) diff --git a/autosubmit_api/database/table_manager.py b/autosubmit_api/database/table_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..8c245cf0333b251af22d645b94a93c505bf1eb8f --- /dev/null +++ b/autosubmit_api/database/table_manager.py @@ -0,0 +1,123 @@ +from abc import ABC, abstractmethod +import os +from typing import Any, Dict, List, Optional, Type, Union +from sqlalchemy import Connection, Engine, Table, delete, insert, select +from sqlalchemy.schema import CreateTable, CreateSchema, DropTable +from sqlalchemy.orm import DeclarativeBase +from autosubmit_api.database import tables, session + +from autosubmit_api.config.basicConfig import APIBasicConfig + + +class DbTableManager(ABC): + engine: Engine + table: Table + + @abstractmethod + def __init__( + self, + table: Union[Type[DeclarativeBase], Table], + db_filepath: str = None, + schema: Optional[str] = None, + ) -> None: + """ + Class to manage a database table with common methods + :param table: SQLAlchemy Table + :param db_filepath: File path location in case of SQLite is used as database backend + :param schema: Almost always same as expid. Postgres database schema in case this is a distributed table. + """ + self.schema = schema + self.db_filepath = db_filepath + if isinstance(table, type) and issubclass(table, DeclarativeBase): + self.table = table.__table__ + else: + self.table = table + + def get_connection(self) -> Connection: + return self.engine.connect() + + def create_table(self, conn: Connection): + """ + Create table + """ + conn.execute(CreateTable(self.table, if_not_exists=True)) + conn.commit() + + def drop_table(self, conn: Connection): + """ + Drops the table + """ + conn.execute(DropTable(self.table, if_exists=True)) + conn.commit() + + def insert_many(self, conn: Connection, values: List[Dict[str, Any]]) -> int: + """ + Insert many values + """ + result = conn.execute(insert(self.table), values) + conn.commit() + return result.rowcount + + def select_all(self, conn: Connection): + rows = conn.execute(select(self.table)).all() + return rows + + def delete_all(self, conn: Connection) -> int: + """ + Deletes all the rows of the table + """ + result = conn.execute(delete(self.table)) + conn.commit() + return result.rowcount + + +class SQLiteDbTableManager(DbTableManager): + def __init__( + self, + table: Union[Type[DeclarativeBase], Table], + db_filepath: str = None, + schema: Optional[str] = None, + ) -> None: + super().__init__(table, db_filepath, schema) + self.engine = session.create_sqlite_engine(self.db_filepath) + + +class PostgresDbTableManager(DbTableManager): + def __init__( + self, + table: Union[Type[DeclarativeBase], Table], + db_filepath: str = None, + schema: Optional[str] = None, + ) -> None: + super().__init__(table, db_filepath, schema) + self.engine = session.Session().bind + if schema: + self.table = tables.table_change_schema(schema, table) + + def create_table(self, conn: Connection): + """ + Create table and the schema (if applicable) + """ + if self.schema: + conn.execute(CreateSchema(self.schema, if_not_exists=True)) + super().create_table(conn) + + +def create_db_table_manager( + table: Union[Type[DeclarativeBase], Table], + db_filepath: str = None, + schema: Optional[str] = None, +) -> DbTableManager: + """ + Creates a Postgres or SQLite DbTableManager depending on the Autosubmit configuration + :param table: SQLAlchemy Table + :param db_filepath: File path location in case of SQLite is used as database backend + :param schema: Almost always same as expid. Postgres database schema in case this is a distributed table. + """ + APIBasicConfig.read() + if APIBasicConfig.DATABASE_BACKEND == "postgres": + return PostgresDbTableManager(table, db_filepath, schema) + elif APIBasicConfig.DATABASE_BACKEND == "sqlite": + return SQLiteDbTableManager(table, db_filepath, schema) + else: + raise Exception("Invalid DATABASE_BACKEND") diff --git a/autosubmit_api/database/tables.py b/autosubmit_api/database/tables.py index a1fd39dccff3eea56bb0ffd0134829239ac5a450..1513cd7efcd919b47a834064cc4dacc599cbb831 100644 --- a/autosubmit_api/database/tables.py +++ b/autosubmit_api/database/tables.py @@ -1,110 +1,76 @@ -from sqlalchemy import MetaData, Integer, String, Text, Table -from sqlalchemy.orm import DeclarativeBase, mapped_column, Mapped - - -metadata_obj = MetaData() - - -## SQLAlchemy ORM tables -class BaseTable(DeclarativeBase): - metadata = metadata_obj - - -class ExperimentTable(BaseTable): - """ - Is the main table, populated by Autosubmit. Should be read-only by the API. - """ - - __tablename__ = "experiment" - - id: Mapped[int] = mapped_column(Integer, nullable=False, primary_key=True) - name: Mapped[str] = mapped_column(String, nullable=False) - description: Mapped[str] = mapped_column(String, nullable=False) - autosubmit_version: Mapped[str] = mapped_column(String) - - -class DetailsTable(BaseTable): - """ - Stores extra information. It is populated by the API. - """ - - __tablename__ = "details" - - exp_id: Mapped[int] = mapped_column(Integer, primary_key=True) - user: Mapped[str] = mapped_column(Text, nullable=False) - created: Mapped[str] = mapped_column(Text, nullable=False) - model: Mapped[str] = mapped_column(Text, nullable=False) - branch: Mapped[str] = mapped_column(Text, nullable=False) - hpc: Mapped[str] = mapped_column(Text, nullable=False) - - -class ExperimentStatusTable(BaseTable): - """ - Stores the status of the experiments - """ - - __tablename__ = "experiment_status" - - exp_id: Mapped[int] = mapped_column(Integer, primary_key=True) - name: Mapped[str] = mapped_column(Text, nullable=False) - status: Mapped[str] = mapped_column(Text, nullable=False) - seconds_diff: Mapped[int] = mapped_column(Integer, nullable=False) - modified: Mapped[str] = mapped_column(Text, nullable=False) - - -class GraphDataTable(BaseTable): - """ - Stores the coordinates and it is used exclusively to speed up the process - of generating the graph layout - """ - - __tablename__ = "experiment_graph_draw" - - id: Mapped[int] = mapped_column(Integer, primary_key=True) - job_name: Mapped[str] = mapped_column(Text, nullable=False) - x: Mapped[int] = mapped_column(Integer, nullable=False) - y: Mapped[int] = mapped_column(Integer, nullable=False) - - -class JobPackageTable(BaseTable): - """ - Stores a mapping between the wrapper name and the actual job in slurm - """ - - __tablename__ = "job_package" - - exp_id: Mapped[str] = mapped_column(Text) - package_name: Mapped[str] = mapped_column(Text, primary_key=True) - job_name: Mapped[str] = mapped_column(Text, primary_key=True) - - -class WrapperJobPackageTable(BaseTable): +from typing import Type, Union +from sqlalchemy import Column, Integer, MetaData, Text, Table +from sqlalchemy.orm import DeclarativeBase +from autosubmit.database.tables import ( + metadata_obj, + ExperimentTable, + ExperimentStructureTable, + ExperimentStatusTable, + JobPackageTable, + WrapperJobPackageTable, + ExperimentRunTable, + JobDataTable, +) + + +def table_change_schema( + schema: str, source: Union[Type[DeclarativeBase], Table] +) -> Table: """ - It is a replication. It is only created/used when using inspectand create or monitor - with flag -cw in Autosubmit.\n - This replication is used to not interfere with the current autosubmit run of that experiment - since wrapper_job_package will contain a preview, not the real wrapper packages + Copy the source table and change the schema of that SQLAlchemy table into a new table instance """ - - __tablename__ = "wrapper_job_package" - - exp_id: Mapped[str] = mapped_column(Text) - package_name: Mapped[str] = mapped_column(Text, primary_key=True) - job_name: Mapped[str] = mapped_column(Text, primary_key=True) - - -## SQLAlchemy Core tables - -# MAIN_DB TABLES -experiment_table: Table = ExperimentTable.__table__ -details_table: Table = DetailsTable.__table__ - -# AS_TIMES TABLES -experiment_status_table: Table = ExperimentStatusTable.__table__ - -# Graph Data TABLES -graph_data_table: Table = GraphDataTable.__table__ - -# Job package TABLES -job_package_table: Table = JobPackageTable.__table__ -wrapper_job_package_table: Table = WrapperJobPackageTable.__table__ \ No newline at end of file + if isinstance(source, type) and issubclass(source, DeclarativeBase): + _source_table: Table = source.__table__ + elif isinstance(source, Table): + _source_table = source + else: + raise RuntimeError("Invalid source type on table schema change") + + metadata = MetaData(schema=schema) + dest_table = Table(_source_table.name, metadata) + + for col in _source_table.columns: + dest_table.append_column(col.copy()) + + return dest_table + + +## API extended SQLAlchemy Core tables + +DetailsTable = Table( + "details", + metadata_obj, + Column("exp_id", Integer, primary_key=True), + Column("user", Text, nullable=False), + Column("created", Text, nullable=False), + Column("model", Text, nullable=False), + Column("branch", Text, nullable=False), + Column("hpc", Text, nullable=False), +) +"""Stores extra information. It is populated by the API.""" + + +GraphDataTable = Table( + "experiment_graph_draw", + metadata_obj, + Column("id", Integer, primary_key=True), + Column("job_name", Text, nullable=False), + Column("x", Integer, nullable=False), + Column("y", Integer, nullable=False), +) +"""Stores the coordinates and it is used exclusively +to speed up the process of generating the graph layout""" + +# Module exports +__all__ = [ + "table_change_schema", + "ExperimentTable", + "ExperimentStructureTable", + "ExperimentStatusTable", + "JobPackageTable", + "WrapperJobPackageTable", + "ExperimentRunTable", + "JobDataTable", + "DetailsTable", + "GraphDataTable", +] diff --git a/autosubmit_api/database/utils.py b/autosubmit_api/database/utils.py deleted file mode 100644 index 0c46773025c0c7dd41d50721aec55925e91fc9db..0000000000000000000000000000000000000000 --- a/autosubmit_api/database/utils.py +++ /dev/null @@ -1,16 +0,0 @@ -from sqlite3 import Cursor -from typing import List, Any - - -def get_headers_sqlite(cursor: Cursor): - """ - Get headers in position of a sqlite query cursor - """ - return list(map(lambda attr: attr[0], cursor.description)) - - -def map_row_result_to_dict_sqlite(row: List[Any], headers: List[str]): - """ - Return a dict of the rows as values with keys as their respective header. - """ - return {header: row[i] for i, header in enumerate(headers)} diff --git a/autosubmit_api/experiment/common_db_requests.py b/autosubmit_api/experiment/common_db_requests.py index e8aa22fa4e2add6edad197839a2becf0a0287401..3b256c2f9681c0ed9af4cc712d81c119164f7119 100644 --- a/autosubmit_api/experiment/common_db_requests.py +++ b/autosubmit_api/experiment/common_db_requests.py @@ -4,8 +4,6 @@ import sqlite3 from datetime import datetime from autosubmit_api.logger import logger from autosubmit_api.config.basicConfig import APIBasicConfig -from autosubmit_api.database import tables -from autosubmit_api.database.common import create_as_times_db_engine APIBasicConfig.read() DB_FILES_STATUS = os.path.join( @@ -31,7 +29,6 @@ def create_connection(db_file): def insert_archive_status(status, alatency, abandwidth, clatency, cbandwidth, rtime): - try: with create_connection(DB_FILES_STATUS) as conn: sql = """ INSERT INTO archive_status(status, avg_latency, avg_bandwidth, current_latency, current_bandwidth, response_time, modified ) VALUES(?,?,?,?,?,?,?)""" @@ -73,47 +70,3 @@ def get_last_read_archive_status(): print((traceback.format_exc())) print(("Error on Get Last : " + str(exp))) return (False, None, None, None, None, None, None) - - -# SELECTS - - -def get_experiment_status(): - """ - Gets table experiment_status as dictionary - conn is expected to reference as_times.db - """ - experiment_status = dict() - try: - with create_as_times_db_engine().connect() as conn: - cursor = conn.execute(tables.experiment_status_table.select()) - for row in cursor: - experiment_status[row.name] = row.status - except Exception as exc: - logger.error(f"Exception while reading experiment_status: {exc}") - logger.error(traceback.format_exc()) - return experiment_status - - -def get_specific_experiment_status(expid): - """ - Gets the current status from database.\n - :param expid: Experiment name - :type expid: str - :return: name of experiment and status - :rtype: 2-tuple (name, status) - """ - try: - with create_as_times_db_engine().connect() as conn: - row = conn.execute( - tables.experiment_status_table.select().where( - tables.experiment_status_table.c.name == expid - ) - ).one_or_none() - if row: - return (row.name, row.status) - except Exception as exc: - logger.error(f"Exception while reading experiment_status for {expid}: {exc}") - logger.error(traceback.format_exc()) - - return (expid, "NOT RUNNING") diff --git a/autosubmit_api/experiment/common_requests.py b/autosubmit_api/experiment/common_requests.py index 82be53fc00d8a32e79e3d2057778563f233d540f..5d11f759911ed523762c61f20963bd952f28253a 100644 --- a/autosubmit_api/experiment/common_requests.py +++ b/autosubmit_api/experiment/common_requests.py @@ -29,12 +29,15 @@ import multiprocessing import subprocess from collections import deque + from autosubmit_api.components.experiment.pkl_organizer import PklOrganizer from autosubmit_api.components.jobs.job_factory import SimpleJob from autosubmit_api.config.confConfigStrategy import confConfigStrategy -from autosubmit_api.database import db_common as db_common +from autosubmit_api.database.repositories.experiment import ExperimentDbRepository +from autosubmit_api.database.repositories.experiment_status import ExperimentStatusDbRepository +from autosubmit_api.database.repositories import ExperimentJoinDbRepository from autosubmit_api.experiment import common_db_requests as DbRequests -from autosubmit_api.database import db_jobdata as JobData +from autosubmit_api.components import jobdata as JobData from autosubmit_api.common import utils as common_utils from autosubmit_api.components.jobs import utils as JUtils @@ -151,7 +154,7 @@ def get_experiment_data(expid): try: autosubmit_config_facade = ConfigurationFacadeDirector(AutosubmitConfigurationFacadeBuilder(expid)).build_autosubmit_configuration_facade() try: - _, experiment_status = DbRequests.get_specific_experiment_status(expid) + experiment_status = ExperimentStatusDbRepository().get_status(expid) result["running"] = (experiment_status == "RUNNING") except Exception as exc: logger.warning((traceback.format_exc())) @@ -163,7 +166,10 @@ def get_experiment_data(expid): result["owner"] = autosubmit_config_facade.get_owner_name() result["time_last_access"] = autosubmit_config_facade.get_experiment_last_access_time_as_datetime() result["time_last_mod"] = autosubmit_config_facade.get_experiment_last_modified_time_as_datetime() - result["description"] = db_common.get_experiment_by_id(expid)["description"] + try: + result["description"] = ExperimentDbRepository().get_by_expid(expid).get("description", "NA") + except Exception: + result["description"] = "NA" result["version"] = autosubmit_config_facade.get_autosubmit_version() result["model"] = autosubmit_config_facade.get_model() result["branch"] = autosubmit_config_facade.get_branch() @@ -463,7 +469,7 @@ def quick_test_run(expid): error_message = "" try: - name, status = DbRequests.get_specific_experiment_status(expid) + status = ExperimentStatusDbRepository().get_status(expid) if status != "RUNNING": running = False except Exception as exp: @@ -759,8 +765,8 @@ def get_experiment_tree_structured(expid, log): try: APIBasicConfig.read() - # TODO: Encapsulate this following 2 lines or move to the parent function in app.py - curr_exp_as_version = db_common.get_autosubmit_version(expid, log) + # TODO: Encapsulate this following 2 lines or move to the parent function in app. + curr_exp_as_version: str = ExperimentDbRepository().get_by_expid(expid).get("autosubmit_version") main, secondary = common_utils.parse_version_number(curr_exp_as_version) if main and main >= 4: as_conf = Autosubmit4Config(expid) @@ -1256,3 +1262,181 @@ def enforceLocal(log): except Exception: log.info("Locale C.utf8 is not found, using '{0}' as fallback".format("C")) locale.setlocale(locale.LC_ALL, 'C') + + +def search_experiment_by_id( + query: str, exp_type: str = None, only_active: bool = None, owner: str = None +): + """ + Search experiments using provided data. Main query searches in the view listexp of ec_earth.db. + + :param searchString: string used to match columns in the table + :type searchString: str + :param typeExp: Assumes values "test" (only experiments starting with 't') or "experiment" (not experiment starting with 't') or "all" (indistinct). + :type typeExp: str + :param onlyActive: Assumes "active" (only active experiments) or "" (indistinct) + :type onlyActive: str + :param owner: return only experiment that match the provided owner of the experiment + :type owner: str + :return: list of experiments that match the search + :rtype: JSON + """ + result = list() + query_result, _ = ExperimentJoinDbRepository().search( + query=query, exp_type=exp_type, only_active=only_active, owner=owner + ) + + for row in query_result: + expid = str(row["name"]) + completed = "NA" + total = "NA" + submitted = 0 + queuing = 0 + running = 0 + failed = 0 + suspended = 0 + version = "Unknown" + wrapper = None + # last_modified_timestamp = None + last_modified_pkl_datetime = None + hpc = row["hpc"] + try: + autosubmit_config_facade = ConfigurationFacadeDirector( + AutosubmitConfigurationFacadeBuilder(expid) + ).build_autosubmit_configuration_facade() + version = autosubmit_config_facade.get_autosubmit_version() + wrapper = autosubmit_config_facade.get_wrapper_type() + last_modified_pkl_datetime = ( + autosubmit_config_facade.get_pkl_last_modified_time_as_datetime() + ) + hpc = autosubmit_config_facade.get_main_platform() + except Exception: + last_modified_pkl_datetime = None + pass + + total, completed = ("NA", "NA") + + # Getting run data from historical database + try: + current_run = ( + ExperimentHistoryDirector(ExperimentHistoryBuilder(expid)) + .build_reader_experiment_history() + .manager.get_experiment_run_dc_with_max_id() + ) + if current_run and current_run.total > 0: + completed = current_run.completed + total = current_run.total + submitted = current_run.submitted + queuing = current_run.queuing + running = current_run.running + failed = current_run.failed + suspended = current_run.suspended + # last_modified_timestamp = current_run.modified_timestamp + except Exception as exp: + print(("Exception on search_experiment_by_id : {}".format(exp))) + pass + + result.append( + { + "id": row["id"], + "name": row["name"], + "user": row["user"], + "description": row["description"], + "hpc": hpc, + "status": row["status"], + "completed": completed, + "total": total, + "version": version, + "wrapper": wrapper, + "submitted": submitted, + "queuing": queuing, + "running": running, + "failed": failed, + "suspended": suspended, + "modified": last_modified_pkl_datetime, + } + ) + return {"experiment": result} + + +def get_current_running_exp(): + """ + Simple query that gets the list of experiments currently running + + :rtype: list of users + """ + result = list() + query_result, _ = ExperimentJoinDbRepository().search(only_active=True) + + for row in query_result: + expid = str(row["name"]) + status = "NOT RUNNING" + completed = "NA" + total = "NA" + submitted = 0 + queuing = 0 + running = 0 + failed = 0 + suspended = 0 + user = str(row["user"]) + version = "Unknown" + wrapper = None + # last_modified_timestamp = None + last_modified_pkl_datetime = None + status = str(row["status"]) + if status == "RUNNING": + try: + autosubmit_config_facade = ConfigurationFacadeDirector( + AutosubmitConfigurationFacadeBuilder(expid) + ).build_autosubmit_configuration_facade() + version = autosubmit_config_facade.get_autosubmit_version() + wrapper = autosubmit_config_facade.get_wrapper_type() + last_modified_pkl_datetime = ( + autosubmit_config_facade.get_pkl_last_modified_time_as_datetime() + ) + hpc = autosubmit_config_facade.get_main_platform() + except Exception: + # last_modified_pkl_datetime = None + pass + + # Try to retrieve experiment_run data + try: + current_run = ( + ExperimentHistoryDirector(ExperimentHistoryBuilder(expid)) + .build_reader_experiment_history() + .manager.get_experiment_run_dc_with_max_id() + ) + if current_run and current_run.total > 0: + completed = current_run.completed + total = current_run.total + submitted = current_run.submitted + queuing = current_run.queuing + running = current_run.running + failed = current_run.failed + suspended = current_run.suspended + # last_modified_timestamp = current_run.modified_timestamp + except Exception as exp: + print(("Exception on get_current_running_exp : {}".format(exp))) + + # Append to result + result.append( + { + "id": row["id"], + "name": row["name"], + "user": user, + "description": row["description"], + "hpc": hpc, + "status": status, + "completed": completed, + "total": total, + "version": version, + "wrapper": wrapper, + "submitted": submitted, + "queuing": queuing, + "running": running, + "failed": failed, + "suspended": suspended, + "modified": last_modified_pkl_datetime, + } + ) + return {"experiment": result} diff --git a/autosubmit_api/history/data_classes/experiment_run.py b/autosubmit_api/history/data_classes/experiment_run.py index ef3459f67a46f15bdd02e758ab96b72cd58d0ae0..d8860564710f0da1f83a5b0f52e0c73e1154968a 100644 --- a/autosubmit_api/history/data_classes/experiment_run.py +++ b/autosubmit_api/history/data_classes/experiment_run.py @@ -137,10 +137,9 @@ class ExperimentRun(object): raise dbexception @classmethod - def from_model(cls, row): + def from_model(cls, row_dict: dict): """ Build ExperimentRun from ExperimentRunRow """ try: - row_dict = row._asdict() experiment_run = cls(0) experiment_run.run_id = row_dict.get('run_id', 0) experiment_run.created = get_current_datetime_if_none(row_dict.get('created', None)) diff --git a/autosubmit_api/history/data_classes/job_data.py b/autosubmit_api/history/data_classes/job_data.py index 739e0a5a5a8fb8b58268150259149c9642c8c066..6cb5a8e6ff797dce9717499f9437fd91d8f0ad42 100644 --- a/autosubmit_api/history/data_classes/job_data.py +++ b/autosubmit_api/history/data_classes/job_data.py @@ -78,9 +78,8 @@ class JobData(object): self.platform_output = platform_output # DB 17 @classmethod - def from_model(cls, row): + def from_model(cls, row_dict: dict): """ Build JobData from JobDataRow. """ - row_dict = row._asdict() job_data = cls(row_dict['id'], row_dict['counter'], row_dict['job_name'], diff --git a/autosubmit_api/history/database_managers/database_manager.py b/autosubmit_api/history/database_managers/database_manager.py deleted file mode 100644 index c7cb2d950dac36d35b963754000e8eeb2d8f1e12..0000000000000000000000000000000000000000 --- a/autosubmit_api/history/database_managers/database_manager.py +++ /dev/null @@ -1,150 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2015-2020 Earth Sciences Department, BSC-CNS -# This file is part of Autosubmit. - -# Autosubmit is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# Autosubmit is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with Autosubmit. If not, see . - -import sqlite3 -import os -from autosubmit_api.history import utils as HUtils -from autosubmit_api.history.database_managers import database_models as Models -from autosubmit_api.config.basicConfig import APIBasicConfig -from abc import ABCMeta - -DEFAULT_JOBDATA_DIR = os.path.join('/esarchive', 'autosubmit', 'as_metadata', 'data') -DEFAULT_HISTORICAL_LOGS_DIR = os.path.join('/esarchive', 'autosubmit', 'as_metadata', 'logs') -DEFAULT_LOCAL_ROOT_DIR = os.path.join('/esarchive', 'autosubmit') - -class DatabaseManager(metaclass=ABCMeta): - """ Simple database manager. Needs expid. """ - AS_TIMES_DB_NAME = "as_times.db" # default AS_TIMES location - ECEARTH_DB_NAME = "ecearth.db" # default EC_EARTH_DB_NAME location - def __init__(self, expid, basic_config): - # type: (str, APIBasicConfig) -> None - self.expid = expid - self.JOBDATA_DIR = basic_config.JOBDATA_DIR - self.LOCAL_ROOT_DIR = basic_config.LOCAL_ROOT_DIR - self.db_version = Models.DatabaseVersion.NO_DATABASE.value - - def get_connection(self, path): - # type : (str) -> Sqlite3Connection - """ - Create a database connection to the SQLite database specified by path. - :param path: database file name - :return: Connection object or None - """ - if not os.path.exists(path): - self._create_database_file(path) - return sqlite3.connect(path) - - def _create_database_file(self, path): - # type : (str) -> None - """ creates a database files with full permissions """ - os.umask(0) - os.open(path, os.O_WRONLY | os.O_CREAT, 0o776) - - def execute_statement_on_dbfile(self, path, statement): - # type : (str, str) -> None - """ Executes a statement on a database file specified by path. """ - conn = self.get_connection(path) - cursor = conn.cursor() - cursor.execute(statement) - conn.commit() - conn.close() - - def execute_statement_with_arguments_on_dbfile(self, path, statement, arguments): - # type : (str, str, Tuple) -> None - """ Executes an statement with arguments on a database file specified by path. """ - conn = self.get_connection(path) - cursor = conn.cursor() - cursor.execute(statement, arguments) - conn.commit() - conn.close() - - def execute_many_statement_with_arguments_on_dbfile(self, path, statement, arguments_list): - # type : (str, str, List[Tuple]) -> None - """ Executes many statements from a list of arguments specified by a path. """ - conn = self.get_connection(path) - cursor = conn.cursor() - cursor.executemany(statement, arguments_list) - conn.commit() - conn.close() - - def execute_many_statements_on_dbfile(self, path, statements): - # type : (str, List[str]) -> None - """ - Updates the table schema using a **small** list of statements. No Exception raised. - Should be used to execute a list of schema updates that might have been already applied. - """ - for statement in statements: - try: - self.execute_statement_on_dbfile(path, statement) - except Exception as exp: - pass - - def get_from_statement(self, path, statement): - # type : (str, str) -> List[Tuple] - """ Get the rows from a statement with no arguments """ - conn = self.get_connection(path) - conn.text_factory = str - cursor = conn.cursor() - cursor.execute(statement) - statement_rows = cursor.fetchall() - conn.close() - return statement_rows - - def get_from_statement_with_arguments(self, path, statement, arguments): - # type : (str, str, Tuple) -> List[Tuple] - """ Get the rows from a statement with arguments """ - conn = self.get_connection(path) - conn.text_factory = str - cursor = conn.cursor() - cursor.execute(statement, arguments) - statement_rows = cursor.fetchall() - conn.close() - return statement_rows - - def insert_statement(self, path, statement): - # type : (str, str) -> int - """ Insert statement into path """ - conn = self.get_connection(path) - conn.text_factory = str - cursor = conn.cursor() - cursor.execute(statement) - lastrow_id = cursor.lastrowid - conn.commit() - conn.close() - return lastrow_id - - def insert_statement_with_arguments(self, path, statement, arguments): - # type : (str, str, Tuple) -> int - """ Insert statement with arguments into path """ - conn = self.get_connection(path) - conn.text_factory = str - cursor = conn.cursor() - cursor.execute(statement, arguments) - lastrow_id = cursor.lastrowid - conn.commit() - conn.close() - return lastrow_id - - def get_built_select_statement(self, table_name, conditions=None): - # type : (str, namedtuple, str) -> str - """ Build and return a SELECT statement with the same fields as the model. Requires that the table is associated with a model (namedtuple). """ - model = Models.get_correct_model_for_table_and_version(table_name, self.db_version) # Models.table_name_to_model[table_name] - if conditions: - return "SELECT {0} FROM {1} WHERE {2}".format(HUtils.get_fields_as_comma_str(model), table_name, conditions) - else: - return "SELECT {0} FROM {1}".format(HUtils.get_fields_as_comma_str(model), table_name) diff --git a/autosubmit_api/history/database_managers/experiment_history_db_manager.py b/autosubmit_api/history/database_managers/experiment_history_db_manager.py index 1f35a92154782623839b84dbe7c055e34abb4b45..e72731d22cf6e005708703117f4038f1f0150b75 100644 --- a/autosubmit_api/history/database_managers/experiment_history_db_manager.py +++ b/autosubmit_api/history/database_managers/experiment_history_db_manager.py @@ -16,450 +16,114 @@ # You should have received a copy of the GNU General Public License # along with Autosubmit. If not, see . import os -import textwrap +from typing import Any, Dict, List, Optional +from autosubmit_api.database.repositories import ExperimentRunDbRepository, JobDataDbRepository from autosubmit_api.persistance.experiment import ExperimentPaths -from autosubmit_api.history import utils as HUtils from autosubmit_api.history.database_managers import database_models as Models from autosubmit_api.history.data_classes.job_data import JobData from autosubmit_api.history.data_classes.experiment_run import ExperimentRun -from autosubmit_api.config.basicConfig import APIBasicConfig -from autosubmit_api.history.database_managers.database_manager import DatabaseManager -from typing import List -from collections import namedtuple -DEFAULT_MAX_COUNTER = 0 -class ExperimentHistoryDbManager(DatabaseManager): - """ Manages actions directly on the database. - """ - def __init__(self, expid, basic_config): - # type: (str, APIBasicConfig) -> None - """ Requires expid and jobdata_dir_path. """ - super(ExperimentHistoryDbManager, self).__init__(expid, basic_config) - self._set_schema_changes() - self._set_table_queries() - exp_paths = ExperimentPaths(expid) - self.historicaldb_file_path = exp_paths.job_data_db - if self.my_database_exists(): - self.set_db_version_models() - - def initialize(self): - """ Check if database exists. Updates to current version if necessary. """ - if self.my_database_exists(): - if not self.is_current_version(): - self.update_historical_database() - else: - self.create_historical_database() - self.set_db_version_models() - - def set_db_version_models(self): - self.db_version = self._get_pragma_version() - self.experiment_run_row_model = Models.get_experiment_row_model(self.db_version) - self.job_data_row_model = Models.get_job_data_row_model(self.db_version) - - def my_database_exists(self): - return os.path.exists(self.historicaldb_file_path) - - def is_header_ready_db_version(self): - if self.my_database_exists(): - return self._get_pragma_version() >= Models.DatabaseVersion.EXPERIMENT_HEADER_SCHEMA_CHANGES.value - return False - - def is_current_version(self): - if self.my_database_exists(): - return self._get_pragma_version() == Models.DatabaseVersion.CURRENT_DB_VERSION.value - return False - - def _set_table_queries(self): - """ Sets basic table queries. """ - self.create_table_header_query = textwrap.dedent( - '''CREATE TABLE - IF NOT EXISTS experiment_run ( - run_id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, - created TEXT NOT NULL, - modified TEXT NOT NULL, - start INTEGER NOT NULL, - finish INTEGER, - chunk_unit TEXT NOT NULL, - chunk_size INTEGER NOT NULL, - completed INTEGER NOT NULL, - total INTEGER NOT NULL, - failed INTEGER NOT NULL, - queuing INTEGER NOT NULL, - running INTEGER NOT NULL, - submitted INTEGER NOT NULL, - suspended INTEGER NOT NULL DEFAULT 0, - metadata TEXT - ); - ''') - self.create_table_query = textwrap.dedent( - '''CREATE TABLE - IF NOT EXISTS job_data ( - id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, - counter INTEGER NOT NULL, - job_name TEXT NOT NULL, - created TEXT NOT NULL, - modified TEXT NOT NULL, - submit INTEGER NOT NULL, - start INTEGER NOT NULL, - finish INTEGER NOT NULL, - status TEXT NOT NULL, - rowtype INTEGER NOT NULL, - ncpus INTEGER NOT NULL, - wallclock TEXT NOT NULL, - qos TEXT NOT NULL, - energy INTEGER NOT NULL, - date TEXT NOT NULL, - section TEXT NOT NULL, - member TEXT NOT NULL, - chunk INTEGER NOT NULL, - last INTEGER NOT NULL, - platform TEXT NOT NULL, - job_id INTEGER NOT NULL, - extra_data TEXT NOT NULL, - nnodes INTEGER NOT NULL DEFAULT 0, - run_id INTEGER, - MaxRSS REAL NOT NULL DEFAULT 0.0, - AveRSS REAL NOT NULL DEFAULT 0.0, - out TEXT NOT NULL, - err TEXT NOT NULL, - rowstatus INTEGER NOT NULL DEFAULT 0, - children TEXT, - platform_output TEXT, - UNIQUE(counter,job_name) - ); - ''') - self.create_index_query = textwrap.dedent(''' - CREATE INDEX IF NOT EXISTS ID_JOB_NAME ON job_data(job_name); - ''') - - def _set_schema_changes(self): - # type : () -> None - """ Creates the list of schema changes""" - self.version_schema_changes = [ - "ALTER TABLE job_data ADD COLUMN nnodes INTEGER NOT NULL DEFAULT 0", - "ALTER TABLE job_data ADD COLUMN run_id INTEGER" - ] - # Version 15 - self.version_schema_changes.extend([ - "ALTER TABLE job_data ADD COLUMN MaxRSS REAL NOT NULL DEFAULT 0.0", - "ALTER TABLE job_data ADD COLUMN AveRSS REAL NOT NULL DEFAULT 0.0", - "ALTER TABLE job_data ADD COLUMN out TEXT NOT NULL DEFAULT ''", - "ALTER TABLE job_data ADD COLUMN err TEXT NOT NULL DEFAULT ''", - "ALTER TABLE job_data ADD COLUMN rowstatus INTEGER NOT NULL DEFAULT 0", - "ALTER TABLE experiment_run ADD COLUMN suspended INTEGER NOT NULL DEFAULT 0", - "ALTER TABLE experiment_run ADD COLUMN metadata TEXT" - ]) - # Version 16 - self.version_schema_changes.extend([ - "ALTER TABLE experiment_run ADD COLUMN modified TEXT" - ]) - # Version 17 - self.version_schema_changes.extend([ - "ALTER TABLE job_data ADD COLUMN children TEXT", - "ALTER TABLE job_data ADD COLUMN platform_output TEXT" - ]) - - def create_historical_database(self): - """ Creates the historical database with the latest changes. """ - self.execute_statement_on_dbfile(self.historicaldb_file_path, self.create_table_header_query) - self.execute_statement_on_dbfile(self.historicaldb_file_path, self.create_table_query) - self.execute_statement_on_dbfile(self.historicaldb_file_path, self.create_index_query) - self._set_historical_pragma_version(Models.DatabaseVersion.CURRENT_DB_VERSION.value) - - def update_historical_database(self): - """ Updates the historical database with the latest changes IF necessary. """ - self.execute_many_statements_on_dbfile(self.historicaldb_file_path, self.version_schema_changes) - self.execute_statement_on_dbfile(self.historicaldb_file_path, self.create_index_query) - self.execute_statement_on_dbfile(self.historicaldb_file_path, self.create_table_header_query) - self._set_historical_pragma_version(Models.DatabaseVersion.CURRENT_DB_VERSION.value) - - def get_experiment_run_dc_with_max_id(self): - """ Get Current (latest) ExperimentRun data class. """ - return ExperimentRun.from_model(self._get_experiment_run_with_max_id()) - - def register_experiment_run_dc(self, experiment_run_dc): - self._insert_experiment_run(experiment_run_dc) - return ExperimentRun.from_model(self._get_experiment_run_with_max_id()) - - def update_experiment_run_dc_by_id(self, experiment_run_dc): - """ Requires ExperimentRun data class. """ - self._update_experiment_run(experiment_run_dc) - return ExperimentRun.from_model(self._get_experiment_run_with_max_id()) - - def _get_experiment_run_with_max_id(self): - """ Get Models.ExperimentRunRow for the maximum id run. """ - statement = self.get_built_select_statement("experiment_run", "run_id > 0 ORDER BY run_id DESC LIMIT 0, 1") - max_experiment_run = self.get_from_statement(self.historicaldb_file_path, statement) - if len(max_experiment_run) == 0: - raise Exception("No Experiment Runs registered.") - return self.experiment_run_row_model(*max_experiment_run[0]) - - def get_experiment_run_by_id(self, run_id): - # type: (int) -> ExperimentRun | None - if run_id: - return ExperimentRun.from_model(self._get_experiment_run_by_id(run_id)) - return None - - def _get_experiment_run_by_id(self, run_id): - # type: (int) -> namedtuple - statement = self.get_built_select_statement("experiment_run", "run_id=?") - arguments = (run_id,) - experiment_run = self.get_from_statement_with_arguments(self.historicaldb_file_path, statement, arguments) - if len(experiment_run) == 0: - raise Exception("Experiment run {0} for experiment {1} does not exists.".format(run_id, self.expid)) - return self.experiment_run_row_model(*experiment_run[0]) - - def get_experiment_runs_dcs(self): - # type: () -> List[ExperimentRun] - experiment_run_rows = self._get_experiment_runs() - return [ExperimentRun.from_model(row) for row in experiment_run_rows] - - def _get_experiment_runs(self): - # type: () -> List[namedtuple] - statement = self.get_built_select_statement("experiment_run") - experiment_runs = self.get_from_statement(self.historicaldb_file_path, statement) - return [self.experiment_run_row_model(*row) for row in experiment_runs] - - def is_there_a_last_experiment_run(self): - statement = self.get_built_select_statement("experiment_run", "run_id > 0 ORDER BY run_id DESC LIMIT 0, 1") - max_experiment_run = self.get_from_statement(self.historicaldb_file_path, statement) - if len(max_experiment_run) > 0: - return True - return False - - def get_job_data_dcs_all(self): - # type: () -> List[JobData] - """ Gets all content from job_data ordered by id (from table). """ - return [JobData.from_model(row) for row in self.get_job_data_all()] - - def get_job_data_all(self): - """ Gets all content from job_data as list of Models.JobDataRow from database. """ - statement = self.get_built_select_statement("job_data", "id > 0 ORDER BY id") - job_data_rows = self.get_from_statement(self.historicaldb_file_path, statement) - return [self.job_data_row_model(*row) for row in job_data_rows] - - def register_submitted_job_data_dc(self, job_data_dc): - """ Sets previous register to last=0 and inserts the new job_data_dc data class.""" - self._set_current_job_data_rows_last_to_zero_by_job_name(job_data_dc.job_name) - self._insert_job_data(job_data_dc) - return self.get_job_data_dc_unique_latest_by_job_name(job_data_dc.job_name) - - def _set_current_job_data_rows_last_to_zero_by_job_name(self, job_name): - """ Sets the column last = 0 for all job_rows by job_name and last = 1. """ - job_data_row_last = self._get_job_data_last_by_name(job_name) - job_data_dc_list = [JobData.from_model(row) for row in job_data_row_last] - for job_data_dc in job_data_dc_list: - job_data_dc.last = 0 - self._update_job_data_by_id(job_data_dc) - - def update_job_data_dc_by_id(self, job_data_dc): - """ Update JobData data class. Returns latest last=1 row from job_data by job_name. """ - self._update_job_data_by_id(job_data_dc) - return self.get_job_data_dc_unique_latest_by_job_name(job_data_dc.job_name) - - def update_list_job_data_dc_by_each_id(self, job_data_dcs): - """ Return length of updated list. """ - for job_data_dc in job_data_dcs: - self._update_job_data_by_id(job_data_dc) - return len(job_data_dcs) - - def get_job_data_dc_unique_latest_by_job_name(self, job_name): - """ Returns JobData data class for the latest job_data_row with last=1 by job_name. """ - job_data_row_last = self._get_job_data_last_by_name(job_name) - if len(job_data_row_last) > 0: - return JobData.from_model(job_data_row_last[0]) - return None - - def _get_job_data_last_by_name(self, job_name): - """ Get List of Models.JobDataRow for job_name and last=1 """ - statement = self.get_built_select_statement("job_data", "last=1 and job_name=? ORDER BY counter DESC") - arguments = (job_name,) - job_data_rows_last = self.get_from_statement_with_arguments(self.historicaldb_file_path, statement, arguments) - return [self.job_data_row_model(*row) for row in job_data_rows_last] - - def get_job_data_dc_COMPLETED_by_wrapper_run_id(self, package_code, run_id): - # type: (int, int) -> List[JobData] - if not run_id or package_code <= Models.RowType.NORMAL: - return [] - job_data_rows = self._get_job_data_dc_COMPLETED_by_wrapper_run_id(package_code, run_id) - if len(job_data_rows) == 0: - return [] - return [JobData.from_model(row) for row in job_data_rows] - - def _get_job_data_dc_COMPLETED_by_wrapper_run_id(self, package_code, run_id): - # type: (int, int) -> List[namedtuple] - statement = self.get_built_select_statement("job_data", "run_id=? and rowtype=? and status=? ORDER BY id") - arguments = (run_id, package_code, "COMPLETED") - job_data_rows = self.get_from_statement_with_arguments(self.historicaldb_file_path, statement, arguments) - return [self.job_data_row_model(*row) for row in job_data_rows] - - def get_job_data_dcs_last_by_run_id(self, run_id): - job_data_rows = self._get_job_data_last_by_run_id(run_id) - return [JobData.from_model(row) for row in job_data_rows] - - def _get_job_data_last_by_run_id(self, run_id): - """ Get List of Models.JobDataRow for last=1 and run_id """ - statement = self.get_built_select_statement("job_data", "run_id=? and last=1 and rowtype >= 2 ORDER BY id") - arguments = (run_id,) - job_data_rows = self.get_from_statement_with_arguments(self.historicaldb_file_path, statement, arguments) - return [self.job_data_row_model(*row) for row in job_data_rows] - - def get_job_data_dcs_last_by_wrapper_code(self, wrapper_code): - if wrapper_code and wrapper_code > 2: - return [JobData.from_model(row) for row in self._get_job_data_last_by_wrapper_code(wrapper_code)] - else: - return [] - - def get_job_data_dcs_COMPLETED_by_section(self, section): - # type: (str) -> List[JobData] - arguments = {"status": "COMPLETED", "section": section} - job_data_rows = self._get_job_data_COMPLETD_by_section(section) - return [JobData.from_model(row) for row in job_data_rows] - - def _get_job_data_last_by_wrapper_code(self, wrapper_code): - """ Get List of Models.JobDataRow for last=1 and rowtype=wrapper_code """ - statement = self.get_built_select_statement("job_data", "rowtype = ? and last=1 ORDER BY id") - arguments = (wrapper_code,) - job_data_rows = self.get_from_statement_with_arguments(self.historicaldb_file_path, statement, arguments) - return [self.job_data_row_model(*row) for row in job_data_rows] - - def _get_job_data_COMPLETD_by_section(self, section): - statement = self.get_built_select_statement("job_data", "status=? and (section=? or member=?) ORDER BY id") - arguments = ("COMPLETED", section, section) - job_data_rows = self.get_from_statement_with_arguments(self.historicaldb_file_path, statement, arguments) - return [self.job_data_row_model(*row) for row in job_data_rows] - - - def get_all_last_job_data_dcs(self): - """ Gets JobData data classes in job_data for last=1. """ - job_data_rows = self._get_all_last_job_data_rows() - return [JobData.from_model(row) for row in job_data_rows] - - def _get_all_last_job_data_rows(self): - """ Get List of Models.JobDataRow for last=1. """ - statement = self.get_built_select_statement("job_data", "last=1 and rowtype >= 2") - job_data_rows = self.get_from_statement(self.historicaldb_file_path, statement) - return [self.job_data_row_model(*row) for row in job_data_rows] - - def get_job_data_dcs_by_name(self, job_name): - # type: (str) -> List[JobData] - job_data_rows = self._get_job_data_by_name(job_name) - return [JobData.from_model(row) for row in job_data_rows] - - def _get_job_data_by_name(self, job_name): - # type: (str) -> List[namedtuple] - """ Get List of Models.JobDataRow for job_name """ - statement = self.get_built_select_statement("job_data", "job_name=? ORDER BY counter DESC") - arguments = (job_name,) - job_data_rows = self.get_from_statement_with_arguments(self.historicaldb_file_path, statement, arguments) - return [self.job_data_row_model(*row) for row in job_data_rows] - - def _insert_job_data(self, job_data): - # type : (JobData) -> int - """ Insert data class JobData into job_data table. """ - statement = ''' INSERT INTO job_data(counter, job_name, created, modified, - submit, start, finish, status, rowtype, ncpus, - wallclock, qos, energy, date, section, member, chunk, last, - platform, job_id, extra_data, nnodes, run_id, MaxRSS, AveRSS, - out, err, rowstatus, children, platform_output) - VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) ''' - arguments = (job_data.counter, job_data.job_name, HUtils.get_current_datetime(), HUtils.get_current_datetime(), - job_data.submit, job_data.start, job_data.finish, job_data.status, job_data.rowtype, job_data.ncpus, - job_data.wallclock, job_data.qos, job_data.energy, job_data.date, job_data.section, job_data.member, job_data.chunk, job_data.last, - job_data.platform, job_data.job_id, job_data.extra_data, job_data.nnodes, job_data.run_id, job_data.MaxRSS, job_data.AveRSS, - job_data.out, job_data.err, job_data.rowstatus, job_data.children, job_data.platform_output) - return self.insert_statement_with_arguments(self.historicaldb_file_path, statement, arguments) - - def _insert_experiment_run(self, experiment_run): - """ Insert data class ExperimentRun into database """ - statement = ''' INSERT INTO experiment_run(created, modified, start, finish, - chunk_unit, chunk_size, completed, total, - failed, queuing, running, - submitted, suspended, metadata) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?) ''' - arguments = (HUtils.get_current_datetime(), HUtils.get_current_datetime(), experiment_run.start, experiment_run.finish, - experiment_run.chunk_unit, experiment_run.chunk_size, experiment_run.completed, experiment_run.total, - experiment_run.failed, experiment_run.queuing, experiment_run.running, - experiment_run.submitted, experiment_run.suspended, experiment_run.metadata) - return self.insert_statement_with_arguments(self.historicaldb_file_path, statement, arguments) - - def update_many_job_data_change_status(self, changes): - # type : (List[Tuple]) -> None - """ - Update many job_data rows in bulk. Requires a changes list of argument tuples. - Only updates finish, modified, status, and rowstatus by id. - """ - statement = ''' UPDATE job_data SET modified=?, status=?, rowstatus=? WHERE id=? ''' - self.execute_many_statement_with_arguments_on_dbfile(self.historicaldb_file_path, statement, changes) - - def _update_job_data_by_id(self, job_data_dc): - """ - Update job_data table with data class JobData. - Update last, submit, start, finish, modified, job_id, status, energy, extra_data, nnodes, ncpus, rowstatus, out, err by id. - """ - statement = ''' UPDATE job_data SET last=?, submit=?, start=?, finish=?, modified=?, - job_id=?, status=?, energy=?, extra_data=?, - nnodes=?, ncpus=?, rowstatus=?, out=?, err=?, - children=?, platform_output=?, rowtype=? WHERE id=? ''' - arguments = (job_data_dc.last, job_data_dc.submit, job_data_dc.start, job_data_dc.finish, HUtils.get_current_datetime(), - job_data_dc.job_id, job_data_dc.status, job_data_dc.energy, job_data_dc.extra_data, - job_data_dc.nnodes, job_data_dc.ncpus, job_data_dc.rowstatus, job_data_dc.out, job_data_dc.err, - job_data_dc.children, job_data_dc.platform_output, job_data_dc.rowtype, job_data_dc._id) - self.execute_statement_with_arguments_on_dbfile(self.historicaldb_file_path, statement, arguments) - - def _update_experiment_run(self, experiment_run_dc): - """ - Update experiment_run table with data class ExperimentRun. - Updates by run_id (finish, chunk_unit, chunk_size, completed, total, failed, queuing, running, submitted, suspended) - """ - statement = ''' UPDATE experiment_run SET finish=?, chunk_unit=?, chunk_size=?, completed=?, total=?, - failed=?, queuing=?, running=?, submitted=?, - suspended=?, modified=? WHERE run_id=? ''' - arguments = (experiment_run_dc.finish, experiment_run_dc.chunk_unit, experiment_run_dc.chunk_size, experiment_run_dc.completed, experiment_run_dc.total, - experiment_run_dc.failed, experiment_run_dc.queuing, experiment_run_dc.running, experiment_run_dc.submitted, - experiment_run_dc.suspended, HUtils.get_current_datetime(), experiment_run_dc.run_id) - self.execute_statement_with_arguments_on_dbfile(self.historicaldb_file_path, statement, arguments) - - def _get_job_data_last_by_run_id_and_finished(self, run_id): - """ Get List of Models.JobDataRow for last=1, finished > 0 and run_id """ - statement = self.get_built_select_statement("job_data", "run_id=? and last=1 and finish > 0 and rowtype >= 2 ORDER BY id") - arguments = (run_id,) - job_data_rows = self.get_from_statement_with_arguments(self.historicaldb_file_path, statement, arguments) - return [self.job_data_row_model(*row) for row in job_data_rows] - - def get_job_data_max_counter(self): - """ The max counter is the maximum count value for the count column in job_data. """ - statement = "SELECT MAX(counter) as maxcounter FROM job_data" - counter_result = self.get_from_statement(self.historicaldb_file_path, statement) - if len(counter_result) <= 0: - return DEFAULT_MAX_COUNTER - else: - max_counter = Models.MaxCounterRow(*counter_result[0]).maxcounter - return max_counter if max_counter else DEFAULT_MAX_COUNTER - - def delete_job_data(self, id): - """ Deletes row from job_data by id. Useful for testing. """ - statement = ''' DELETE FROM job_data WHERE id=? ''' - arguments = (id, ) - self.execute_statement_with_arguments_on_dbfile(self.historicaldb_file_path, statement, arguments) - - def delete_experiment_run(self, run_id): - """ Deletes row in experiment_run by run_id. Useful for testing. """ - statement = ''' DELETE FROM experiment_run where run_id=? ''' - arguments = (run_id,) - self.execute_statement_with_arguments_on_dbfile(self.historicaldb_file_path, statement, arguments) - - def _set_historical_pragma_version(self, version=10): - """ Sets the pragma version. """ - statement = "pragma user_version={v:d};".format(v=version) - self.execute_statement_on_dbfile(self.historicaldb_file_path, statement) - - def _get_pragma_version(self): - # type: () -> int - """ Gets current pragma version as int. """ - statement = "pragma user_version;" - pragma_result = self.get_from_statement(self.historicaldb_file_path, statement) - if len(pragma_result) <= 0: - raise Exception("Error while getting the pragma version. This might be a signal of a deeper problem. Review previous errors.") - return int(Models.PragmaVersion(*pragma_result[0]).version) +class ExperimentHistoryDbManager: + """Manages history DDBB actions directly on the database.""" + + def __init__(self, expid: str): + """Requires expid""" + self.expid = expid + self.run_db = ExperimentRunDbRepository(expid) + self.job_data_db = JobDataDbRepository(expid) + self.historicaldb_file_path = ExperimentPaths(expid).job_data_db + + def my_database_exists(self) -> bool: + return os.path.exists(self.historicaldb_file_path) + + def get_experiment_run_dc_with_max_id(self) -> ExperimentRun: + """Get Current (latest) ExperimentRun data class.""" + return ExperimentRun.from_model(self._get_experiment_run_with_max_id()) + + def _get_experiment_run_with_max_id(self) -> Dict[str, Any]: + """Get Models.ExperimentRunRow for the maximum id run.""" + max_experiment_run = self.run_db.get_last_run() + if not max_experiment_run: + raise Exception("No Experiment Runs registered.") + return max_experiment_run + + def get_experiment_run_by_id(self, run_id: int) -> Optional[ExperimentRun]: + if run_id: + return ExperimentRun.from_model(self._get_experiment_run_by_id(run_id)) + return None + + def _get_experiment_run_by_id(self, run_id: int) -> Dict[str, Any]: + experiment_run = self.run_db.get_run_by_id(run_id) + if not experiment_run: + raise Exception( + "Experiment run {0} for experiment {1} does not exists.".format( + run_id, self.expid + ) + ) + return experiment_run + + def get_experiment_runs_dcs(self) -> List[ExperimentRun]: + experiment_run_rows = self._get_experiment_runs() + return [ExperimentRun.from_model(row) for row in experiment_run_rows] + + def _get_experiment_runs(self): + experiment_runs = self.run_db.get_all() + return experiment_runs + + def get_job_data_dcs_all(self) -> List[JobData]: + """Gets all content from job_data ordered by id (from table).""" + return [JobData.from_model(row) for row in self.get_job_data_all()] + + def get_job_data_all(self): + """Gets all content from job_data as list of Models.JobDataRow from database.""" + job_data_rows = self.job_data_db.get_all() + return job_data_rows + + def get_job_data_dc_COMPLETED_by_wrapper_run_id( + self, package_code: int, run_id: int + ) -> List[JobData]: + if not run_id or package_code <= Models.RowType.NORMAL: + return [] + job_data_rows = self._get_job_data_dc_COMPLETED_by_wrapper_run_id( + package_code, run_id + ) + if len(job_data_rows) == 0: + return [] + return [JobData.from_model(row) for row in job_data_rows] + + def _get_job_data_dc_COMPLETED_by_wrapper_run_id( + self, package_code: int, run_id: int + ) -> List[Dict[str, Any]]: + job_data_rows = self.job_data_db.get_job_data_COMPLETED_by_rowtype_run_id( + rowtype=package_code, run_id=run_id + ) + return job_data_rows + + def get_job_data_dcs_COMPLETED_by_section(self, section: str) -> List[JobData]: + job_data_rows = self._get_job_data_COMPLETD_by_section(section) + return [JobData.from_model(row) for row in job_data_rows] + + def _get_job_data_COMPLETD_by_section(self, section: str) -> List[Dict[str, Any]]: + job_data_rows = self.job_data_db.get_job_data_COMPLETD_by_section(section) + return job_data_rows + + def get_all_last_job_data_dcs(self): + """Gets JobData data classes in job_data for last=1.""" + job_data_rows = self._get_all_last_job_data_rows() + return [JobData.from_model(row) for row in job_data_rows] + + def _get_all_last_job_data_rows(self): + """Get List of Models.JobDataRow for last=1.""" + job_data_rows = self.job_data_db.get_last_job_data() + return job_data_rows + + def get_job_data_dcs_by_name(self, job_name: str) -> List[JobData]: + job_data_rows = self._get_job_data_by_name(job_name) + return [JobData.from_model(row) for row in job_data_rows] + + def _get_job_data_by_name(self, job_name: str) -> List[Dict[str, Any]]: + """Get List of Models.JobDataRow for job_name""" + job_data_rows = self.job_data_db.get_jobs_by_name(job_name) + return job_data_rows diff --git a/autosubmit_api/history/experiment_history.py b/autosubmit_api/history/experiment_history.py index f9ae4231d0196aaabc39f9c1b28ada0fe70a2a69..a392571c07931aee33b58fc03427cc5188c4b8cd 100644 --- a/autosubmit_api/history/experiment_history.py +++ b/autosubmit_api/history/experiment_history.py @@ -28,8 +28,7 @@ from typing import List, Dict, Tuple, Any SECONDS_WAIT_PLATFORM = 60 class ExperimentHistory(): - def __init__(self, expid, basic_config, experiment_history_db_manager, logger): - # type: (str, APIBasicConfig, ExperimentHistoryDbManager, Logging) -> None + def __init__(self, expid: str, basic_config: APIBasicConfig, experiment_history_db_manager: ExperimentHistoryDbManager, logger: Logging): self.expid = expid self._log = logger self.basic_config = basic_config @@ -41,11 +40,6 @@ class ExperimentHistory(): self._log.log(str(exp), traceback.format_exc()) self.manager = None - def is_header_ready(self): - if self.manager: - return self.manager.is_header_ready_db_version() - return False - def get_historic_job_data(self, job_name): # type: (str) -> List[Dict[str, Any]] result = [] diff --git a/autosubmit_api/persistance/job_package_reader.py b/autosubmit_api/persistance/job_package_reader.py index 2dbe4ba749073f0625ac4e56b5c75d7790ee99b7..4846cfa980d87ec4d4ee2efd9538948930cc7609 100644 --- a/autosubmit_api/persistance/job_package_reader.py +++ b/autosubmit_api/persistance/job_package_reader.py @@ -1,13 +1,12 @@ from typing import Dict, List -from sqlalchemy import select +from autosubmit_api.database.repositories import ( + JobPackagesDbRepository, + WrapperJobPackagesDbRepository, +) from autosubmit_api.logger import logger -from autosubmit_api.database import tables -from autosubmit_api.database.common import AttachedDatabaseConnBuilder -from autosubmit_api.persistance.experiment import ExperimentPaths class JobPackageReader: - def __init__(self, expid: str) -> None: self.expid = expid self._content: List[Dict] = [] @@ -17,23 +16,13 @@ class JobPackageReader: self._package_to_symbol: Dict[str, str] = {} def read(self): - conn_builder = AttachedDatabaseConnBuilder() - conn_builder.attach_db( - ExperimentPaths(self.expid).job_packages_db, "job_packages" - ) - - with conn_builder.product as conn: - try: - statement = select(tables.JobPackageTable) - self._content = [x._mapping for x in conn.execute(statement).all()] - if len(self._content) == 0: - raise Warning( - "job_packages table empty, trying wrapper_job_packages" - ) - except Exception as exc: - logger.warning(exc) - statement = select(tables.WrapperJobPackageTable) - self._content = [x._mapping for x in conn.execute(statement).all()] + try: + self._content = JobPackagesDbRepository(self.expid).get_all() + if len(self._content) == 0: + raise Warning("job_packages table empty, trying wrapper_job_packages") + except Exception as exc: + logger.warning(exc) + self._content = WrapperJobPackagesDbRepository(self.expid).get_all() self._build_job_to_package() self._build_package_to_jobs() diff --git a/autosubmit_api/views/v3.py b/autosubmit_api/views/v3.py index 23c9d621894c8f2ef6c093f0edd498fd2396dfbe..a7e091cb1a5df1bcad9de225cea2ee1dd4c92960 100644 --- a/autosubmit_api/views/v3.py +++ b/autosubmit_api/views/v3.py @@ -6,15 +6,14 @@ import requests from flask_cors import cross_origin from flask import request, session, redirect from autosubmit_api.auth import ProtectionLevels, with_auth_token -from autosubmit_api.database.db_common import ( +from autosubmit_api.experiment.common_requests import ( get_current_running_exp, - update_experiment_description_owner, ) from autosubmit_api.experiment import common_requests as CommonRequests from autosubmit_api.experiment import utils as Utiles from autosubmit_api.logger import logger, with_log_run_times from autosubmit_api.performance.performance_metrics import PerformanceMetrics -from autosubmit_api.database.db_common import search_experiment_by_id +from autosubmit_api.experiment.common_requests import search_experiment_by_id from autosubmit_api.config.basicConfig import APIBasicConfig from autosubmit_api.builders.joblist_helper_builder import ( JobListHelperBuilder, @@ -113,16 +112,17 @@ def update_description(user_id: Optional[str] = None): """ Updates the description of an experiment. Requires authenticated user. """ - expid = None - new_description = None - if request.is_json: - body_data = request.json - expid = body_data.get("expid", None) - new_description = body_data.get("description", None) - return ( - update_experiment_description_owner(expid, new_description, user_id), - HTTPStatus.OK if user_id else HTTPStatus.UNAUTHORIZED, - ) + raise NotImplementedError + # expid = None + # new_description = None + # if request.is_json: + # body_data = request.json + # expid = body_data.get("expid", None) + # new_description = body_data.get("description", None) + # return ( + # update_experiment_description_owner(expid, new_description, user_id), + # HTTPStatus.OK if user_id else HTTPStatus.UNAUTHORIZED, + # ) @cross_origin(expose_headers="Authorization") diff --git a/autosubmit_api/views/v4.py b/autosubmit_api/views/v4.py index d7101ce372cba4e65599e60652f25e5f1f4f4bc2..a3f4842c23ee04addb327f6e9bc66d6e32ff931a 100644 --- a/autosubmit_api/views/v4.py +++ b/autosubmit_api/views/v4.py @@ -18,11 +18,7 @@ from autosubmit_api.builders.experiment_history_builder import ( ) from autosubmit_api.common.utils import Status from autosubmit_api.database import tables -from autosubmit_api.database.common import ( - create_main_db_conn, - execute_with_limit_offset, -) -from autosubmit_api.database.queries import generate_query_listexp_extended +from autosubmit_api.database.repositories import ExperimentJoinDbRepository from autosubmit_api.logger import logger, with_log_run_times from cas import CASClient from autosubmit_api import config @@ -231,11 +227,11 @@ class ExperimentView(MethodView): else: page_size = None offset = None - except: + except Exception: return {"error": {"message": "Invalid params"}}, HTTPStatus.BAD_REQUEST # Query - statement = generate_query_listexp_extended( + query_result, total_rows = ExperimentJoinDbRepository().search( query=query, only_active=only_active, owner=owner, @@ -243,20 +239,15 @@ class ExperimentView(MethodView): autosubmit_version=autosubmit_version, order_by=order_by, order_desc=order_desc, + limit=page_size, + offset=offset, ) - with create_main_db_conn() as conn: - query_result, total_rows = execute_with_limit_offset( - statement=statement, - conn=conn, - limit=page_size, - offset=offset, - ) # Process experiments experiments = [] for raw_exp in query_result: exp_builder = ExperimentBuilder() - exp_builder.produce_base_from_dict(raw_exp._mapping) + exp_builder.produce_base_from_dict(raw_exp) # Get additional data from config files try: @@ -270,7 +261,7 @@ class ExperimentView(MethodView): exp = exp_builder.product # Get current run data from history - last_modified_timestamp = exp.created + # last_modified_timestamp = exp.created completed = 0 total = 0 submitted = 0 @@ -292,7 +283,7 @@ class ExperimentView(MethodView): running = current_run.running failed = current_run.failed suspended = current_run.suspended - last_modified_timestamp = current_run.modified_timestamp + # last_modified_timestamp = current_run.modified_timestamp except Exception as exc: logger.warning((f"Exception getting the current run on search: {exc}")) logger.warning(traceback.format_exc()) @@ -343,7 +334,7 @@ class ExperimentDetailView(MethodView): """ exp_builder = ExperimentBuilder() exp_builder.produce_base(expid) - return exp_builder.product.model_dump(include=tables.experiment_table.c.keys()) + return exp_builder.product.model_dump(include=tables.ExperimentTable.c.keys()) class ExperimentJobsViewOptEnum(str, Enum): @@ -412,7 +403,6 @@ class ExperimentWrappersView(MethodView): decorators = [with_auth_token(), with_log_run_times(logger, "WRAPPERS")] def get(self, expid: str, user_id: Optional[str] = None): - job_package_reader = JobPackageReader(expid) job_package_reader.read() diff --git a/autosubmit_api/workers/business/process_graph_drawings.py b/autosubmit_api/workers/business/process_graph_drawings.py index fc3517e53cfef378ca561af1f0b13fe01a8defa4..c284963d686e3a1e73150fef96c09c77b5fe1aec 100644 --- a/autosubmit_api/workers/business/process_graph_drawings.py +++ b/autosubmit_api/workers/business/process_graph_drawings.py @@ -1,9 +1,8 @@ import time import traceback -from autosubmit_api.database import tables -from autosubmit_api.database.common import create_as_times_db_engine +from autosubmit_api.database.repositories import ExperimentStatusDbRepository from autosubmit_api.common import utils as common_utils -from autosubmit_api.database.db_jobdata import ExperimentGraphDrawing +from autosubmit_api.components.experiment.graph_drawer import ExperimentGraphDrawing from autosubmit_api.builders.configuration_facade_builder import ( ConfigurationFacadeDirector, AutosubmitConfigurationFacadeBuilder, @@ -12,7 +11,7 @@ from autosubmit_api.builders.joblist_loader_builder import ( JobListLoaderBuilder, JobListLoaderDirector, ) -from typing import List, Any +from typing import List, Any, Optional def process_active_graphs(): @@ -20,14 +19,7 @@ def process_active_graphs(): Process the list of active experiments to generate the positioning of their graphs """ try: - with create_as_times_db_engine().connect() as conn: - query_result = conn.execute( - tables.experiment_status_table.select().where( - tables.experiment_status_table.c.status == "RUNNING" - ) - ).all() - - active_experiments: List[str] = [exp.name for exp in query_result] + active_experiments = ExperimentStatusDbRepository().get_only_running_expids() for expid in active_experiments: try: @@ -38,7 +30,7 @@ def process_active_graphs(): autosubmit_configuration_facade.get_autosubmit_version() ): _process_graph(expid, autosubmit_configuration_facade.chunk_size) - except Exception as exp: + except Exception: print((traceback.format_exc())) print(("Error while processing: {}".format(expid))) @@ -47,8 +39,7 @@ def process_active_graphs(): print(("Error while processing graph drawing: {}".format(exp))) -def _process_graph(expid, chunk_size): - # type: (str, int) -> List[Any] | None +def _process_graph(expid: str, chunk_size: int) -> Optional[List[Any]]: result = None experimentGraphDrawing = ExperimentGraphDrawing(expid) locked = experimentGraphDrawing.locked diff --git a/autosubmit_api/workers/populate_details/populate.py b/autosubmit_api/workers/populate_details/populate.py index 175b21b81f9c17f8adacb5b6592e948f78422054..49304ef25f28c547f200a02594554fa7b2d2222f 100644 --- a/autosubmit_api/workers/populate_details/populate.py +++ b/autosubmit_api/workers/populate_details/populate.py @@ -1,10 +1,8 @@ -import textwrap - -from sqlalchemy import text +from autosubmit_api.database.repositories import ( + ExperimentDetailsDbRepository, + ExperimentDbRepository, +) from autosubmit_api.logger import logger -from autosubmit_api.database import tables - -from autosubmit_api.database.common import create_autosubmit_db_engine from autosubmit_api.builders.configuration_facade_builder import ( ConfigurationFacadeDirector, AutosubmitConfigurationFacadeBuilder, @@ -23,22 +21,21 @@ Experiment = namedtuple("Experiment", ["id", "name"]) class DetailsProcessor: def __init__(self, basic_config: APIBasicConfig): self.basic_config = basic_config - self.main_db_engine = create_autosubmit_db_engine() + self.experiment_db = ExperimentDbRepository() + self.details_db = ExperimentDetailsDbRepository() def process(self): new_details = self._get_all_details() - self.create_details_table_if_not_exists() self._clean_table() return self._insert_many_into_details_table(new_details) def _get_experiments(self) -> List[Experiment]: experiments = [] - with self.main_db_engine.connect() as conn: - query_result = conn.execute(tables.experiment_table.select()).all() + query_result = self.experiment_db.get_all() for exp in query_result: experiments.append( - Experiment(exp._mapping.get("id"), exp._mapping.get("name")) + Experiment(exp.get("id"), exp.get("name")) ) return experiments @@ -81,35 +78,9 @@ class DetailsProcessor: return result def _insert_many_into_details_table(self, values: List[dict]) -> int: - with self.main_db_engine.connect() as conn: - result = conn.execute( - tables.details_table.insert(), values - ) # Executemany style https://docs.sqlalchemy.org/en/20/tutorial/data_insert.html#insert-usually-generates-the-values-clause-automatically - conn.commit() - return result.rowcount - - def create_details_table_if_not_exists(self): - create_table_query = textwrap.dedent( - """ - CREATE TABLE - IF NOT EXISTS details ( - exp_id integer PRIMARY KEY, - user text NOT NULL, - created text NOT NULL, - model text NOT NULL, - branch text NOT NULL, - hpc text NOT NULL, - FOREIGN KEY (exp_id) REFERENCES experiment (id) - ); - """ - ) - with self.main_db_engine.connect() as conn: - conn.execute(text(create_table_query)) - conn.commit() + rowcount = self.details_db.insert_many(values) + return rowcount - def _clean_table(self): - # type: () -> None - with self.main_db_engine.connect() as conn: - with conn.execution_options(isolation_level="AUTOCOMMIT"): - conn.execute(tables.details_table.delete()) - conn.execute(text("VACUUM;")) + def _clean_table(self) -> int: + rowcount = self.details_db.delete_all() + return rowcount diff --git a/setup.py b/setup.py index 7ab919bf27719a5240b7eae838d56af1b083314b..c1a17221fecfa637981369ca151f3e06cf3172ee 100644 --- a/setup.py +++ b/setup.py @@ -31,12 +31,13 @@ install_requires = [ "scipy~=1.11.4", "python-dotenv~=1.0.1", "autosubmitconfigparser>=1.0.65", - "autosubmit>=3.13", + "autosubmit>=4.2.0", "Flask-APScheduler~=1.13.1", "gunicorn~=22.0.0", "pydantic~=2.5.2", "SQLAlchemy~=2.0.23", - "python-cas~=1.6.0" + "python-cas>=1.6.0", + "psycopg2>=2.9.9" ] # Test dependencies diff --git a/tests/bgtasks/test_status_updater.py b/tests/bgtasks/test_status_updater.py index c6975a9b5f8fb5e4a72886e4e025b9c40a7db729..56a225cc93bd133272e1bc95c7a1b3d38db1d815 100644 --- a/tests/bgtasks/test_status_updater.py +++ b/tests/bgtasks/test_status_updater.py @@ -12,15 +12,15 @@ class TestStatusUpdater: prepare_db() with create_as_times_db_engine().connect() as conn: - exps_status = conn.execute(tables.experiment_status_table.delete()) + exps_status = conn.execute(tables.ExperimentStatusTable.delete()) StatusUpdater.run() with create_autosubmit_db_engine().connect() as conn: - experiments = conn.execute(tables.experiment_table.select()).all() + experiments = conn.execute(tables.ExperimentTable.select()).all() with create_as_times_db_engine().connect() as conn: - exps_status = conn.execute(tables.experiment_status_table.select()).all() + exps_status = conn.execute(tables.ExperimentStatusTable.select()).all() assert len(experiments) == len(exps_status) assert set([x.id for x in experiments]) == set([x.exp_id for x in exps_status]) diff --git a/tests/conftest.py b/tests/conftest.py index be699e00c6651b16daf191f6618656cbe26787b3..84b7f3e79d73290ddc726a1c6e7442a3bedf6ae3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,31 +2,58 @@ # Reference: https://docs.pytest.org/en/latest/reference/fixtures.html#conftest-py-sharing-fixtures-across-multiple-files import os +import tempfile +from typing import Tuple from flask import Flask import pytest -from autosubmitconfigparser.config.basicconfig import BasicConfig from autosubmit_api.app import create_app from autosubmit_api.config.basicConfig import APIBasicConfig from autosubmit_api import config -from tests.custom_utils import custom_return_value +from autosubmit_api.database import session +from tests import utils +from sqlalchemy import Engine, create_engine +from sqlalchemy.orm import scoped_session, sessionmaker FAKE_EXP_DIR = "./tests/experiments/" +DEFAULT_DATABASE_CONN_URL = ( + "postgresql://postgres:mysecretpassword@localhost:5432/autosubmit_test" +) + + +# FIXTURES #### + +# Config fixtures -#### FIXTURES #### @pytest.fixture(autouse=True) def fixture_disable_protection(monkeypatch: pytest.MonkeyPatch): + """ + This fixture disables the protection level for all the tests. + + Autouse is set, so, no need to put this fixture in the test function. + """ monkeypatch.setattr(config, "PROTECTION_LEVEL", "NONE") monkeypatch.setenv("PROTECTION_LEVEL", "NONE") -@pytest.fixture -def fixture_mock_basic_config(monkeypatch: pytest.MonkeyPatch): - # Get APIBasicConfig from file - monkeypatch.setenv("AUTOSUBMIT_CONFIGURATION", os.path.join(FAKE_EXP_DIR, ".autosubmitrc")) +@pytest.fixture( + params=[ + pytest.param("fixture_sqlite", marks=pytest.mark.sqlite), + pytest.param("fixture_pg", marks=pytest.mark.pg), + ] +) +def fixture_mock_basic_config(request: pytest.FixtureRequest): + """ + Sets a mock basic config for the tests. + """ + request.getfixturevalue(request.param) + APIBasicConfig.read() yield APIBasicConfig +# Flask app fixtures + + @pytest.fixture def fixture_app(fixture_mock_basic_config): app = create_app() @@ -46,3 +73,172 @@ def fixture_client(fixture_app: Flask): @pytest.fixture def fixture_runner(fixture_app: Flask): return fixture_app.test_cli_runner() + + +# Fixtures sqlite + + +@pytest.fixture(scope="session") +def fixture_temp_dir_copy(): + """ + Fixture that copies the contents of the FAKE_EXP_DIR to a temporary directory with rsync + """ + with tempfile.TemporaryDirectory() as tempdir: + # Copy all files recursively + os.system(f"rsync -r {FAKE_EXP_DIR} {tempdir}") + yield tempdir + + +@pytest.fixture(scope="session") +def fixture_gen_rc_sqlite(fixture_temp_dir_copy: str): + """ + Fixture that generates a .autosubmitrc file in the temporary directory + """ + rc_file = os.path.join(fixture_temp_dir_copy, ".autosubmitrc") + with open(rc_file, "w") as f: + f.write( + "\n".join( + [ + "[database]", + f"path = {fixture_temp_dir_copy}", + "filename = autosubmit.db", + "backend = sqlite", + "[local]", + f"path = {fixture_temp_dir_copy}", + "[globallogs]", + f"path = {fixture_temp_dir_copy}/logs", + "[historicdb]", + f"path = {fixture_temp_dir_copy}/metadata/data", + "[structures]", + f"path = {fixture_temp_dir_copy}/metadata/structures", + "[historiclog]", + f"path = {fixture_temp_dir_copy}/metadata/logs", + "[graph]", + f"path = {fixture_temp_dir_copy}/metadata/graph", + ] + ) + ) + yield fixture_temp_dir_copy + + +@pytest.fixture +def fixture_sqlite(fixture_gen_rc_sqlite: str, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv( + "AUTOSUBMIT_CONFIGURATION", os.path.join(fixture_gen_rc_sqlite, ".autosubmitrc") + ) + yield fixture_gen_rc_sqlite + + +# Fixtures Postgres + + +@pytest.fixture(scope="session") +def fixture_temp_dir_copy_exclude_db(): + """ + Fixture that copies the contents of the FAKE_EXP_DIR to a temporary directory with rsync + and exclues .db files + """ + with tempfile.TemporaryDirectory() as tempdir: + # Copy all files recursively excluding .db files + os.system(f"rsync -r --exclude '*.db' {FAKE_EXP_DIR} {tempdir}") + yield tempdir + + +@pytest.fixture(scope="session") +def fixture_gen_rc_pg(fixture_temp_dir_copy_exclude_db: str): + """ + Fixture that generates a .autosubmitrc file in the temporary directory + """ + rc_file = os.path.join(fixture_temp_dir_copy_exclude_db, ".autosubmitrc") + conn_url = os.environ.get("PYTEST_DATABASE_CONN_URL", DEFAULT_DATABASE_CONN_URL) + with open(rc_file, "w") as f: + f.write( + "\n".join( + [ + "[database]", + f"path = {fixture_temp_dir_copy_exclude_db}", + "backend = postgres", + f"connection_url = {conn_url}", + "[local]", + f"path = {fixture_temp_dir_copy_exclude_db}", + "[globallogs]", + f"path = {fixture_temp_dir_copy_exclude_db}/logs", + "[historicdb]", + f"path = {fixture_temp_dir_copy_exclude_db}/metadata/data", + "[structures]", + f"path = {fixture_temp_dir_copy_exclude_db}/metadata/structures", + "[historiclog]", + f"path = {fixture_temp_dir_copy_exclude_db}/metadata/logs", + "[graph]", + f"path = {fixture_temp_dir_copy_exclude_db}/metadata/graph", + ] + ) + ) + yield fixture_temp_dir_copy_exclude_db + + +@pytest.fixture(scope="session") +def fixture_pg_db(fixture_gen_rc_pg: str): + """ + This fixture cleans and setup a PostgreSQL database for testing purposes. + """ + conn_url = os.environ.get("PYTEST_DATABASE_CONN_URL", DEFAULT_DATABASE_CONN_URL) + engine = create_engine(conn_url) + + with engine.connect() as conn: + utils.setup_pg_db(conn) + conn.commit() + + yield (fixture_gen_rc_pg, engine) + + # with engine.connect() as conn: + # utils.setup_pg_db(conn) + # conn.commit() + + +@pytest.fixture(scope="session") +def fixture_pg_db_copy_all(fixture_pg_db: Tuple[str, Engine]): + """ + This fixture recursively search all the .db files in the FAKE_EXP_DIR and copies them to the test database + """ + engine = fixture_pg_db[1] + # Get .db files absolute paths from the FAKE_EXP_DIR recursively + all_files = [] + for root, dirs, files in os.walk(FAKE_EXP_DIR): + for filepath in files: + if filepath.endswith(".db"): + all_files.append(os.path.join(root, filepath)) + + for filepath in all_files: + # Infer which type of DB is this + if "metadata/structures" in filepath: + utils.copy_structure_db(filepath, engine) + elif "metadata/data" in filepath: + utils.copy_job_data_db(filepath, engine) + elif "metadata/graph" in filepath: + utils.copy_graph_data_db(filepath, engine) + elif "autosubmit.db" in filepath: + utils.copy_autosubmit_db(filepath, engine) + elif "as_times.db" in filepath: + utils.copy_as_times_db(filepath, engine) + elif "pkl/job_packages" in filepath: + utils.copy_job_packages_db(filepath, engine) + + yield fixture_pg_db + + +@pytest.fixture +def fixture_pg( + fixture_pg_db_copy_all: Tuple[str, Engine], monkeypatch: pytest.MonkeyPatch +): + """ + This fixture cleans and setup a PostgreSQL database for testing purposes. + """ + monkeypatch.setenv( + "AUTOSUBMIT_CONFIGURATION", + os.path.join(fixture_pg_db_copy_all[0], ".autosubmitrc"), + ) + # Mock the session because it is initialized before the fixture + mock_session = scoped_session(sessionmaker(bind=fixture_pg_db_copy_all[1])) + monkeypatch.setattr(session, "Session", mock_session) + yield fixture_pg_db_copy_all[0] diff --git a/tests/custom_utils.py b/tests/custom_utils.py deleted file mode 100644 index 9148a98a887c26f71c409f8ef94e2dd10ee99ab3..0000000000000000000000000000000000000000 --- a/tests/custom_utils.py +++ /dev/null @@ -1,12 +0,0 @@ -from http import HTTPStatus - - -def dummy_response(*args, **kwargs): - return "Hello World!", HTTPStatus.OK - - -def custom_return_value(value=None): - def blank_func(*args, **kwargs): - return value - - return blank_func diff --git a/tests/experiments/.autosubmitrc b/tests/experiments/.autosubmitrc deleted file mode 100644 index da7bbe7c1b55fa565084366bcb41f5026c2cb561..0000000000000000000000000000000000000000 --- a/tests/experiments/.autosubmitrc +++ /dev/null @@ -1,21 +0,0 @@ -[database] -path = ./tests/experiments/ -filename = autosubmit.db - -[local] -path = ./tests/experiments/ - -[globallogs] -path = ./tests/experiments/logs - -[historicdb] -path = ./tests/experiments/metadata/data - -[structures] -path = ./tests/experiments/metadata/structures - -[historiclog] -path = ./tests/experiments/metadata/logs - -[graph] -path = ./tests/experiments/metadata/graph \ No newline at end of file diff --git a/tests/experiments/autosubmit.db b/tests/experiments/autosubmit.db index 472787336d33e91b529d921869dcc82dea543426..9e09467e75b138708f6697d30642b0281b119884 100644 Binary files a/tests/experiments/autosubmit.db and b/tests/experiments/autosubmit.db differ diff --git a/tests/experiments/metadata/data/job_data_a007.db b/tests/experiments/metadata/data/job_data_a007.db index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..7fcd6487e0efd128f66e1889e268c268079f8b1b 100755 Binary files a/tests/experiments/metadata/data/job_data_a007.db and b/tests/experiments/metadata/data/job_data_a007.db differ diff --git a/tests/experiments/metadata/graph/graph_data_a003.db b/tests/experiments/metadata/graph/graph_data_a003.db index 1862073cccbab88a43d010644c5c6316b4202aa1..736e381f49520ad0d6caa607f0a964faeffa80ce 100755 Binary files a/tests/experiments/metadata/graph/graph_data_a003.db and b/tests/experiments/metadata/graph/graph_data_a003.db differ diff --git a/tests/test_auth.py b/tests/test_auth.py index 5fef2066abbd3e6810b817ed6a9bf6ee296f49a3..9bfc5eed2d9b506b3e007a1b31cd881c7398fbb2 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -6,7 +6,7 @@ from autosubmit_api import auth from autosubmit_api.auth.utils import validate_client from autosubmit_api.config.basicConfig import APIBasicConfig from autosubmit_api import config -from tests.custom_utils import custom_return_value, dummy_response +from tests.utils import custom_return_value, dummy_response class TestCommonAuth: diff --git a/tests/test_bg_tasks.py b/tests/test_bg_tasks.py index f8d5ab205a980753c29235b6bbfa1f94e9641774..2c5a0737d35f6074aaa49580dcdda7847467b563 100644 --- a/tests/test_bg_tasks.py +++ b/tests/test_bg_tasks.py @@ -9,12 +9,12 @@ class TestDetailsPopulate: def test_process(self,fixture_mock_basic_config: APIBasicConfig): with create_autosubmit_db_engine().connect() as conn: - conn.execute(tables.details_table.delete()) + conn.execute(tables.DetailsTable.delete()) conn.commit() count = DetailsProcessor(fixture_mock_basic_config).process() - rows = conn.execute(tables.details_table.select()).all() + rows = conn.execute(tables.DetailsTable.select()).all() assert len(rows) > 0 assert len(rows) == count \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py index 64b12455f13e1ce7742ece32dff1b4cbfeb3023c..748da2bee66cc2970cbd066f525db58d89e86e8e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -6,39 +6,10 @@ from autosubmit_api.config.basicConfig import APIBasicConfig from autosubmit_api.config.config_common import AutosubmitConfigResolver from autosubmit_api.config.ymlConfigStrategy import ymlConfigStrategy -from tests.conftest import FAKE_EXP_DIR -from tests.custom_utils import custom_return_value +from tests.utils import custom_return_value -class TestBasicConfig: - def test_api_basic_config(self, fixture_mock_basic_config): - APIBasicConfig.read() - - assert os.getenv("AUTOSUBMIT_CONFIGURATION") == os.path.join( - FAKE_EXP_DIR, ".autosubmitrc" - ) - assert APIBasicConfig.LOCAL_ROOT_DIR == FAKE_EXP_DIR - assert APIBasicConfig.DB_FILE == "autosubmit.db" - assert APIBasicConfig.DB_PATH == os.path.join( - FAKE_EXP_DIR, APIBasicConfig.DB_FILE - ) - assert APIBasicConfig.AS_TIMES_DB == "as_times.db" - assert APIBasicConfig.JOBDATA_DIR == os.path.join( - FAKE_EXP_DIR, "metadata", "data" - ) - assert APIBasicConfig.GLOBAL_LOG_DIR == os.path.join(FAKE_EXP_DIR, "logs") - assert APIBasicConfig.STRUCTURES_DIR == os.path.join( - FAKE_EXP_DIR, "metadata", "structures" - ) - assert APIBasicConfig.HISTORICAL_LOG_DIR == os.path.join( - FAKE_EXP_DIR, "metadata", "logs" - ) - - assert APIBasicConfig.GRAPHDATA_DIR == os.path.join( - FAKE_EXP_DIR, "metadata", "graph" - ) - class TestConfigResolver: def test_simple_init(self, monkeypatch: pytest.MonkeyPatch): # Conf test decision @@ -61,7 +32,7 @@ class TestConfigResolver: class TestYMLConfigStrategy: def test_exclusive(self, fixture_mock_basic_config): wrapper = ymlConfigStrategy("a007", fixture_mock_basic_config) - assert True == wrapper.get_exclusive(JobSection.SIM) + assert True is wrapper.get_exclusive(JobSection.SIM) wrapper = ymlConfigStrategy("a003", fixture_mock_basic_config) - assert False == wrapper.get_exclusive(JobSection.SIM) + assert False is wrapper.get_exclusive(JobSection.SIM) diff --git a/tests/test_database.py b/tests/test_database.py index 518523b3705220b6a9d8bcb41fe485f215c7b72b..632c05337d37cac13ce12aafd8c4c6cf121bd48a 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -13,7 +13,7 @@ def count_pid_lsof(pid): class TestDatabase: - def test_open_files(self, fixture_mock_basic_config): + def test_open_files(self, fixture_sqlite): current_pid = os.getpid() counter = count_pid_lsof(current_pid) diff --git a/tests/test_db_repositories.py b/tests/test_db_repositories.py new file mode 100644 index 0000000000000000000000000000000000000000..ff6eb70abe67d1b6c6cd87233ba994f957a26e41 --- /dev/null +++ b/tests/test_db_repositories.py @@ -0,0 +1,82 @@ +from autosubmit_api.database.repositories.experiment import ExperimentDbRepository +from autosubmit_api.database.repositories.experiment_structure import ( + ExperimentStructureDbRepository, +) +from autosubmit_api.database.repositories.graph_draw import ExpGraphDrawDBRepository +from autosubmit_api.database.repositories.join.experiment_join import ( + ExperimentJoinDbRepository, +) + + +class TestExperimentDbRepository: + def test_operations(self, fixture_mock_basic_config): + experiment_db = ExperimentDbRepository() + + # Check get_all + rows = experiment_db.get_all() + assert len(rows) >= 4 + for expid in ["a003", "a007", "a3tb", "a6zj"]: + assert expid in [row.get("name") for row in rows] + + # Check get_by_expid + row = experiment_db.get_by_expid("a003") + assert row["name"] == "a003" + + +class TestExpGraphDrawDBRepository: + def test_operations(self, fixture_mock_basic_config): + expid = "g001" + graph_draw_db = ExpGraphDrawDBRepository(expid) + + # Create table + graph_draw_db.create_table() + + # Table exists and is empty + assert graph_draw_db.get_all() == [] + + # Insert data + data = [ + {"id": 1, "job_name": "job1", "x": 1, "y": 2}, + {"id": 2, "job_name": "job2", "x": 2, "y": 3}, + ] + assert graph_draw_db.insert_many(data) == len(data) + + # Get data + assert graph_draw_db.get_all() == data + + # Delete data + assert graph_draw_db.delete_all() == len(data) + + # Table is empty + assert graph_draw_db.get_all() == [] + + +class TestExperimentJoinDbRepository: + def test_search(self, fixture_mock_basic_config): + experiment_join_db = ExperimentJoinDbRepository() + + # Check search + rows, total = experiment_join_db.search(limit=3) + assert len(rows) == 3 + assert total >= 4 + + for row in rows: + assert row.get("status") + + +class TestExperimentStructureDbRepository: + def test_get(self, fixture_mock_basic_config): + structure_db = ExperimentStructureDbRepository("a007") + + # Check get_structure + structure = structure_db.get_structure() + assert sorted(structure) == sorted({ + "a007_20000101_fc0_1_SIM": ["a007_20000101_fc0_2_SIM"], + "a007_20000101_fc0_2_SIM": ["a007_POST"], + "a007_20000101_fc0_INI": ["a007_20000101_fc0_1_SIM"], + "a007_20000101_fc0_TRANSFER": [], + "a007_CLEAN": ["a007_20000101_fc0_TRANSFER"], + "a007_LOCAL_SETUP": ["a007_REMOTE_SETUP"], + "a007_POST": ["a007_CLEAN"], + "a007_REMOTE_SETUP": ["a007_20000101_fc0_INI"], + }) diff --git a/tests/test_endpoints_v3.py b/tests/test_endpoints_v3.py index b5d0113c6406ee9f9e958a7a39b9289813f21820..85056ec6094de21c0db457b0d9f0f764c2bc7025 100644 --- a/tests/test_endpoints_v3.py +++ b/tests/test_endpoints_v3.py @@ -14,7 +14,6 @@ class TestLogin: def test_not_allowed_client( self, fixture_client: FlaskClient, - fixture_mock_basic_config: APIBasicConfig, monkeypatch: pytest.MonkeyPatch, ): monkeypatch.setattr(APIBasicConfig, "ALLOWED_CLIENTS", []) @@ -28,7 +27,6 @@ class TestLogin: def test_redirect( self, fixture_client: FlaskClient, - fixture_mock_basic_config: APIBasicConfig, monkeypatch: pytest.MonkeyPatch, ): random_referer = str(f"https://${str(uuid4())}/") diff --git a/tests/test_endpoints_v4.py b/tests/test_endpoints_v4.py index dc83894f9c90849e7358a6ae11efc414c0f1695f..6e7fb38d1d36c6b412ca8224f60353592b24c334 100644 --- a/tests/test_endpoints_v4.py +++ b/tests/test_endpoints_v4.py @@ -7,7 +7,7 @@ import jwt import pytest from autosubmit_api import config from autosubmit_api.views.v4 import PAGINATION_LIMIT_DEFAULT, ExperimentJobsViewOptEnum -from tests.custom_utils import custom_return_value +from tests.utils import custom_return_value class TestCASV2Login: diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py new file mode 100644 index 0000000000000000000000000000000000000000..e2d9b2bdb78a5fa2330c7893baf265819dfdd11d --- /dev/null +++ b/tests/test_fixtures.py @@ -0,0 +1,113 @@ +import os +from typing import Tuple +import pytest +from sqlalchemy import Engine, select +from autosubmit_api.config.basicConfig import APIBasicConfig +from autosubmit_api.database import tables +from tests.utils import get_schema_names + + +def test_mock_basic_config(fixture_mock_basic_config: APIBasicConfig): + assert "AUTOSUBMIT_CONFIGURATION" in os.environ and os.path.exists( + os.environ["AUTOSUBMIT_CONFIGURATION"] + ) + + # Reading the configuration file + APIBasicConfig.read() + + assert APIBasicConfig.DATABASE_BACKEND in ["sqlite", "postgres"] + assert "tmp" in APIBasicConfig.LOCAL_ROOT_DIR + assert APIBasicConfig.LOCAL_ROOT_DIR in os.environ["AUTOSUBMIT_CONFIGURATION"] + + if APIBasicConfig.DATABASE_BACKEND == "sqlite": + assert APIBasicConfig.DB_FILE == "autosubmit.db" + + elif APIBasicConfig.DATABASE_BACKEND == "postgres": + assert APIBasicConfig.DATABASE_CONN_URL + + +class TestSQLiteFixtures: + def test_fixture_temp_dir_copy(self, fixture_temp_dir_copy: str): + """ + Test if all the files are copied from FAKEDIR to the temporary directory + """ + FILES_SHOULD_EXIST = [ + "a003/conf/minimal.yml", + "metadata/data/job_data_a007.db", + ] + for file in FILES_SHOULD_EXIST: + assert os.path.exists(os.path.join(fixture_temp_dir_copy, file)) + + def test_fixture_gen_rc_sqlite(self, fixture_gen_rc_sqlite: str): + """ + Test if the .autosubmitrc file is generated and the environment variable is set + """ + rc_file = os.path.join(fixture_gen_rc_sqlite, ".autosubmitrc") + + # File should exist + assert os.path.exists(rc_file) + + with open(rc_file, "r") as f: + content = f.read() + assert "[database]" in content + assert f"path = {fixture_gen_rc_sqlite}" in content + assert "filename = autosubmit.db" in content + assert "backend = sqlite" in content + + +@pytest.mark.pg +class TestPostgresFixtures: + def test_fixture_temp_dir_copy_exclude_db( + self, fixture_temp_dir_copy_exclude_db: str + ): + """ + Test if all the files are copied from FAKEDIR to the temporary directory except .db files + """ + FILES_SHOULD_EXIST = [ + "a003/conf/minimal.yml", + ] + FILES_SHOULD_EXCLUDED = ["metadata/data/job_data_a007.db"] + for file in FILES_SHOULD_EXIST: + assert os.path.exists(os.path.join(fixture_temp_dir_copy_exclude_db, file)) + + for file in FILES_SHOULD_EXCLUDED: + assert not os.path.exists( + os.path.join(fixture_temp_dir_copy_exclude_db, file) + ) + + def test_fixture_gen_rc_postgres(self, fixture_gen_rc_pg: str): + """ + Test if the .autosubmitrc file is generated and the environment variable is set + """ + rc_file = os.path.join(fixture_gen_rc_pg, ".autosubmitrc") + + # File should exist + assert os.path.exists(rc_file) + + with open(rc_file, "r") as f: + content = f.read() + assert "[database]" in content + assert "backend = postgres" in content + assert "postgresql://" in content + assert fixture_gen_rc_pg in content + + def test_fixture_pg_db(self, fixture_pg_db: Tuple[str, Engine]): + engine = fixture_pg_db[1] + + # Check if the public schema exists + with engine.connect() as conn: + schema_names = get_schema_names(conn) + assert "public" in schema_names + + def test_fixture_pg_db_copy_all(self, fixture_pg_db_copy_all: Tuple[str, Engine]): + engine = fixture_pg_db_copy_all[1] + + # Check if the experiment and details tables are copied + with engine.connect() as conn: + exp_rows = conn.execute(select(tables.ExperimentTable)).all() + details_rows = conn.execute(select(tables.DetailsTable)).all() + + assert len(exp_rows) > 0 + assert len(details_rows) > 0 + + # TODO: Check if the other tables are copied diff --git a/tests/test_graph.py b/tests/test_graph.py index 0dc5beda06a9f91b2944b9fd402bf0ce305f8f7a..55f185f1185e38f36e281ba771d4f73965fbde85 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -1,6 +1,4 @@ -import os -from sqlalchemy import create_engine from autosubmit_api.builders.configuration_facade_builder import ( AutosubmitConfigurationFacadeBuilder, ConfigurationFacadeDirector, @@ -9,14 +7,12 @@ from autosubmit_api.builders.joblist_loader_builder import ( JobListLoaderBuilder, JobListLoaderDirector, ) -from autosubmit_api.database import tables -from autosubmit_api.database.db_jobdata import ExperimentGraphDrawing +from autosubmit_api.components.experiment.graph_drawer import ExperimentGraphDrawing +from autosubmit_api.database.repositories.graph_draw import ExpGraphDrawDBRepository from autosubmit_api.monitor.monitor import Monitor -from autosubmit_api.persistance.experiment import ExperimentPaths class TestPopulateDB: - def test_monitor_dot(self, fixture_mock_basic_config): expid = "a003" job_list_loader = JobListLoaderDirector( @@ -48,28 +44,27 @@ class TestPopulateDB: AutosubmitConfigurationFacadeBuilder(expid) ).build_autosubmit_configuration_facade() - exp_paths = ExperimentPaths(expid) - with create_engine( - f"sqlite:///{ os.path.abspath(exp_paths.graph_data_db)}" - ).connect() as conn: - conn.execute(tables.graph_data_table.delete()) - conn.commit() + # Create repository handler + graph_draw_db = ExpGraphDrawDBRepository(expid) + + # Delete content of table + graph_draw_db.delete_all() - experimentGraphDrawing.calculate_drawing( - allJobs=job_list_loader.jobs, - independent=False, - num_chunks=autosubmit_configuration_facade.chunk_size, - job_dictionary=job_list_loader.job_dictionary, - ) + experimentGraphDrawing.calculate_drawing( + allJobs=job_list_loader.jobs, + independent=False, + num_chunks=autosubmit_configuration_facade.chunk_size, + job_dictionary=job_list_loader.job_dictionary, + ) - assert ( - experimentGraphDrawing.coordinates - and len(experimentGraphDrawing.coordinates) == 8 - ) + assert ( + experimentGraphDrawing.coordinates + and len(experimentGraphDrawing.coordinates) == 8 + ) - rows = conn.execute(tables.graph_data_table.select()).all() + rows = graph_draw_db.get_all() - assert len(rows) == 8 - for job in rows: - job_name: str = job.job_name - assert job_name.startswith(expid) + assert len(rows) == 8 + for job in rows: + job_name: str = job.get("job_name") + assert job_name.startswith(expid) \ No newline at end of file diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e5b7f212ad297f6e29fe7c0c4577183f354a91c1 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,193 @@ +from http import HTTPStatus +import re +from typing import List +from sqlalchemy import Connection, Engine, create_engine, insert, select, text + +from autosubmit_api.database import tables +from sqlalchemy.schema import CreateSchema, CreateTable + + +def dummy_response(*args, **kwargs): + return "Hello World!", HTTPStatus.OK + + +def custom_return_value(value=None): + def blank_func(*args, **kwargs): + return value + + return blank_func + + +def get_schema_names(conn: Connection) -> List[str]: + """ + Get all schema names that are not from the system + """ + results = conn.execute( + text( + "SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT LIKE 'pg_%' AND schema_name != 'information_schema'" + ) + ).all() + return [res[0] for res in results] + + +def setup_pg_db(conn: Connection): + """ + Resets database by dropping all schemas except the system ones and restoring the public schema + """ + # Get all schema names that are not from the system + schema_names = get_schema_names(conn) + + # Drop all schemas + for schema_name in schema_names: + conn.execute(text(f'DROP SCHEMA IF EXISTS "{schema_name}" CASCADE')) + + # Restore default public schema + conn.execute(text("CREATE SCHEMA public")) + conn.execute(text("GRANT ALL ON SCHEMA public TO public")) + conn.execute(text("GRANT ALL ON SCHEMA public TO postgres")) + + +def copy_structure_db(filepath: str, engine: Engine): + """ + This function copies the content of the FAKE_EXP_DIR/metadata/structures to the Postgres database + """ + # Get the xxxx from structure_xxxx.db with regex + match = re.search(r"structure_(\w+)\.db", filepath) + expid = match.group(1) + + # Get SQLite source data + source_as_db = create_engine(f"sqlite:///{filepath}") + with source_as_db.connect() as source_conn: + structures_rows = source_conn.execute( + select(tables.ExperimentStructureTable) + ).all() + + # Copy data to the Postgres database + with engine.connect() as conn: + conn.execute(CreateSchema(expid, if_not_exists=True)) + target_table = tables.table_change_schema( + expid, tables.ExperimentStructureTable + ) + conn.execute(CreateTable(target_table, if_not_exists=True)) + if len(structures_rows) > 0: + conn.execute( + insert(target_table), [row._asdict() for row in structures_rows] + ) + conn.commit() + + +def copy_job_data_db(filepath: str, engine: Engine): + """ + This function copies the content of the FAKE_EXP_DIR/metadata/data to the Postgres database + """ + # Get the xxxx from job_data_xxxx.db with regex + match = re.search(r"job_data_(\w+)\.db", filepath) + expid = match.group(1) + # Get SQLite source data + source_as_db = create_engine(f"sqlite:///{filepath}") + with source_as_db.connect() as source_conn: + job_data_rows = source_conn.execute(select(tables.JobDataTable)).all() + exprun_rows = source_conn.execute(select(tables.ExperimentRunTable)).all() + + # Copy data to the Postgres database + with engine.connect() as conn: + conn.execute(CreateSchema(expid, if_not_exists=True)) + # Job data + target_table = tables.table_change_schema(expid, tables.JobDataTable) + conn.execute(CreateTable(target_table, if_not_exists=True)) + if len(job_data_rows) > 0: + conn.execute(insert(target_table),[row._asdict() for row in job_data_rows]) + # Experiment run + target_table = tables.table_change_schema(expid, tables.ExperimentRunTable) + conn.execute(CreateTable(target_table, if_not_exists=True)) + if len(exprun_rows) > 0: + conn.execute(insert(target_table),[row._asdict() for row in exprun_rows]) + conn.commit() + + +def copy_graph_data_db(filepath: str, engine: Engine): + """ + This function copies the content of the FAKE_EXP_DIR/metadata/graph to the Postgres database + """ + # Get the xxxx from graph_xxxx.db with regex + match = re.search(r"graph_data_(\w+)\.db", filepath) + expid = match.group(1) + + # Get SQLite source data + source_as_db = create_engine(f"sqlite:///{filepath}") + with source_as_db.connect() as source_conn: + graph_rows = source_conn.execute(select(tables.GraphDataTable)).all() + + # Copy data to the Postgres database + with engine.connect() as conn: + conn.execute(CreateSchema(expid, if_not_exists=True)) + target_table = tables.table_change_schema(expid, tables.GraphDataTable) + conn.execute(CreateTable(target_table, if_not_exists=True)) + if len(graph_rows) > 0: + conn.execute(insert(target_table),[row._asdict() for row in graph_rows]) + conn.commit() + + +def copy_autosubmit_db(filepath: str, engine: Engine): + """ + This function copies the content of the FAKE_EXP_DIR/autosubmit.db to the Postgres database + """ + # Get SQLite source data + source_as_db = create_engine(f"sqlite:///{filepath}") + with source_as_db.connect() as source_conn: + exp_rows = source_conn.execute(select(tables.ExperimentTable)).all() + details_rows = source_conn.execute(select(tables.DetailsTable)).all() + + # Copy data to the Postgres database + with engine.connect() as conn: + conn.execute(CreateTable(tables.ExperimentTable, if_not_exists=True)) + conn.execute(insert(tables.ExperimentTable),[row._asdict() for row in exp_rows]) + conn.execute(CreateTable(tables.DetailsTable, if_not_exists=True)) + conn.execute(insert(tables.DetailsTable),[row._asdict() for row in details_rows]) + conn.commit() + + +def copy_as_times_db(filepath: str, engine: Engine): + """ + This function copies the content of the FAKE_EXP_DIR/as_times.db to the Postgres database + """ + # Get SQLite source data + source_as_db = create_engine(f"sqlite:///{filepath}") + with source_as_db.connect() as source_conn: + as_times_rows = source_conn.execute(select(tables.ExperimentStatusTable)).all() + + # Copy data to the Postgres database + with engine.connect() as conn: + conn.execute(CreateTable(tables.ExperimentStatusTable, if_not_exists=True)) + conn.execute(insert(tables.ExperimentStatusTable),[row._asdict() for row in as_times_rows]) + conn.commit() + + +def copy_job_packages_db(filepath: str, engine: Engine): + """ + This function copies the content of the FAKE_EXP_DIR/pkl/job_packages to the Postgres database + """ + # Get the xxxx from job_packages_xxxx.db with regex + match = re.search(r"job_packages_(\w+)\.db", filepath) + expid = match.group(1) + + # Get SQLite source data + source_as_db = create_engine(f"sqlite:///{filepath}") + with source_as_db.connect() as source_conn: + job_packages_rows = source_conn.execute(select(tables.JobPackageTable)).all() + wrapper_job_packages_rows = source_conn.execute(select(tables.WrapperJobPackageTable)).all() + + # Copy data to the Postgres database + with engine.connect() as conn: + conn.execute(CreateSchema(expid, if_not_exists=True)) + # Job packages + target_table = tables.table_change_schema(expid, tables.JobPackageTable) + conn.execute(CreateTable(target_table, if_not_exists=True)) + if len(job_packages_rows) > 0: + conn.execute(insert(target_table),[row._asdict() for row in job_packages_rows]) + # Wrapper job packages + target_table = tables.table_change_schema(expid, tables.WrapperJobPackageTable) + conn.execute(CreateTable(target_table, if_not_exists=True)) + if len(wrapper_job_packages_rows) > 0: + conn.execute(insert(target_table),[row._asdict() for row in wrapper_job_packages_rows]) + conn.commit()