From dca69d82f74aafd33b79fc5b6509962e44cc2e7f Mon Sep 17 00:00:00 2001 From: Ivan Alsina <1050-ialsina@users.noreply.bsc.es> Date: Fri, 20 Jun 2025 16:09:35 +0200 Subject: [PATCH 1/3] Refactor: Add scripts as submodule --- destine_analytics/scripts/__init__.py | 1 + {scripts => destine_analytics/scripts}/create_ganttchart.py | 0 {scripts => destine_analytics/scripts}/draw_filetree.py | 0 3 files changed, 1 insertion(+) create mode 100644 destine_analytics/scripts/__init__.py rename {scripts => destine_analytics/scripts}/create_ganttchart.py (100%) rename {scripts => destine_analytics/scripts}/draw_filetree.py (100%) diff --git a/destine_analytics/scripts/__init__.py b/destine_analytics/scripts/__init__.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/destine_analytics/scripts/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/scripts/create_ganttchart.py b/destine_analytics/scripts/create_ganttchart.py similarity index 100% rename from scripts/create_ganttchart.py rename to destine_analytics/scripts/create_ganttchart.py diff --git a/scripts/draw_filetree.py b/destine_analytics/scripts/draw_filetree.py similarity index 100% rename from scripts/draw_filetree.py rename to destine_analytics/scripts/draw_filetree.py -- GitLab From 2a81f0992a4b58166c2089477ac3e5da2d7acba6 Mon Sep 17 00:00:00 2001 From: Ivan Alsina <1050-ialsina@users.noreply.bsc.es> Date: Fri, 20 Jun 2025 16:28:44 +0200 Subject: [PATCH 2/3] Add: Entrypoints to destine-analytics --- destine_analytics/scripts/utils.py | 46 ++++++++++++++++++++++++++++++ destine_analytics/utils.py | 6 ---- pyproject.toml | 4 +++ requirements.txt | 1 + 4 files changed, 51 insertions(+), 6 deletions(-) create mode 100644 destine_analytics/scripts/utils.py delete mode 100644 destine_analytics/utils.py diff --git a/destine_analytics/scripts/utils.py b/destine_analytics/scripts/utils.py new file mode 100644 index 0000000..9cb60e3 --- /dev/null +++ b/destine_analytics/scripts/utils.py @@ -0,0 +1,46 @@ +import hashlib +import json +from pathlib import Path +import pickle +import os + +from destine_analytics.paths import LOG_DIR + +def get_available_expids() -> list[str]: + """Get all available experiment IDs in the given log directory.""" + return [p.name for p in LOG_DIR.glob("*") if p.is_dir()] + + +def json_serialize(obj): + if isinstance(obj, Path): + return str(obj) + raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable") + + +def get_cache_path(name, *args, **kwargs) -> Path: + key = json.dumps([name, *args, kwargs], default=json_serialize) + filename = hashlib.sha256(key.encode("utf-8")).hexdigest() + directory = os.path.basename(__file__) + return Path.home().resolve() / ".cache" / directory / filename + + +def cache_read(cache, name, /, *args, **kwargs): + if cache is False: + return + cpath = get_cache_path(name, *args, **kwargs) + if not cpath.exists(): + return None + print(f"Reading '{name}' from cache...") + with open(cpath, "rb") as fb: + return pickle.load(fb) + + +def cache_write(cache, obj, name, /, *args, **kwargs): + if cache is False: + return + print(f"Writing '{name}' to cache...") + cpath = get_cache_path(name, *args, **kwargs) + if not cpath.parent.exists(): + cpath.parent.mkdir(parents=True) + with open(cpath, "wb") as fb: + pickle.dump(obj, fb) diff --git a/destine_analytics/utils.py b/destine_analytics/utils.py deleted file mode 100644 index 0a480cc..0000000 --- a/destine_analytics/utils.py +++ /dev/null @@ -1,6 +0,0 @@ -from .paths import LOG_DIR - - -def get_available_expids() -> list[str]: - """Get all available experiment IDs in the given log directory.""" - return [p.name for p in LOG_DIR.glob("*") if p.is_dir()] diff --git a/pyproject.toml b/pyproject.toml index 927eedf..2e152c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,10 @@ packages = ["destine_analytics"] [tool.setuptools.dynamic] dependencies = {file = ["requirements.txt"]} +[project.scripts] +draw-filetree = "destine_analytics.scripts.draw_filetree:main" +create-ganttchart = "destine_analytics.scripts.create_ganttchart:main" + [project.optional-dependencies] dev = [ "pytest>=7.0.0", diff --git a/requirements.txt b/requirements.txt index 370e8bf..c237246 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ matplotlib==3.* numpy==2.* pandas==2.* +plotly==5.* pytz==2025.2 PyYAML==6.* requests==2.* -- GitLab From 71be64ba4c26b9395d7605e3f3f1af59eb675079 Mon Sep 17 00:00:00 2001 From: Ivan Alsina <1050-ialsina@users.noreply.bsc.es> Date: Fri, 20 Jun 2025 16:29:17 +0200 Subject: [PATCH 3/3] Fix: draw_filetree script --- destine_analytics/scripts/draw_filetree.py | 107 +++++++++------------ 1 file changed, 48 insertions(+), 59 deletions(-) diff --git a/destine_analytics/scripts/draw_filetree.py b/destine_analytics/scripts/draw_filetree.py index 5a61a7d..4aa70bb 100644 --- a/destine_analytics/scripts/draw_filetree.py +++ b/destine_analytics/scripts/draw_filetree.py @@ -278,7 +278,7 @@ def makefigure( parents: Mapping[str, str], root: str, figure: str, - show: bool, + interactive: bool, output_dir: str, prefix: str, cache: bool, @@ -287,13 +287,13 @@ def makefigure( if figure == "all": return [ makefigure( - sizes, parents, root, "treemap", show, output_dir, prefix, cache, **kwargs + sizes, parents, root, "treemap", interactive, output_dir, prefix, cache, **kwargs ), makefigure( - sizes, parents, root, "sunburst", show, output_dir, prefix, cache, **kwargs + sizes, parents, root, "sunburst", interactive, output_dir, prefix, cache, **kwargs ), makefigure( - sizes, parents, root, "icicle", show, output_dir, prefix, cache, **kwargs + sizes, parents, root, "icicle", interactive, output_dir, prefix, cache, **kwargs ), ] @@ -306,7 +306,8 @@ def makefigure( output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) fig.write_image(output_path / f"{prefix}{figure}.png") - if show: + if interactive: + print("Showing figure...") fig.show() return fig @@ -378,6 +379,43 @@ def compute_directory_size(df, only_directories=False): return total_sizes, parents +def _get_dataframe(path_str, ssh_command, expid, cache): + if ssh_command: + find_cmd = f"find {shlex.quote(path_str)} -ls" + ssh_cmd_list = shlex.split(ssh_command) + [find_cmd] + try: + process = subprocess.run( + ssh_cmd_list, + capture_output=True, + text=True, + check=True, + encoding="utf-8", + ) + return read_filetree(StringIO(process.stdout), common=False) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + raise RuntimeError(f"Error executing remote command: {e}") from e + else: + path = Path(path_str) + if not path.exists(): + raise FileNotFoundError(f"Local path '{path}' does not exist.") + + if path.is_dir(): + print(f"Scanning directory '{path}'...") + try: + process = subprocess.run( + ["find", str(path), "-ls"], + capture_output=True, + text=True, + check=True, + encoding="utf-8", + ) + return read_filetree(StringIO(process.stdout), common=False) + except subprocess.CalledProcessError as e: + raise FileNotFoundError(f"Error executing 'find' on '{path}': {e}") from e + if path.is_file(): + return read_filetree(path, common=False) + raise FileNotFoundError(f"Local path '{path}' is not a directory or a file.") + def main(): global df @@ -388,70 +426,21 @@ def main(): root = args.root only_directories = args.only_directories figure = args.figure - show = args.show + interactive = args.interactive cache = args.cache output_dir = args.output_dir df = None + cache_key_args = { "path": path_str, "expid": expid, "ssh_command": ssh_command, } - df = _cache_read(cache, "df", **cache_key_args) if df is None: - if ssh_command: - print(f"Executing 'find -ls' on remote path '{path_str}' via ssh...") - find_cmd = f"find {shlex.quote(path_str)} -ls" - ssh_cmd_list = shlex.split(ssh_command) + [find_cmd] - try: - process = subprocess.run( - ssh_cmd_list, - capture_output=True, - text=True, - check=True, - encoding="utf-8", - ) - df = read_filetree(StringIO(process.stdout), common=False) - except (subprocess.CalledProcessError, FileNotFoundError) as e: - print(f"Error executing remote command: {e}") - if isinstance(e, subprocess.CalledProcessError): - print(f"Stderr: {e.stderr}") - return - else: - path = Path(path_str) - if not path.exists(): - print(f"Error: Local path '{path}' does not exist.") - return - - if path.is_dir(): - print(f"Scanning directory '{path}'...") - try: - process = subprocess.run( - ["find", str(path), "-ls"], - capture_output=True, - text=True, - check=True, - encoding="utf-8", - ) - df = read_filetree(StringIO(process.stdout), common=False) - except subprocess.CalledProcessError as e: - print(f"Error executing 'find' on '{path}': {e}") - print(f"Stderr: {e.stderr}") - return - elif path.is_file(): - df = read_filetree(path, common=False) - else: - print(f"Error: Local path '{path}' is not a directory or a file.") - return - - if df is not None: - _cache_write(cache, df, "df", **cache_key_args) - - if df is None: - print("Could not generate DataFrame.") - return + df = _get_dataframe(path_str, ssh_command, expid, cache) + _cache_write(cache, df, "df", **cache_key_args) df = _add_path_column(df) df = _get_subtree_df(df, root) @@ -474,7 +463,7 @@ def main(): sizes=sizes_parents[0], parents=sizes_parents[1], figure=figure, - show=show, + interactive=interactive, output_dir=output_dir, prefix=(f"{expid}_" if expid else ""), cache=cache, -- GitLab