From 71224dc3cde445f2400e7d7e05c9d362086b85d9 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Tue, 22 Aug 2023 15:35:11 +0200 Subject: [PATCH] Add first performance test case created using the profiler --- .gitignore | 4 + .../profiler-tests/profiler-tests.ipynb | 510 ++++++++++++++++++ .../profiler-tests/requirements.txt | 3 + 3 files changed, 517 insertions(+) create mode 100644 test/performance/profiler-tests/profiler-tests.ipynb create mode 100644 test/performance/profiler-tests/requirements.txt diff --git a/.gitignore b/.gitignore index 40e7cb3ec..c74035e8d 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,7 @@ test/coverage.xml dockerfiles/id_rsa* dockerfiles/authorized_keys + +# Jupyter +.ipynb_checkpoints/ +test/performance/profiler-tests/scenario* diff --git a/test/performance/profiler-tests/profiler-tests.ipynb b/test/performance/profiler-tests/profiler-tests.ipynb new file mode 100644 index 000000000..3be8dcb4a --- /dev/null +++ b/test/performance/profiler-tests/profiler-tests.ipynb @@ -0,0 +1,510 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "55294041-dd6e-4e20-bd2f-596d3d34ac93", + "metadata": {}, + "source": [ + "# Autosubmit profiler tests\n", + "\n", + "This notebook contains the first tests performed with the new Autosubmit profiler.\n", + "The test scenarios listed here are described in [this issue][issue].\n", + "\n", + "[issue]: https://earth.bsc.es/gitlab/es/autosubmit/-/issues/1120" + ] + }, + { + "cell_type": "markdown", + "id": "80ed8592-a8d9-4a69-995e-a5a79c08a8e7", + "metadata": {}, + "source": [ + "## 1. A workflow configuration similar to what Dani used to profile memory issues\n", + "\n", + "The configuration used by Daniel Beltran to profile memory issues in Autosubmit\n", + "configuration is based on this:\n", + "\n", + "```yaml\n", + "EXPERIMENT:\n", + " DATELIST: '20000101 20000102'\n", + " MEMBERS: 'fc0 fc1'\n", + " CHUNKSIZEUNIT: month\n", + " CHUNK: '4'\n", + " NUMCHUNKS: '400000' # 13000 max cap 1000000 jobs...\n", + " CHUNKINI: ''\n", + " CALENDAR: standard\n", + " RUN_ONLY_MEMBERS: ''\n", + "```\n", + "\n", + "Then we can change the amount of splits, or split and chunks, or just chunks.\n", + "Dani does that depending on which part he is trying to optimize." + ] + }, + { + "cell_type": "markdown", + "id": "4798a1e6-0553-4170-ab86-dba4c913bf14", + "metadata": {}, + "source": [ + "### Steps to reproduce it\n", + "\n", + "```bash\n", + "$ autosubmit expid -H local -d test -min\n", + "```\n", + "\n", + "Open `~/autosubmit//conf/minimal.yml`, set the project type to `LOCAL`,\n", + "and update the `LOCAL` project key accordingly:\n", + "\n", + "```yaml\n", + "CONFIG:\n", + " # Current version of Autosubmit.\n", + " AUTOSUBMIT_VERSION: \"4.0.87\"\n", + " # Total number of jobs in the workflow.\n", + " TOTALJOBS: 20\n", + " # Maximum number of jobs permitted in the waiting status.\n", + " MAXWAITINGJOBS: 20\n", + "DEFAULT:\n", + " # Job experiment ID.\n", + " EXPID: \"a003\"\n", + " # Default HPC platform name.\n", + " HPCARCH: \"local\"\n", + " #hint: use %PROJDIR% to point to the project folder (where the project is cloned)\n", + " # Custom configuration location.\n", + " CUSTOM_CONFIG: \"%PROJDIR%/\"\n", + "PROJECT:\n", + " # Type of the project.\n", + " PROJECT_TYPE: local\n", + " # Folder to hold the project sources.\n", + " PROJECT_DESTINATION: 'local_project'\n", + "PLATFORMS:\n", + " LOCAL:\n", + " TYPE: ps\n", + " HOST: localhost\n", + " PROJECT: \n", + " USER:\n", + " QUEUE: debug\n", + " SCRATCH_DIR: /tmp/\n", + " ADD_PROJECT_TO_HOST: false\n", + " MAX_WALLCLOCK: 48:00\n", + " TEMP_DIR: ''\n", + "EXPERIMENT:\n", + " DATELIST: '20000101 20000102'\n", + " MEMBERS: 'fc0 fc1'\n", + " CHUNKSIZEUNIT: month\n", + " CHUNKSIZE: '4'\n", + " NUMCHUNKS: '1' # 13000 max cap 1000000 jobs...\n", + " CHUNKINI: ''\n", + " CALENDAR: standard\n", + " RUN_ONLY_MEMBERS: ''\n", + "LOCAL:\n", + " PROJECT_PATH: '/tmp/project'\n", + "JOBS:\n", + " TEST:\n", + " FILE: TEMPLATE.sh\n", + " RUNNING: once\n", + "```\n", + "\n", + "Then run the profiler.\n", + "\n", + "```bash\n", + "$ autosubmit create --profile -np \n", + "```\n", + "\n", + "Then save the two files created in `~/autosubmit//tmp/profile/`\n", + "to `./scenario1/`.\n", + "\n", + "Now modify `EXPERIMENT.NUMCHUNKS` to `1000` and run the profiler again,\n", + "saving the new files to `./scenario1/` as well. Repeat it to `10000`,\n", + "`100000`, `1000000`, and `10000000`." + ] + }, + { + "cell_type": "markdown", + "id": "954b8a92-8818-4b46-8c26-39e101033767", + "metadata": {}, + "source": [ + "### Memory" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "608d0759-1286-4b9f-a04a-7700031fe5c2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 6 test cases...\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "scenario_path = Path('./scenario1/')\n", + "result_files = sorted([result_file for result_file in scenario_path.iterdir() if result_file.is_file()])\n", + "\n", + "if len(result_files) % 2 != 0:\n", + " raise ValueError(f'Incorrect number of result files in {scenario_path}: {len(result_files)}')\n", + "\n", + "cases = [1, 1000, 10000, 100000, 1000000, 10000000]\n", + "pairs = [(prof, txt) for prof, txt in zip(result_files[0::2], result_files[1::2])]\n", + "pair_cases = [(case, *pair) for case, pair in zip(cases, pairs)]\n", + "\n", + "print(f'Processing {len(pair_cases)} test cases...')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8af096ae-4f62-43ad-9bb2-7f4c89541941", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib notebook\n", + "%matplotlib inline \n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "65df4b6d-7060-4a1b-a0c1-f98eee3fd635", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "\n", + "MEM_REGEX = re.compile(r'MEMORY CONSUMPTION: (.*)')\n", + "\n", + "def get_memory(txt):\n", + " # memory\n", + " with open(txt) as txt_file:\n", + " m = MEM_REGEX.search(txt_file.read())\n", + " # These are all in MiB.\n", + " memory = float(m.group(1).strip(' .').split(' ')[0])\n", + " return memory\n", + "\n", + "\n", + "memories = list(map(get_memory, [case[2] for case in pair_cases]))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "372f7ac2-67fa-4448-a621-49ac43568451", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 1000, 10000, 100000, 1000000, 10000000]\n", + "[2.015625, 2.08203125, 2.7890625, 9.71484375, 79.29296875, 774.8515625]\n" + ] + } + ], + "source": [ + "print(cases)\n", + "print(memories)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2fe175d1-09fa-4088-8bcd-940d6e0876e2", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.bar(list(map(str, cases)), memories)\n", + "plt.title('Memory')\n", + "plt.ylabel('MiB')\n", + "plt.xlabel('NUMCHUNKS')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "3e7b9ea2-b731-40b9-b59e-76d931c88b89", + "metadata": {}, + "source": [ + "### cProfile" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "06749131-bf6e-4e0a-b902-41edfd6e658b", + "metadata": {}, + "outputs": [], + "source": [ + "# There are 7 lines before the functions listing appears...\n", + "N=10\n", + "\n", + "import pstats\n", + "import io\n", + "from IPython.display import display, Markdown" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "99f6b137-3664-4aa6-9f2c-f2a6c616518f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "## NUMCHUNKS=1\n", + "```bash\n", + "Tue Aug 22 13:34:57 2023 scenario1/a003_profile_20230822-133457.prof\n", + "\n", + " 107526 function calls (104387 primitive calls) in 0.153 seconds\n", + "\n", + " Ordered by: cumulative time\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 10 0.044 0.004 0.044 0.004 {method 'commit' of 'sqlite3.Connection' objects}\n", + " 20 0.040 0.002 0.040 0.002 {method 'execute' of 'sqlite3.Cursor' objects}\n", + " 1 0.000 0.000 0.038 0.038 job_package_persistence.py:83(reset_table)\n", + " 2 0.000 0.000 0.029 0.015 job_list.py:178(generate)\n", + " 2 0.000 0.000 0.028 0.014 configcommon.py:1343(reload)\n", + " 2 0.000 0.000 0.028 0.014 job_list.py:2447(update_genealogy)\n", + " 1 0.000 0.000 0.026 0.026 db_structure.py:120(save_structure)\n", + " 4 0.000 0.000 0.024 0.006 constructor.py:116(get_single_data)\n", + " 2 0.000 0.000 0.023 0.011 configcommon.py:1391(load_last_run)\n", + " 1 0.000 0.000 0.022 0.022 experiment_history.py:191(process_status_changes)\n", + "...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "## NUMCHUNKS=1000\n", + "```bash\n", + "Tue Aug 22 13:42:43 2023 scenario1/a003_profile_20230822-134243.prof\n", + "\n", + " 107526 function calls (104387 primitive calls) in 0.150 seconds\n", + "\n", + " Ordered by: cumulative time\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 10 0.044 0.004 0.044 0.004 {method 'commit' of 'sqlite3.Connection' objects}\n", + " 20 0.041 0.002 0.041 0.002 {method 'execute' of 'sqlite3.Cursor' objects}\n", + " 1 0.000 0.000 0.038 0.038 job_package_persistence.py:83(reset_table)\n", + " 2 0.000 0.000 0.029 0.015 job_list.py:178(generate)\n", + " 2 0.000 0.000 0.028 0.014 job_list.py:2447(update_genealogy)\n", + " 1 0.000 0.000 0.027 0.027 db_structure.py:120(save_structure)\n", + " 2 0.000 0.000 0.026 0.013 configcommon.py:1343(reload)\n", + " 4 0.000 0.000 0.023 0.006 constructor.py:116(get_single_data)\n", + " 1 0.000 0.000 0.022 0.022 experiment_history.py:191(process_status_changes)\n", + " 2 0.000 0.000 0.021 0.011 configcommon.py:1391(load_last_run)\n", + "...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "## NUMCHUNKS=10000\n", + "```bash\n", + "Tue Aug 22 13:43:11 2023 scenario1/a003_profile_20230822-134311.prof\n", + "\n", + " 107526 function calls (104387 primitive calls) in 0.155 seconds\n", + "\n", + " Ordered by: cumulative time\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 10 0.049 0.005 0.049 0.005 {method 'commit' of 'sqlite3.Connection' objects}\n", + " 20 0.041 0.002 0.041 0.002 {method 'execute' of 'sqlite3.Cursor' objects}\n", + " 1 0.000 0.000 0.038 0.038 job_package_persistence.py:83(reset_table)\n", + " 2 0.000 0.000 0.029 0.015 job_list.py:178(generate)\n", + " 2 0.000 0.000 0.028 0.014 job_list.py:2447(update_genealogy)\n", + " 1 0.000 0.000 0.027 0.027 experiment_history.py:191(process_status_changes)\n", + " 1 0.000 0.000 0.026 0.026 db_structure.py:120(save_structure)\n", + " 2 0.000 0.000 0.026 0.013 configcommon.py:1343(reload)\n", + " 1 0.000 0.000 0.026 0.026 experiment_history.py:259(create_new_experiment_run)\n", + " 4 0.000 0.000 0.023 0.006 constructor.py:116(get_single_data)\n", + "...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "## NUMCHUNKS=100000\n", + "```bash\n", + "Tue Aug 22 13:43:23 2023 scenario1/a003_profile_20230822-134323.prof\n", + "\n", + " 107526 function calls (104387 primitive calls) in 0.149 seconds\n", + "\n", + " Ordered by: cumulative time\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 10 0.043 0.004 0.043 0.004 {method 'commit' of 'sqlite3.Connection' objects}\n", + " 20 0.036 0.002 0.036 0.002 {method 'execute' of 'sqlite3.Cursor' objects}\n", + " 1 0.000 0.000 0.035 0.035 job_package_persistence.py:83(reset_table)\n", + " 2 0.004 0.002 0.033 0.016 job_list.py:178(generate)\n", + " 2 0.000 0.000 0.030 0.015 configcommon.py:1343(reload)\n", + " 2 0.000 0.000 0.028 0.014 job_list.py:2447(update_genealogy)\n", + " 4 0.000 0.000 0.026 0.007 constructor.py:116(get_single_data)\n", + " 1 0.000 0.000 0.026 0.026 db_structure.py:120(save_structure)\n", + " 2 0.000 0.000 0.025 0.013 configcommon.py:1391(load_last_run)\n", + " 2 0.000 0.000 0.025 0.012 main.py:1059(load)\n", + "...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "## NUMCHUNKS=1000000\n", + "```bash\n", + "Tue Aug 22 13:43:33 2023 scenario1/a003_profile_20230822-134333.prof\n", + "\n", + " 107526 function calls (104387 primitive calls) in 0.177 seconds\n", + "\n", + " Ordered by: cumulative time\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 2 0.035 0.018 0.065 0.032 job_list.py:178(generate)\n", + " 10 0.043 0.004 0.043 0.004 {method 'commit' of 'sqlite3.Connection' objects}\n", + " 20 0.037 0.002 0.037 0.002 {method 'execute' of 'sqlite3.Cursor' objects}\n", + " 1 0.000 0.000 0.035 0.035 job_package_persistence.py:83(reset_table)\n", + " 2 0.000 0.000 0.028 0.014 job_list.py:2447(update_genealogy)\n", + " 2 0.000 0.000 0.027 0.013 configcommon.py:1343(reload)\n", + " 1 0.000 0.000 0.026 0.026 db_structure.py:120(save_structure)\n", + " 1 0.000 0.000 0.024 0.024 autosubmit.py:5833(load_job_list)\n", + " 4 0.000 0.000 0.023 0.006 constructor.py:116(get_single_data)\n", + " 2 0.000 0.000 0.022 0.011 configcommon.py:1391(load_last_run)\n", + "...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "## NUMCHUNKS=10000000\n", + "```bash\n", + "Tue Aug 22 13:44:03 2023 scenario1/a003_profile_20230822-134403.prof\n", + "\n", + " 107526 function calls (104387 primitive calls) in 0.545 seconds\n", + "\n", + " Ordered by: cumulative time\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 2 0.366 0.183 0.397 0.198 job_list.py:178(generate)\n", + " 1 0.000 0.000 0.167 0.167 autosubmit.py:5833(load_job_list)\n", + " 10 0.050 0.005 0.050 0.005 {method 'commit' of 'sqlite3.Connection' objects}\n", + " 20 0.041 0.002 0.041 0.002 {method 'execute' of 'sqlite3.Cursor' objects}\n", + " 1 0.000 0.000 0.038 0.038 job_package_persistence.py:83(reset_table)\n", + " 2 0.000 0.000 0.037 0.019 subprocess.py:332(call)\n", + " 2 0.000 0.000 0.034 0.017 subprocess.py:732(__init__)\n", + " 1 0.000 0.000 0.034 0.034 autosubmit.py:4263(database_backup)\n", + " 2 0.000 0.000 0.034 0.017 subprocess.py:1550(_execute_child)\n", + " 2 0.000 0.000 0.029 0.014 job_list.py:2447(update_genealogy)\n", + "...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sort_by = pstats.SortKey.CUMULATIVE\n", + "\n", + "def process_cumtime(prof):\n", + " stream = io.StringIO()\n", + " stats = pstats.Stats(str(prof), stream=stream)\n", + " stats.strip_dirs().sort_stats(sort_by).print_stats()\n", + "\n", + " output = stream.getvalue()\n", + " return '\\n'.join(output.split('\\n')[0:(N + 7)])\n", + "\n", + "for case in pair_cases:\n", + " header = f\"## NUMCHUNKS={str(case[0])}\"\n", + " cprofile_truncated = process_cumtime(case[1])\n", + " text = f\"{header}\\n```bash\\n{cprofile_truncated}\\n...\\n```\"\n", + " display(Markdown(text))" + ] + }, + { + "cell_type": "markdown", + "id": "98c03c99-261e-4b6b-9d42-75fcab9290b3", + "metadata": {}, + "source": [ + "### Notes\n", + "\n", + "WIP" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/test/performance/profiler-tests/requirements.txt b/test/performance/profiler-tests/requirements.txt new file mode 100644 index 000000000..94bfe2e6c --- /dev/null +++ b/test/performance/profiler-tests/requirements.txt @@ -0,0 +1,3 @@ +jupyterlab==4.0.* +pandas==2.0.* +pandoc==2.3.* -- GitLab