From 5921112bc2d96e79d3d785cf0e84430dd21c33b4 Mon Sep 17 00:00:00 2001 From: plutopulp Date: Mon, 15 Dec 2025 13:48:22 +0100 Subject: [PATCH 1/4] Add pytest-benchmark dependency for performance benchmarking --- Makefile | 4 ++-- poetry.lock | 39 +++++++++++++++++++++++++++++++++++++-- pyproject.toml | 1 + 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 5b55c0f..0eabaae 100755 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -# Async Downloader - Makefile -CODE_PATHS := src tests examples scripts +# Rheo - Makefile +CODE_PATHS := src tests examples scripts benchmarks .PHONY: help clean format lint test test-cov test-quick type-check ci docs-cli examples diff --git a/poetry.lock b/poetry.lock index adf25c3..b9dd0b7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1142,6 +1142,19 @@ files = [ {file = "propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d"}, ] +[[package]] +name = "py-cpuinfo" +version = "9.0.0" +description = "Get CPU info with pure Python" +optional = true +python-versions = "*" +groups = ["main"] +markers = "extra == \"dev\"" +files = [ + {file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"}, + {file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"}, +] + [[package]] name = "pycodestyle" version = "2.14.0" @@ -1407,6 +1420,28 @@ typing-extensions = {version = ">=4.12", markers = "python_version < \"3.13\""} docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1)"] testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] +[[package]] +name = "pytest-benchmark" +version = "5.2.3" +description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer." +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "extra == \"dev\"" +files = [ + {file = "pytest_benchmark-5.2.3-py3-none-any.whl", hash = "sha256:bc839726ad20e99aaa0d11a127445457b4219bdb9e80a1afc4b51da7f96b0803"}, + {file = "pytest_benchmark-5.2.3.tar.gz", hash = "sha256:deb7317998a23c650fd4ff76e1230066a76cb45dcece0aca5607143c619e7779"}, +] + +[package.dependencies] +py-cpuinfo = "*" +pytest = ">=8.1" + +[package.extras] +aspect = ["aspectlib"] +elasticsearch = ["elasticsearch"] +histogram = ["pygal", "pygaljs", "setuptools"] + [[package]] name = "pytest-cov" version = "7.0.0" @@ -1729,9 +1764,9 @@ multidict = ">=4.0" propcache = ">=0.2.1" [extras] -dev = ["aioresponses", "black", "blockbuster", "flake8", "isort", "mypy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-mock", "types-aiofiles"] +dev = ["aioresponses", "black", "blockbuster", "flake8", "isort", "mypy", "pytest", "pytest-asyncio", "pytest-benchmark", "pytest-cov", "pytest-mock", "types-aiofiles"] [metadata] lock-version = "2.1" python-versions = ">=3.12,<4.0" -content-hash = "e4980dd23a13bfc87e60eca931d43088f70a84dcd93cc40135cda2868c85a244" +content-hash = "143821edeb3f9821b4cd59a7355fd82b36c801ff8eef01ec6e088537b5d41ea3" diff --git a/pyproject.toml b/pyproject.toml index 07dbc27..b2133b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ dev = [ "mypy>=1.11.0,<2.0.0", "blockbuster>=1.5.25,<2.0.0", "types-aiofiles>=25.1.0.20251011", + "pytest-benchmark>=5.2.3,<6.0.0", ] [project.scripts] From d87474f51cc18c47ae7ead25f23083a3c9df0f77 Mon Sep 17 00:00:00 2001 From: plutopulp Date: Mon, 15 Dec 2025 13:53:22 +0100 Subject: [PATCH 2/4] Scaffold benchmarks directory structure --- benchmarks/README.md | 0 benchmarks/__init__.py | 0 benchmarks/contest.py | 0 benchmarks/test_throughput.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 benchmarks/README.md create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/contest.py create mode 100644 benchmarks/test_throughput.py diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/contest.py b/benchmarks/contest.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/test_throughput.py b/benchmarks/test_throughput.py new file mode 100644 index 0000000..e69de29 From 8ed72f7ef4ab42fc88d84784160c44f9eceaa36f Mon Sep 17 00:00:00 2001 From: plutopulp Date: Mon, 15 Dec 2025 14:18:35 +0100 Subject: [PATCH 3/4] Add HTTP fixture server for benchmark downloads --- benchmarks/conftest.py | 110 +++++++++++++++++++++++++++++++++++++++++ benchmarks/contest.py | 0 2 files changed, 110 insertions(+) create mode 100644 benchmarks/conftest.py delete mode 100644 benchmarks/contest.py diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py new file mode 100644 index 0000000..4bc2c7e --- /dev/null +++ b/benchmarks/conftest.py @@ -0,0 +1,110 @@ +"""Shared fixtures for benchmarking.""" + +import asyncio +import threading +import typing as t +from pathlib import Path + +import pytest +from aiohttp import web + +_PATTERN = b"X" * 1024 + + +async def _file_handler(request: web.Request) -> web.Response: + """Serve deterministic content of the requested size.""" + size = int(request.match_info["size"]) + chunks, remainder = divmod(size, len(_PATTERN)) + content = _PATTERN * chunks + _PATTERN[:remainder] + return web.Response(body=content, content_type="application/octet-stream") + + +class _BenchmarkServer: + """HTTP server running in a background thread for benchmark downloads.""" + + def __init__(self) -> None: + self._base_url: str | None = None + self._loop: asyncio.AbstractEventLoop | None = None + self._thread: threading.Thread | None = None + self._runner: web.AppRunner | None = None + self._started = threading.Event() + + @property + def base_url(self) -> str: + if self._base_url is None: + raise RuntimeError("Server not started") + return self._base_url + + def start(self) -> None: + """Start the server in a background thread.""" + self._thread = threading.Thread(target=self._run_server, daemon=True) + self._thread.start() + self._started.wait(timeout=10) + if self._base_url is None: + raise RuntimeError("Server failed to start") + + def stop(self) -> None: + """Stop the server and clean up.""" + if self._loop and self._runner: + future = asyncio.run_coroutine_threadsafe( + self._runner.cleanup(), self._loop + ) + future.result(timeout=5) + if self._loop: + self._loop.call_soon_threadsafe(self._loop.stop) + if self._thread: + self._thread.join(timeout=5) + + def _run_server(self) -> None: + """Run the server event loop in this thread.""" + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) + + try: + self._loop.run_until_complete(self._start_server()) + self._started.set() + self._loop.run_forever() + finally: + self._loop.close() + + async def _start_server(self) -> None: + """Start the aiohttp server.""" + app = web.Application() + app.router.add_get("/file/{size}", _file_handler) + + self._runner = web.AppRunner(app) + await self._runner.setup() + + site = web.TCPSite(self._runner, host="127.0.0.1", port=0) + await site.start() + + # Get the dynamically assigned port + sockets = site._server.sockets if site._server else [] + if not sockets: + raise RuntimeError("Failed to bind server socket") + + port = sockets[0].getsockname()[1] + self._base_url = f"http://127.0.0.1:{port}" + + +@pytest.fixture(scope="session") +def benchmark_server() -> t.Iterator[str]: + """Start an HTTP server for benchmark downloads and yield its base URL. + + The server runs in a background thread to avoid async fixture issues + with pytest-benchmark (which uses sync test functions). + """ + server = _BenchmarkServer() + server.start() + try: + yield server.base_url + finally: + server.stop() + + +@pytest.fixture +def benchmark_download_dir(tmp_path: Path) -> Path: + """Provide a clean download directory for each benchmark run.""" + download_dir = tmp_path / "downloads" + download_dir.mkdir(exist_ok=True) + return download_dir diff --git a/benchmarks/contest.py b/benchmarks/contest.py deleted file mode 100644 index e69de29..0000000 From dccce3fa0db399e232f7917f16040fbcc0ea4676 Mon Sep 17 00:00:00 2001 From: plutopulp Date: Mon, 15 Dec 2025 14:23:50 +0100 Subject: [PATCH 4/4] Add basic throughput benchmark scenario + reamde --- .gitignore | 5 ++--- Makefile | 18 +++++++++++++++-- benchmarks/README.md | 22 +++++++++++++++++++++ benchmarks/conftest.py | 10 +++++++++- benchmarks/test_throughput.py | 37 +++++++++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index ffefb11..ad98d1d 100644 --- a/.gitignore +++ b/.gitignore @@ -210,11 +210,10 @@ __marimo__/ .cursor/ # Internal planning documents -PUBLICATION_PLAN.md -GITHUB_ISSUES.md .github/IDEAS_INTERNAL.md # Downloaded files directory (root level only, not src/rheo/downloads/) /downloads/ .tmp/ -tmp/ \ No newline at end of file +tmp/ +benchmarks/.results/ \ No newline at end of file diff --git a/Makefile b/Makefile index 0eabaae..3f6b562 100755 --- a/Makefile +++ b/Makefile @@ -1,12 +1,12 @@ # Rheo - Makefile CODE_PATHS := src tests examples scripts benchmarks -.PHONY: help clean format lint test test-cov test-quick type-check ci docs-cli examples +.PHONY: help clean format lint test test-cov test-quick type-check ci docs-cli examples benchmark benchmark-compare .DEFAULT_GOAL := help help: - @echo "Async Downloader - Make Commands" + @echo "Rheo - Make Commands" @echo "" @echo "Usage: make " @echo "" @@ -20,6 +20,8 @@ help: @echo " make ci Run all CI checks locally" @echo " make docs-cli Generate CLI documentation" @echo " make examples Run all example scripts" + @echo " make benchmark Run performance benchmarks" + @echo " make benchmark-compare Compare benchmark results" @echo " make clean Clean up build artifacts" clean: @@ -76,3 +78,15 @@ docs-cli: examples: @echo "Running all examples..." poetry run python scripts/run_examples.py + +benchmark: + @echo "Running benchmarks..." + poetry run pytest benchmarks -v --benchmark-only --benchmark-autosave --benchmark-storage=benchmarks/.results + +benchmark-compare: + @echo "Comparing benchmark results..." + @if ls benchmarks/.results/*/*.json 1>/dev/null 2>&1; then \ + poetry run pytest-benchmark compare benchmarks/.results/*/*.json --group-by=name; \ + else \ + echo "No benchmark results found. Run 'make benchmark' first."; \ + fi diff --git a/benchmarks/README.md b/benchmarks/README.md index e69de29..739d86f 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -0,0 +1,22 @@ +# Benchmarks + +Minimal benchmarking using `pytest-benchmark` and a local aiohttp fixture server. + +## Running + +- `make benchmark` — runs all benchmarks, saves JSON under `benchmarks/.results/` + +## Comparing + +- `make benchmark-compare` — compares all saved results in `benchmarks/.results/` + +## Scenario + +- `test_throughput_10_files_1mb` downloads 10 x 1MB files with 4 concurrent workers via `DownloadManager` +- Files are served locally from a background aiohttp server (`/file/{size}`) with deterministic content +- Uses `FileExistsStrategy.OVERWRITE` to ensure files are actually downloaded each iteration + +## Outputs + +- Results: `benchmarks/.results/` (gitignored) +- Downloads for each run: per-test `tmp_path / "downloads"` temp directory diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py index 4bc2c7e..4b86ea0 100644 --- a/benchmarks/conftest.py +++ b/benchmarks/conftest.py @@ -28,6 +28,7 @@ def __init__(self) -> None: self._thread: threading.Thread | None = None self._runner: web.AppRunner | None = None self._started = threading.Event() + self._error: BaseException | None = None @property def base_url(self) -> str: @@ -40,8 +41,12 @@ def start(self) -> None: self._thread = threading.Thread(target=self._run_server, daemon=True) self._thread.start() self._started.wait(timeout=10) + if self._error is not None: + raise RuntimeError( + f"Server failed to start: {self._error}" + ) from self._error if self._base_url is None: - raise RuntimeError("Server failed to start") + raise RuntimeError("Server failed to start (timeout)") def stop(self) -> None: """Stop the server and clean up.""" @@ -64,6 +69,9 @@ def _run_server(self) -> None: self._loop.run_until_complete(self._start_server()) self._started.set() self._loop.run_forever() + except BaseException as e: + self._error = e + self._started.set() # Unblock main thread so it can see the error finally: self._loop.close() diff --git a/benchmarks/test_throughput.py b/benchmarks/test_throughput.py index e69de29..d1bad7d 100644 --- a/benchmarks/test_throughput.py +++ b/benchmarks/test_throughput.py @@ -0,0 +1,37 @@ +"""Throughput benchmark scenarios.""" + +import asyncio +from pathlib import Path + +from rheo.domain.file_config import FileConfig, FileExistsStrategy +from rheo.downloads import DownloadManager + + +def test_throughput_10_files_1mb( + benchmark, benchmark_server: str, benchmark_download_dir: Path +) -> None: + """Benchmark downloading 10 x 1MB files with 4 concurrent workers.""" + file_size = 1_000_000 + file_count = 10 + max_concurrent = 4 + + async def download_batch() -> None: + files = [ + FileConfig( + url=f"{benchmark_server}/file/{file_size}", + filename=f"file_{i}.bin", + ) + for i in range(file_count) + ] + async with DownloadManager( + max_concurrent=max_concurrent, + download_dir=benchmark_download_dir, + file_exists_strategy=FileExistsStrategy.OVERWRITE, + ) as manager: + await manager.add(files) + await manager.wait_until_complete() + + def run_downloads() -> None: + asyncio.run(download_batch()) + + benchmark(run_downloads)