Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,10 @@ __marimo__/
.cursor/

# Internal planning documents
PUBLICATION_PLAN.md
GITHUB_ISSUES.md
.github/IDEAS_INTERNAL.md

# Downloaded files directory (root level only, not src/rheo/downloads/)
/downloads/
.tmp/
tmp/
tmp/
benchmarks/.results/
22 changes: 18 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Async Downloader - Makefile
CODE_PATHS := src tests examples scripts
# Rheo - Makefile
CODE_PATHS := src tests examples scripts benchmarks

.PHONY: help clean format lint test test-cov test-quick type-check ci docs-cli examples
.PHONY: help clean format lint test test-cov test-quick type-check ci docs-cli examples benchmark benchmark-compare

.DEFAULT_GOAL := help

help:
@echo "Async Downloader - Make Commands"
@echo "Rheo - Make Commands"
@echo ""
@echo "Usage: make <command>"
@echo ""
Expand All @@ -20,6 +20,8 @@ help:
@echo " make ci Run all CI checks locally"
@echo " make docs-cli Generate CLI documentation"
@echo " make examples Run all example scripts"
@echo " make benchmark Run performance benchmarks"
@echo " make benchmark-compare Compare benchmark results"
@echo " make clean Clean up build artifacts"

clean:
Expand Down Expand Up @@ -76,3 +78,15 @@ docs-cli:
examples:
@echo "Running all examples..."
poetry run python scripts/run_examples.py

benchmark:
@echo "Running benchmarks..."
poetry run pytest benchmarks -v --benchmark-only --benchmark-autosave --benchmark-storage=benchmarks/.results

benchmark-compare:
@echo "Comparing benchmark results..."
@if ls benchmarks/.results/*/*.json 1>/dev/null 2>&1; then \
poetry run pytest-benchmark compare benchmarks/.results/*/*.json --group-by=name; \
else \
echo "No benchmark results found. Run 'make benchmark' first."; \
fi
22 changes: 22 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Benchmarks

Minimal benchmarking using `pytest-benchmark` and a local aiohttp fixture server.

## Running

- `make benchmark` — runs all benchmarks, saves JSON under `benchmarks/.results/`

## Comparing

- `make benchmark-compare` — compares all saved results in `benchmarks/.results/`

## Scenario

- `test_throughput_10_files_1mb` downloads 10 x 1MB files with 4 concurrent workers via `DownloadManager`
- Files are served locally from a background aiohttp server (`/file/{size}`) with deterministic content
- Uses `FileExistsStrategy.OVERWRITE` to ensure files are actually downloaded each iteration

## Outputs

- Results: `benchmarks/.results/` (gitignored)
- Downloads for each run: per-test `tmp_path / "downloads"` temp directory
Empty file added benchmarks/__init__.py
Empty file.
118 changes: 118 additions & 0 deletions benchmarks/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Shared fixtures for benchmarking."""

import asyncio
import threading
import typing as t
from pathlib import Path

import pytest
from aiohttp import web

_PATTERN = b"X" * 1024


async def _file_handler(request: web.Request) -> web.Response:
"""Serve deterministic content of the requested size."""
size = int(request.match_info["size"])
chunks, remainder = divmod(size, len(_PATTERN))
content = _PATTERN * chunks + _PATTERN[:remainder]
return web.Response(body=content, content_type="application/octet-stream")


class _BenchmarkServer:
"""HTTP server running in a background thread for benchmark downloads."""

def __init__(self) -> None:
self._base_url: str | None = None
self._loop: asyncio.AbstractEventLoop | None = None
self._thread: threading.Thread | None = None
self._runner: web.AppRunner | None = None
self._started = threading.Event()
self._error: BaseException | None = None

@property
def base_url(self) -> str:
if self._base_url is None:
raise RuntimeError("Server not started")
return self._base_url

def start(self) -> None:
"""Start the server in a background thread."""
self._thread = threading.Thread(target=self._run_server, daemon=True)
self._thread.start()
self._started.wait(timeout=10)
if self._error is not None:
raise RuntimeError(
f"Server failed to start: {self._error}"
) from self._error
if self._base_url is None:
raise RuntimeError("Server failed to start (timeout)")

def stop(self) -> None:
"""Stop the server and clean up."""
if self._loop and self._runner:
future = asyncio.run_coroutine_threadsafe(
self._runner.cleanup(), self._loop
)
future.result(timeout=5)
if self._loop:
self._loop.call_soon_threadsafe(self._loop.stop)
if self._thread:
self._thread.join(timeout=5)

def _run_server(self) -> None:
"""Run the server event loop in this thread."""
self._loop = asyncio.new_event_loop()
asyncio.set_event_loop(self._loop)

try:
self._loop.run_until_complete(self._start_server())
self._started.set()
self._loop.run_forever()
except BaseException as e:
self._error = e
self._started.set() # Unblock main thread so it can see the error
finally:
self._loop.close()

async def _start_server(self) -> None:
"""Start the aiohttp server."""
app = web.Application()
app.router.add_get("/file/{size}", _file_handler)

self._runner = web.AppRunner(app)
await self._runner.setup()

site = web.TCPSite(self._runner, host="127.0.0.1", port=0)
await site.start()

# Get the dynamically assigned port
sockets = site._server.sockets if site._server else []
if not sockets:
raise RuntimeError("Failed to bind server socket")

port = sockets[0].getsockname()[1]
self._base_url = f"http://127.0.0.1:{port}"


@pytest.fixture(scope="session")
def benchmark_server() -> t.Iterator[str]:
"""Start an HTTP server for benchmark downloads and yield its base URL.

The server runs in a background thread to avoid async fixture issues
with pytest-benchmark (which uses sync test functions).
"""
server = _BenchmarkServer()
server.start()
try:
yield server.base_url
finally:
server.stop()


@pytest.fixture
def benchmark_download_dir(tmp_path: Path) -> Path:
"""Provide a clean download directory for each benchmark run."""
download_dir = tmp_path / "downloads"
download_dir.mkdir(exist_ok=True)
return download_dir
37 changes: 37 additions & 0 deletions benchmarks/test_throughput.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Throughput benchmark scenarios."""

import asyncio
from pathlib import Path

from rheo.domain.file_config import FileConfig, FileExistsStrategy
from rheo.downloads import DownloadManager


def test_throughput_10_files_1mb(
benchmark, benchmark_server: str, benchmark_download_dir: Path
) -> None:
"""Benchmark downloading 10 x 1MB files with 4 concurrent workers."""
file_size = 1_000_000
file_count = 10
max_concurrent = 4

async def download_batch() -> None:
files = [
FileConfig(
url=f"{benchmark_server}/file/{file_size}",
filename=f"file_{i}.bin",
)
for i in range(file_count)
]
async with DownloadManager(
max_concurrent=max_concurrent,
download_dir=benchmark_download_dir,
file_exists_strategy=FileExistsStrategy.OVERWRITE,
) as manager:
await manager.add(files)
await manager.wait_until_complete()

def run_downloads() -> None:
asyncio.run(download_batch())

benchmark(run_downloads)
39 changes: 37 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ dev = [
"mypy>=1.11.0,<2.0.0",
"blockbuster>=1.5.25,<2.0.0",
"types-aiofiles>=25.1.0.20251011",
"pytest-benchmark>=5.2.3,<6.0.0",
]

[project.scripts]
Expand Down