diff --git a/.gitignore b/.gitignore
index 505a3b1..259d473 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,5 +6,33 @@ dist/
 wheels/
 *.egg-info
 
+# mac specific crap
+.DS_Store
+
+# checkpoints
+checkpoints_*/
+
 # Virtual environments
 .venv
+
+# wandb files
+wandb/
+
+# Slurm logs
+logs/
+*.log
+
+# Big jsonl files
+data/
+*.jsonl
+
+# Environment files (secrets)
+.env
+.env.*
+.DS_Store
+
+# HPC specific files
+examples/code_exec/hpc/
+
+# personal research directory
+research/
diff --git a/.python-version b/.python-version
deleted file mode 100644
index e4fba21..0000000
--- a/.python-version
+++ /dev/null
@@ -1 +0,0 @@
-3.12
diff --git a/AGENTS.md b/AGENTS.md
index 48c9080..69fe02f 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -20,9 +20,10 @@ Instead, Ludic is closer to **classical RL** – specifically policy-gradient me
 
 - **Separate Agent vs Environment**
   - **Environment** = state transition function (+ optional scalar reward) with minimal assumptions; can be multi-agent by default.
-  - **Agent** = LLM *with state* (prompt harness + memory + parsing + optional auxiliary tools).
-    - auxiliary tools = tools that don't change the state of the environment
-  - Rationale: reuse environments across different “agent harnesses” (memory schemes, parsers, prompts, tools) and reuse harness pieces across environments.
+  - **Agent** = LLM *with state* (prompt harness + memory + parsing + optional tools).
+    - Internal tools = executed by agent (calculator, code interpreter); don't change env state
+    - External tools = returned to protocol for handling (delegation, sub-agents)
+  - Rationale: reuse environments across different "agent harnesses" (memory schemes, parsers, prompts, tools) and reuse harness pieces across environments.
 
 - **Make the interaction loop explicit**
   - Neither env nor agent “owns” rollout generation. An **InteractionProtocol** owns the agent<-->env loop and produces rollouts.
@@ -50,6 +51,11 @@ Instead, Ludic is closer to **classical RL** – specifically policy-gradient me
 ## Core Abstractions (Where + What)
 
 - **Shared types (rollouts, steps, truncation flags)**: `src/ludic/types.py`
+- **Steps (agent vs env)**: See `CONSIDERATIONS.md` for the full rationale. The short version:
+  - **AgentStep**: Every model call, including internal tool loops. Contains `TokenTrace` for training.
+  - **EnvironmentStep**: State transitions (`env.step()` outcomes). References the triggering AgentSteps.
+  - Why separate? Training needs the full reasoning trace, not just final actions. A ReAct agent might call 3 tools before outputting an action—all those calls have token traces we want to train on.
+  - Rollouts keep a single timeline of both kinds; online batching concatenates all AgentSteps in a turn into one `SAWItem`.
 
 - **Environment kernel (multi-agent by default)**: `src/ludic/envs/env.py`
   - `LudicEnv.reset() -> {agent_id: (obs, info)}`
@@ -62,8 +68,10 @@ Instead, Ludic is closer to **classical RL** – specifically policy-gradient me
   - Wraps a `ChatClient` (inference backend), a `ContextStrategy` (memory/prompt building), and a `Parser` (action decoding + intrinsic format rewards/penalties).
   - Handles incomplete completions (`finish_reason == "length"`) as parse failures (optional) to avoid training on truncated actions.
   - Extended agent types:
-    - `ToolAgent` (`src/ludic/agents/tool_agent.py`): OpenAI/vLLM-compatible tool calling with automatic schema generation from callables.
-    - `ReActAgent` (`src/ludic/agents/react_agent.py`): Multi-step ReAct pattern with configurable `max_react_steps` for tool loops.
+    - `ToolAgent` (`src/ludic/agents/tool_agent.py`): Base for tool-calling agents. Supports two tool scopes:
+      - `tools`: Internal tools executed by agent (calculator, code interpreter). Results go to context, agent continues.
+      - `external_tools`: Tools returned to protocol for handling (delegation, sub-agents). Protocol feeds results back.
+    - `ReActAgent` (`src/ludic/agents/react_agent.py`): Multi-step ReAct pattern [Think → Tool]* → Act. Returns `action_target` indicating what happens next: `"internal"` (handled), `"external"` (protocol handles), or `"env"` (final action).
 
 - **Context strategy (memory/prompt policy)**: `src/ludic/context/base.py`
   - Hooks: `on_env_reset`, `on_before_act`, `on_after_act`, `on_after_step`.
@@ -77,8 +85,12 @@ Instead, Ludic is closer to **classical RL** – specifically policy-gradient me
 
 - **Interaction protocols (own the loop)**: `src/ludic/interaction/base.py`
   - Single-agent synchronous loop: `src/ludic/interaction/single_agent.py`
+    - Supports `external_tool_handler` callback for handling external tool calls
   - Multi-agent loop (per-agent rollouts via `TraceCollector`): `src/ludic/interaction/multi_agent.py`, `src/ludic/interaction/step_collector.py`
-  - Key behavior: parser failures are handled *inside the protocol* (synthetic step, no `env.step()` call), so env stays parser-agnostic.
+  - Key behaviors:
+    - Parser failures are handled *inside the protocol* (synthetic step, no `env.step()` call), so env stays parser-agnostic.
+    - External tool calls (`action_target="external"`) are routed through `external_tool_handler`; results are fed back to agent context and the agent continues reasoning.
+  - **Delegation pattern**: External tools enable hierarchical agents where a parent can spawn sub-agents. The protocol handles the sub-agent's rollout and returns results to the parent. Both rollouts are collected for training. See `CONSIDERATIONS.md` for details.
   - Utility: `src/ludic/interaction/info.py` provides `merge_step_info()` for safely merging step metadata with collision detection on reserved keys.
 
 - **Rollout execution + collation**: `src/ludic/training/batching/rollout_engine.py`
@@ -86,7 +98,7 @@ Instead, Ludic is closer to **classical RL** – specifically policy-gradient me
   - Converts rollouts → `SAWItem`s using either:
     - exact token IDs returned by the inference backend (preferred), or
     - `retokenize=True` with a caller-provided tokenizer.
-  - Practical note: if you want drift-free RL on the *actual sampled tokens*, have your inference client return token IDs/logprobs (vLLM: `SamplingArgs["extras"]["extra_body"]["return_token_ids"]=True`).
+  - Practical note: Token-in mode (see README) ensures drift-free RL by using rollout-time token IDs directly. Use `ReturnSpec.for_rl()` or set `return_token_ids=True` in `InferenceSpec` to get token IDs from the backend.
 
 - **Batch sources (trainer talks to these, not the engine)**: `src/ludic/training/types.py`
   - Sync: `src/ludic/training/batching/synced_batching.py` (`RolloutBatchSource`)
@@ -96,9 +108,9 @@ Instead, Ludic is closer to **classical RL** – specifically policy-gradient me
 
 - **Algorithm injection (credit + loss)**: `src/ludic/training/algorithm.py`
   - `RLAlgorithm = (CreditAssigner, Loss)`
-  - Presets: `make_reinforce()`, `make_reinforce_baseline()`, `make_grpo()`, `make_sft()`
+  - Presets: `make_reinforce()`, `make_reinforce_baseline()`, `make_grpo()`, `make_dr_grpo()`, `make_gspo()`, `make_cispo()`, `make_gmpo()`, `make_sft()`
   - Credit assigners: `src/ludic/training/credit_assignment.py` – `MonteCarloReturn`, `GroupNormalizedReturn`, `EpisodicReturn`, `PerStepReward`, `ConstantCredit`
-  - Losses: `src/ludic/training/loss.py`
+  - Losses: `src/ludic/training/loss.py` – `ReinforceLoss`, `TokenClippedSurrogateLoss`, `ClippedSurrogateLoss`, `CISPOLoss`, `GMPOLoss`, `MaskedCausalLMCrossEntropyLoss`
 
 - **Trainer (optimization loop only)**: `src/ludic/training/trainer.py`
   - Collates `SAWItem` → tensors and runs `RLAlgorithm.loss`.
@@ -135,6 +147,23 @@ GRPO mental model in this codebase:
 - It avoids a learned **value function** by using a **Monte Carlo / group-relative baseline** (group mean reward for the same prompt) to form advantages.
 - If you come from PPO-RLHF: think "PPO-shaped dataflow" without a critic/value model, where the "advantage" is estimated by group comparison rather than by GAE/value bootstrapping.
 
+## GMPO (Geometric-Mean Policy Optimization)
+
+**GMPO** (arXiv:2507.20673) is a variant of GRPO that uses the **geometric mean** of token-level importance ratios instead of the arithmetic mean.
+
+**Core idea**:
+- GRPO optimizes: (1/|o|) Σ_t ρ_t * A (arithmetic mean)
+- GMPO optimizes: (∏_t ρ_t)^(1/|o|) * A (geometric mean)
+
+The geometric mean is less sensitive to outlier importance ratios, which can help prevent extreme policy updates when individual tokens have unusually high or low ratios.
+
+**Implementation** (`src/ludic/training/loss.py`, `src/ludic/training/algorithm.py`):
+- **Loss**: `GMPOLoss` computes the geometric mean in log-space for numerical stability
+- **Objective**: J_GMPO = E[ (∏_t min(ρ_t * A, clip(ρ_t, e^-ε_low, e^ε_high) * A))^(1/|o|) * sgn(A) ]
+- **Clipping**: Token-level clipping in log-space, wider default range (e^-0.4, e^0.4) vs GRPO's (0.8, 1.2)
+- **Normalization**: 1/|o| sequence length normalization
+- **Preset**: `make_gmpo(group_size=4)` uses same credit assignment as GRPO (`GroupNormalizedReturn`)
+
 ## SFT / Offline RL
 
 Ludic supports supervised fine-tuning (SFT) and offline RL through the same abstractions:
diff --git a/examples/code_exec/README.md b/examples/code_exec/README.md
new file mode 100644
index 0000000..caa6f4e
--- /dev/null
+++ b/examples/code_exec/README.md
@@ -0,0 +1,526 @@
+# Code Execution Training
+
+> Train LLMs on code generation with sandboxed test-driven evaluation.
+
+## What This Is
+
+This module provides an RL training environment where:
+- The **agent generates code** in response to programming problems
+- The **environment executes the code** in isolated containers
+- **Test cases verify correctness** and provide reward signal
+- The **trainer updates the policy** based on execution outcomes
+
+Key features:
+- **Sandboxed execution** — Generated code runs in Docker/Podman containers for security
+- **Persistent containers** — 40x faster than cold-start containers (17ms vs 700ms per execution)
+- **Automatic caching** — Skip redundant executions (especially valuable with CISPO/GRPO)
+- **Multi-backend** — Works on laptop (Docker) or HPC clusters (Podman-HPC)
+
+## Who This Is For
+
+**Experienced Ludic users**: Jump to [Quick Start](#quick-start) for copy-paste examples.
+
+**New to Ludic**: Read [How It Works](#how-it-works) first to understand the concepts.
+
+**Prerequisites**:
+- Familiarity with Ludic's training concepts (`Trainer`, `RolloutEngine`, `BatchSource`)
+- Docker running locally, or Podman-HPC on your HPC cluster
+- A vLLM inference server for generation
+
+---
+
+## Quick Start
+
+### Prerequisites
+
+1. **Docker daemon running** — See [Setup Guide](#setup-guide) if not
+2. **HuggingFace token** — Create `.env` file: `echo 'HF_TOKEN=your_token' > .env`
+3. **Dependencies**: `pip install docker datasets peft`
+
+### 5-Minute Local Run
+
+```bash
+# Terminal 1: Start vLLM inference server
+CUDA_VISIBLE_DEVICES=0 uv run --env-file .env python -m ludic.inference.vllm_server \
+    --model Qwen/Qwen2.5-Coder-0.5B-Instruct
+
+# Terminal 2: Run training
+CUDA_VISIBLE_DEVICES=1 PYTHONPATH=. uv run --env-file .env python examples/code_exec/train_apps.py \
+    --model Qwen/Qwen2.5-Coder-0.5B-Instruct \
+    --limit 100 \
+    --train-steps 10
+```
+
+You should see:
+- Sandbox pool starting with 4 workers
+- Baseline evaluation running
+- Training steps with reward metrics
+
+### HPC Cluster Run (Slurm)
+
+```bash
+# 1. Prepare environment on LOGIN NODE (one-time, requires internet)
+./examples/code_exec/prepare_env.sh
+
+# 2. Submit job to compute nodes
+sbatch examples/code_exec/train_apps_isambard.slurm
+```
+
+The Slurm script handles:
+- Starting vLLM server on GPU 0
+- Running training on GPU 1
+- Auto-detecting Podman-HPC backend
+- Structured logging in `logs/YYYY-MM-DD/`
+
+---
+
+## How It Works
+
+### The Training Loop
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                      Training Loop                          │
+│  ┌─────────────┐   ┌──────────────┐   ┌─────────────────┐   │
+│  │   Trainer   │◄──│ BatchSource  │◄──│  RolloutEngine  │   │
+│  └─────────────┘   └──────────────┘   └────────┬────────┘   │
+│        ▲                                       │            │
+│        │                           ┌───────────▼─────────┐  │
+│   Weight Updates                   │ SingleAgentProtocol │  │
+│        │                           └────────────┬────────┘  │
+│        ▼                                        │           │
+│  ┌───────────┐                     ┌────────────▼────────┐  │
+│  │   vLLM    │◄────────────────────│    CodeExecEnv      │  │
+│  │  Server   │    generates code   └────────────┬────────┘  │
+│  └───────────┘                                  │           │
+└─────────────────────────────────────────────────┼───────────┘
+                                                  │ executes
+                                     ┌────────────▼────────┐
+                                     │    SandboxPool      │
+                                     │  ┌────┐ ┌────┐      │
+                                     │  │ S1 │ │ S2 │ ...  │
+                                     │  └────┘ └────┘      │
+                                     └─────────────────────┘
+```
+
+**Step by step:**
+
+1. **RolloutEngine** creates a `CodeExecEnv` for each problem from the dataset
+2. **Agent** (via vLLM) generates Python code given the problem prompt
+3. **CodeExecEnv** sends the code to a sandboxed container for execution
+4. **Test cases** are run against the code; results determine the reward
+5. **Trainer** collects rollouts and updates the model weights
+6. **Weights are pushed** back to vLLM for the next generation round
+
+### Key Concepts
+
+#### Sandboxing
+
+**Why sandbox?** LLM-generated code can be malicious, buggy, or resource-hungry. Sandboxing:
+- Prevents file system access outside the container
+- Limits memory and CPU usage
+- Disables network access (by default)
+- Isolates each execution from others
+
+**Persistent containers** are the key to performance. Instead of starting a new container per execution (700ms overhead), we keep containers running and reuse them (17ms overhead).
+
+#### Backend Auto-Detection
+
+| Environment | Priority |
+|-------------|----------|
+| Inside Slurm job | `podman-hpc` → `docker` |
+| Outside Slurm | `docker` → `podman-hpc` |
+
+Override with `--sandbox-backend docker` or `--sandbox-backend podman-hpc`.
+
+#### Test-Driven Evaluation
+
+Each problem in the dataset has test cases (input/output pairs). The flow:
+
+1. **TestAdapter** extracts test cases from the dataset format (e.g., `APPSTestAdapter` for APPS)
+2. **StdinStdoutRunner** executes the code with each test's input as stdin
+3. **OutputVerifier** compares actual output to expected output
+4. **Reward** is computed based on test pass rate
+
+#### Caching
+
+The LRU cache prevents redundant execution:
+
+- **Cache key**: `hash(code) + hash(tests)`
+- **Hit rate**: Often 30-50% with CISPO/GRPO (multiple generations per prompt)
+- **Speedup**: Cache hits return instantly (no container execution)
+
+Monitor cache performance:
+```python
+stats = pool.cache_stats
+# {'hits': 150, 'misses': 50, 'size': 200}
+hit_rate = stats['hits'] / (stats['hits'] + stats['misses'])
+```
+
+### Reward Shaping
+
+| Event | Reward | Configurable | Rationale |
+|-------|--------|--------------|-----------|
+| All tests pass | `+1.0` | — | Complete success |
+| Some tests pass | `0.0` to `1.0` | `--partial-credit` | Smoother gradient signal |
+| All tests fail | `0.0` | — | No partial credit by default |
+| Compile error | `-0.1` | `compile_failure_reward` | Discourage syntax errors |
+| Proper code block | `+0.05` | Parser reward | Encourage correct formatting |
+
+**When to enable partial credit:**
+- Training from scratch (model needs incremental signal)
+- Long test suites where all-or-nothing is too sparse
+
+**When to keep binary rewards:**
+- Fine-tuning a capable model
+- Problems where partial correctness is meaningless
+
+---
+
+## Configuration Reference
+
+### Training Script Arguments (`train_apps.py`)
+
+#### Model & Inference
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--model` | `Qwen/Qwen2.5-3B-Instruct` | Model name or path |
+| `--host` | `127.0.0.1` | vLLM server host |
+| `--port` | `8000` | vLLM server port |
+| `--max-prompt-tokens` | `1024` | Max prompt length (longer prompts filtered) |
+| `--max-new-tokens` | `4096` | Max generation length |
+
+#### Training
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--train-steps` | `100` | Number of training steps |
+| `--batch-size` | `4` | Rollout requests per batch |
+| `--group-size` | `8` | CISPO group size (rollouts per prompt) |
+| `--train-temperature` | `0.8` | Sampling temperature |
+| `--max-seq-len` | `2048` | Max tokens per sample (truncation limit) |
+| `--micro-token-budget` | `16384` | Max padded tokens per micro-batch |
+
+#### LoRA
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--lora-rank` | `8` | LoRA rank |
+| `--lora-alpha-mult` | `2.0` | Alpha = rank × mult |
+| `--lora-dropout` | `0.0` | LoRA dropout |
+
+#### Dataset
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--split` | `train` | Dataset split |
+| `--limit` | None | Max samples to load |
+| `--difficulty` | None | Filter: `introductory`, `interview`, `competition` |
+| `--eval-samples` | `200` | Hold out for evaluation |
+
+#### Sandbox
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--sandbox-backend` | `auto` | `auto`, `docker`, `podman-hpc` |
+| `--sandbox-workers` | `4` | Container pool size |
+| `--python-version` | `3.11` | Python in sandbox |
+| `--timeout-per-test` | `1.0` | Per-test timeout (seconds) |
+| `--partial-credit` | `False` | Enable fractional rewards |
+| `--minimal-sandbox` | `False` | Skip memory/network limits (HPC compat) |
+| `--max-concurrent-ops` | `8` | Semaphore limit for Podman |
+
+#### Evaluation
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--eval-every` | `25` | Eval every N steps |
+| `--eval-before-start` | `True` | Run baseline evaluation |
+| `--eval-concurrency` | `32` | Parallel eval rollouts |
+| `--eval-temperature` | `0.0` | Greedy decoding for eval |
+
+#### Logging
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--wandb` | `False` | Enable W&B logging |
+| `--wandb-project` | `ludic-apps` | W&B project name |
+
+### Environment Configuration (`CodeExecConfig`)
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `timeout_per_test_s` | `5.0` | Per-test execution timeout |
+| `memory_limit_mb` | `256` | Container memory limit |
+| `max_tests` | `None` | Limit test count (None = all) |
+| `stop_on_first_failure` | `True` | Early stop on failure |
+| `compile_first` | `True` | Syntax check before running |
+| `partial_credit` | `False` | Reward = pass_rate (vs binary) |
+| `compile_failure_reward` | `-0.1` | Penalty for syntax errors |
+| `use_cache` | `True` | Enable execution caching |
+
+### Sandbox Pool Sizing
+
+| Environment | CPUs | Recommended `--sandbox-workers` |
+|-------------|------|--------------------------------|
+| Laptop (M1/M2) | 8-10 | 4 |
+| Workstation | 16-32 | 8-16 |
+| HPC node | 64-128 | 24-64 |
+
+**Rule of thumb**: Each sandbox uses ~0.5-1 CPU core. Use `floor(cpus / 2)`.
+
+**Concurrency vs Workers**:
+- `--concurrency` controls parallel rollouts (async tasks)
+- `--sandbox-workers` controls parallel code executions
+- If `concurrency > sandbox-workers`, tasks queue for sandboxes
+
+---
+
+## End-to-End Example
+
+This complete example shows how to build a training script from scratch:
+
+```python
+"""Minimal code execution training script."""
+
+import asyncio
+from datasets import load_dataset
+
+from ludic.agent import Agent
+from ludic.context import FullDialog
+from ludic.inference import VLLMChatClient, InferenceSpec, SamplingParams, ReturnSpec
+from ludic.interaction import SingleAgentProtocol
+from ludic.parsers import ParseResult
+from ludic.distributed.adapters import create_vllm_publisher
+from ludic.training import (
+    RolloutEngine, RolloutBatchSource, Trainer, TrainerConfig,
+    make_cispo, make_dataset_queue_requests_fn,
+)
+from ludic.envs.code_exec import (
+    CodeExecEnv, CodeExecConfig, create_sandbox_pool, APPSTestAdapter,
+)
+
+async def main():
+    # 1. Load dataset
+    ds = load_dataset("RoganInglis/apps-control-arena", split="train")
+    samples = [{"question": r["question"], "inputs": r["inputs"], "outputs": r["outputs"]}
+               for r in list(ds)[:100]]
+
+    # 2. Create sandbox pool (shared across all envs)
+    pool = await create_sandbox_pool(n_workers=4, backend="auto")
+
+    # 3. Setup inference client
+    client = VLLMChatClient(host="127.0.0.1", port=8000, enable_weight_updates=True)
+    publisher = create_vllm_publisher(client)
+
+    # 4. Environment factory (captures pool via closure)
+    adapter = APPSTestAdapter()
+    env_config = CodeExecConfig(timeout_per_test_s=5.0, partial_credit=False)
+
+    def env_factory(sample):
+        return CodeExecEnv(sample=sample, sandbox_pool=pool,
+                          test_adapter=adapter, config=env_config)
+
+    # 5. Protocol factory
+    def protocol_factory():
+        return SingleAgentProtocol(agent=Agent(
+            client=client, model="Qwen/Qwen2.5-3B-Instruct",
+            ctx=FullDialog(),
+            parser=lambda raw: ParseResult(action=raw, reward=0.0, obs=None),
+        ))
+
+    # 6. Setup training pipeline
+    engine = RolloutEngine(
+        env_registry={"apps": env_factory},
+        protocol_registry={"single": protocol_factory},
+    )
+
+    algo = make_cispo(group_size=8, clip_eps_high=5.0, length_normalize=True)
+
+    batch_source = RolloutBatchSource(
+        orchestrator=engine,
+        credit_assigner=algo.credit_assigner,
+        requests_fn=make_dataset_queue_requests_fn(...),  # See train_apps.py
+        concurrency=32,
+    )
+
+    # 7. Train
+    trainer = Trainer(
+        model=your_model,  # Load with LoRA
+        algo=algo,
+        batch_source=batch_source,
+        publisher=publisher,
+        cfg=TrainerConfig(max_seq_len=2048, micro_token_budget=16384),
+    )
+
+    await trainer.train(num_steps=100)
+
+    # 8. Cleanup
+    await pool.shutdown()
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+For a complete, production-ready script, see [`train_apps.py`](./train_apps.py).
+
+---
+
+## Customization
+
+### Using a Different Dataset
+
+Implement the `TestAdapter` protocol:
+
+```python
+from ludic.envs.code_exec import TestAdapter, TestCase
+
+class MyDatasetAdapter:
+    def get_tests(self, sample: dict) -> list[TestCase]:
+        return [
+            TestCase(input=t["stdin"], expected=t["stdout"], id=f"test_{i}")
+            for i, t in enumerate(sample["tests"])
+        ]
+
+    def get_prompt(self, sample: dict) -> str:
+        return sample["problem_description"]
+
+    def get_problem_id(self, sample: dict) -> str:
+        return sample["id"]
+```
+
+### Custom Reward Shaping
+
+Modify `CodeExecConfig`:
+
+```python
+config = CodeExecConfig(
+    partial_credit=True,          # Reward = fraction of tests passed
+    compile_failure_reward=-0.5,  # Harsher penalty for syntax errors
+    stop_on_first_failure=False,  # Run all tests for full feedback
+)
+```
+
+### Custom Output Verification
+
+For floating-point comparisons:
+
+```python
+from ludic.envs.code_exec.adapters import FloatTolerantVerifier
+
+verifier = FloatTolerantVerifier(abs_tol=1e-6, rel_tol=1e-6)
+runner = StdinStdoutRunner(verifier=verifier)
+```
+
+For full API details, see the [Module README](../../src/ludic/envs/code_exec/README.md).
+
+---
+
+## Troubleshooting
+
+### "Docker daemon not running"
+
+```
+docker.errors.DockerException: Error while fetching server API version
+```
+
+**Solution**: Start Docker Desktop (macOS/Windows) or `sudo systemctl start docker` (Linux).
+
+### Tests timing out
+
+**Symptoms**: Many `TIMEOUT` results, slow training.
+
+**Diagnosis**: Check if problems have expensive test cases.
+
+**Solutions**:
+- Increase timeout: `--timeout-per-test 10.0`
+- Use batch execution (enabled by default)
+- Reduce number of tests: Set `max_tests` in `CodeExecConfig`
+
+### GPU out of memory
+
+**Solutions**:
+- Reduce `--batch-size`
+- Reduce `--micro-token-budget`
+- Enable gradient checkpointing (already on by default)
+
+### Slow sandbox initialization
+
+**Symptoms**: "Starting sandbox pool..." takes 30+ seconds.
+
+**Solutions**:
+- Reduce `--sandbox-workers` for initial testing
+- Pre-pull images: `docker pull python:3.11-slim`
+
+### Podman-HPC: Image not found on compute node
+
+**Cause**: Images must be migrated to shared storage.
+
+**Solution**:
+```bash
+podman-hpc pull python:3.11-slim  # Auto-migrates
+podman-hpc images  # Verify R/O=true
+```
+
+### Network access denied on compute node
+
+**Cause**: HPC compute nodes often lack internet access.
+
+**Solution**: Run `prepare_env.sh` on the login node first to pre-stage all dependencies.
+
+---
+
+## Setup Guide
+
+### Docker (Local Development)
+
+```bash
+# Install (macOS)
+brew install --cask docker
+
+# Install (Linux)
+curl -fsSL https://get.docker.com | sh
+sudo usermod -aG docker $USER && newgrp docker
+
+# Start daemon
+open -a Docker  # macOS
+sudo systemctl start docker  # Linux
+
+# Verify
+docker info && pip install docker>=7.0.0
+```
+
+For detailed setup, see [Docker documentation](https://docs.docker.com/get-docker/).
+
+### Podman-HPC (HPC Clusters)
+
+```bash
+# Pull and migrate image to shared storage
+podman-hpc pull python:3.11-slim
+
+# Verify migration (R/O should be 'true')
+podman-hpc images
+
+# Test execution
+srun -N 1 podman-hpc run --rm python:3.11-slim python -c "print('hello')"
+```
+
+For cluster-specific setup, consult your HPC documentation or [Podman-HPC docs](https://github.com/NERSC/podman-hpc).
+
+### Verifying Your Setup
+
+```bash
+# Run integration tests
+pytest tests/integration/test_code_exec_docker.py -v
+
+# If tests are skipped, Docker is not accessible
+```
+
+---
+
+## See Also
+
+- **Module README**: [src/ludic/envs/code_exec/README.md](../../src/ludic/envs/code_exec/README.md) — API reference, protocols, internals
+- **Migration Guide**: [MIGRATION.md](./MIGRATION.md) — Training API changes and migration steps
+- **Training Script**: [train_apps.py](./train_apps.py) — Production-ready example
diff --git a/examples/code_exec/train_apps.py b/examples/code_exec/train_apps.py
new file mode 100644
index 0000000..bb7496a
--- /dev/null
+++ b/examples/code_exec/train_apps.py
@@ -0,0 +1,771 @@
+"""
+APPS code generation training scaffold using CodeExecEnv with LoRA.
+
+This wires together:
+  - HuggingFace datasets for APPS code samples
+  - CodeExecEnv with sandboxed execution (Docker or Podman-HPC)
+  - SingleAgentProtocol with async env support
+  - LoRA adapters via PEFT for efficient fine-tuning
+  - GRPO with optional KL regularization
+  - Baseline + periodic evaluation on held-out samples
+  - RichLiveLogger (terminal dashboard) or WandB (cloud logging)
+
+Requirements:
+  - Container runtime: Docker daemon OR Podman-HPC (auto-detected)
+  - pip install docker>=7.0.0 datasets peft (for Docker backend)
+  - GPU(s) for training (optional for rollout-only mode)
+
+Usage:
+  # Start vLLM server (in one terminal)
+  CUDA_VISIBLE_DEVICES=0 uv run python -m ludic.inference.vllm_server \\
+      --model Qwen/Qwen2.5-3B-Instruct
+
+  # Run training with terminal dashboard (default)
+  CUDA_VISIBLE_DEVICES=1 PYTHONPATH=. uv run python examples/code_exec/train_apps.py \\
+      --model Qwen/Qwen2.5-3B-Instruct \\
+      --limit 500 --eval-samples 200 --train-steps 100 --final-save
+
+  # Run training with KL regularization
+  CUDA_VISIBLE_DEVICES=1 PYTHONPATH=. uv run python examples/code_exec/train_apps.py \\
+      --model Qwen/Qwen2.5-3B-Instruct \\
+      --limit 500 --eval-samples 200 --train-steps 100 \\
+      --kl-coeff 0.01 --final-save
+
+  # Run training with WandB logging
+  CUDA_VISIBLE_DEVICES=1 PYTHONPATH=. uv run python examples/code_exec/train_apps.py \\
+      --model Qwen/Qwen2.5-3B-Instruct \\
+      --limit 500 --eval-samples 200 --train-steps 100 \\
+      --wandb --wandb-project ludic-apps --final-save
+
+Key Features:
+  - LoRA: rank=8, alpha=16, target_modules="all-linear" (configurable)
+  - Eval: Baseline before training, periodic eval every N steps
+  - Logging: Terminal sparkline dashboard or WandB cloud tracking
+  - KL regularization: Optional penalty to prevent policy drift
+
+See README.md for detailed setup instructions.
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import os
+import queue
+from typing import Any, Dict, List
+
+import torch
+from datasets import load_dataset  # type: ignore
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import get_peft_model, LoraConfig, TaskType
+
+from ludic.agent import Agent
+from ludic.context import FullDialog
+from ludic.inference import VLLMChatClient, InferenceSpec, SamplingParams, ReturnSpec
+from ludic.interaction import SingleAgentProtocol
+from ludic.parsers import ParseResult
+from ludic.distributed.adapters import create_vllm_publisher
+from ludic.eval import EngineEvaluator
+from ludic.training import (
+    RolloutEngine,
+    RolloutBatchSource,
+    Trainer,
+    TrainerConfig,
+    CheckpointConfig,
+    make_dataset_queue_requests_fn,
+    RequestsExhausted,
+    RolloutRequest,
+    EnvSpec,
+    ProtocolSpec,
+    # Algorithm
+    make_cispo,
+)
+from ludic.training import Reducer, RichLiveLogger, default_reducers
+from ludic.training.loggers import WandbLogger
+from ludic.training.hardware import configure_flash_attention, log_hardware_info
+
+# Import CodeExecEnv components
+from ludic.envs.code_exec import (
+    CodeExecEnv,
+    CodeExecConfig,
+    create_sandbox_pool,
+    SandboxBackend,
+)
+from ludic.envs.code_exec.adapters.apps import APPSTestAdapter, APPS_SYSTEM_PROMPT
+
+import logging
+
+logging.basicConfig(
+    level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
+)
+
+
+def code_block_parser(raw: str) -> ParseResult:
+    """
+    Parse code from markdown code blocks or raw text.
+
+    Accepts:
+      - ```python\n...\n```
+      - ```\n...\n```
+      - Raw code (if no code blocks found)
+
+    Returns parsed code with small format reward for proper code blocks.
+    """
+    import re
+
+    # Try to extract from markdown code block
+    code_block_pattern = r"```(?:python)?\s*\n(.*?)(?:\n)?```"
+    match = re.search(code_block_pattern, raw, re.DOTALL)
+
+    if match:
+        code = match.group(1).strip()
+        return ParseResult(
+            action=code, reward=0.05, obs=None
+        )  # Small bonus for proper formatting
+
+    # Empty response if no code block found
+    return ParseResult(action=None, reward=-0.1, obs="Please provide Python code.")
+
+
+def load_apps_samples(
+    split: str = "train",
+    limit: int | None = None,
+    difficulty: str | None = None,
+) -> List[Dict[str, Any]]:
+    """
+    Load APPS samples from HuggingFace datasets.
+
+    Args:
+        split: Dataset split ("train" or "test")
+        limit: Maximum number of samples to load
+        difficulty: Filter by difficulty ("introductory", "interview", "competition")
+
+    Returns:
+        List of sample dicts with question, inputs, outputs, etc.
+    """
+    # Load from the control-arena version which has cleaner formatting
+    ds = load_dataset("RoganInglis/apps-control-arena", split=split)
+
+    samples: List[Dict[str, Any]] = []
+    for idx, row in enumerate(ds):
+        # Filter by difficulty if specified
+        if difficulty and row.get("difficulty") != difficulty:
+            continue
+
+        # Skip nondeterministic problems (they require special handling)
+        if row.get("is_nondeterministic", False):
+            continue
+
+        samples.append(
+            {
+                "problem_id": row.get("problem_id", str(idx)),
+                "question": row["question"],
+                "inputs": row.get("inputs", []),
+                "outputs": row.get("outputs", []),
+                "difficulty": row.get("difficulty", "unknown"),
+            }
+        )
+
+        if limit is not None and len(samples) >= limit:
+            break
+
+    return samples
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Train on APPS code generation dataset with LoRA"
+    )
+
+    # Model and inference
+    parser.add_argument("--model", default="Qwen/Qwen2.5-3B-Instruct")
+    parser.add_argument("--host", default="127.0.0.1", help="vLLM server host")
+    parser.add_argument("--port", type=int, default=8000, help="vLLM server port")
+    parser.add_argument(
+        "--max-prompt-tokens", type=int, default=1024, help="Max prompt tokens"
+    )
+    parser.add_argument(
+        "--max-new-tokens", type=int, default=4096, help="Max new tokens"
+    )
+    parser.add_argument(
+        "--stop",
+        nargs="*",
+        default=None,
+        help="Stop sequences (e.g. --stop '```' '</answer>')",
+    )
+
+    # LoRA configuration
+    parser.add_argument("--lora-rank", type=int, default=8, help="LoRA rank")
+    parser.add_argument(
+        "--lora-alpha", type=int, default=32, help="LoRA alpha"
+    )
+    parser.add_argument("--lora-dropout", type=float, default=0.0, help="LoRA dropout")
+    parser.add_argument("--lora-use-rslora", action="store_true", help="Use RSLora")
+
+    # Attention configuration
+    parser.add_argument(
+        "--disable-flash-attn",
+        action="store_true",
+        help="Disable Flash Attention (fall back to SDPA)",
+    )
+
+    # KL regularization
+    parser.add_argument(
+        "--kl-coeff",
+        type=float,
+        default=0.0,
+        help="KL penalty coefficient (0 = disabled)",
+    )
+
+    # Data
+    parser.add_argument("--split", default="train", help="Dataset split")
+    parser.add_argument("--limit", type=int, default=None, help="Max samples to load")
+    parser.add_argument("--difficulty", default=None, help="Filter by difficulty")
+
+    # Sandbox
+    parser.add_argument(
+        "--max-concurrent-ops",
+        type=int,
+        default=8,
+        help="Max concurrent sandbox operations (prevents deadlock in HPC environments)",
+    )
+    parser.add_argument(
+        "--sandbox-workers", type=int, default=4, help="Number of sandbox containers"
+    )
+    parser.add_argument(
+        "--sandbox-backend",
+        default="auto",
+        choices=["auto", "docker", "podman-hpc"],
+        help="Sandbox backend (default: auto-detect)",
+    )
+    parser.add_argument(
+        "--python-version", default="3.11", help="Python version in sandbox"
+    )
+    parser.add_argument(
+        "--minimal-sandbox",
+        action="store_true",
+        help="Use minimal sandbox config (no memory/network limits) for HPC compatibility",
+    )
+    parser.add_argument(
+        "--timeout-per-test", type=float, default=2.0, help="Timeout per test (seconds)"
+    )
+
+    # Training
+    parser.add_argument("--lr", type=float, default=1e-5, help="Learning rate")
+    parser.add_argument(
+        "--concurrency", type=int, default=32, help="Rollout concurrency"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, default=4, help="Rollout requests per batch"
+    )
+    parser.add_argument(
+        "--train-steps",
+        type=int,
+        default=100,
+        help="Training steps (0=run until exhausted)",
+    )
+    parser.add_argument("--group-size", type=int, default=8, help="GRPO group size")
+    parser.add_argument(
+        "--train-temperature", type=float, default=0.8, help="Sampling temperature"
+    )
+    parser.add_argument(
+        "--partial-credit", action="store_true", help="Enable partial credit rewards"
+    )
+    parser.add_argument(
+        "--max-seq-len",
+        type=int,
+        default=2048,
+        help="Max tokens per sample (sequences are truncated to this)",
+    )
+    parser.add_argument(
+        "--micro-token-budget",
+        type=int,
+        default=16384,
+        help="Max padded tokens per micro-batch (replaces grad_accum_steps)",
+    )
+
+    # Evaluation
+    parser.add_argument(
+        "--eval-samples",
+        type=int,
+        default=200,
+        help="Number of samples to hold out for eval",
+    )
+    parser.add_argument(
+        "--eval-every", type=int, default=25, help="Eval every N training steps"
+    )
+    parser.add_argument(
+        "--eval-before-start",
+        action="store_true",
+        default=True,
+        help="Run baseline eval",
+    )
+    parser.add_argument(
+        "--eval-concurrency", type=int, default=32, help="Eval concurrency"
+    )
+    parser.add_argument(
+        "--eval-temperature",
+        type=float,
+        default=0.5,
+        help="Eval sampling temperature",
+    )
+
+    # Logging
+    parser.add_argument(
+        "--wandb", action="store_true", help="Enable Weights & Biases logging"
+    )
+    parser.add_argument(
+        "--wandb-project", type=str, default="ludic-apps", help="WandB project name"
+    )
+
+    # Checkpoints
+    parser.add_argument("--rollout-log", default="data/apps_train_rollouts.jsonl")
+    parser.add_argument("--checkpoint-dir", default="checkpoints_apps")
+    parser.add_argument("--checkpoint-every", type=int, default=25)
+    parser.add_argument(
+        "--final-save", action="store_true", help="Save final checkpoint after training"
+    )
+
+    args = parser.parse_args()
+
+    # Warn about concurrency/pool mismatch
+    if args.concurrency > args.sandbox_workers:
+        print(
+            f"WARNING: concurrency ({args.concurrency}) > sandbox-workers ({args.sandbox_workers})"
+        )
+        print(
+            f"  This means {args.concurrency - args.sandbox_workers} tasks will wait for sandboxes."
+        )
+        print(
+            f"  Consider: --sandbox-workers={args.concurrency} OR --concurrency={args.sandbox_workers}"
+        )
+        print()
+
+    # Setup rollout log
+    rollout_log_path = os.path.abspath(args.rollout_log)
+    os.makedirs(os.path.dirname(rollout_log_path) or ".", exist_ok=True)
+    open(rollout_log_path, "a", encoding="utf-8").close()
+
+    # Load tokenizer early (needed for prompt length filtering)
+    print(f"Loading tokenizer: {args.model}")
+    tokenizer = AutoTokenizer.from_pretrained(args.model)
+    if tokenizer.pad_token_id is None:
+        tokenizer.pad_token_id = tokenizer.eos_token_id
+
+    # Load data and split into train/eval sets
+    print(f"Loading APPS samples (split={args.split}, limit={args.limit})...")
+    all_samples = load_apps_samples(args.split, args.limit, args.difficulty)
+    if not all_samples:
+        print("ERROR: No APPS samples loaded.")
+        return 1
+
+    # Filter out samples with prompts that exceed max_prompt_tokens
+    # This ensures max_new_tokens can fit within the model's context window
+    def prompt_fits(sample: Dict[str, Any]) -> bool:
+        messages = [
+            {"role": "system", "content": APPS_SYSTEM_PROMPT},
+            {"role": "user", "content": sample["question"]},
+        ]
+        token_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
+        return len(token_ids) <= args.max_prompt_tokens
+
+    pre_filter_count = len(all_samples)
+    all_samples = [s for s in all_samples if prompt_fits(s)]
+    filtered_count = pre_filter_count - len(all_samples)
+    if filtered_count > 0:
+        print(
+            f"Filtered {filtered_count} samples exceeding {args.max_prompt_tokens} prompt tokens."
+        )
+
+    if not all_samples:
+        print(
+            "ERROR: All samples filtered out by prompt length. Increase --max-prompt-tokens."
+        )
+        return 1
+
+    # Split: last N samples for eval (deterministic, reproducible)
+    if args.eval_samples > 0 and len(all_samples) > args.eval_samples:
+        train_samples = all_samples[: -args.eval_samples]
+        eval_samples = all_samples[-args.eval_samples :]
+    else:
+        train_samples = all_samples
+        eval_samples = []
+
+    print(f"Loaded {len(all_samples)} total samples (after filtering).")
+    print(f"  Train: {len(train_samples)} samples")
+    print(f"  Eval:  {len(eval_samples)} samples (held out)")
+
+    samples_q: queue.Queue = queue.Queue()
+    for idx, s in enumerate(train_samples):
+        samples_q.put((idx, s))
+
+    # Load model with LoRA
+    print(f"Loading model: {args.model}")
+
+    # Configure Flash Attention (auto-detects optimal implementation)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    attn_impl = configure_flash_attention(device, disable_flash_attn=args.disable_flash_attn)
+    log_hardware_info()
+    print(f"Attention implementation: {attn_impl}")
+
+    base_model = AutoModelForCausalLM.from_pretrained(
+        args.model,
+        torch_dtype=torch.bfloat16,
+        trust_remote_code=True,
+        attn_implementation=attn_impl,
+    )
+
+    # Apply LoRA adapter
+    lora_config = LoraConfig(
+        task_type=TaskType.CAUSAL_LM,
+        inference_mode=False,
+        r=args.lora_rank,
+        lora_alpha=args.lora_alpha,
+        lora_dropout=args.lora_dropout,
+        use_rslora=False,
+        bias="none",
+        # target_modules="all-linear",
+        target_modules=[
+            "q_proj",  # Attention: Query projection
+            "k_proj",  # Attention: Key projection
+            "v_proj",  # Attention: Value projection
+            "o_proj",  # Attention: Output projection
+            "gate_proj",  # MLP: Gating projection
+            "up_proj",  # MLP: Up projection
+            "down_proj",  # MLP: Down projection
+        ],
+    )
+    model = get_peft_model(base_model, lora_config)
+    model.to(device)
+    model.print_trainable_parameters()
+    print(
+        f"Model loaded on {device} with LoRA (rank={args.lora_rank}, alpha={args.lora_alpha})."
+    )
+
+    # Setup sandbox pool
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    # Build backend kwargs (minimal mode skips memory/network limits for HPC compatibility)
+    backend_kwargs = {}
+    if args.minimal_sandbox:
+        backend_kwargs["memory_limit"] = None
+        backend_kwargs["network_disabled"] = False
+
+    try:
+        sandbox_pool = loop.run_until_complete(
+            create_sandbox_pool(
+                n_workers=args.sandbox_workers,
+                backend=args.sandbox_backend,
+                python_version=args.python_version,
+                max_concurrent_ops=args.max_concurrent_ops,
+                cache_size=10000,
+                **backend_kwargs,
+            )
+        )
+    except RuntimeError as e:
+        print(f"ERROR: {e}")
+        return 1
+
+    # Create shared adapter and config
+    test_adapter = APPSTestAdapter()
+    env_config = CodeExecConfig(
+        timeout_per_test_s=args.timeout_per_test,
+        stop_on_first_failure=False,
+        compile_first=True,
+        partial_credit=args.partial_credit,
+        compile_failure_reward=-0.1,
+        use_cache=True,
+    )
+
+    # Shared client for inference
+    client = VLLMChatClient(host=args.host, port=args.port, enable_weight_updates=True)
+    publisher = create_vllm_publisher(client)
+
+    # Environment factory (captures sandbox_pool via closure)
+    def env_factory(sample: Dict[str, Any]) -> CodeExecEnv:
+        return CodeExecEnv(
+            sample=sample,
+            sandbox_pool=sandbox_pool,
+            test_adapter=test_adapter,
+            config=env_config,
+            system_prompt=APPS_SYSTEM_PROMPT,
+        )
+
+    env_registry = {"apps": env_factory}
+
+    def protocol_factory():
+        return SingleAgentProtocol(
+            agent=Agent(
+                client=client,
+                model=args.model,
+                ctx=FullDialog(),
+                parser=code_block_parser,
+            )
+        )
+
+    protocol_registry = {"single_agent": protocol_factory}
+
+    # Algorithm (CISPO - better for reasoning tokens)
+    algo = make_cispo(
+        group_size=args.group_size,
+        group_normalize_adv=True,
+        clip_eps_high=0.2,
+        length_normalize=True,
+        kl_coeff=args.kl_coeff,
+    )
+    print("Using CISPO algorithm (better for reasoning/self-correction tokens)")
+    print(f"KL coefficient: {args.kl_coeff}")
+
+    # Engine + batch source
+    engine = RolloutEngine(
+        env_registry=env_registry,
+        protocol_registry=protocol_registry,
+        jsonl_path=rollout_log_path,
+    )
+
+    train_inference = InferenceSpec(
+        sampling=SamplingParams(
+            temperature=args.train_temperature,
+            max_tokens=args.max_new_tokens,
+            stop=args.stop,
+        ),
+        return_=ReturnSpec.for_rl(top_logprobs_k=1),
+    )
+
+    requests_fn = make_dataset_queue_requests_fn(
+        samples_q,
+        batch_size=args.batch_size,
+        env_kind="apps",
+        protocol_kind="single_agent",
+        inference=train_inference,
+        protocol_kwargs={},
+        request_meta_fn=lambda idx, sample: {
+            "sample_index": idx,
+            "problem_id": sample.get("problem_id", idx),
+            "difficulty": sample.get("difficulty", "unknown"),
+        },
+        env_seed_fn=lambda idx, _sample: idx,
+        sampling_seed_fn=lambda idx, _sample: idx,
+        group_size=args.group_size,
+    )
+
+    batch_source = RolloutBatchSource(
+        orchestrator=engine,
+        credit_assigner=algo.credit_assigner,
+        requests_fn=requests_fn,
+        max_steps=1,  # Single-step env
+        concurrency=args.concurrency,
+    )
+
+    # Trainer config with eval settings
+    cfg = TrainerConfig(
+        model_device=device,
+        lr=args.lr,
+        max_seq_len=args.max_seq_len,
+        micro_token_budget=args.micro_token_budget,
+        max_grad_norm=0.1,
+        pad_token_id=tokenizer.pad_token_id,
+        eval_at_start=bool(args.eval_before_start and eval_samples),
+        eval_every_n_steps=(
+            args.eval_every
+            if args.eval_every and args.eval_every > 0 and eval_samples
+            else None
+        ),
+        eval_concurrency=args.eval_concurrency,
+        eval_max_steps=1,
+    )
+
+    checkpoint_cfg = CheckpointConfig(
+        output_dir=args.checkpoint_dir,
+        every_n_steps=args.checkpoint_every,
+        max_to_keep=2,
+        save_optimizer=True,
+    )
+
+    # Training reducers
+    reducers = {
+        "all_passed_rate": Reducer(
+            kind="count_true",
+            source="all_passed",
+            normalize_by="rollouts",
+        ),
+        "compile_fail_rate": Reducer(
+            kind="count_true",
+            source="compile_failed",
+            normalize_by="rollouts",
+        ),
+        "avg_pass_rate": Reducer(
+            kind="mean",
+            source="pass_rate",
+        ),
+        "parse_err_rate": Reducer(
+            kind="count_true",
+            source="parse_error",
+            normalize_by="samples",
+        ),
+        "total_completion_tokens": Reducer(
+            kind="sum",
+            source="completion_length",
+        ),
+        **default_reducers(),
+    }
+
+    # Eval reducers (for held-out samples)
+    eval_reducers = {
+        "all_passed_rate": Reducer(
+            kind="count_true",
+            source="all_passed",
+            normalize_by="rollouts",
+            as_percent=True,
+        ),
+        "compile_fail_rate": Reducer(
+            kind="count_true",
+            source="compile_failed",
+            normalize_by="rollouts",
+            as_percent=True,
+        ),
+        "avg_pass_rate": Reducer(
+            kind="mean",
+            source="pass_rate",
+        ),
+        "parse_error_rate": Reducer(
+            kind="count_true",
+            source="parse_error",
+            normalize_by="samples",
+            as_percent=True,
+        ),
+        "avg_completion_tokens": Reducer(
+            kind="mean",
+            source="completion_length",
+        ),
+    }
+
+    # Logging metrics to track
+    log_keys = [
+        # Core training
+        "train/loss",
+        "train/avg_total_reward",
+        # APPS-specific
+        "train/all_passed_rate",
+        "train/compile_fail_rate",
+        "train/avg_pass_rate",
+        "train/parse_err_rate",
+        "train/avg_completion_length",
+        # Eval metrics
+        "eval/all_passed_rate",
+        "eval/compile_fail_rate",
+        "eval/avg_pass_rate",
+        "eval/parse_error_rate",
+        "eval/avg_completion_tokens",
+        # Counts
+        "train/target_rollouts",
+        "train/num_samples",
+    ]
+
+    # Configure logger (WandB or RichLive terminal dashboard)
+    if args.wandb:
+        train_logger = WandbLogger(project=args.wandb_project, config=dict(vars(args)))
+        print(f"WandB logging enabled: project={args.wandb_project}")
+    else:
+        train_logger = RichLiveLogger(
+            keys=log_keys,
+            spark_key="avg_total_reward",
+            history=100,
+            precision=4,
+        )
+
+    # Create EngineEvaluator for eval set
+    eval_inference = InferenceSpec(
+        sampling=SamplingParams(
+            temperature=args.eval_temperature,
+            max_tokens=args.max_new_tokens,
+            stop=args.stop,
+        ),
+        return_=ReturnSpec.for_eval(return_token_ids=True),
+    )
+
+    evaluator = None
+    if eval_samples:
+        evaluator = EngineEvaluator(
+            engine=RolloutEngine(
+                env_registry=env_registry, protocol_registry=protocol_registry
+            ),
+            requests_fn=lambda: [
+                RolloutRequest(
+                    env=EnvSpec(kind="apps", kwargs={"sample": sample}),
+                    protocol=ProtocolSpec(kind="single_agent"),
+                    env_seed=idx,
+                    sampling_seed=idx,
+                    inference=eval_inference,
+                    num_episodes=1,
+                    meta={
+                        "eval_idx": idx,
+                        "problem_id": sample.get("problem_id", idx),
+                        "difficulty": sample.get("difficulty", "unknown"),
+                    },
+                )
+                for idx, sample in enumerate(eval_samples)
+            ],
+            reducers=eval_reducers,
+            max_steps=1,
+            timeout_s=cfg.eval_timeout_s,
+            concurrency=cfg.eval_concurrency,
+        )
+        print(
+            f"Eval configured: {len(eval_samples)} samples, every {args.eval_every} steps"
+        )
+
+    trainer = Trainer(
+        model=model,
+        algo=algo,
+        batch_source=batch_source,
+        publisher=publisher,
+        enable_gradient_checkpointing=True,
+        cfg=cfg,
+        checkpoint_config=checkpoint_cfg,
+        train_logger=train_logger,
+        reducers=reducers,
+        evaluator=evaluator,
+    )
+
+    print(f"\nStarting training for {args.train_steps} steps...")
+    print(f"  Samples: {len(train_samples)}")
+    print(f"  Batch size: {args.batch_size}")
+    print(f"  Group size: {args.group_size}")
+    print(f"  Concurrency: {args.concurrency}")
+    print(f"  Sandbox workers: {args.sandbox_workers}")
+    print(f"  Sandbox backend: {args.sandbox_backend}")
+    print()
+
+    try:
+        loop.run_until_complete(trainer.train(args.train_steps))
+    except RequestsExhausted:
+        print("Training samples exhausted; stopping.")
+    except KeyboardInterrupt:
+        print("\nTraining interrupted.")
+    finally:
+        # Cleanup sandbox pool
+        print("Shutting down sandbox pool...")
+        loop.run_until_complete(sandbox_pool.shutdown())
+        loop.close()
+
+    # Save final checkpoint if requested
+    if args.final_save:
+        try:
+            ckpt_path = trainer.save_checkpoint(metadata={"final": True})
+            print(f"Final checkpoint saved: {ckpt_path}")
+        except RuntimeError:
+            pass  # No checkpointer configured
+
+    # Close WandB if used
+    if args.wandb:
+        train_logger.close()
+        print("WandB run finished.")
+
+    print("Training complete.")
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/examples/fsdp2_training/train_math_fsdp2.py b/examples/fsdp2_training/train_math_fsdp2.py
index 8c8730f..23b9907 100644
--- a/examples/fsdp2_training/train_math_fsdp2.py
+++ b/examples/fsdp2_training/train_math_fsdp2.py
@@ -30,7 +30,7 @@
 from ludic.agent import Agent
 from ludic.context import FullDialog
 from ludic.inference import VLLMChatClient, InferenceSpec, SamplingParams, ReturnSpec
-from ludic.interaction import SingleAgentSyncProtocol
+from ludic.interaction import SingleAgentProtocol
 from ludic.distributed import create_vllm_publisher
 from ludic.parsers import boxed_parser, extract_last_boxed_content
 from ludic.eval import EngineEvaluator
@@ -301,7 +301,7 @@ def main() -> None:
     env_registry = {"math": lambda sample: MATHEnv(sample=sample, system_prompt=args.system_prompt)}
 
     def protocol_factory():
-        return SingleAgentSyncProtocol(
+        return SingleAgentProtocol(
             agent=Agent(
                 client=client,
                 model=args.model,
diff --git a/examples/gsm8k/train_gsm8k.py b/examples/gsm8k/train_gsm8k.py
index 6b3c4bb..6d5cec3 100644
--- a/examples/gsm8k/train_gsm8k.py
+++ b/examples/gsm8k/train_gsm8k.py
@@ -4,7 +4,7 @@
 This wires together:
   - HF datasets for GSM8K samples
   - single-sample QA envs (GSM8KEnv)
-  - SingleAgentSyncProtocol with a shared VLLMChatClient
+  - SingleAgentProtocol with a shared VLLMChatClient
   - RolloutBatchSource + MonteCarloReturn credit
   - Trainer with REINFORCE loss
 
@@ -27,7 +27,7 @@
 from ludic.agent import Agent
 from ludic.context import FullDialog
 from ludic.inference import VLLMChatClient, InferenceSpec, SamplingParams, ReturnSpec
-from ludic.interaction import SingleAgentSyncProtocol
+from ludic.interaction import SingleAgentProtocol
 from ludic.parsers import boxed_parser
 from ludic.distributed.adapters import create_vllm_publisher
 from ludic.eval import EngineEvaluator
@@ -140,7 +140,7 @@ def main():
     env_registry = {"gsm8k": lambda sample: GSM8KEnv(sample=sample, system_prompt=args.system_prompt)}
 
     def protocol_factory():
-        return SingleAgentSyncProtocol(
+        return SingleAgentProtocol(
             agent=Agent(
                 client=client,
                 model=args.model,
diff --git a/examples/pipeline_rl/run_actor.py b/examples/pipeline_rl/run_actor.py
index 462a9b2..0a154db 100644
--- a/examples/pipeline_rl/run_actor.py
+++ b/examples/pipeline_rl/run_actor.py
@@ -14,7 +14,7 @@
     RolloutRequest,
     make_reinforce,
 )
-from ludic.interaction import SingleAgentSyncProtocol
+from ludic.interaction import SingleAgentProtocol
 
 # Env Import
 from environments.tic_tac_toe import TicTacToeEnv
@@ -40,11 +40,11 @@ def create_engine(client: VLLMChatClient) -> RolloutEngine:
     training_prompt = base_prompt + "\n\nOutput your move as a single XML tag, e.g., <move>A1</move>."
 
     def create_protocol():
-        return SingleAgentSyncProtocol(
+        return SingleAgentProtocol(
             agent=Agent(
-                client=client, 
-                model=MODEL_NAME, 
-                ctx=FullDialog(system_prompt=training_prompt), 
+                client=client,
+                model=MODEL_NAME,
+                ctx=FullDialog(system_prompt=training_prompt),
                 parser=xml_tag_parser("move")
             ),
             stop_on_parse_error=True,
diff --git a/examples/rejection_sampling.py b/examples/rejection_sampling.py
index e12a77d..a2b52c4 100644
--- a/examples/rejection_sampling.py
+++ b/examples/rejection_sampling.py
@@ -17,7 +17,7 @@
 from ludic.agent import Agent
 from ludic.context import FullDialog
 from ludic.inference import VLLMChatClient, InferenceSpec, SamplingParams
-from ludic.interaction import SingleAgentSyncProtocol
+from ludic.interaction import SingleAgentProtocol
 from ludic.parsers import xml_tag_parser
 from ludic.training import RolloutEngine, EnvSpec, ProtocolSpec, RolloutRequest
 from ludic.types import Rollout
@@ -78,7 +78,7 @@ async def generate_filtered_data(args: argparse.Namespace) -> None:
     prompt_text = build_system_prompt()
 
     def create_protocol():
-        return SingleAgentSyncProtocol(
+        return SingleAgentProtocol(
             agent=Agent(
                 client=client,
                 model=args.model,
diff --git a/examples/tic_tac_toe/generate_synth_data.py b/examples/tic_tac_toe/generate_synth_data.py
index ed9f90a..e635eb6 100644
--- a/examples/tic_tac_toe/generate_synth_data.py
+++ b/examples/tic_tac_toe/generate_synth_data.py
@@ -46,8 +46,7 @@ def build_system_prompt() -> str:
     """Build system prompt matching train_tic_tac_toe.py"""
     base_prompt = TicTacToeEnv().suggested_sysprompt or ""
     return (
-        base_prompt
-        + "\n\nThink through the board in <think>...</think>. "
+        base_prompt + "\n\nThink through the board in <think>...</think>. "
         "After </think>, output exactly one XML tag of the form <move>A1</move> and nothing else."
     )
 
@@ -160,7 +159,9 @@ def apply_prompt_format(
 
         if include_step:
             truncated_messages = _truncate_history_messages(full_messages, placeholder)
-            chat_messages = truncated_messages if truncate_history else list(full_messages)
+            chat_messages = (
+                truncated_messages if truncate_history else list(full_messages)
+            )
 
             prompt_text = _messages_to_prompt(chat_messages)
 
@@ -225,7 +226,7 @@ async def generate_synth_data(args: argparse.Namespace) -> None:
     prompt_text = build_system_prompt()
 
     def create_protocol():
-        return SingleAgentSyncProtocol(
+        return SingleAgentProtocol(
             agent=Agent(
                 client=client,
                 model=args.model,
@@ -248,7 +249,9 @@ def create_protocol():
     if args.min_completion_tokens > 0 or args.max_completion_tokens > 0:
         return_spec = ReturnSpec.for_eval(return_token_ids=True)
     inference = InferenceSpec(
-        sampling=SamplingParams(temperature=args.temperature, max_tokens=args.max_tokens),
+        sampling=SamplingParams(
+            temperature=args.temperature, max_tokens=args.max_tokens
+        ),
         return_=return_spec,
     )
 
@@ -286,7 +289,9 @@ def create_protocol():
             results[res] += 1
         else:
             results["other"] += 1
-    print(f"Generated {total} rollouts: {results['win']} wins, {results['loss']} losses, {results['draw']} draws")
+    print(
+        f"Generated {total} rollouts: {results['win']} wins, {results['loss']} losses, {results['draw']} draws"
+    )
 
     # Filter and transform
     out_path = Path(args.output)
@@ -323,16 +328,24 @@ def create_protocol():
         too_short = stats.get("too_short", 0)
         kept_steps = stats.get("kept_steps", 0)
         if missing_trace:
-            print(f"Skipped {missing_trace} steps missing token traces (enable return_token_ids).")
+            print(
+                f"Skipped {missing_trace} steps missing token traces (enable return_token_ids)."
+            )
         if too_short:
-            print(f"Skipped {too_short} steps with completion length < {args.min_completion_tokens}.")
+            print(
+                f"Skipped {too_short} steps with completion length < {args.min_completion_tokens}."
+            )
         if too_long:
-            print(f"Skipped {too_long} steps with completion length > {args.max_completion_tokens}.")
+            print(
+                f"Skipped {too_long} steps with completion length > {args.max_completion_tokens}."
+            )
         print(f"Kept {kept_steps} steps after length filtering.")
         if dropped_empty:
             print(f"Skipped {dropped_empty} rollouts with no remaining steps.")
     if args.transform:
-        print(f"  (transformed to TruncatedThinking format with placeholder: '{args.placeholder}')")
+        print(
+            f"  (transformed to TruncatedThinking format with placeholder: '{args.placeholder}')"
+        )
 
 
 def main():
@@ -345,8 +358,12 @@ def main():
     parser.add_argument("--port", type=int, default=8000)
 
     # Generation
-    parser.add_argument("--episodes", type=int, default=5000, help="Total episodes to generate.")
-    parser.add_argument("--max-steps", type=int, default=5, help="Max steps per episode.")
+    parser.add_argument(
+        "--episodes", type=int, default=5000, help="Total episodes to generate."
+    )
+    parser.add_argument(
+        "--max-steps", type=int, default=5, help="Max steps per episode."
+    )
     parser.add_argument("--concurrency", type=int, default=32)
     parser.add_argument("--temperature", type=float, default=0.8)
     parser.add_argument("--max-tokens", type=int, default=250)
@@ -364,16 +381,35 @@ def main():
     )
 
     # Transformation
-    parser.add_argument("--transform", action="store_true", default=True,
-                        help="Truncate history to TruncatedThinking format (default: True)")
-    parser.add_argument("--no-transform", action="store_false", dest="transform",
-                        help="Keep full assistant history in prompts")
-    parser.add_argument("--placeholder", default="[TRUNCATED]",
-                        help="Placeholder for truncated thinking blocks")
-    parser.add_argument("--lean", action="store_true", default=True,
-                        help="Drop heavy metadata to keep JSONL small (default: True)")
-    parser.add_argument("--no-lean", action="store_false", dest="lean",
-                        help="Keep full step/meta fields")
+    parser.add_argument(
+        "--transform",
+        action="store_true",
+        default=True,
+        help="Truncate history to TruncatedThinking format (default: True)",
+    )
+    parser.add_argument(
+        "--no-transform",
+        action="store_false",
+        dest="transform",
+        help="Keep full assistant history in prompts",
+    )
+    parser.add_argument(
+        "--placeholder",
+        default="[TRUNCATED]",
+        help="Placeholder for truncated thinking blocks",
+    )
+    parser.add_argument(
+        "--lean",
+        action="store_true",
+        default=True,
+        help="Drop heavy metadata to keep JSONL small (default: True)",
+    )
+    parser.add_argument(
+        "--no-lean",
+        action="store_false",
+        dest="lean",
+        help="Keep full step/meta fields",
+    )
 
     # Output
     parser.add_argument("--output", default="data/tictactoe_sft_train_data.jsonl")
diff --git a/examples/tic_tac_toe/sft_tic_tac_toe.py b/examples/tic_tac_toe/sft_tic_tac_toe.py
index 80c7eff..6461210 100644
--- a/examples/tic_tac_toe/sft_tic_tac_toe.py
+++ b/examples/tic_tac_toe/sft_tic_tac_toe.py
@@ -206,7 +206,7 @@ def main() -> None:
 
     model = AutoModelForCausalLM.from_pretrained(
         args.model,
-        torch_dtype=torch.bfloat16,
+        dtype=torch.bfloat16,
         device_map={"": "cpu"},
         low_cpu_mem_usage=True,
         trust_remote_code=True,
diff --git a/examples/tic_tac_toe/train_tic_tac_toe.py b/examples/tic_tac_toe/train_tic_tac_toe.py
index ebe040b..55230ca 100644
--- a/examples/tic_tac_toe/train_tic_tac_toe.py
+++ b/examples/tic_tac_toe/train_tic_tac_toe.py
@@ -3,7 +3,7 @@
 
 This wires together:
   - TicTacToeEnv single-agent episodes
-  - SingleAgentSyncProtocol with a shared VLLMChatClient
+  - SingleAgentProtocol with a shared VLLMChatClient
   - RolloutBatchSource + GroupNormalizedReturn credit
   - Trainer with REINFORCE loss
   - Optional periodic eval of win rate
@@ -25,7 +25,7 @@
 from ludic.agent import Agent
 from ludic.context import FullDialog, TruncatedThinkingContext
 from ludic.inference import VLLMChatClient, InferenceSpec, SamplingParams, ReturnSpec
-from ludic.interaction import SingleAgentSyncProtocol
+from ludic.interaction import SingleAgentProtocol
 from ludic.distributed.adapters import create_vllm_publisher
 from ludic.parsers import compose_parsers, think_prefix_parser, xml_tag_parser
 from ludic.eval import EngineEvaluator
@@ -216,7 +216,7 @@ def protocol_factory():
             ctx = TruncatedThinkingContext(system_prompt=system_prompt)
         else:
             ctx = FullDialog(system_prompt=system_prompt)
-        return SingleAgentSyncProtocol(
+        return SingleAgentProtocol(
             agent=Agent(
                 client=client,
                 model=args.model,
diff --git a/pyproject.toml b/pyproject.toml
index 274e1e7..a87fb8c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,20 +11,27 @@ dependencies = [
     "aiohttp>=3.13.2",
     "beartype>=0.22.9",
     "jaxtyping>=0.3.4",
+    "datasets>=4.4.2",
     "openai>=2.7.1",
     "peft>=0.18.0",
     "rich>=14.2.0",
-    "torch>=2.8.0",
-    "vllm>=0.13.0",
+    "setuptools>=79.0.1",
+    # CRITICAL: torch>=2.9.0 required for aarch64 CUDA wheels
+    # PyTorch 2.8.0 has NO aarch64 CUDA wheels - skip it!
+    # See: https://download.pytorch.org/whl/cu128/torch/
+    "torch>=2.9.0",
+    # vLLM is Linux-only (depends on NVIDIA libraries)
+    # Use sys_platform marker to skip on macOS/Windows
+    "torch-c-dlpack-ext>=0.1.4",
+    "vllm>=0.12.0; sys_platform == 'linux'",
+    "wandb>=0.23.1",
+    # Flash Attention for efficient attention computation (Linux-only, requires CUDA)
+    "flash-attn>=2.7.0; sys_platform == 'linux'",
 ]
 
 [project.optional-dependencies]
-pipelinerl = [
-    "redis>=7.1.0",
-]
-examples = [
-    "datasets==4.4.1", # pinned to the versions in uv.lock that are known to work
-    "math-verify==0.8.0", # pinned to the versions in uv.lock that are known to work
+code-exec = [
+    "docker>=7.1.0",
 ]
 
 [build-system]
@@ -56,3 +63,55 @@ markers = [
     "diagnostic: marks tests that primarily emit diagnostic reports rather than asserting strict correctness",
 ]
 testpaths = ["tests"]
+
+# =============================================================================
+# uv Configuration for Cross-Platform PyTorch
+# =============================================================================
+# This configuration automatically selects the correct PyTorch wheels:
+#   - Linux: CUDA 12.8 wheels from pytorch-cu128 index
+#   - macOS/Windows: CPU wheels from pytorch-cpu index
+#
+# Usage:
+#   Local dev (macOS):  uv sync
+#   HPC (Linux GPU):    uv sync
+#   Linux CI (no GPU):  uv sync --extra cpu
+#
+# See: https://docs.astral.sh/uv/guides/integration/pytorch/
+# See: https://docs.isambard.ac.uk/user-documentation/applications/ML-packages/
+# =============================================================================
+
+[tool.uv]
+# Flash Attention build configuration:
+# - Disable build isolation so torch is available during compilation
+# - Declare build-time dependencies explicitly
+# - Set MAX_JOBS for parallel compilation
+no-build-isolation-package = ["flash-attn"]
+
+[tool.uv.extra-build-dependencies]
+flash-attn = ["torch", "packaging", "ninja"]
+
+[tool.uv.extra-build-variables]
+flash-attn = { MAX_JOBS = "16" }
+
+# Platform-based torch source selection:
+# - Linux: Use CUDA 12.8 wheels (supports both x86_64 and aarch64)
+# - Non-Linux (macOS, Windows): Use CPU wheels
+[tool.uv.sources]
+torch = [
+    { index = "pytorch-cpu", marker = "sys_platform != 'linux'" },
+    { index = "pytorch-cu128", marker = "sys_platform == 'linux'" },
+]
+torchvision = [
+    { index = "pytorch-cpu", marker = "sys_platform != 'linux'" },
+    { index = "pytorch-cu128", marker = "sys_platform == 'linux'" },
+]
+
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu128"
+url = "https://download.pytorch.org/whl/cu128"
+explicit = true
diff --git a/src/ludic/envs/code_exec/README.md b/src/ludic/envs/code_exec/README.md
new file mode 100644
index 0000000..0c4453c
--- /dev/null
+++ b/src/ludic/envs/code_exec/README.md
@@ -0,0 +1,263 @@
+# CodeExecEnv Module
+
+A sandboxed code execution environment for reinforcement learning on code generation tasks.
+
+## Module Structure
+
+```
+code_exec/
+├── __init__.py           # Public API exports
+├── types.py              # Data types (TestCase, TestResult, BatchTestResult, etc.)
+├── sandbox.py            # Sandbox/SandboxPool protocols
+├── docker_sandbox.py     # Docker-based sandbox implementation + LRU cache
+├── runners.py            # Code execution strategies (StdinStdoutRunner)
+├── env.py                # CodeExecEnv (main RL environment)
+└── adapters/
+    ├── base.py           # TestAdapter, OutputVerifier protocols
+    └── apps.py           # APPS dataset adapter
+```
+
+## Core Abstractions
+
+### Sandbox Protocol
+
+```python
+class Sandbox(Protocol):
+    """Single sandboxed execution environment."""
+
+    async def execute(
+        self,
+        code: str,
+        stdin: str = "",
+        timeout_s: float = 5.0,
+    ) -> ExecutionResult:
+        """Execute code and return result."""
+        ...
+```
+
+### SandboxPool Protocol
+
+```python
+class SandboxPool(Protocol):
+    """Pool of reusable sandboxes with caching."""
+
+    async def checkout(self, timeout_s: float = 30.0) -> Sandbox:
+        """Get a sandbox from the pool."""
+        ...
+
+    async def release(self, sandbox: Sandbox) -> None:
+        """Return sandbox to pool."""
+        ...
+
+    def cache_get(self, code_hash: str, tests_hash: str) -> BatchTestResult | None:
+        """Check cache for previous results."""
+        ...
+
+    def cache_put(self, code_hash: str, tests_hash: str, result: BatchTestResult) -> None:
+        """Store result in cache."""
+        ...
+```
+
+### TestAdapter Protocol
+
+```python
+class TestAdapter(Protocol):
+    """Extracts test cases from dataset samples."""
+
+    def extract_tests(self, sample: dict[str, Any]) -> list[TestCase]:
+        """Extract test cases from a sample."""
+        ...
+
+    def format_problem(self, sample: dict[str, Any]) -> str:
+        """Format problem description for prompt."""
+        ...
+```
+
+### CodeRunner Protocol
+
+```python
+class CodeRunner(Protocol):
+    """Executes code against test cases."""
+
+    async def run_tests(
+        self,
+        code: str,
+        tests: list[TestCase],
+        sandbox: Sandbox,
+        config: CodeExecConfig,
+    ) -> BatchTestResult:
+        """Run all tests and return results."""
+        ...
+```
+
+## Usage
+
+### Basic Setup
+
+```python
+from ludic.envs.code_exec import (
+    CodeExecEnv,
+    CodeExecConfig,
+    DockerSandboxPool,
+    DockerSandboxConfig,
+)
+from ludic.envs.code_exec.adapters.apps import APPSTestAdapter
+
+# Create sandbox pool
+pool_config = DockerSandboxConfig(
+    python_version="3.11",
+    memory_limit="256m",
+    cpu_quota=50000,
+    network_disabled=True,
+)
+pool = DockerSandboxPool(n_workers=4, config=pool_config)
+await pool.start()
+
+# Create environment
+env_config = CodeExecConfig(
+    timeout_per_test_s=5.0,
+    stop_on_first_failure=True,
+    partial_credit=False,
+)
+env = CodeExecEnv(
+    sample={"question": "...", "inputs": [...], "outputs": [...]},
+    sandbox_pool=pool,
+    test_adapter=APPSTestAdapter(),
+    config=env_config,
+)
+
+# Run episode
+obs, info = await env.env_reset()
+outcome = await env.env_step("print(input())")
+
+# Cleanup
+await pool.shutdown()
+```
+
+### With SingleAgentProtocol
+
+The protocol automatically detects async environments:
+
+```python
+from ludic.interaction import SingleAgentProtocol
+from ludic.agent import Agent
+
+protocol = SingleAgentProtocol(agent=agent)
+rollouts = await protocol.run(env=env, max_steps=3)
+```
+
+## Configuration
+
+### CodeExecConfig
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `timeout_per_test_s` | `float` | `5.0` | Timeout per test case |
+| `stop_on_first_failure` | `bool` | `True` | Stop after first failed test |
+| `compile_first` | `bool` | `True` | Check syntax before running |
+| `partial_credit` | `bool` | `False` | Reward based on pass fraction |
+| `compile_failure_reward` | `float` | `-0.1` | Reward for syntax errors |
+| `timeout_reward` | `float` | `-0.05` | Reward for timeout |
+| `use_cache` | `bool` | `True` | Enable result caching |
+
+### DockerSandboxConfig
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `python_version` | `str` | `"3.11"` | Python version in container |
+| `memory_limit` | `str` | `"256m"` | Container memory limit |
+| `cpu_quota` | `int` | `50000` | CPU quota (50% of one core) |
+| `network_disabled` | `bool` | `True` | Disable container networking |
+
+## Implementing Custom Adapters
+
+```python
+from ludic.envs.code_exec import TestAdapter, TestCase, ExactMatchVerifier
+
+class MyDatasetAdapter(TestAdapter):
+    def __init__(self):
+        self._verifier = ExactMatchVerifier(strip=True, normalize_whitespace=True)
+
+    def extract_tests(self, sample: dict) -> list[TestCase]:
+        tests = []
+        for i, (inp, out) in enumerate(zip(sample["inputs"], sample["outputs"])):
+            tests.append(TestCase(input=inp, expected=out, id=f"test_{i}"))
+        return tests
+
+    def format_problem(self, sample: dict) -> str:
+        return sample["problem_statement"]
+
+    @property
+    def verifier(self) -> ExactMatchVerifier:
+        return self._verifier
+```
+
+## Result Types
+
+### TestResult
+
+```python
+@dataclass
+class TestResult:
+    test_case: TestCase
+    passed: bool
+    actual: str | None
+    execution: ExecutionResult
+    error_message: str | None = None
+```
+
+### BatchTestResult
+
+```python
+@dataclass
+class BatchTestResult:
+    results: list[TestResult]
+    code_hash: str
+    tests_hash: str
+
+    @property
+    def passed_count(self) -> int: ...
+
+    @property
+    def total_count(self) -> int: ...
+
+    @property
+    def all_passed(self) -> bool: ...
+
+    @property
+    def pass_rate(self) -> float: ...
+```
+
+## Caching
+
+The `DockerSandboxPool` includes an LRU cache to avoid re-executing identical code:
+
+```python
+pool = DockerSandboxPool(
+    n_workers=4,
+    config=config,
+    cache_size=10000,  # Max cached results
+)
+
+# Check cache stats
+print(pool.cache_stats)
+# {'hits': 150, 'misses': 50, 'size': 200, 'max_size': 10000}
+```
+
+Cache keys are computed from:
+- SHA256 hash of the code
+- SHA256 hash of serialized test cases
+
+## Thread Safety
+
+- `LRUCache`: Thread-safe via `threading.Lock`
+- `DockerSandboxPool`: Async-safe via `asyncio.Queue`
+- `CodeExecEnv`: Not thread-safe (one instance per rollout)
+
+## Dependencies
+
+**Required:**
+- `docker>=7.0.0` - Docker Python SDK
+
+**Optional (for specific adapters):**
+- `datasets` - HuggingFace datasets for APPS
diff --git a/src/ludic/envs/code_exec/__init__.py b/src/ludic/envs/code_exec/__init__.py
new file mode 100644
index 0000000..2b4f816
--- /dev/null
+++ b/src/ludic/envs/code_exec/__init__.py
@@ -0,0 +1,126 @@
+"""
+Code execution environment for RL on code generation tasks.
+
+This module provides:
+  - CodeExecEnv: Environment that executes code against test cases
+  - Sandbox protocols: Async sandboxed execution
+  - Test adapters: Dataset-specific test extraction
+  - Code runners: Execution strategies (stdin/stdout, function calls, etc.)
+  - Backend selection: Auto-detection and manual selection of sandbox backends
+
+Supported backends:
+  - Docker (requires docker package + daemon): pip install docker>=7.0.0
+  - Podman-HPC (HPC clusters): requires podman-hpc CLI
+  - Singularity (planned): not yet implemented
+
+Usage:
+  # Recommended: use the factory with auto-detection
+  from ludic.envs.code_exec import create_sandbox_pool
+
+  pool = await create_sandbox_pool(n_workers=4)  # Auto-detects backend
+  pool = await create_sandbox_pool(n_workers=4, backend="podman-hpc")  # Explicit
+
+  # Or import specific implementations
+  from ludic.envs.code_exec import DockerSandboxPool  # Docker
+  from ludic.envs.code_exec import PodmanHPCSandboxPool  # Podman-HPC
+"""
+
+from __future__ import annotations
+
+from .types import (
+    CompileStatus,
+    RunStatus,
+    CompileResult,
+    ExecutionResult,
+    TestCase,
+    TestResult,
+    BatchTestResult,
+)
+from .sandbox import Sandbox, SandboxPool
+from .adapters.base import TestAdapter, OutputVerifier, ExactMatchVerifier
+from .runners import CodeRunner, StdinStdoutRunner, compute_hash, hash_tests
+from .env import CodeExecConfig, CodeExecEnv
+
+# Backend detection and factory (always available)
+from .backend import (
+    SandboxBackend,
+    detect_available_backend,
+    is_docker_available,
+    is_podman_hpc_available,
+    is_singularity_available,
+    get_backend_info,
+)
+from .factory import create_sandbox_pool
+
+# Docker-related imports are optional (requires docker package)
+try:
+    from .docker_sandbox import (
+        DockerSandboxConfig,
+        DockerSandbox,
+        DockerSandboxPool,
+        LRUCache,
+    )
+    _DOCKER_AVAILABLE = True
+except ImportError:
+    _DOCKER_AVAILABLE = False
+    DockerSandboxConfig = None  # type: ignore[misc, assignment]
+    DockerSandbox = None  # type: ignore[misc, assignment]
+    DockerSandboxPool = None  # type: ignore[misc, assignment]
+    LRUCache = None  # type: ignore[misc, assignment]
+
+# Podman-HPC imports (always available - uses subprocess, no external package)
+from .podman_sandbox import (
+    PodmanConfig,
+    PodmanHPCSandbox,
+    PodmanHPCSandboxPool,
+    PodmanError,
+)
+
+__all__ = [
+    # Types
+    "CompileStatus",
+    "RunStatus",
+    "CompileResult",
+    "ExecutionResult",
+    "TestCase",
+    "TestResult",
+    "BatchTestResult",
+    # Protocols
+    "Sandbox",
+    "SandboxPool",
+    "TestAdapter",
+    "OutputVerifier",
+    "CodeRunner",
+    # Implementations
+    "ExactMatchVerifier",
+    "StdinStdoutRunner",
+    # Environment
+    "CodeExecConfig",
+    "CodeExecEnv",
+    # Utilities
+    "compute_hash",
+    "hash_tests",
+    # Backend detection
+    "SandboxBackend",
+    "detect_available_backend",
+    "is_docker_available",
+    "is_podman_hpc_available",
+    "is_singularity_available",
+    "get_backend_info",
+    # Factory
+    "create_sandbox_pool",
+    # Podman-HPC (always available)
+    "PodmanConfig",
+    "PodmanHPCSandbox",
+    "PodmanHPCSandboxPool",
+    "PodmanError",
+]
+
+# Add Docker-related exports only if available
+if _DOCKER_AVAILABLE:
+    __all__.extend([
+        "DockerSandboxConfig",
+        "DockerSandbox",
+        "DockerSandboxPool",
+        "LRUCache",
+    ])
diff --git a/src/ludic/envs/code_exec/adapters/__init__.py b/src/ludic/envs/code_exec/adapters/__init__.py
new file mode 100644
index 0000000..f73237b
--- /dev/null
+++ b/src/ludic/envs/code_exec/adapters/__init__.py
@@ -0,0 +1,19 @@
+"""
+Dataset adapters for code execution environments.
+
+Each adapter knows how to extract test cases and prompts from a specific
+dataset format (APPS, HumanEval, LeetCode, etc.).
+"""
+
+from __future__ import annotations
+
+from .apps import APPS_SYSTEM_PROMPT, APPSTestAdapter
+from .base import ExactMatchVerifier, OutputVerifier, TestAdapter
+
+__all__ = [
+    "TestAdapter",
+    "OutputVerifier",
+    "ExactMatchVerifier",
+    "APPSTestAdapter",
+    "APPS_SYSTEM_PROMPT",
+]
diff --git a/src/ludic/envs/code_exec/adapters/apps.py b/src/ludic/envs/code_exec/adapters/apps.py
new file mode 100644
index 0000000..cff6bb9
--- /dev/null
+++ b/src/ludic/envs/code_exec/adapters/apps.py
@@ -0,0 +1,144 @@
+"""
+APPS dataset adapter.
+
+Compatible with:
+  - codeparrot/apps
+  - RoganInglis/apps-control-arena
+  - Similar stdin/stdout format datasets
+"""
+
+from __future__ import annotations
+
+import hashlib
+from typing import Any, Dict, List
+
+from ..types import TestCase
+
+
+APPS_SYSTEM_PROMPT = """You are an expert Python programmer solving competitive programming problems.
+
+Your solution will be tested against multiple test cases with different inputs. All tests must pass.
+
+CRITICAL REQUIREMENTS:
+1. Read the problem specification carefully - understand input/output format, constraints, and edge cases
+2. Write a complete, self-contained Python script
+3. Read input using input() or sys.stdin
+4. Print output using print() - match the exact format required
+5. Your code must compile without errors and handle all test cases
+
+OUTPUT FORMAT (you MUST follow this exactly):
+
+<think>
+Brief analysis:
+- Input/output format
+- Key algorithm or approach
+- Edge cases to handle
+</think>
+
+<code>
+```python
+# Your complete solution here
+```
+</code>
+
+IMPORTANT:
+- Keep <think> concise - focus on problem understanding and approach
+- Ensure your code compiles cleanly (no syntax errors)
+- Match output format exactly (spacing, newlines, etc.)
+- Test your logic within <think> before writing
+- Your solution will be executed against hidden test cases"""
+
+
+class APPSTestAdapter:
+    """
+    Test adapter for APPS-style datasets.
+
+    Compatible with:
+      - codeparrot/apps
+      - RoganInglis/apps-control-arena
+      - Similar stdin/stdout datasets
+
+    APPS format:
+      - question: problem description (string)
+      - inputs: list of stdin strings
+      - outputs: list of expected stdout strings
+      - problem_id: unique identifier
+
+    Code is expected to be a Python script that reads from stdin
+    and writes to stdout.
+    """
+
+    def __init__(
+        self,
+        *,
+        question_key: str = "question",
+        inputs_key: str = "inputs",
+        outputs_key: str = "outputs",
+        problem_id_key: str = "problem_id",
+    ) -> None:
+        """
+        Args:
+            question_key: Key for problem description
+            inputs_key: Key for test inputs list
+            outputs_key: Key for expected outputs list
+            problem_id_key: Key for problem identifier
+        """
+        self._question_key = question_key
+        self._inputs_key = inputs_key
+        self._outputs_key = outputs_key
+        self._problem_id_key = problem_id_key
+
+    def get_prompt(self, sample: Dict[str, Any]) -> str:
+        """Extract problem description from sample."""
+        return str(sample[self._question_key])
+
+    def get_problem_id(self, sample: Dict[str, Any]) -> str:
+        """Extract problem identifier from sample."""
+        return str(sample.get(self._problem_id_key, "unknown"))
+
+    def get_tests(self, sample: Dict[str, Any]) -> List[TestCase]:
+        """
+        Extract test cases from sample.
+
+        Args:
+            sample: Dataset sample with inputs and outputs lists
+
+        Returns:
+            List of TestCase objects for stdin/stdout testing
+
+        Raises:
+            ValueError: If inputs and outputs lists have different lengths
+        """
+        inputs = sample[self._inputs_key]
+        outputs = sample[self._outputs_key]
+
+        if len(inputs) != len(outputs):
+            raise ValueError(
+                f"Mismatched test case counts: {len(inputs)} inputs, "
+                f"{len(outputs)} outputs"
+            )
+
+        return [
+            TestCase(input=inp, expected=out, id=f"test_{i}")
+            for i, (inp, out) in enumerate(zip(inputs, outputs))
+        ]
+
+    def hash_tests(self, tests: List[TestCase]) -> str:
+        """
+        Compute stable hash of test cases for caching.
+
+        Args:
+            tests: List of test cases to hash
+
+        Returns:
+            16-character hex hash
+        """
+        # Create canonical representation
+        canonical = [(t.input, t.expected) for t in tests]
+        canonical_str = str(canonical)
+
+        # Hash with SHA256
+        hash_obj = hashlib.sha256(canonical_str.encode("utf-8"))
+
+        # Return first 16 hex characters
+        return hash_obj.hexdigest()[:16]
diff --git a/src/ludic/envs/code_exec/adapters/base.py b/src/ludic/envs/code_exec/adapters/base.py
new file mode 100644
index 0000000..99d63bf
--- /dev/null
+++ b/src/ludic/envs/code_exec/adapters/base.py
@@ -0,0 +1,249 @@
+"""
+Base protocols and default implementations for dataset adapters.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional, Protocol, Tuple, runtime_checkable
+
+from ..types import TestCase
+
+
+@runtime_checkable
+class TestAdapter(Protocol):
+    """
+    Extracts test cases from a dataset sample.
+
+    Each dataset format needs its own adapter to map from the sample
+    schema to the TestCase abstraction. This decouples the CodeExecEnv
+    from any specific dataset format.
+
+    Implementations should be stateless and reusable across samples.
+    """
+
+    __test__ = False  # Prevent pytest from collecting this as a test class
+
+    def get_tests(self, sample: Dict[str, Any]) -> List[TestCase]:
+        """
+        Extract test cases from a sample.
+
+        Args:
+            sample: A single dataset sample (row)
+
+        Returns:
+            List of TestCase objects ready for execution
+        """
+        ...
+
+    def get_prompt(self, sample: Dict[str, Any]) -> str:
+        """
+        Extract the problem prompt/question from a sample.
+
+        This is the text shown to the agent as the initial observation.
+
+        Args:
+            sample: A single dataset sample (row)
+
+        Returns:
+            The problem description string
+        """
+        ...
+
+    def get_problem_id(self, sample: Dict[str, Any]) -> str:
+        """
+        Extract unique problem identifier.
+
+        Used for logging, caching keys, and result tracking.
+
+        Args:
+            sample: A single dataset sample (row)
+
+        Returns:
+            Unique identifier string
+        """
+        ...
+
+    def hash_tests(self, tests: List[TestCase]) -> str:
+        """
+        Compute a stable hash of test cases for caching.
+
+        The hash should be deterministic and capture all test inputs
+        and expected outputs. Used as part of the cache key.
+
+        Args:
+            tests: List of test cases to hash
+
+        Returns:
+            Hex string hash (typically 16 chars)
+        """
+        ...
+
+
+@runtime_checkable
+class OutputVerifier(Protocol):
+    """
+    Compares actual output against expected output.
+
+    Separated from TestAdapter because the same comparison logic
+    (e.g., float tolerance, whitespace normalization) often applies
+    across different dataset formats.
+    """
+
+    def verify(self, actual: str, expected: str) -> Tuple[bool, Optional[str]]:
+        """
+        Compare actual output against expected.
+
+        Args:
+            actual: The actual output from code execution
+            expected: The expected output from the test case
+
+        Returns:
+            Tuple of (passed, details) where:
+              - passed: True if outputs match
+              - details: Explanation of mismatch if not passed, None otherwise
+        """
+        ...
+
+
+class ExactMatchVerifier:
+    """
+    Exact string match after stripping whitespace.
+
+    This is the default verifier and works for most competitive
+    programming style problems (APPS, Codeforces, etc.).
+    """
+
+    def __init__(self, *, strip: bool = True, case_sensitive: bool = True) -> None:
+        """
+        Args:
+            strip: Whether to strip leading/trailing whitespace
+            case_sensitive: Whether comparison is case-sensitive
+        """
+        self._strip = strip
+        self._case_sensitive = case_sensitive
+
+    def verify(self, actual: str, expected: str) -> Tuple[bool, Optional[str]]:
+        """Compare actual vs expected with configured normalization."""
+        a = actual.strip() if self._strip else actual
+        e = expected.strip() if self._strip else expected
+
+        if not self._case_sensitive:
+            a = a.lower()
+            e = e.lower()
+
+        if a == e:
+            return True, None
+
+        # Provide useful diff info for debugging
+        details = self._generate_diff_details(a, e)
+        return False, details
+
+    def _generate_diff_details(self, actual: str, expected: str) -> str:
+        """Generate a human-readable diff explanation."""
+        # Length mismatch
+        if len(actual) != len(expected):
+            return (
+                f"Length mismatch: got {len(actual)} chars, "
+                f"expected {len(expected)} chars"
+            )
+
+        # Find first difference
+        for i, (ca, ce) in enumerate(zip(actual, expected)):
+            if ca != ce:
+                # Show context around the difference
+                start = max(0, i - 10)
+                end = min(len(actual), i + 10)
+                actual_ctx = actual[start:end]
+                expected_ctx = expected[start:end]
+                return (
+                    f"First diff at position {i}: "
+                    f"got {repr(ca)}, expected {repr(ce)}. "
+                    f"Context: got '{actual_ctx}', expected '{expected_ctx}'"
+                )
+
+        return "Unknown difference (possibly trailing content)"
+
+
+class WhitespaceNormalizedVerifier:
+    """
+    Verifier that normalizes all whitespace before comparison.
+
+    Useful for problems where output formatting (spaces, newlines)
+    may vary but content should be the same.
+    """
+
+    def verify(self, actual: str, expected: str) -> Tuple[bool, Optional[str]]:
+        """Compare after normalizing all whitespace to single spaces."""
+        a = " ".join(actual.split())
+        e = " ".join(expected.split())
+
+        if a == e:
+            return True, None
+
+        return False, f"Mismatch after whitespace normalization: got '{a[:100]}...', expected '{e[:100]}...'"
+
+
+class FloatTolerantVerifier:
+    """
+    Verifier that handles floating point comparisons with tolerance.
+
+    Useful for numerical problems where small floating point differences
+    are acceptable.
+    """
+
+    def __init__(
+        self,
+        *,
+        abs_tol: float = 1e-9,
+        rel_tol: float = 1e-9,
+        strip: bool = True,
+    ) -> None:
+        """
+        Args:
+            abs_tol: Absolute tolerance for float comparison
+            rel_tol: Relative tolerance for float comparison
+            strip: Whether to strip whitespace
+        """
+        self._abs_tol = abs_tol
+        self._rel_tol = rel_tol
+        self._strip = strip
+
+    def verify(self, actual: str, expected: str) -> Tuple[bool, Optional[str]]:
+        """
+        Compare outputs, using float tolerance where applicable.
+
+        Splits output into tokens and compares each. If both tokens
+        parse as floats, uses tolerance comparison. Otherwise uses
+        exact string match.
+        """
+        a = actual.strip() if self._strip else actual
+        e = expected.strip() if self._strip else expected
+
+        a_tokens = a.split()
+        e_tokens = e.split()
+
+        if len(a_tokens) != len(e_tokens):
+            return False, f"Token count mismatch: got {len(a_tokens)}, expected {len(e_tokens)}"
+
+        for i, (at, et) in enumerate(zip(a_tokens, e_tokens)):
+            if not self._tokens_match(at, et):
+                return False, f"Mismatch at token {i}: got '{at}', expected '{et}'"
+
+        return True, None
+
+    def _tokens_match(self, actual: str, expected: str) -> bool:
+        """Check if two tokens match (with float tolerance if applicable)."""
+        # Try exact match first
+        if actual == expected:
+            return True
+
+        # Try float comparison
+        try:
+            a_float = float(actual)
+            e_float = float(expected)
+            diff = abs(a_float - e_float)
+            threshold = max(self._abs_tol, self._rel_tol * abs(e_float))
+            return diff <= threshold
+        except ValueError:
+            # Not floats, exact match already failed
+            return False
diff --git a/src/ludic/envs/code_exec/backend.py b/src/ludic/envs/code_exec/backend.py
new file mode 100644
index 0000000..c236b41
--- /dev/null
+++ b/src/ludic/envs/code_exec/backend.py
@@ -0,0 +1,171 @@
+"""
+Sandbox backend detection and selection.
+
+This module provides:
+  - SandboxBackend: Enumeration of supported sandbox backends
+  - detect_available_backend(): Auto-detection based on environment
+  - is_*_available(): Individual backend availability checks
+
+Auto-detection priority:
+  - In Slurm job: podman-hpc → docker → error
+  - Outside Slurm: docker → podman-hpc → error
+
+Usage:
+  from ludic.envs.code_exec.backend import detect_available_backend, SandboxBackend
+
+  # Auto-detect
+  backend = detect_available_backend()
+
+  # Manual selection
+  if backend == SandboxBackend.PODMAN_HPC:
+      from ludic.envs.code_exec.podman_sandbox import PodmanHPCSandboxPool
+      pool = PodmanHPCSandboxPool(n_workers=4)
+"""
+
+from __future__ import annotations
+
+import os
+import shutil
+from enum import Enum
+
+
+class SandboxBackend(str, Enum):
+    """Supported sandbox backends."""
+
+    DOCKER = "docker"
+    PODMAN_HPC = "podman-hpc"
+    SINGULARITY = "singularity"
+    AUTO = "auto"
+
+
+def detect_available_backend() -> str:
+    """
+    Auto-detect the best available sandbox backend.
+
+    Detection priority:
+      - In Slurm job (SLURM_JOB_ID set):
+        1. podman-hpc (most common on HPC)
+        2. docker (some HPC clusters have Docker)
+        3. error
+      - Outside Slurm:
+        1. docker (most common for local development)
+        2. podman-hpc
+        3. error
+
+    Returns:
+        Backend identifier (one of SandboxBackend values, excluding AUTO)
+
+    Raises:
+        RuntimeError: If no sandbox backend is available
+    """
+    in_slurm = os.environ.get("SLURM_JOB_ID") is not None
+
+    if in_slurm:
+        # HPC environment: prefer podman-hpc
+        if is_podman_hpc_available():
+            return SandboxBackend.PODMAN_HPC.value
+        if is_docker_available():
+            return SandboxBackend.DOCKER.value
+    else:
+        # Local/cloud environment: prefer Docker
+        if is_docker_available():
+            return SandboxBackend.DOCKER.value
+        if is_podman_hpc_available():
+            return SandboxBackend.PODMAN_HPC.value
+
+    # Singularity is deferred but check for future use
+    if is_singularity_available():
+        # NOTE: Singularity backend not yet implemented
+        pass
+
+    raise RuntimeError(
+        "No sandbox backend available. Install one of:\n"
+        "  - Docker (daemon-based): pip install docker && start Docker daemon\n"
+        "  - Podman-HPC (daemonless): available on HPC clusters with podman-hpc\n"
+        "\n"
+        "For HPC clusters, ensure you're running within a Slurm job:\n"
+        "  srun --pty bash\n"
+        "  # or\n"
+        "  sbatch your_script.sh"
+    )
+
+
+def is_docker_available() -> bool:
+    """
+    Check if Docker daemon is running and accessible.
+
+    Returns:
+        True if Docker is available and responding
+    """
+    try:
+        import docker
+        client = docker.from_env()
+        client.ping()
+        client.close()
+        return True
+    except ImportError:
+        # docker package not installed
+        return False
+    except Exception:
+        # Docker daemon not running or not accessible
+        return False
+
+
+def is_podman_hpc_available() -> bool:
+    """
+    Check if podman-hpc CLI is available.
+
+    Note: This only checks if the command exists, not if containers
+    can actually be run (which may require being in a Slurm job).
+
+    Returns:
+        True if podman-hpc command is in PATH
+    """
+    return shutil.which("podman-hpc") is not None
+
+
+def is_singularity_available() -> bool:
+    """
+    Check if Singularity/Apptainer CLI is available.
+
+    Returns:
+        True if singularity or apptainer command is in PATH
+    """
+    return (
+        shutil.which("singularity") is not None
+        or shutil.which("apptainer") is not None
+    )
+
+
+def get_backend_info() -> dict:
+    """
+    Get information about all backend availability.
+
+    Useful for debugging and status reporting.
+
+    Returns:
+        Dict with backend names as keys and availability info as values
+    """
+    in_slurm = os.environ.get("SLURM_JOB_ID") is not None
+
+    return {
+        "environment": {
+            "in_slurm": in_slurm,
+            "slurm_job_id": os.environ.get("SLURM_JOB_ID"),
+        },
+        "backends": {
+            SandboxBackend.DOCKER.value: {
+                "available": is_docker_available(),
+                "requires": "Docker daemon + docker package",
+            },
+            SandboxBackend.PODMAN_HPC.value: {
+                "available": is_podman_hpc_available(),
+                "requires": "podman-hpc command (HPC clusters)",
+            },
+            SandboxBackend.SINGULARITY.value: {
+                "available": is_singularity_available(),
+                "requires": "singularity/apptainer command",
+                "note": "Not yet implemented",
+            },
+        },
+    }
diff --git a/src/ludic/envs/code_exec/batch_runner.py b/src/ludic/envs/code_exec/batch_runner.py
new file mode 100644
index 0000000..ae0d998
--- /dev/null
+++ b/src/ludic/envs/code_exec/batch_runner.py
@@ -0,0 +1,326 @@
+#!/usr/bin/env python3
+"""
+Batch test runner for ludic code execution sandbox.
+
+This script runs inside the container. It:
+1. Reads manifest.json for test configuration
+2. Optionally compiles the solution using py_compile
+3. Runs tests in PARALLEL using multiprocessing.Pool (default 16 workers)
+4. Outputs streaming JSONL results (one JSON object per line, flushed immediately)
+
+Usage:
+    python batch_runner.py [manifest_path]
+
+The manifest.json format:
+    {
+        "code_file": "solution.py",
+        "compile_first": true,
+        "timeout_s": 5.0,
+        "stop_on_first_failure": true,
+        "num_workers": 16,
+        "tests": [
+            {"id": "test_0", "stdin": "5\\n", "expected": "25\\n"},
+            {"id": "test_1", "stdin": "3\\n", "expected": "9\\n"}
+        ]
+    }
+
+Output format (streaming JSONL):
+    {"type": "compile", "status": "success", "duration_ms": 12.5}
+    {"type": "test", "id": "test_0", "status": "success", "stdout": "25\\n", ...}
+    {"type": "test", "id": "test_1", "status": "timeout", ...}
+    {"type": "done", "total_tests": 2, "passed": 1, "failed": 1, "compile_failed": false}
+
+Status values:
+    compile: success, syntax_error, timeout
+    test: success, runtime_error, timeout, memory_exceeded, not_run
+
+Note: This script is designed to be self-contained with no external dependencies
+beyond Python's standard library. It will be bundled into the container at runtime.
+"""
+
+from __future__ import annotations
+
+import json
+import multiprocessing
+import py_compile
+import subprocess
+import sys
+import time
+from typing import Any, Dict, Iterator, List, Optional, Tuple
+
+
+def emit(obj: Dict[str, Any]) -> None:
+    """Write JSON line and flush immediately for streaming.
+
+    Each line must be a complete JSON object to enable partial result
+    recovery if the container crashes mid-execution.
+    """
+    print(json.dumps(obj), flush=True)
+
+
+def compile_check(code_file: str, timeout_s: float) -> Dict[str, Any]:
+    """Run py_compile and return result dict.
+
+    Args:
+        code_file: Path to the Python file to compile
+        timeout_s: Timeout for compilation (not currently enforced for py_compile)
+
+    Returns:
+        Dict with type="compile" and status/error info
+    """
+    start = time.perf_counter()
+    try:
+        py_compile.compile(code_file, doraise=True)
+        return {
+            "type": "compile",
+            "status": "success",
+            "duration_ms": (time.perf_counter() - start) * 1000,
+        }
+    except py_compile.PyCompileError as e:
+        # Extract line number from the exception
+        # PyCompileError has exc_value which contains the SyntaxError
+        error_line: Optional[int] = None
+        error_column: Optional[int] = None
+        error_message = str(e)
+
+        # Try to extract line/column from the underlying SyntaxError
+        if hasattr(e, "exc_value") and e.exc_value is not None:
+            exc = e.exc_value
+            if hasattr(exc, "lineno"):
+                error_line = exc.lineno
+            if hasattr(exc, "offset"):
+                error_column = exc.offset
+            if hasattr(exc, "msg"):
+                error_message = exc.msg
+
+        return {
+            "type": "compile",
+            "status": "syntax_error",
+            "error_message": error_message,
+            "error_line": error_line,
+            "error_column": error_column,
+            "duration_ms": (time.perf_counter() - start) * 1000,
+        }
+
+
+def run_test(code_file: str, test: Dict[str, Any], timeout_s: float) -> Dict[str, Any]:
+    """Run a single test and return result dict.
+
+    Args:
+        code_file: Path to the Python file to execute
+        test: Test specification with id, stdin, expected (optional)
+        timeout_s: Timeout in seconds for the test execution
+
+    Returns:
+        Dict with type="test" and execution results
+    """
+    start = time.perf_counter()
+    test_id = test.get("id", "unknown")
+    stdin_data = test.get("stdin", "")
+
+    try:
+        proc = subprocess.run(
+            [sys.executable, code_file],
+            input=stdin_data,
+            capture_output=True,
+            text=True,
+            timeout=timeout_s,
+        )
+        duration_ms = (time.perf_counter() - start) * 1000
+
+        # Classify status based on return code
+        if proc.returncode == 0:
+            status = "success"
+        elif proc.returncode == 137:
+            # SIGKILL - typically OOM killer
+            status = "memory_exceeded"
+        elif proc.returncode == 143:
+            # SIGTERM
+            status = "killed"
+        else:
+            status = "runtime_error"
+
+        return {
+            "type": "test",
+            "id": test_id,
+            "status": status,
+            "stdout": proc.stdout,
+            "stderr": proc.stderr,
+            "exit_code": proc.returncode,
+            "duration_ms": duration_ms,
+        }
+
+    except subprocess.TimeoutExpired as e:
+        # Capture any partial output
+        stdout = e.stdout.decode("utf-8", errors="replace") if e.stdout else ""
+        stderr = e.stderr.decode("utf-8", errors="replace") if e.stderr else ""
+
+        return {
+            "type": "test",
+            "id": test_id,
+            "status": "timeout",
+            "stdout": stdout,
+            "stderr": stderr,
+            "exit_code": None,
+            "duration_ms": timeout_s * 1000,
+        }
+
+    except Exception as e:
+        # Catch any unexpected errors (e.g., file not found)
+        duration_ms = (time.perf_counter() - start) * 1000
+        return {
+            "type": "test",
+            "id": test_id,
+            "status": "runtime_error",
+            "stdout": "",
+            "stderr": f"Execution error: {e}",
+            "exit_code": None,
+            "duration_ms": duration_ms,
+        }
+
+
+def _run_test_wrapper(args: Tuple[int, Dict[str, Any], str, float]) -> Tuple[int, Dict[str, Any]]:
+    """Wrapper for multiprocessing - must be top-level function for pickling.
+
+    Args:
+        args: Tuple of (test_index, test_dict, code_file, timeout_s)
+
+    Returns:
+        Tuple of (test_index, result_dict) to preserve ordering info
+    """
+    i, test, code_file, timeout_s = args
+    result = run_test(code_file, test, timeout_s)
+    return (i, result)
+
+
+def run_tests_parallel(
+    code_file: str,
+    tests: List[Dict[str, Any]],
+    timeout_s: float,
+    num_workers: int = 16,
+) -> Iterator[Dict[str, Any]]:
+    """Run tests in parallel using multiprocessing.Pool.
+
+    Uses imap_unordered for streaming results as they complete (not waiting
+    for all tests). This dramatically reduces wall-clock time when tests
+    have varying execution times.
+
+    Args:
+        code_file: Path to the Python file to execute
+        tests: List of test specifications
+        timeout_s: Timeout per test in seconds
+        num_workers: Number of parallel worker processes (default 16 for HPC)
+
+    Yields:
+        Test result dicts as they complete (unordered)
+    """
+    if not tests:
+        return
+
+    # Prepare arguments for each test
+    args_list = [(i, test, code_file, timeout_s) for i, test in enumerate(tests)]
+
+    # Use spawn context to avoid fork issues with subprocess-heavy workloads
+    # This is safer on HPC systems where fork can cause issues with MPI, CUDA, etc.
+    ctx = multiprocessing.get_context("spawn")
+
+    with ctx.Pool(processes=min(num_workers, len(tests))) as pool:
+        # imap_unordered streams results as they complete
+        for _i, result in pool.imap_unordered(_run_test_wrapper, args_list):
+            yield result
+
+
+def main() -> None:
+    """Main entry point for batch runner."""
+    # Get manifest path from command line or use default
+    manifest_path = sys.argv[1] if len(sys.argv) > 1 else "manifest.json"
+
+    # Load manifest
+    try:
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+    except Exception as e:
+        emit({
+            "type": "error",
+            "message": f"Failed to load manifest: {e}",
+        })
+        emit({
+            "type": "done",
+            "total_tests": 0,
+            "passed": 0,
+            "failed": 0,
+            "compile_failed": False,
+        })
+        return
+
+    # Extract configuration
+    code_file = manifest.get("code_file", "solution.py")
+    compile_first = manifest.get("compile_first", True)
+    timeout_s = manifest.get("timeout_s", 5.0)
+    stop_on_first_failure = manifest.get("stop_on_first_failure", True)
+    num_workers = manifest.get("num_workers", 16)  # Configurable via manifest
+    tests: List[Dict[str, Any]] = manifest.get("tests", [])
+
+    # Step 1: Compile check (optional)
+    if compile_first:
+        result = compile_check(code_file, timeout_s)
+        emit(result)
+
+        if result["status"] != "success":
+            # Compilation failed - emit done and exit
+            emit({
+                "type": "done",
+                "total_tests": len(tests),
+                "passed": 0,
+                "failed": 0,
+                "compile_failed": True,
+            })
+            return
+
+    # Step 2: Run tests in parallel
+    passed = 0
+    failed = 0
+    received_ids: set[str] = set()
+
+    # Use parallel execution for better throughput on HPC
+    for result in run_tests_parallel(code_file, tests, timeout_s, num_workers):
+        emit(result)  # Stream immediately as each test completes
+        received_ids.add(result.get("id", "unknown"))
+
+        if result["status"] == "success":
+            passed += 1
+        else:
+            failed += 1
+
+            if stop_on_first_failure:
+                # Early termination - emit remaining tests as "not_run"
+                # Note: with parallel execution, some tests may have already
+                # started but the pool will be terminated on context exit
+                break
+
+    # Emit any tests that didn't run (due to early termination or errors)
+    for test in tests:
+        test_id = test.get("id", "unknown")
+        if test_id not in received_ids:
+            emit({
+                "type": "test",
+                "id": test_id,
+                "status": "not_run",
+                "stdout": "",
+                "stderr": "",
+                "exit_code": None,
+                "duration_ms": 0,
+            })
+
+    # Step 3: Emit done marker
+    emit({
+        "type": "done",
+        "total_tests": len(tests),
+        "passed": passed,
+        "failed": failed,
+        "compile_failed": False,
+    })
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/ludic/envs/code_exec/cache.py b/src/ludic/envs/code_exec/cache.py
new file mode 100644
index 0000000..a5275b6
--- /dev/null
+++ b/src/ludic/envs/code_exec/cache.py
@@ -0,0 +1,129 @@
+"""
+Shared LRU cache for code execution results.
+
+Provides thread-safe caching of BatchTestResult keyed by (code_hash, tests_hash).
+Used by both Docker and Podman sandbox pools to avoid redundant execution of
+identical code/test combinations.
+"""
+
+from __future__ import annotations
+
+import threading
+from collections import OrderedDict
+from typing import Dict, Optional
+
+from .types import BatchTestResult
+
+
+class LRUCache:
+    """
+    Thread-safe LRU cache for BatchTestResult.
+
+    Uses OrderedDict for LRU semantics and threading.Lock for safety.
+    Suitable for use across multiple async tasks sharing the same pool.
+
+    Args:
+        max_size: Maximum number of entries to cache. Oldest entries are
+            evicted when this limit is exceeded.
+    """
+
+    def __init__(self, max_size: int = 10000):
+        self._max_size = max_size
+        self._cache: OrderedDict[tuple[str, str], BatchTestResult] = OrderedDict()
+        self._lock = threading.Lock()
+        self._hits = 0
+        self._misses = 0
+
+    def get(
+        self,
+        code_hash: str,
+        tests_hash: str,
+    ) -> Optional[BatchTestResult]:
+        """
+        Get cached result.
+
+        On hit, moves item to end (most recently used).
+        Thread-safe.
+
+        Args:
+            code_hash: Hash of the code being executed.
+            tests_hash: Hash of the test cases.
+
+        Returns:
+            Cached BatchTestResult if found, None otherwise.
+        """
+        key = (code_hash, tests_hash)
+        with self._lock:
+            if key in self._cache:
+                # Move to end (most recently used)
+                self._cache.move_to_end(key)
+                self._hits += 1
+                return self._cache[key]
+            else:
+                self._misses += 1
+                return None
+
+    def put(
+        self,
+        code_hash: str,
+        tests_hash: str,
+        result: BatchTestResult,
+    ) -> None:
+        """
+        Cache a result.
+
+        Evicts oldest item if cache is full.
+        Thread-safe.
+
+        Args:
+            code_hash: Hash of the code being executed.
+            tests_hash: Hash of the test cases.
+            result: The test result to cache.
+        """
+        key = (code_hash, tests_hash)
+        with self._lock:
+            if key in self._cache:
+                # Update existing entry and move to end
+                self._cache[key] = result
+                self._cache.move_to_end(key)
+            else:
+                # Add new entry
+                self._cache[key] = result
+                # Evict oldest if over limit
+                if len(self._cache) > self._max_size:
+                    self._cache.popitem(last=False)  # FIFO: remove oldest
+
+    def clear(self) -> None:
+        """Clear all cached entries. Thread-safe."""
+        with self._lock:
+            self._cache.clear()
+            # Note: We don't reset hit/miss counters on clear
+
+    @property
+    def stats(self) -> Dict[str, int]:
+        """
+        Get cache statistics (thread-safe).
+
+        Returns:
+            Dict with keys: hits, misses, size, max_size
+        """
+        with self._lock:
+            return {
+                "hits": self._hits,
+                "misses": self._misses,
+                "size": len(self._cache),
+                "max_size": self._max_size,
+            }
+
+    @property
+    def hit_rate(self) -> float:
+        """
+        Get cache hit rate as a float between 0 and 1.
+
+        Returns 0.0 if no lookups have been performed.
+        """
+        with self._lock:
+            total = self._hits + self._misses
+            if total == 0:
+                return 0.0
+            return self._hits / total
diff --git a/src/ludic/envs/code_exec/docker_sandbox.py b/src/ludic/envs/code_exec/docker_sandbox.py
new file mode 100644
index 0000000..5b5f354
--- /dev/null
+++ b/src/ludic/envs/code_exec/docker_sandbox.py
@@ -0,0 +1,725 @@
+"""
+Docker-based sandbox implementation for code execution.
+
+This module provides:
+  - DockerSandboxConfig: Configuration for Docker containers
+  - DockerSandbox: Async Docker container sandbox
+  - DockerSandboxPool: Pool of Docker sandboxes with caching
+
+Requires: docker>=7.0.0
+Install with: pip install 'ludic[code-exec]'
+"""
+
+from __future__ import annotations
+
+import asyncio
+import hashlib
+import io
+import json
+import logging
+import os
+import re
+import tarfile
+import time
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass, field
+from typing import AsyncIterator, Dict, List, Optional, Union
+
+logger = logging.getLogger(__name__)
+
+try:
+    import docker
+    from docker.models.containers import Container
+except ImportError as e:
+    raise ImportError(
+        "Docker is not installed. Install it with: pip install 'ludic[code-exec]'"
+    ) from e
+
+from .parsing import (
+    get_batch_runner_script,
+    parse_batch_compile_result,
+    parse_batch_test_result,
+    parse_syntax_error,
+)
+from .pool import BaseSandboxPool
+from .sandbox import Sandbox, SandboxPool
+from .types import (
+    BatchExecutionSpec,
+    BatchTestResult,
+    CompileResult,
+    CompileStatus,
+    ExecutionResult,
+    RunStatus,
+    TestCase,
+)
+
+
+@dataclass
+class DockerSandboxConfig:
+    """Configuration for Docker-based sandboxes."""
+
+    python_version: str = "3.11"
+    base_image: Optional[str] = None
+    memory_limit: str = "256m"
+    cpu_quota: int = 50000  # 50% of one CPU (out of 100000)
+    network_disabled: bool = True
+    working_dir: str = "/workspace"
+
+    @property
+    def image(self) -> str:
+        """Get Docker image name (auto-generated or explicit)."""
+        if self.base_image:
+            return self.base_image
+        return f"python:{self.python_version}-slim"
+
+
+class DockerSandbox:
+    """
+    Async Docker container sandbox for Python code execution.
+
+    Uses ThreadPoolExecutor to make docker-py calls non-blocking.
+    Implements the Sandbox protocol with full async support.
+    """
+
+    def __init__(
+        self,
+        container: Container,
+        config: DockerSandboxConfig,
+        executor: ThreadPoolExecutor,
+    ):
+        self._container = container
+        self._config = config
+        self._executor = executor
+        self._memory_limit_warned = False
+
+    @property
+    def python_version(self) -> str:
+        return self._config.python_version
+
+    async def reset(self) -> None:
+        """Clear workspace directory."""
+
+        def _reset():
+            # Remove all files in workspace
+            self._container.exec_run(
+                f"sh -c 'rm -rf {self._config.working_dir}/*'",
+                workdir=self._config.working_dir,
+            )
+
+        loop = asyncio.get_event_loop()
+        await loop.run_in_executor(self._executor, _reset)
+
+    async def compile(
+        self,
+        code: str,
+        *,
+        timeout_s: float = 5.0,
+    ) -> CompileResult:
+        """
+        Syntax-check code using py_compile.
+
+        Returns rich error info including line and column numbers.
+        """
+        start = time.perf_counter()
+
+        def _compile():
+            # Write code to temp file
+            self._write_file("_check.py", code)
+
+            # Run py_compile
+            result = self._container.exec_run(
+                "python -m py_compile _check.py",
+                workdir=self._config.working_dir,
+                demux=True,
+            )
+            return result
+
+        loop = asyncio.get_event_loop()
+        try:
+            # Run with timeout
+            result = await asyncio.wait_for(
+                loop.run_in_executor(self._executor, _compile),
+                timeout=timeout_s,
+            )
+
+            duration_ms = (time.perf_counter() - start) * 1000
+
+            exit_code = result.exit_code
+            stdout, stderr = result.output
+
+            if exit_code == 0:
+                return CompileResult(
+                    status=CompileStatus.SUCCESS,
+                    duration_ms=duration_ms,
+                )
+
+            # Parse error message
+            error_msg = (stderr or b"").decode("utf-8", errors="replace")
+            line, column, clean_msg = parse_syntax_error(error_msg)
+
+            # Classify error type
+            status = CompileStatus.SYNTAX_ERROR
+            if "ImportError" in error_msg or "ModuleNotFoundError" in error_msg:
+                status = CompileStatus.IMPORT_ERROR
+            elif not clean_msg:
+                status = CompileStatus.UNKNOWN_ERROR
+
+            return CompileResult(
+                status=status,
+                error_message=clean_msg or error_msg,
+                error_line=line,
+                error_column=column,
+                duration_ms=duration_ms,
+            )
+
+        except asyncio.TimeoutError:
+            duration_ms = (time.perf_counter() - start) * 1000
+            return CompileResult(
+                status=CompileStatus.TIMEOUT,
+                error_message=f"Compilation timed out after {timeout_s}s",
+                duration_ms=duration_ms,
+            )
+
+    async def execute(
+        self,
+        code: str,
+        *,
+        stdin: str = "",
+        skip_compile: bool = False,
+        timeout_s: float = 10.0,
+        memory_limit_mb: Optional[int] = None,
+        env_vars: Optional[Dict[str, str]] = None,
+    ) -> ExecutionResult:
+        """
+        Execute code with full resource isolation and rich metadata.
+
+        Compiles first, then executes if compilation succeeds (unless skip_compile=True).
+        """
+        # Log warning for memory_limit_mb if provided (only once per sandbox)
+        if memory_limit_mb is not None and not self._memory_limit_warned:
+            logger.warning(
+                "Per-execution memory limits are not supported by docker exec. "
+                "Container-level memory limit (%s) is enforced instead.",
+                self._config.memory_limit,
+            )
+            self._memory_limit_warned = True
+
+        # Step 1: Compile
+        if skip_compile:
+            compile_result = CompileResult(status=CompileStatus.SUCCESS)
+        else:
+            compile_result = await self.compile(code, timeout_s=timeout_s)
+
+        total_start = time.perf_counter()
+
+        if not compile_result.success:
+            # Return early with compilation failure
+            total_ms = (time.perf_counter() - total_start) * 1000
+            return ExecutionResult(
+                compile_result=compile_result,
+                run_status=RunStatus.NOT_RUN,
+                compile_duration_ms=compile_result.duration_ms,
+                total_duration_ms=total_ms,
+            )
+
+        # Step 2: Execute
+        run_start = time.perf_counter()
+
+        def _execute():
+            # Generate unique execution ID to avoid race conditions
+            exec_id = uuid.uuid4().hex[:8]
+            exec_file = f"_exec_{exec_id}.py"
+            input_file = f"input_{exec_id}.txt"
+
+            # Write code to file
+            self._write_file(exec_file, code)
+
+            # Write stdin to file if provided
+            if stdin:
+                self._write_file(input_file, stdin)
+                # Build command with stdin redirection
+                cmd = f"python {self._config.working_dir}/{exec_file} < {self._config.working_dir}/{input_file}"
+            else:
+                # Build command without redirection
+                cmd = f"python {self._config.working_dir}/{exec_file}"
+
+            # Prepare environment
+            environment = env_vars or {}
+
+            # Run with resource limits
+            result = self._container.exec_run(
+                cmd,
+                workdir=self._config.working_dir,
+                demux=True,
+                environment=environment,
+            )
+
+            return result
+
+        loop = asyncio.get_event_loop()
+
+        try:
+            # Run with timeout
+            result = await asyncio.wait_for(
+                loop.run_in_executor(self._executor, _execute),
+                timeout=timeout_s,
+            )
+
+            run_ms = (time.perf_counter() - run_start) * 1000
+            total_ms = (time.perf_counter() - total_start) * 1000
+
+            exit_code = result.exit_code
+            stdout, stderr = result.output
+
+            stdout_str = (stdout or b"").decode("utf-8", errors="replace")
+            stderr_str = (stderr or b"").decode("utf-8", errors="replace")
+
+            # Classify run status
+            if exit_code == 0:
+                run_status = RunStatus.SUCCESS
+            elif exit_code == 137:  # SIGKILL (OOM)
+                run_status = RunStatus.MEMORY_EXCEEDED
+            elif exit_code == 143:  # SIGTERM
+                run_status = RunStatus.KILLED
+            else:
+                run_status = RunStatus.RUNTIME_ERROR
+
+            return ExecutionResult(
+                compile_result=compile_result,
+                run_status=run_status,
+                stdout=stdout_str,
+                stderr=stderr_str,
+                exit_code=exit_code,
+                compile_duration_ms=compile_result.duration_ms,
+                run_duration_ms=run_ms,
+                total_duration_ms=total_ms,
+            )
+
+        except asyncio.TimeoutError:
+            run_ms = (time.perf_counter() - run_start) * 1000
+            total_ms = (time.perf_counter() - total_start) * 1000
+
+            # Try to kill the process
+            try:
+                await loop.run_in_executor(
+                    self._executor,
+                    lambda: self._container.exec_run("pkill -9 python"),
+                )
+            except Exception:
+                pass  # Best effort cleanup
+
+            return ExecutionResult(
+                compile_result=compile_result,
+                run_status=RunStatus.TIMEOUT,
+                stderr=f"Execution timed out after {timeout_s}s",
+                compile_duration_ms=compile_result.duration_ms,
+                run_duration_ms=run_ms,
+                total_duration_ms=total_ms,
+            )
+
+    def _write_file(self, path: str, content: str) -> None:
+        """
+        Write a file to the container using tarfile.
+
+        Docker API doesn't have a direct "write file" method,
+        so we create a tar archive in memory and extract it.
+        """
+        # Create tar archive in memory
+        tar_buffer = io.BytesIO()
+        tar = tarfile.open(fileobj=tar_buffer, mode="w")
+
+        # Add file to archive
+        file_data = content.encode("utf-8")
+        tarinfo = tarfile.TarInfo(name=path)
+        tarinfo.size = len(file_data)
+        tarinfo.mtime = time.time()
+        tar.addfile(tarinfo, io.BytesIO(file_data))
+        tar.close()
+
+        # Extract to container
+        tar_buffer.seek(0)
+        self._container.put_archive(self._config.working_dir, tar_buffer)
+
+    # -------------------------------------------------------------------------
+    # Batch execution (reduces ThreadPoolExecutor calls from O(N) to O(1))
+    # -------------------------------------------------------------------------
+
+    async def execute_batch(
+        self,
+        spec: BatchExecutionSpec,
+    ) -> AsyncIterator[Union[CompileResult, ExecutionResult]]:
+        """
+        Execute all tests in a single batch with streaming results.
+
+        This method reduces the number of ThreadPoolExecutor calls by:
+        1. Bundling code, manifest, and runner into a single tar
+        2. Executing the batch runner once, which runs all tests sequentially
+        3. Streaming results back as JSONL
+
+        Args:
+            spec: Batch execution specification with code, tests, and options
+
+        Yields:
+            CompileResult (if compile_first=True), then ExecutionResult for each test
+        """
+        batch_dir = "_batch"
+        batch_start = time.perf_counter()
+        loop = asyncio.get_event_loop()
+
+        # Build manifest for the batch runner
+        manifest = {
+            "code_file": "solution.py",
+            "compile_first": spec.compile_first,
+            "timeout_s": spec.timeout_s,
+            "stop_on_first_failure": spec.stop_on_first_failure,
+            "tests": [
+                {"id": t.id or f"test_{i}", "stdin": t.input, "expected": t.expected}
+                for i, t in enumerate(spec.tests)
+            ],
+        }
+
+        # Build and write tar archive
+        tar_data = self._build_batch_tar(
+            manifest=manifest,
+            code=spec.code,
+            runner_script=get_batch_runner_script(),
+            batch_dir=batch_dir,
+        )
+
+        def _write_tar():
+            tar_buffer = io.BytesIO(tar_data)
+            self._container.put_archive(self._config.working_dir, tar_buffer)
+
+        await loop.run_in_executor(self._executor, _write_tar)
+
+        # Execute batch runner and stream results
+        manifest_path = f"{self._config.working_dir}/{batch_dir}/manifest.json"
+        runner_path = f"{self._config.working_dir}/{batch_dir}/batch_runner.py"
+
+        run_start = time.perf_counter()
+        received_done = False
+        received_test_ids: set[str] = set()
+        compile_result: Optional[CompileResult] = None
+
+        def _execute():
+            result = self._container.exec_run(
+                f"python {runner_path} {manifest_path}",
+                workdir=f"{self._config.working_dir}/{batch_dir}",
+                demux=True,
+            )
+            return result
+
+        try:
+            result = await asyncio.wait_for(
+                loop.run_in_executor(self._executor, _execute),
+                timeout=spec.timeout_s * len(spec.tests) + 10.0,  # Extra buffer
+            )
+
+            stdout, stderr = result.output
+            stdout_str = (stdout or b"").decode("utf-8", errors="replace")
+
+            # Parse JSONL output
+            for line in stdout_str.strip().split("\n"):
+                if not line:
+                    continue
+
+                try:
+                    result_dict = json.loads(line)
+                except json.JSONDecodeError:
+                    logger.warning(f"Invalid JSON from batch runner: {line}")
+                    continue
+
+                result_type = result_dict.get("type")
+
+                if result_type == "compile":
+                    compile_result = parse_batch_compile_result(result_dict)
+                    yield compile_result
+                    if not compile_result.success:
+                        break
+
+                elif result_type == "test":
+                    test_id = result_dict.get("id", "unknown")
+                    received_test_ids.add(test_id)
+                    exec_result = parse_batch_test_result(result_dict, run_start)
+                    yield exec_result
+
+                elif result_type == "done":
+                    received_done = True
+                    break
+
+                elif result_type == "error":
+                    logger.error(f"Batch runner error: {result_dict.get('message')}")
+
+        except asyncio.TimeoutError:
+            logger.warning(f"Batch execution timed out")
+
+        except Exception as e:
+            logger.warning(f"Batch execution failed: {e}")
+
+        # Handle missing tests
+        if not received_done and compile_result is None:
+            compile_result = CompileResult(
+                status=CompileStatus.UNKNOWN_ERROR,
+                error_message="Batch execution terminated unexpectedly",
+                duration_ms=(time.perf_counter() - batch_start) * 1000,
+            )
+            yield compile_result
+
+        if not received_done and (compile_result is None or compile_result.success):
+            for i, test in enumerate(spec.tests):
+                test_id = test.id or f"test_{i}"
+                if test_id not in received_test_ids:
+                    run_ms = (time.perf_counter() - run_start) * 1000
+                    yield ExecutionResult(
+                        compile_result=compile_result or CompileResult(
+                            status=CompileStatus.SUCCESS
+                        ),
+                        run_status=RunStatus.SANDBOX_ERROR,
+                        stdout="",
+                        stderr="Batch execution terminated unexpectedly",
+                        exit_code=None,
+                        run_duration_ms=run_ms,
+                        total_duration_ms=run_ms,
+                    )
+
+    def _build_batch_tar(
+        self,
+        manifest: dict,
+        code: str,
+        runner_script: str,
+        batch_dir: str = "_batch",
+    ) -> bytes:
+        """Build tar archive containing batch execution files."""
+        buf = io.BytesIO()
+        with tarfile.open(fileobj=buf, mode="w") as tar:
+            # Create directory entry first
+            dir_info = tarfile.TarInfo(name=batch_dir)
+            dir_info.type = tarfile.DIRTYPE
+            dir_info.mode = 0o755
+            dir_info.mtime = int(time.time())
+            tar.addfile(dir_info)
+
+            # Add manifest.json
+            manifest_data = json.dumps(manifest, indent=2).encode("utf-8")
+            info = tarfile.TarInfo(name=f"{batch_dir}/manifest.json")
+            info.size = len(manifest_data)
+            info.mtime = int(time.time())
+            tar.addfile(info, io.BytesIO(manifest_data))
+
+            # Add solution.py
+            code_data = code.encode("utf-8")
+            info = tarfile.TarInfo(name=f"{batch_dir}/solution.py")
+            info.size = len(code_data)
+            info.mtime = int(time.time())
+            tar.addfile(info, io.BytesIO(code_data))
+
+            # Add batch_runner.py
+            runner_data = runner_script.encode("utf-8")
+            info = tarfile.TarInfo(name=f"{batch_dir}/batch_runner.py")
+            info.size = len(runner_data)
+            info.mtime = int(time.time())
+            tar.addfile(info, io.BytesIO(runner_data))
+
+        buf.seek(0)
+        return buf.read()
+
+
+class DockerSandboxPool(BaseSandboxPool[DockerSandbox]):
+    """
+    Pool of Docker sandboxes with LRU caching.
+
+    Manages container lifecycle, checkout/release, and execution caching.
+    Inherits background reset pattern from BaseSandboxPool.
+    """
+
+    def __init__(
+        self,
+        n_workers: int = 4,
+        config: Optional[DockerSandboxConfig] = None,
+        cache_size: int = 10000,
+        executor_threads: int = 8,
+        auto_replace_failed: bool = False,
+        max_consecutive_failures: int = 5,
+        max_concurrent_ops: int = 8,
+    ):
+        # Initialize base pool
+        super().__init__(
+            n_workers=n_workers,
+            cache_size=cache_size,
+            auto_replace_failed=auto_replace_failed,
+            max_consecutive_failures=max_consecutive_failures,
+            max_concurrent_ops=max_concurrent_ops,
+        )
+
+        # Docker-specific configuration
+        self._config = config or DockerSandboxConfig()
+        self._executor = ThreadPoolExecutor(max_workers=executor_threads)
+        self._docker_client: Optional[docker.DockerClient] = None
+
+    @property
+    def python_version(self) -> str:
+        return self._config.python_version
+
+    async def _create_sandboxes(self) -> list[DockerSandbox]:
+        """
+        Create all Docker containers.
+
+        Pulls the image if needed, creates containers with resource limits.
+        Called by base class start() method.
+        """
+        loop = asyncio.get_event_loop()
+
+        def _start():
+            # Create Docker client
+            client = docker.from_env()
+
+            # Pull image if not present
+            try:
+                client.images.get(self._config.image)
+            except docker.errors.ImageNotFound:
+                print(f"Pulling image {self._config.image}...")
+                client.images.pull(self._config.image)
+
+            # Define function to create a single container
+            def create_container(i: int):
+                # Generate container name with PID for uniqueness
+                container_name = f"ludic-sandbox-{self._config.python_version}-{os.getpid()}-{i}"
+
+                # Remove existing container if present
+                try:
+                    old = client.containers.get(container_name)
+                    old.remove(force=True)
+                except docker.errors.NotFound:
+                    pass
+
+                # Create container with resource limits
+                container = client.containers.create(
+                    image=self._config.image,
+                    name=container_name,
+                    detach=True,
+                    command="sleep infinity",  # Keep container alive
+                    mem_limit=self._config.memory_limit,
+                    cpu_quota=self._config.cpu_quota,
+                    cpu_period=100000,  # Standard 100ms period
+                    network_disabled=self._config.network_disabled,
+                    working_dir=self._config.working_dir,
+                    auto_remove=False,  # We'll manage cleanup
+                )
+
+                # Start container
+                container.start()
+
+                # Create sandbox wrapper
+                return DockerSandbox(
+                    container=container,
+                    config=self._config,
+                    executor=self._executor,
+                )
+
+            # Parallelize container creation
+            with ThreadPoolExecutor(max_workers=self._n_workers) as pool:
+                sandboxes = list(pool.map(create_container, range(self._n_workers)))
+
+            return client, sandboxes
+
+        # Run container creation in executor
+        self._docker_client, sandboxes = await loop.run_in_executor(
+            self._executor, _start
+        )
+
+        return sandboxes
+
+    async def _stop_sandbox(self, sandbox: DockerSandbox) -> None:
+        """
+        Stop and remove a single Docker container.
+
+        Called during shutdown and when replacing a failed sandbox.
+        Errors are logged but not raised.
+        """
+        loop = asyncio.get_event_loop()
+
+        def _stop():
+            try:
+                sandbox._container.stop(timeout=2)
+                sandbox._container.remove(force=True)
+            except Exception as e:
+                print(f"Warning: Failed to remove container: {e}")
+
+        await loop.run_in_executor(self._executor, _stop)
+
+    async def _create_replacement_sandbox(self) -> Optional[DockerSandbox]:
+        """
+        Create a single replacement Docker container.
+
+        Called when a sandbox fails to reset and auto_replace_failed is True.
+        Returns None if container creation fails.
+        """
+        loop = asyncio.get_event_loop()
+
+        def _create():
+            if self._docker_client is None:
+                return None
+
+            try:
+                # Generate unique container name
+                import random
+                i = random.randint(10000, 99999)
+                container_name = f"ludic-sandbox-{self._config.python_version}-{os.getpid()}-{i}"
+
+                # Remove existing container if present
+                try:
+                    old = self._docker_client.containers.get(container_name)
+                    old.remove(force=True)
+                except docker.errors.NotFound:
+                    pass
+
+                # Create container with resource limits
+                container = self._docker_client.containers.create(
+                    image=self._config.image,
+                    name=container_name,
+                    detach=True,
+                    command="sleep infinity",
+                    mem_limit=self._config.memory_limit,
+                    cpu_quota=self._config.cpu_quota,
+                    cpu_period=100000,
+                    network_disabled=self._config.network_disabled,
+                    working_dir=self._config.working_dir,
+                    auto_remove=False,
+                )
+
+                # Start container
+                container.start()
+
+                # Create sandbox wrapper
+                return DockerSandbox(
+                    container=container,
+                    config=self._config,
+                    executor=self._executor,
+                )
+            except Exception:
+                return None
+
+        return await loop.run_in_executor(self._executor, _create)
+
+    async def shutdown(self) -> None:
+        """
+        Tear down all containers and release resources.
+
+        Waits for pending resets, stops containers, closes Docker client,
+        and shuts down executor.
+        """
+        # Base shutdown handles pending resets and calls _stop_sandbox
+        await super().shutdown()
+
+        # Docker-specific cleanup
+        loop = asyncio.get_event_loop()
+
+        def _close_client():
+            if self._docker_client:
+                self._docker_client.close()
+
+        await loop.run_in_executor(self._executor, _close_client)
+
+        # Shutdown executor
+        self._executor.shutdown(wait=True)
diff --git a/src/ludic/envs/code_exec/env.py b/src/ludic/envs/code_exec/env.py
new file mode 100644
index 0000000..1e0f190
--- /dev/null
+++ b/src/ludic/envs/code_exec/env.py
@@ -0,0 +1,452 @@
+"""
+Main environment for code execution RL tasks.
+
+This environment bridges the world of RL agents and code execution sandboxes,
+providing a clean SingleAgentEnv interface for training LLMs to write code.
+
+Key design decisions:
+  1. env_reset and env_step are async to support async sandbox operations
+  2. The interaction protocol (Phase 6) must detect and await these coroutines
+  3. Caching is handled at the pool level but controllable via config
+  4. Rich info dict includes all execution metadata for analysis/logging
+
+Note: env_reset and env_step are async methods. The interaction protocol
+must detect this and await them. See Phase 6 integration.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
+
+from ludic.envs.single_agent_env import SingleAgentEnv
+from ludic.types import Info, Observation, StepOutcome
+
+from .adapters.base import ExactMatchVerifier, OutputVerifier, TestAdapter
+from .runners import CodeRunner, StdinStdoutRunner, compute_hash, hash_tests
+from .sandbox import SandboxPool
+from .types import BatchTestResult, TestCase
+
+
+@dataclass
+class CodeExecConfig:
+    """Configuration for CodeExecEnv behavior."""
+
+    # Execution limits
+    timeout_per_test_s: float = 5.0  # efficiency-focused default
+    memory_limit_mb: int = 256
+    max_tests: Optional[int] = None  # limit number of tests
+    stop_on_first_failure: bool = True
+    compile_first: bool = True
+
+    # Reward shaping
+    partial_credit: bool = False  # reward = fraction passed
+    compile_failure_reward: float = -0.1
+
+    # Observations
+    include_stderr_in_obs: bool = True
+    max_error_length: int = 500
+
+    # Caching
+    use_cache: bool = True
+
+
+class CodeExecEnv(SingleAgentEnv):
+    """
+    Code execution environment for RL training.
+
+    This environment:
+      - Takes a dataset sample containing a problem + test cases
+      - Extracts prompt and tests via a TestAdapter
+      - Executes submitted code in a Sandbox from a SandboxPool
+      - Verifies outputs using an OutputVerifier
+      - Computes rewards based on test results
+      - Returns rich info dicts for logging/analysis
+
+    The environment is single-step by design: agent submits code once,
+    gets results, episode ends. For multi-step refinement, wrap this
+    in a meta-environment or use a ReAct-style agent with tool calling.
+
+    Example usage:
+        ```python
+        pool = await create_sandbox_pool(size=4)
+        adapter = APPSAdapter()
+
+        env = CodeExecEnv(
+            sample=dataset[0],
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=CodeExecConfig(partial_credit=True),
+        )
+
+        obs, info = await env.env_reset()
+        outcome = await env.env_step(agent_code)
+        ```
+    """
+
+    def __init__(
+        self,
+        sample: Dict[str, Any],
+        *,
+        sandbox_pool: SandboxPool,
+        test_adapter: TestAdapter,
+        code_runner: Optional[CodeRunner] = None,
+        verifier: Optional[OutputVerifier] = None,
+        config: Optional[CodeExecConfig] = None,
+        system_prompt: Optional[str] = None,
+    ) -> None:
+        """
+        Initialize the code execution environment.
+
+        Args:
+            sample: Dataset sample containing problem and tests
+            sandbox_pool: Shared pool of sandboxes for execution
+            test_adapter: Adapter to extract prompt/tests from sample
+            code_runner: Runner for executing code (default: StdinStdoutRunner)
+            verifier: Output verifier (default: ExactMatchVerifier)
+            config: Environment configuration (default: CodeExecConfig())
+            system_prompt: Optional system prompt for the agent
+        """
+        super().__init__()
+
+        self._sample = sample
+        self._sandbox_pool = sandbox_pool
+        self._test_adapter = test_adapter
+        self._code_runner = code_runner or StdinStdoutRunner(
+            default_timeout_s=config.timeout_per_test_s if config else 5.0,
+            memory_limit_mb=config.memory_limit_mb if config else 256,
+        )
+        self._verifier = verifier or ExactMatchVerifier()
+        self._config = config or CodeExecConfig()
+        self._system_prompt = system_prompt
+
+        # Episode state (set during reset)
+        self._problem_id: Optional[str] = None
+        self._prompt: Optional[str] = None
+        self._tests: Optional[List[TestCase]] = None
+        self._tests_hash: Optional[str] = None
+        self._current_obs: Optional[Observation] = None
+
+    @property
+    def suggested_sysprompt(self) -> Optional[str]:
+        """Return the configured system prompt."""
+        return self._system_prompt
+
+    async def env_reset(
+        self, *, seed: Optional[int] = None
+    ) -> Tuple[Observation, Info]:
+        """
+        Reset the environment for a new episode.
+
+        Extracts the problem prompt and test cases from the sample,
+        but does not checkout a sandbox yet (that happens on step).
+
+        Args:
+            seed: Optional random seed (unused in this deterministic env)
+
+        Returns:
+            Tuple of (prompt, info) where info contains problem metadata
+        """
+        # Extract problem components via adapter
+        self._problem_id = self._test_adapter.get_problem_id(self._sample)
+        self._prompt = self._test_adapter.get_prompt(self._sample)
+        self._tests = self._test_adapter.get_tests(self._sample)
+
+        # Handle case where no tests were extracted
+        if not self._tests:
+            error_msg = f"No tests extracted for problem {self._problem_id}"
+            self._current_obs = error_msg
+            return self._current_obs, {
+                "problem_id": self._problem_id,
+                "error": "no_tests_extracted",
+            }
+
+        # Apply max_tests limit if configured
+        if self._config.max_tests is not None:
+            self._tests = self._tests[: self._config.max_tests]
+
+        # Compute tests hash for caching
+        self._tests_hash = hash_tests(self._tests)
+
+        # Set current observation to the prompt
+        self._current_obs = self._prompt
+
+        # Build info dict with episode metadata
+        info: Info = {
+            "problem_id": self._problem_id,
+            "num_tests": len(self._tests),
+            "tests_hash": self._tests_hash,
+            "python_version": self._sandbox_pool.python_version,
+        }
+
+        return self._current_obs, info
+
+    async def env_step(self, action: str) -> StepOutcome:
+        """
+        Execute submitted code and return results.
+
+        This is the core of the environment: takes the agent's code,
+        runs it through the sandbox, computes rewards, and builds
+        rich observations and info dicts.
+
+        Args:
+            action: The code submitted by the agent
+
+        Returns:
+            StepOutcome with observation, reward, termination flags, and info
+        """
+        # Sanity check: ensure reset was called
+        if self._tests is None or self._tests_hash is None:
+            error_obs = "Error: env_reset() must be called before env_step()"
+            return StepOutcome(
+                obs=error_obs,
+                reward=-1.0,
+                truncated=False,
+                terminated=True,
+                info={"error": "reset_not_called"},
+            )
+
+        # Handle empty code submission
+        if not action.strip():
+            error_obs = "Error: Empty code submission"
+            return StepOutcome(
+                obs=error_obs,
+                reward=self._config.compile_failure_reward,
+                truncated=False,
+                terminated=True,
+                info={"error": "empty_code"},
+            )
+
+        # Compute code hash for caching
+        code = action.strip()
+        code_hash = compute_hash(code)
+
+        # Check cache FIRST, before checkout
+        result: Optional[BatchTestResult] = None
+        cache_hit = False
+
+        if self._config.use_cache:
+            result = self._sandbox_pool.get_cached(code_hash, self._tests_hash)
+            if result is not None:
+                cache_hit = True
+
+        # Only checkout sandbox if cache miss
+        if result is None:
+            # Checkout sandbox from pool
+            sandbox = await self._sandbox_pool.checkout()
+
+            try:
+                # Run tests via code runner
+                result = await self._code_runner.run_tests(
+                    sandbox=sandbox,
+                    code=code,
+                    tests=self._tests,
+                    verifier=self._verifier,
+                    stop_on_first_failure=self._config.stop_on_first_failure,
+                    compile_first=self._config.compile_first,
+                )
+
+                # Cache result if enabled
+                if self._config.use_cache:
+                    self._sandbox_pool.put_cached(code_hash, self._tests_hash, result)
+
+            finally:
+                # Always release sandbox back to pool
+                await self._sandbox_pool.release(sandbox)
+
+        # Compute reward based on results
+        reward = self._compute_reward(result)
+
+        # Build observation for agent
+        obs = self._build_observation(result)
+        self._current_obs = obs
+
+        # Build rich info dict for logging/analysis
+        info = self._build_info(result, code_hash, cache_hit)
+
+        # Episode ends after single step (single-shot code generation)
+        return StepOutcome(
+            obs=obs,
+            reward=reward,
+            truncated=False,
+            terminated=True,
+            info=info,
+        )
+
+    def env_current_obs(self) -> Observation:
+        """
+        Return the current observation.
+
+        Returns:
+            The current observation string
+        """
+        if self._current_obs is None:
+            return "Error: No observation available (call env_reset first)"
+        return self._current_obs
+
+    def _compute_reward(self, result: BatchTestResult) -> float:
+        """
+        Compute reward from test results.
+
+        Reward schemes:
+          - partial_credit=False: 1.0 if all passed, 0.0 otherwise
+          - partial_credit=True: fraction of tests passed (0.0 to 1.0)
+          - Compilation failures get compile_failure_reward
+
+        Args:
+            result: Batch test results
+
+        Returns:
+            Scalar reward value
+        """
+        # Compilation failure gets special penalty
+        if result.compile_failed:
+            return self._config.compile_failure_reward
+
+        # All tests passed
+        if result.all_passed:
+            return 1.0
+
+        # Partial credit
+        if self._config.partial_credit:
+            return result.pass_rate
+
+        # Binary reward (all or nothing)
+        return 0.0
+
+    def _build_observation(self, result: BatchTestResult) -> str:
+        """
+        Build observation string from test results.
+
+        The observation provides feedback to the agent about what went wrong,
+        including compilation errors, runtime errors, or test failures.
+
+        Args:
+            result: Batch test results
+
+        Returns:
+            Observation string for the agent
+        """
+        # All tests passed - success message
+        if result.all_passed:
+            return (
+                f"All {result.total_count} tests passed! "
+                f"Total execution time: {result.total_run_ms:.1f}ms"
+            )
+
+        # Compilation failed - show compile error
+        if result.compile_failed:
+            first = result.results[0]
+            compile_err = (
+                first.execution.compile_result.error_message or "Unknown error"
+            )
+
+            # Truncate error if too long
+            if len(compile_err) > self._config.max_error_length:
+                compile_err = compile_err[: self._config.max_error_length] + "..."
+
+            obs = f"Compilation failed: {compile_err}"
+
+            if first.execution.compile_result.error_line is not None:
+                obs += f" (line {first.execution.compile_result.error_line})"
+
+            return obs
+
+        # Some tests failed - show first failure details
+        first_failure = result.first_failure
+        if first_failure is None:
+            # Should never happen, but handle gracefully
+            return f"Tests failed: {result.passed_count}/{result.total_count} passed"
+
+        obs_parts = [f"Tests failed: {result.passed_count}/{result.total_count} passed"]
+
+        # Add first failure details
+        if first_failure.comparison_details:
+            details = first_failure.comparison_details
+            if len(details) > self._config.max_error_length:
+                details = details[: self._config.max_error_length] + "..."
+            obs_parts.append(f"\nFirst failure: {details}")
+
+        # Add stderr if configured and available
+        if self._config.include_stderr_in_obs and first_failure.execution.stderr:
+            stderr = first_failure.execution.stderr.strip()
+            if stderr:
+                if len(stderr) > self._config.max_error_length:
+                    stderr = stderr[: self._config.max_error_length] + "..."
+                obs_parts.append(f"\nStderr: {stderr}")
+
+        return "".join(obs_parts)
+
+    def _build_info(
+        self,
+        result: BatchTestResult,
+        code_hash: str,
+        cache_hit: bool,
+    ) -> Info:
+        """
+        Build rich info dict with all execution metadata.
+
+        The info dict is JSON-serializable and includes everything needed
+        for logging, analysis, and debugging.
+
+        Args:
+            result: Batch test results
+            code_hash: Hash of the submitted code
+            cache_hit: Whether result came from cache
+
+        Returns:
+            Info dict with comprehensive metadata
+        """
+        # Build per-test result summaries
+        test_results = []
+        for test_result in result.results:
+            test_info = {
+                "test_id": test_result.test_case.id,
+                "passed": test_result.passed,
+                "compiled": test_result.compiled,
+                "ran": test_result.ran,
+                "run_status": (
+                    test_result.execution.run_status.value
+                    if test_result.execution.run_status
+                    else None
+                ),
+                "compile_status": test_result.execution.compile_result.status.value,
+                "run_duration_ms": test_result.execution.run_duration_ms,
+                "stdout": test_result.execution.stdout,
+                "stderr": test_result.execution.stderr,
+            }
+
+            # Optionally include failure details
+            if not test_result.passed and test_result.comparison_details:
+                test_info["failure_reason"] = test_result.comparison_details
+
+            test_results.append(test_info)
+
+        # Build complete info dict
+        info: Info = {
+            # Problem metadata
+            "problem_id": self._problem_id,
+            "code_hash": code_hash,
+            "tests_hash": self._tests_hash,
+            # Test results summary
+            "passed": result.passed_count,
+            "total": result.total_count,
+            "all_passed": result.all_passed,
+            "pass_rate": result.pass_rate,
+            "compile_failed": result.compile_failed,
+            # Detailed test results
+            "test_results": test_results,
+            # Timing
+            "timing": {
+                "total_compile_ms": result.total_compile_ms,
+                "total_run_ms": result.total_run_ms,
+                "total_execution_ms": result.total_execution_ms,
+            },
+            # Cache info
+            "cache_hit": cache_hit,
+            "cache_stats": self._sandbox_pool.cache_stats,
+            # Environment metadata
+            "python_version": self._sandbox_pool.python_version,
+        }
+
+        return info
diff --git a/src/ludic/envs/code_exec/factory.py b/src/ludic/envs/code_exec/factory.py
new file mode 100644
index 0000000..d993dee
--- /dev/null
+++ b/src/ludic/envs/code_exec/factory.py
@@ -0,0 +1,210 @@
+"""
+Unified factory for creating sandbox pools.
+
+This module provides:
+  - create_sandbox_pool(): Async factory that auto-detects or uses specified backend
+
+Usage:
+  from ludic.envs.code_exec import create_sandbox_pool
+
+  # Auto-detect backend
+  pool = await create_sandbox_pool(n_workers=4)
+
+  # Explicit backend
+  pool = await create_sandbox_pool(n_workers=4, backend="podman-hpc")
+
+  # With custom config
+  pool = await create_sandbox_pool(
+      n_workers=4,
+      backend="docker",
+      python_version="3.11",
+      memory_limit="512m",
+  )
+"""
+
+from __future__ import annotations
+
+from typing import Any, Optional
+
+from .backend import SandboxBackend, detect_available_backend
+from .sandbox import SandboxPool
+
+
+async def create_sandbox_pool(
+    n_workers: int = 4,
+    backend: str = "auto",
+    python_version: str = "3.11",
+    cache_size: int = 10000,
+    max_concurrent_ops: int = 8,
+    workspace_base_dir: str = "auto",
+    **backend_kwargs: Any,
+) -> SandboxPool:
+    """
+    Create and start a sandbox pool with the specified or auto-detected backend.
+
+    This is the recommended way to create sandbox pools as it handles:
+      - Backend auto-detection based on environment
+      - Consistent configuration across backends
+      - Proper initialization (pull images, start containers)
+
+    Args:
+        n_workers: Number of parallel sandboxes in the pool
+        backend: Backend to use ("auto", "docker", "podman-hpc", "singularity")
+        python_version: Python version for the sandbox containers
+        cache_size: Maximum number of cached execution results
+        max_concurrent_ops: Maximum concurrent sandbox operations (resets, exec
+            calls). Prevents deadlock in HPC environments. Default 8.
+        workspace_base_dir: Base directory for host-mounted workspaces.
+            - "auto": Auto-detect (use /local on HPC if SLURM_JOB_ID set)
+            - explicit path: Use this directory
+            - None: Disable bind mounts, use tar-based I/O
+        **backend_kwargs: Additional backend-specific configuration:
+            - memory_limit (str): Memory limit (e.g., "256m", "1g")
+            - cpu_quota (float): CPU limit as fraction (e.g., 0.5 = 50% of one CPU)
+            - network_disabled (bool): Disable network access (default: True)
+            - gpu (bool): Enable GPU access (podman-hpc only)
+            - image (str): Custom container image (overrides python_version)
+            - sif_path (str): Path to .sif file (singularity only)
+
+    Returns:
+        Started SandboxPool instance
+
+    Raises:
+        RuntimeError: If the specified backend is not available
+        ValueError: If an unknown backend is specified
+
+    Examples:
+        # Auto-detect (recommended)
+        pool = await create_sandbox_pool(n_workers=4)
+
+        # Docker with custom memory
+        pool = await create_sandbox_pool(
+            n_workers=4,
+            backend="docker",
+            memory_limit="512m",
+        )
+
+        # Podman-HPC with GPU
+        pool = await create_sandbox_pool(
+            n_workers=4,
+            backend="podman-hpc",
+            gpu=True,
+        )
+    """
+    # Resolve backend
+    if backend == "auto" or backend == SandboxBackend.AUTO:
+        resolved_backend = detect_available_backend()
+        print(f"Auto-detected sandbox backend: {resolved_backend}")
+    else:
+        resolved_backend = backend
+
+    # Create pool based on backend
+    if resolved_backend == SandboxBackend.DOCKER.value:
+        pool = _create_docker_pool(
+            n_workers=n_workers,
+            python_version=python_version,
+            cache_size=cache_size,
+            max_concurrent_ops=max_concurrent_ops,
+            **backend_kwargs,
+        )
+
+    elif resolved_backend == SandboxBackend.PODMAN_HPC.value:
+        pool = _create_podman_hpc_pool(
+            n_workers=n_workers,
+            python_version=python_version,
+            cache_size=cache_size,
+            max_concurrent_ops=max_concurrent_ops,
+            workspace_base_dir=workspace_base_dir,
+            **backend_kwargs,
+        )
+
+    elif resolved_backend == SandboxBackend.SINGULARITY.value:
+        raise NotImplementedError(
+            "Singularity backend is not yet implemented. "
+            "Use 'docker' or 'podman-hpc' instead."
+        )
+
+    else:
+        raise ValueError(
+            f"Unknown backend: {resolved_backend}. "
+            f"Valid options: {', '.join(b.value for b in SandboxBackend if b != SandboxBackend.AUTO)}"
+        )
+
+    # Start the pool
+    await pool.start()
+    return pool
+
+
+def _create_docker_pool(
+    n_workers: int,
+    python_version: str,
+    cache_size: int,
+    max_concurrent_ops: int = 8,
+    memory_limit: str = "256m",
+    cpu_quota: int = 50000,
+    network_disabled: bool = True,
+    image: Optional[str] = None,
+    **_kwargs: Any,
+) -> SandboxPool:
+    """Create DockerSandboxPool with configuration."""
+    try:
+        from .docker_sandbox import DockerSandboxConfig, DockerSandboxPool
+    except ImportError:
+        raise RuntimeError(
+            "Docker backend requires the docker package:\n"
+            "  pip install docker>=7.0.0"
+        )
+
+    config = DockerSandboxConfig(
+        python_version=python_version,
+        base_image=image,
+        memory_limit=memory_limit,
+        cpu_quota=cpu_quota,
+        network_disabled=network_disabled,
+    )
+
+    return DockerSandboxPool(
+        n_workers=n_workers,
+        config=config,
+        cache_size=cache_size,
+        max_concurrent_ops=max_concurrent_ops,
+    )
+
+
+def _create_podman_hpc_pool(
+    n_workers: int,
+    python_version: str,
+    cache_size: int,
+    max_concurrent_ops: int = 8,
+    workspace_base_dir: str = "auto",
+    memory_limit: str = "256m",
+    cpu_quota: Optional[float] = None,
+    network_disabled: bool = True,
+    gpu: bool = False,
+    image: Optional[str] = None,
+    extra_args: Optional[list[str]] = None,
+    **_kwargs: Any,
+) -> SandboxPool:
+    """Create PodmanHPCSandboxPool with configuration."""
+    from .podman_sandbox import PodmanConfig, PodmanHPCSandboxPool
+
+    config = PodmanConfig(
+        memory_limit=memory_limit,
+        cpu_quota=cpu_quota,
+        network_disabled=network_disabled,
+        gpu=gpu,
+        extra_args=extra_args,
+    )
+
+    # Determine image
+    if image is None:
+        image = f"python:{python_version}-slim"
+
+    return PodmanHPCSandboxPool(
+        n_workers=n_workers,
+        image=image,
+        config=config,
+        cache_size=cache_size,
+        max_concurrent_ops=max_concurrent_ops,
+        workspace_base_dir=workspace_base_dir,
+    )
diff --git a/src/ludic/envs/code_exec/parsing.py b/src/ludic/envs/code_exec/parsing.py
new file mode 100644
index 0000000..a24e6e3
--- /dev/null
+++ b/src/ludic/envs/code_exec/parsing.py
@@ -0,0 +1,127 @@
+"""Shared parsing utilities for code execution sandboxes."""
+
+from __future__ import annotations
+
+import re
+import time
+from typing import Optional
+
+from .types import CompileResult, CompileStatus, ExecutionResult, RunStatus
+
+# Import batch runner script using importlib.resources
+try:
+    from importlib.resources import files
+
+    _BATCH_RUNNER_SCRIPT: Optional[str] = None
+
+    def get_batch_runner_script() -> str:
+        """Lazy-load the batch runner script from package resources."""
+        global _BATCH_RUNNER_SCRIPT
+        if _BATCH_RUNNER_SCRIPT is None:
+            _BATCH_RUNNER_SCRIPT = (
+                files("ludic.envs.code_exec")
+                .joinpath("batch_runner.py")
+                .read_text()
+            )
+        return _BATCH_RUNNER_SCRIPT
+
+except ImportError:
+    # Fallback for older Python versions
+    import pkg_resources
+
+    def get_batch_runner_script() -> str:
+        return pkg_resources.resource_string(
+            "ludic.envs.code_exec", "batch_runner.py"
+        ).decode("utf-8")
+
+
+def parse_syntax_error(error_msg: str) -> tuple[Optional[int], Optional[int], str]:
+    """Parse Python syntax error to extract line, column, and clean message."""
+    line = None
+    column = None
+    clean_msg = ""
+
+    # Try to find line number
+    line_match = re.search(r'line (\d+)', error_msg)
+    if line_match:
+        line = int(line_match.group(1))
+
+    # Try to find column number
+    col_match = re.search(r'column (\d+)', error_msg)
+    if col_match:
+        column = int(col_match.group(1))
+
+    # Extract error type and message
+    error_type_match = re.search(
+        r'(SyntaxError|IndentationError|TabError):\s*(.+)', error_msg
+    )
+    if error_type_match:
+        error_type = error_type_match.group(1)
+        msg = error_type_match.group(2).strip()
+        clean_msg = f"{error_type}: {msg}"
+    else:
+        # Fall back to just extracting the last line
+        lines = [l.strip() for l in error_msg.split('\n') if l.strip()]
+        if lines:
+            clean_msg = lines[-1]
+
+    return line, column, clean_msg
+
+
+def parse_batch_compile_result(result: dict) -> CompileResult:
+    """Parse compile result from batch runner JSON."""
+    status_str = result.get("status", "unknown_error")
+
+    if status_str == "success":
+        status = CompileStatus.SUCCESS
+    elif status_str == "syntax_error":
+        status = CompileStatus.SYNTAX_ERROR
+    elif status_str == "timeout":
+        status = CompileStatus.TIMEOUT
+    else:
+        status = CompileStatus.UNKNOWN_ERROR
+
+    return CompileResult(
+        status=status,
+        error_message=result.get("error_message"),
+        error_line=result.get("error_line"),
+        error_column=result.get("error_column"),
+        duration_ms=result.get("duration_ms", 0.0),
+    )
+
+
+def parse_batch_test_result(
+    result: dict,
+    run_start: float,
+) -> ExecutionResult:
+    """Parse test result from batch runner JSON."""
+    status_str = result.get("status", "runtime_error")
+
+    if status_str == "success":
+        run_status = RunStatus.SUCCESS
+    elif status_str == "runtime_error":
+        run_status = RunStatus.RUNTIME_ERROR
+    elif status_str == "timeout":
+        run_status = RunStatus.TIMEOUT
+    elif status_str == "memory_exceeded":
+        run_status = RunStatus.MEMORY_EXCEEDED
+    elif status_str == "not_run":
+        run_status = RunStatus.NOT_RUN
+    elif status_str == "killed":
+        run_status = RunStatus.KILLED
+    else:
+        run_status = RunStatus.RUNTIME_ERROR
+
+    duration_ms = result.get("duration_ms", 0.0)
+    total_ms = (time.perf_counter() - run_start) * 1000
+
+    return ExecutionResult(
+        compile_result=CompileResult(status=CompileStatus.SUCCESS),
+        run_status=run_status,
+        stdout=result.get("stdout", ""),
+        stderr=result.get("stderr", ""),
+        exit_code=result.get("exit_code"),
+        run_duration_ms=duration_ms,
+        total_duration_ms=total_ms,
+        cache_key=result.get("id", ""),  # Pass test_id for matching in runner
+    )
diff --git a/src/ludic/envs/code_exec/podman_sandbox.py b/src/ludic/envs/code_exec/podman_sandbox.py
new file mode 100644
index 0000000..e8b89e8
--- /dev/null
+++ b/src/ludic/envs/code_exec/podman_sandbox.py
@@ -0,0 +1,1031 @@
+"""
+Podman-HPC sandbox implementation for code execution on HPC clusters.
+
+Provides:
+  - PodmanConfig: Configuration for Podman containers
+  - PodmanHPCSandbox: Async Podman container sandbox using subprocess
+  - PodmanHPCSandboxPool: Pool of Podman sandboxes with caching
+
+Podman-HPC is a daemonless container runtime wrapper for HPC clusters (e.g., Isambard).
+Uses asyncio.create_subprocess_exec instead of docker-py SDK.
+
+**Important**: On some HPC systems (Isambard), podman-hpc's squashfs conversion
+breaks the PATH variable. All commands in this module use absolute paths:
+  - /bin/sleep, /bin/mkdir, /bin/sh
+  - /usr/local/bin/python
+  - /usr/bin/pkill
+"""
+
+from __future__ import annotations
+
+import asyncio
+import io
+import json
+import logging
+import math
+import os
+import re
+import shutil
+import tarfile
+import time
+import uuid
+from dataclasses import dataclass
+from pathlib import Path
+from typing import AsyncIterator, Dict, List, Optional, Union
+
+from .parsing import (
+    get_batch_runner_script,
+    parse_batch_compile_result,
+    parse_batch_test_result,
+    parse_syntax_error,
+)
+from .pool import BaseSandboxPool
+from .sandbox import Sandbox, SandboxPool
+from .types import (
+    BatchExecutionSpec,
+    BatchTestResult,
+    CompileResult,
+    CompileStatus,
+    ExecutionResult,
+    RunStatus,
+    TestCase,
+)
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class PodmanConfig:
+    """Configuration for Podman-HPC sandboxes."""
+
+    memory_limit: str = "256m"
+    cpu_quota: Optional[float] = None  # CPU limit (e.g., 0.5 = 50% of one CPU)
+    network_disabled: bool = True
+    working_dir: str = "/workspace"
+    gpu: bool = False  # Pass --gpu flag for GPU access
+    extra_args: Optional[list[str]] = None  # Additional podman-hpc run args
+
+
+def _get_container_name_prefix() -> str:
+    """
+    Get container name prefix including SLURM_JOB_ID if in a Slurm job.
+
+    Returns:
+        Container name prefix like "ludic-sandbox-12345" or "ludic-sandbox-local"
+    """
+    slurm_job_id = os.environ.get("SLURM_JOB_ID")
+    if slurm_job_id:
+        return f"ludic-sandbox-{slurm_job_id}"
+    return "ludic-sandbox-local"
+
+
+class PodmanHPCSandbox:
+    """
+    Async Podman-HPC container sandbox for Python code execution.
+
+    Uses persistent containers (sleep infinity) with exec for code execution.
+    All operations use asyncio.create_subprocess_exec for non-blocking I/O.
+
+    Podman Concurrency Note:
+        Podman has known issues with concurrent operations (deadlock above ~8
+        simultaneous exec calls). All sandboxes in a pool share an exec_semaphore
+        to prevent overwhelming podman's lock manager.
+    """
+
+    def __init__(
+        self,
+        container_name: str,
+        image: str,
+        config: PodmanConfig,
+        python_version: str = "3.11",
+        exec_semaphore: Optional[asyncio.Semaphore] = None,
+        workspace_host_dir: Optional[str] = None,
+    ):
+        self._container_name = container_name
+        self._image = image
+        self._config = config
+        self._python_version = python_version
+        self._exec_semaphore = exec_semaphore  # Shared across all sandboxes in pool
+        self._workspace_host_dir = workspace_host_dir
+        self._started = False
+
+    @property
+    def python_version(self) -> str:
+        return self._python_version
+
+    async def start(self) -> None:
+        """Create and start the persistent container."""
+        if self._started:
+            return
+
+        # Remove existing container if present
+        await self._run_podman("rm", "-f", self._container_name, check=False)
+
+        # Build run command
+        cmd = ["run", "-d", "--name", self._container_name]
+
+        # Resource limits
+        if self._config.memory_limit:
+            cmd.extend(["--memory", self._config.memory_limit])
+        if self._config.cpu_quota:
+            cmd.extend(["--cpus", str(self._config.cpu_quota)])
+        if self._config.network_disabled:
+            cmd.extend(["--network", "none"])
+        if self._config.gpu:
+            cmd.append("--gpu")
+        if self._config.extra_args:
+            cmd.extend(self._config.extra_args)
+
+        # Add bind mount if workspace_host_dir is set
+        if self._workspace_host_dir:
+            logger.info(
+                f"[{self._container_name}] Using bind mount: "
+                f"{self._workspace_host_dir} -> {self._config.working_dir}"
+            )
+            cmd.extend(
+                ["-v", f"{self._workspace_host_dir}:{self._config.working_dir}:rw"]
+            )
+
+        # Image and command (use full path for HPC compatibility)
+        cmd.extend([self._image, "/bin/sleep", "infinity"])
+
+        # Capture stderr to provide useful error messages
+        await self._run_podman(*cmd, capture=True)
+
+        # Ensure workspace directory exists (use full path for HPC compatibility)
+        # Skip if using bind mount (host directory should already exist)
+        if not self._workspace_host_dir:
+            await self._run_podman(
+                "exec",
+                self._container_name,
+                "/bin/mkdir",
+                "-p",
+                self._config.working_dir,
+                capture=True,
+            )
+
+        self._started = True
+
+    async def stop(self) -> None:
+        """Stop and remove the container."""
+        if not self._started:
+            return
+
+        await self._run_podman("stop", "-t", "2", self._container_name, check=False)
+        await self._run_podman("rm", "-f", self._container_name, check=False)
+        self._started = False
+
+    async def reset(self) -> None:
+        """Clear workspace directory (in-place, no container restart)."""
+        if not self._started:
+            return
+
+        if self._workspace_host_dir:
+            # Direct host filesystem cleanup - no podman exec, no semaphore
+            logger.debug(
+                f"[{self._container_name}] reset() using direct host cleanup..."
+            )
+            start = time.perf_counter()
+
+            workspace_path = Path(self._workspace_host_dir)
+            for item in workspace_path.iterdir():
+                if item.is_dir():
+                    shutil.rmtree(item)
+                else:
+                    item.unlink()
+
+            elapsed = time.perf_counter() - start
+            logger.debug(
+                f"[{self._container_name}] reset() completed in {elapsed:.3f}s (direct)"
+            )
+            return
+
+        logger.debug(f"[{self._container_name}] reset() starting podman-hpc exec...")
+        start = time.perf_counter()
+
+        await self._run_podman(
+            "exec",
+            self._container_name,
+            "/bin/sh",
+            "-c",
+            f"rm -rf {self._config.working_dir}/*",
+        )
+
+        elapsed = time.perf_counter() - start
+        logger.debug(f"[{self._container_name}] reset() completed in {elapsed:.3f}s")
+
+    async def compile(
+        self,
+        code: str,
+        *,
+        timeout_s: float = 5.0,
+    ) -> CompileResult:
+        """Syntax-check code using py_compile."""
+        start = time.perf_counter()
+
+        try:
+            # Write code to container
+            await self._write_file("_check.py", code, timeout_s=timeout_s)
+
+            # Run py_compile (use full path for HPC compatibility)
+            proc = await asyncio.wait_for(
+                self._run_podman(
+                    "exec",
+                    self._container_name,
+                    "/usr/local/bin/python",
+                    "-m",
+                    "py_compile",
+                    f"{self._config.working_dir}/_check.py",
+                    check=False,
+                    capture=True,
+                ),
+                timeout=timeout_s,
+            )
+
+            duration_ms = (time.perf_counter() - start) * 1000
+
+            if proc.returncode == 0:
+                return CompileResult(
+                    status=CompileStatus.SUCCESS,
+                    duration_ms=duration_ms,
+                )
+
+            # Parse error message
+            error_msg = proc.stderr or proc.stdout or ""
+            line, column, clean_msg = parse_syntax_error(error_msg)
+
+            # Classify error type
+            status = CompileStatus.SYNTAX_ERROR
+            if "ImportError" in error_msg or "ModuleNotFoundError" in error_msg:
+                status = CompileStatus.IMPORT_ERROR
+            elif not clean_msg:
+                status = CompileStatus.UNKNOWN_ERROR
+
+            return CompileResult(
+                status=status,
+                error_message=clean_msg or error_msg,
+                error_line=line,
+                error_column=column,
+                duration_ms=duration_ms,
+            )
+
+        except asyncio.TimeoutError:
+            duration_ms = (time.perf_counter() - start) * 1000
+            return CompileResult(
+                status=CompileStatus.TIMEOUT,
+                error_message=f"Compilation timed out after {timeout_s}s",
+                duration_ms=duration_ms,
+            )
+
+    async def execute(
+        self,
+        code: str,
+        *,
+        stdin: str = "",
+        skip_compile: bool = False,
+        timeout_s: float = 10.0,
+        memory_limit_mb: Optional[int] = None,
+        env_vars: Optional[Dict[str, str]] = None,
+    ) -> ExecutionResult:
+        """Execute code with full resource isolation."""
+        # Step 1: Compile
+        if skip_compile:
+            compile_result = CompileResult(status=CompileStatus.SUCCESS)
+        else:
+            compile_result = await self.compile(code, timeout_s=timeout_s)
+
+        total_start = time.perf_counter()
+
+        if not compile_result.success:
+            total_ms = (time.perf_counter() - total_start) * 1000
+            return ExecutionResult(
+                compile_result=compile_result,
+                run_status=RunStatus.NOT_RUN,
+                compile_duration_ms=compile_result.duration_ms,
+                total_duration_ms=total_ms,
+            )
+
+        # Step 2: Execute
+        run_start = time.perf_counter()
+
+        try:
+            # Generate unique filename to avoid race conditions
+            exec_id = uuid.uuid4().hex[:8]
+            exec_filename = f"_exec_{exec_id}.py"
+
+            # Write code to container
+            await self._write_file(exec_filename, code, timeout_s=timeout_s)
+
+            # Build exec command
+            exec_cmd = ["exec"]
+            if stdin:
+                exec_cmd.append("-i")
+
+            # Add environment variables
+            if env_vars:
+                for key, val in env_vars.items():
+                    exec_cmd.extend(["-e", f"{key}={val}"])
+
+            exec_cmd.extend(
+                [
+                    self._container_name,
+                    "/usr/local/bin/python",
+                    f"{self._config.working_dir}/{exec_filename}",
+                ]
+            )
+
+            # Run with timeout
+            proc = await asyncio.wait_for(
+                self._run_podman(
+                    *exec_cmd,
+                    check=False,
+                    capture=True,
+                    input_data=stdin.encode("utf-8") if stdin else None,
+                ),
+                timeout=timeout_s,
+            )
+
+            run_ms = (time.perf_counter() - run_start) * 1000
+            total_ms = (time.perf_counter() - total_start) * 1000
+
+            # Classify run status
+            exit_code = proc.returncode
+            if exit_code == 0:
+                run_status = RunStatus.SUCCESS
+            elif exit_code == 137:  # SIGKILL (OOM)
+                run_status = RunStatus.MEMORY_EXCEEDED
+            elif exit_code == 143:  # SIGTERM
+                run_status = RunStatus.KILLED
+            else:
+                run_status = RunStatus.RUNTIME_ERROR
+
+            return ExecutionResult(
+                compile_result=compile_result,
+                run_status=run_status,
+                stdout=proc.stdout or "",
+                stderr=proc.stderr or "",
+                exit_code=exit_code,
+                compile_duration_ms=compile_result.duration_ms,
+                run_duration_ms=run_ms,
+                total_duration_ms=total_ms,
+            )
+
+        except asyncio.TimeoutError:
+            run_ms = (time.perf_counter() - run_start) * 1000
+            total_ms = (time.perf_counter() - total_start) * 1000
+
+            # Best-effort cleanup - goes through exec_semaphore so won't deadlock
+            try:
+                await self._run_podman(
+                    "exec",
+                    self._container_name,
+                    "/usr/bin/pkill",
+                    "-9",
+                    "python",
+                    check=False,
+                    capture=True,
+                )
+            except Exception:
+                pass  # Best effort, reset() will clean up anyway
+
+            return ExecutionResult(
+                compile_result=compile_result,
+                run_status=RunStatus.TIMEOUT,
+                stderr=f"Execution timed out after {timeout_s}s",
+                compile_duration_ms=compile_result.duration_ms,
+                run_duration_ms=run_ms,
+                total_duration_ms=total_ms,
+            )
+
+    async def _write_file(
+        self,
+        filename: str,
+        content: str,
+        *,
+        timeout_s: float = 5.0,
+    ) -> None:
+        """
+        Write a file to the container using tar pipe.
+
+        Creates a tar archive in memory and pipes it to container.
+        This is more robust than echo for handling special characters.
+        """
+        if self._workspace_host_dir:
+            # Direct host filesystem write - no podman exec, no semaphore
+            path = Path(self._workspace_host_dir) / filename
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_text(content)
+            return
+
+        # Create tar archive in memory
+        tar_buffer = io.BytesIO()
+        with tarfile.open(fileobj=tar_buffer, mode="w") as tar:
+            file_data = content.encode("utf-8")
+            tarinfo = tarfile.TarInfo(name=filename)
+            tarinfo.size = len(file_data)
+            tarinfo.mtime = int(time.time())
+            tar.addfile(tarinfo, io.BytesIO(file_data))
+        tar_buffer.seek(0)
+
+        # Pipe tar to container
+        await asyncio.wait_for(
+            self._run_podman(
+                "exec",
+                "-i",
+                self._container_name,
+                "tar",
+                "-xC",
+                self._config.working_dir,
+                check=True,
+                capture=True,
+                input_data=tar_buffer.read(),
+            ),
+            timeout=timeout_s,
+        )
+
+    async def _run_podman(
+        self,
+        *args: str,
+        check: bool = True,
+        capture: bool = False,
+        input_data: Optional[bytes] = None,
+    ) -> "PodmanResult":
+        """
+        Run a podman-hpc command asynchronously.
+
+        For 'exec' commands, acquires the shared semaphore to prevent
+        overwhelming podman's lock manager (which deadlocks above ~8
+        concurrent operations).
+
+        Args:
+            *args: Command arguments (e.g., "exec", container_name, "python", ...)
+            check: Raise exception if command fails
+            capture: Capture stdout/stderr
+            input_data: Data to pipe to stdin
+
+        Returns:
+            PodmanResult with returncode, stdout, stderr
+        """
+        is_exec = args and args[0] == "exec"
+
+        # Use semaphore for exec commands to prevent podman deadlock
+        if is_exec and self._exec_semaphore:
+            async with self._exec_semaphore:
+                return await self._run_podman_inner(
+                    *args, check=check, capture=capture, input_data=input_data
+                )
+        else:
+            return await self._run_podman_inner(
+                *args, check=check, capture=capture, input_data=input_data
+            )
+
+    async def _run_podman_inner(
+        self,
+        *args: str,
+        check: bool = True,
+        capture: bool = False,
+        input_data: Optional[bytes] = None,
+    ) -> "PodmanResult":
+        """Actually run the podman-hpc command (called by _run_podman)."""
+        start = time.perf_counter()
+
+        proc = await asyncio.create_subprocess_exec(
+            "podman-hpc",
+            *args,
+            stdin=asyncio.subprocess.PIPE if input_data else None,
+            stdout=asyncio.subprocess.PIPE if capture else asyncio.subprocess.DEVNULL,
+            stderr=asyncio.subprocess.PIPE if capture else asyncio.subprocess.DEVNULL,
+        )
+
+        stdout_bytes, stderr_bytes = await proc.communicate(input=input_data)
+
+        elapsed = time.perf_counter() - start
+        if elapsed > 1.0:
+            cmd_preview = " ".join(args[:4])
+            logger.warning(
+                f"[{self._container_name}] SLOW podman-hpc {cmd_preview}... "
+                f"took {elapsed:.2f}s"
+            )
+
+        result = PodmanResult(
+            returncode=proc.returncode or 0,
+            stdout=stdout_bytes.decode("utf-8", errors="replace")
+            if stdout_bytes
+            else "",
+            stderr=stderr_bytes.decode("utf-8", errors="replace")
+            if stderr_bytes
+            else "",
+        )
+
+        if check and result.returncode != 0:
+            raise PodmanError(
+                f"podman-hpc {' '.join(args)} failed with exit code {result.returncode}:\n"
+                f"{result.stderr}"
+            )
+
+        return result
+
+    # -------------------------------------------------------------------------
+    # Batch execution (reduces semaphore acquisitions from O(N) to O(1))
+    # -------------------------------------------------------------------------
+
+    async def execute_batch(
+        self,
+        spec: BatchExecutionSpec,
+    ) -> AsyncIterator[Union[CompileResult, ExecutionResult]]:
+        """
+        Execute all tests in a single batch with streaming results.
+
+        This method reduces semaphore acquisitions from O(2N+1) to O(3) by:
+        1. Bundling code, manifest, and runner into a single tar
+        2. Executing the batch runner once, which runs all tests sequentially
+        3. Streaming results back as JSONL
+
+        Args:
+            spec: Batch execution specification with code, tests, and options
+
+        Yields:
+            CompileResult (if compile_first=True), then ExecutionResult for each test
+        """
+        batch_dir = "_batch"
+        batch_start = time.perf_counter()
+
+        # Build manifest for the batch runner
+        manifest = {
+            "code_file": "solution.py",
+            "compile_first": spec.compile_first,
+            "timeout_s": spec.timeout_s,
+            "stop_on_first_failure": spec.stop_on_first_failure,
+            "tests": [
+                {"id": t.id or f"test_{i}", "stdin": t.input, "expected": t.expected}
+                for i, t in enumerate(spec.tests)
+            ],
+        }
+
+        # Build tar archive with all files
+        tar_data = self._build_batch_tar(
+            manifest=manifest,
+            code=spec.code,
+            runner_script=get_batch_runner_script(),
+            batch_dir=batch_dir,
+        )
+
+        # Write tar to container (1 semaphore acquisition)
+        await self._write_tar(tar_data, timeout_s=spec.timeout_s)
+
+        # Execute batch runner and stream results (1 semaphore acquisition)
+        manifest_path = f"{self._config.working_dir}/{batch_dir}/manifest.json"
+        runner_path = f"{self._config.working_dir}/{batch_dir}/batch_runner.py"
+
+        # Track timing and received results
+        run_start = time.perf_counter()
+        received_done = False
+        received_test_ids: set[str] = set()
+        compile_result: Optional[CompileResult] = None
+
+        # Calculate aggregate timeout accounting for parallelization in batch_runner
+        # With N workers: timeout = (ceil(N_tests / workers) × timeout_per_test) + buffer
+        num_workers = 16  # Matches batch_runner.py default for HPC
+        parallel_batches = math.ceil(len(spec.tests) / num_workers) if spec.tests else 1
+        aggregate_timeout = (
+            spec.timeout_s * parallel_batches + 60.0
+        )  # 60s buffer for HPC
+
+        try:
+            async with self._exec_semaphore:
+                proc = await asyncio.create_subprocess_exec(
+                    "podman-hpc",
+                    "exec",
+                    "--workdir",
+                    f"{self._config.working_dir}/{batch_dir}",
+                    self._container_name,
+                    "python",
+                    runner_path,
+                    manifest_path,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE,
+                )
+
+                # Results collected from streaming to yield after timeout handling
+                streamed_results: list = []
+
+                async def _stream_results():
+                    """Stream results from batch runner, updating nonlocal state."""
+                    nonlocal received_done, compile_result
+                    async for line_bytes in proc.stdout:
+                        line = line_bytes.decode("utf-8", errors="replace").strip()
+                        if not line:
+                            continue
+
+                        try:
+                            result = json.loads(line)
+                        except json.JSONDecodeError:
+                            logger.warning(f"Invalid JSON from batch runner: {line}")
+                            continue
+
+                        result_type = result.get("type")
+
+                        if result_type == "compile":
+                            compile_result = parse_batch_compile_result(result)
+                            streamed_results.append(("compile", compile_result))
+                            if not compile_result.success:
+                                # Compilation failed, we're done
+                                break
+
+                        elif result_type == "test":
+                            test_id = result.get("id", "unknown")
+                            received_test_ids.add(test_id)
+                            exec_result = parse_batch_test_result(result, run_start)
+                            streamed_results.append(("test", exec_result))
+
+                        elif result_type == "done":
+                            received_done = True
+                            break
+
+                        elif result_type == "error":
+                            logger.error(f"Batch runner error: {result.get('message')}")
+
+                    # Wait for process to complete
+                    await proc.wait()
+
+                try:
+                    await asyncio.wait_for(_stream_results(), timeout=aggregate_timeout)
+                except asyncio.TimeoutError:
+                    logger.warning(
+                        f"[{self._container_name}] Batch timed out after {aggregate_timeout:.1f}s "
+                        f"({len(received_test_ids)}/{len(spec.tests)} tests received)"
+                    )
+                    proc.kill()
+                    await proc.wait()
+
+                # Yield all collected results
+                for result_type, result in streamed_results:
+                    yield result
+
+        except asyncio.TimeoutError:
+            logger.warning(f"Batch execution timed out after {aggregate_timeout:.1f}s")
+
+        except Exception as e:
+            logger.warning(f"Batch execution stream broke: {e}")
+
+        # Handle missing tests (stream truncated before "done")
+        if not received_done and compile_result is None:
+            # No compile result received - emit a failure
+            compile_result = CompileResult(
+                status=CompileStatus.UNKNOWN_ERROR,
+                error_message="Batch execution terminated unexpectedly",
+                duration_ms=(time.perf_counter() - batch_start) * 1000,
+            )
+            yield compile_result
+
+        if not received_done and (compile_result is None or compile_result.success):
+            # Some tests may not have been run
+            for i, test in enumerate(spec.tests):
+                test_id = test.id or f"test_{i}"
+                if test_id not in received_test_ids:
+                    run_ms = (time.perf_counter() - run_start) * 1000
+                    yield ExecutionResult(
+                        compile_result=compile_result
+                        or CompileResult(status=CompileStatus.SUCCESS),
+                        run_status=RunStatus.SANDBOX_ERROR,
+                        stdout="",
+                        stderr="Batch execution terminated unexpectedly",
+                        exit_code=None,
+                        run_duration_ms=run_ms,
+                        total_duration_ms=run_ms,
+                    )
+
+    def _build_batch_tar(
+        self,
+        manifest: dict,
+        code: str,
+        runner_script: str,
+        batch_dir: str = "_batch",
+    ) -> bytes:
+        """Build tar archive containing batch execution files.
+
+        Creates a tar with:
+        - {batch_dir}/manifest.json: Test configuration
+        - {batch_dir}/solution.py: Code under test
+        - {batch_dir}/batch_runner.py: Self-contained test runner
+
+        Args:
+            manifest: Test configuration dict
+            code: Python code to test
+            runner_script: Content of batch_runner.py
+            batch_dir: Directory name within workspace
+
+        Returns:
+            Tar archive bytes
+        """
+        buf = io.BytesIO()
+        with tarfile.open(fileobj=buf, mode="w") as tar:
+            # Create directory entry first
+            dir_info = tarfile.TarInfo(name=batch_dir)
+            dir_info.type = tarfile.DIRTYPE
+            dir_info.mode = 0o755
+            dir_info.mtime = int(time.time())
+            tar.addfile(dir_info)
+
+            # Add manifest.json
+            manifest_data = json.dumps(manifest, indent=2).encode("utf-8")
+            info = tarfile.TarInfo(name=f"{batch_dir}/manifest.json")
+            info.size = len(manifest_data)
+            info.mtime = int(time.time())
+            tar.addfile(info, io.BytesIO(manifest_data))
+
+            # Add solution.py
+            code_data = code.encode("utf-8")
+            info = tarfile.TarInfo(name=f"{batch_dir}/solution.py")
+            info.size = len(code_data)
+            info.mtime = int(time.time())
+            tar.addfile(info, io.BytesIO(code_data))
+
+            # Add batch_runner.py
+            runner_data = runner_script.encode("utf-8")
+            info = tarfile.TarInfo(name=f"{batch_dir}/batch_runner.py")
+            info.size = len(runner_data)
+            info.mtime = int(time.time())
+            tar.addfile(info, io.BytesIO(runner_data))
+
+        buf.seek(0)
+        return buf.read()
+
+    async def _write_tar(
+        self,
+        tar_data: bytes,
+        *,
+        timeout_s: float = 5.0,
+    ) -> None:
+        """Write a tar archive directly to the container.
+
+        Similar to _write_file but takes raw tar bytes.
+        """
+        if self._workspace_host_dir:
+            # Extract tar directly to host filesystem - no podman exec
+            buf = io.BytesIO(tar_data)
+            with tarfile.open(fileobj=buf, mode="r") as tar:
+                tar.extractall(path=self._workspace_host_dir)
+            return
+
+        await asyncio.wait_for(
+            self._run_podman(
+                "exec",
+                "-i",
+                self._container_name,
+                "tar",
+                "-xC",
+                self._config.working_dir,
+                check=True,
+                capture=True,
+                input_data=tar_data,
+            ),
+            timeout=timeout_s,
+        )
+
+
+@dataclass
+class PodmanResult:
+    """Result of a podman-hpc command."""
+
+    returncode: int
+    stdout: str
+    stderr: str
+
+
+class PodmanError(Exception):
+    """Error from podman-hpc command."""
+
+    pass
+
+
+class PodmanHPCSandboxPool(BaseSandboxPool[PodmanHPCSandbox]):
+    """
+    Pool of persistent Podman-HPC containers with LRU caching.
+
+    Manages container lifecycle, checkout/release, and execution caching.
+    Designed for HPC environments with Slurm job scheduling.
+
+    Inherits from BaseSandboxPool to use background reset pattern:
+      - checkout() returns pre-reset sandboxes instantly
+      - release() spawns background reset task
+      - shutdown() waits for pending resets before cleanup
+    """
+
+    def __init__(
+        self,
+        n_workers: int = 4,
+        image: str = "python:3.11-slim",
+        config: Optional[PodmanConfig] = None,
+        cache_size: int = 10000,
+        auto_replace_failed: bool = True,
+        max_consecutive_failures: int = 5,
+        max_concurrent_ops: int = 8,
+        workspace_base_dir: str = "auto",
+    ):
+        """
+        Initialize Podman-HPC sandbox pool.
+
+        Args:
+            n_workers: Number of sandboxes to create
+            image: Podman image (e.g., "python:3.11-slim")
+            config: Podman-specific configuration
+            cache_size: Maximum entries in execution cache
+            auto_replace_failed: If True, create new sandbox when reset fails
+            max_consecutive_failures: Maximum consecutive reset failures before raising
+                SandboxPoolExhaustedError (circuit breaker threshold)
+            max_concurrent_ops: Maximum concurrent operations (resets, executions)
+            workspace_base_dir: Base directory for bind mounts. Options:
+                - "auto" (default): Auto-detect; use /local if on HPC, else None
+                - explicit path: Use specified directory for bind mounts
+                - None: Disable bind mounts, use tar-based I/O
+        """
+        super().__init__(
+            n_workers=n_workers,
+            cache_size=cache_size,
+            auto_replace_failed=auto_replace_failed,
+            max_consecutive_failures=max_consecutive_failures,
+            max_concurrent_ops=max_concurrent_ops,
+        )
+        self._image = image
+        self._config = config or PodmanConfig()
+        self._exec_semaphore: Optional[asyncio.Semaphore] = None
+
+        # Extract Python version from image name
+        self._python_version = self._parse_python_version(image)
+
+        # Resolve workspace_base_dir
+        if workspace_base_dir == "auto":
+            # Auto-detect: use /local if on HPC, else None
+            slurm_job_id = os.environ.get("SLURM_JOB_ID")
+            if (
+                slurm_job_id and Path("/home/u5ds/joanv.u5ds").exists()
+            ):  # TODO [joan]: Remove hardcoding
+                self._workspace_base_dir: Optional[str] = (
+                    f"/home/u5ds/joanv.u5ds/sandbox/ludic-{slurm_job_id}"
+                )
+            else:
+                self._workspace_base_dir = None
+        else:
+            self._workspace_base_dir = workspace_base_dir
+
+    @property
+    def python_version(self) -> str:
+        """Python version used by sandboxes in this pool."""
+        return self._python_version
+
+    # -------------------------------------------------------------------------
+    # Abstract method implementations (backend-specific logic)
+    # -------------------------------------------------------------------------
+
+    async def _create_sandboxes(self) -> List[PodmanHPCSandbox]:
+        """
+        Create and start all Podman-HPC container sandboxes.
+
+        Pulls the image (auto-migrates to shared storage on HPC) and creates
+        persistent containers in parallel.
+
+        Returns:
+            List of started PodmanHPCSandbox instances
+        """
+        # Create shared exec semaphore (prevents podman deadlock)
+        self._exec_semaphore = asyncio.Semaphore(self._max_concurrent_ops)
+        logger.info(
+            f"Podman exec semaphore initialized: max_concurrent_ops={self._max_concurrent_ops}"
+        )
+
+        # Pull image (podman-hpc pull auto-migrates to shared storage)
+        logger.info(
+            f"Pulling image {self._image} (may take a moment for HPC migration)..."
+        )
+        proc = await asyncio.create_subprocess_exec(
+            "podman-hpc",
+            "pull",
+            self._image,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        await proc.communicate()
+
+        # If using bind mounts, create base directory
+        if self._workspace_base_dir:
+            Path(self._workspace_base_dir).mkdir(parents=True, exist_ok=True)
+            logger.info(f"Bind mount enabled: {self._workspace_base_dir}")
+        else:
+            logger.info("Bind mount disabled, using tar-based I/O")
+
+        # Create and start sandboxes in parallel
+        container_prefix = _get_container_name_prefix()
+
+        async def _create_and_start(i: int) -> PodmanHPCSandbox:
+            container_name = f"{container_prefix}-{i}"
+
+            # Create per-sandbox host directory if using bind mounts
+            workspace_host_dir = None
+            if self._workspace_base_dir:
+                workspace_host_dir = f"{self._workspace_base_dir}/sandbox-{i}"
+                Path(workspace_host_dir).mkdir(parents=True, exist_ok=True)
+
+            sandbox = PodmanHPCSandbox(
+                container_name=container_name,
+                image=self._image,
+                config=self._config,
+                python_version=self._python_version,
+                exec_semaphore=self._exec_semaphore,  # Shared across all sandboxes
+                workspace_host_dir=workspace_host_dir,
+            )
+            await sandbox.start()
+            return sandbox
+
+        sandboxes = await asyncio.gather(
+            *[_create_and_start(i) for i in range(self._n_workers)]
+        )
+
+        logger.info(f"Podman-HPC sandbox pool ready ({self._n_workers} workers)")
+        return sandboxes
+
+    async def _stop_sandbox(self, sandbox: PodmanHPCSandbox) -> None:
+        """
+        Stop and remove a single Podman container.
+
+        Called during shutdown and when replacing a failed sandbox.
+        Handles errors gracefully (logs warnings, doesn't raise).
+
+        Args:
+            sandbox: The sandbox to stop
+        """
+        try:
+            await sandbox.stop()
+        except Exception as e:
+            logger.warning(f"Failed to stop Podman container: {e}")
+
+    async def _create_replacement_sandbox(self) -> Optional[PodmanHPCSandbox]:
+        """
+        Create a single replacement sandbox for a failed one.
+
+        Creates a new container with the same configuration and starts it.
+
+        Returns:
+            New PodmanHPCSandbox instance, or None if creation fails
+        """
+        try:
+            container_prefix = _get_container_name_prefix()
+            # Use timestamp to ensure unique container name
+            container_name = f"{container_prefix}-replacement-{int(time.time())}"
+
+            # Create per-sandbox host directory if using bind mounts
+            workspace_host_dir = None
+            if self._workspace_base_dir:
+                workspace_host_dir = (
+                    f"{self._workspace_base_dir}/sandbox-replacement-{int(time.time())}"
+                )
+                Path(workspace_host_dir).mkdir(parents=True, exist_ok=True)
+
+            sandbox = PodmanHPCSandbox(
+                container_name=container_name,
+                image=self._image,
+                config=self._config,
+                python_version=self._python_version,
+                exec_semaphore=self._exec_semaphore,  # Use shared semaphore
+                workspace_host_dir=workspace_host_dir,
+            )
+            await sandbox.start()
+            logger.info(f"Created replacement Podman sandbox: {container_name}")
+            return sandbox
+        except Exception as e:
+            logger.error(f"Failed to create replacement Podman sandbox: {e}")
+            return None
+
+    async def shutdown(self) -> None:
+        """
+        Shutdown pool and clean up resources.
+
+        Stops all sandboxes and removes workspace directories if using bind mounts.
+        """
+        # Call parent shutdown to stop sandboxes
+        await super().shutdown()
+
+        # Clean up host workspace directories
+        if self._workspace_base_dir:
+            workspace_path = Path(self._workspace_base_dir)
+            if workspace_path.exists():
+                try:
+                    shutil.rmtree(self._workspace_base_dir, ignore_errors=True)
+                    logger.info(
+                        f"Cleaned up workspace directory: {self._workspace_base_dir}"
+                    )
+                except Exception as e:
+                    logger.warning(f"Failed to clean up workspace directory: {e}")
+
+    # -------------------------------------------------------------------------
+    # Helper methods
+    # -------------------------------------------------------------------------
+
+    @staticmethod
+    def _parse_python_version(image: str) -> str:
+        """Extract Python version from image name."""
+        # Common patterns: python:3.11-slim, python:3.11, ghcr.io/.../python:3.11
+        match = re.search(r"python:(\d+\.\d+)", image)
+        if match:
+            return match.group(1)
+        return "3.11"  # Default fallback
diff --git a/src/ludic/envs/code_exec/pool.py b/src/ludic/envs/code_exec/pool.py
new file mode 100644
index 0000000..8a8cde4
--- /dev/null
+++ b/src/ludic/envs/code_exec/pool.py
@@ -0,0 +1,483 @@
+"""
+Base sandbox pool with background reset pattern.
+
+Provides shared pool management logic for Docker, Podman, and other backends.
+The background reset pattern ensures that sandbox cleanup happens off the
+critical path, maximizing throughput for rollout generation.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from abc import ABC, abstractmethod
+from typing import Dict, Generic, List, Optional, Set, TypeVar
+
+from .cache import LRUCache
+from .sandbox import Sandbox
+from .types import BatchTestResult, SandboxPoolExhaustedError
+
+logger = logging.getLogger(__name__)
+
+# Type variable for sandbox implementations
+S = TypeVar("S", bound=Sandbox)
+
+
+class BaseSandboxPool(ABC, Generic[S]):
+    """
+    Abstract base class for sandbox pools with background reset.
+
+    Provides queue-based checkout/release with sandboxes reset in background
+    tasks (off critical path). Includes LRU caching, pending task tracking,
+    and error handling for failed resets.
+
+    Subclasses must implement: _create_sandboxes(), _stop_sandbox(), python_version.
+
+    Background Reset Pattern:
+      Released sandboxes are reset asynchronously and returned to the queue.
+      checkout() receives already-clean sandboxes instantly, hiding reset latency.
+      Failed resets discard the sandbox and optionally create a replacement.
+    """
+
+    def __init__(
+        self,
+        n_workers: int = 4,
+        cache_size: int = 10000,
+        auto_replace_failed: bool = True,
+        max_consecutive_failures: int = 5,
+        max_concurrent_ops: int = 8,
+    ):
+        """
+        Initialize the pool.
+
+        Args:
+            n_workers: Number of sandboxes to create
+            cache_size: Maximum entries in the execution cache
+            auto_replace_failed: If True, create new sandbox when reset fails
+            max_consecutive_failures: Maximum consecutive reset failures before raising
+                SandboxPoolExhaustedError (circuit breaker threshold)
+            max_concurrent_ops: Maximum concurrent sandbox operations (resets, exec
+                calls). Prevents podman/docker deadlock with too many simultaneous calls.
+        """
+        self._n_workers = n_workers
+        self._cache = LRUCache(max_size=cache_size)
+        self._auto_replace_failed = auto_replace_failed
+        self._max_consecutive_failures = max_consecutive_failures
+        self._max_concurrent_ops = max_concurrent_ops
+
+        self._sandboxes: List[S] = []
+        self._queue: Optional[asyncio.Queue[S]] = None
+        self._pending_resets: Set[asyncio.Task] = set()
+        self._started = False
+        self._shutting_down = False
+        self._consecutive_failures = 0
+
+    # -------------------------------------------------------------------------
+    # Abstract methods (must be implemented by subclasses)
+    # -------------------------------------------------------------------------
+
+    @property
+    @abstractmethod
+    def python_version(self) -> str:
+        """Python version used by sandboxes in this pool."""
+        ...
+
+    @abstractmethod
+    async def _create_sandboxes(self) -> List[S]:
+        """
+        Create all sandbox instances.
+
+        Called by start(). Should create n_workers sandboxes, start them,
+        and return the list. This is where backend-specific logic lives
+        (Docker container creation, Podman-HPC setup, etc.).
+
+        Returns:
+            List of started sandbox instances
+        """
+        ...
+
+    @abstractmethod
+    async def _stop_sandbox(self, sandbox: S) -> None:
+        """
+        Stop and cleanup a single sandbox.
+
+        Called during shutdown and when replacing a failed sandbox.
+        Should handle errors gracefully (log warnings, don't raise).
+
+        Args:
+            sandbox: The sandbox to stop
+        """
+        ...
+
+    async def _create_replacement_sandbox(self) -> Optional[S]:
+        """
+        Create a single replacement sandbox.
+
+        Called when a sandbox fails to reset and auto_replace_failed is True.
+        Default implementation returns None (no replacement). Override in
+        subclass if dynamic sandbox creation is supported.
+
+        Returns:
+            New sandbox instance, or None if replacement not supported
+        """
+        return None
+
+    # -------------------------------------------------------------------------
+    # Pool lifecycle
+    # -------------------------------------------------------------------------
+
+    @property
+    def available(self) -> int:
+        """Number of sandboxes currently available for checkout."""
+        if self._queue is None:
+            return 0
+        return self._queue.qsize()
+
+    @property
+    def cache_stats(self) -> Dict[str, int]:
+        """Cache statistics (hits, misses, size, max_size)."""
+        return self._cache.stats
+
+    @property
+    def pending_resets(self) -> int:
+        """Number of background reset tasks currently running."""
+        return len(self._pending_resets)
+
+    async def start(self) -> None:
+        """
+        Initialize the pool.
+
+        Creates all sandboxes and makes them available for checkout.
+        Idempotent - calling multiple times has no effect.
+        """
+        if self._started:
+            return
+
+        # Limits concurrent background reset TASKS (admission control)
+        # Prevents podman/docker deadlock with too many simultaneous operations
+        self._ops_semaphore = asyncio.Semaphore(self._max_concurrent_ops)
+        logger.info(
+            f"Pool starting: n_workers={self._n_workers}, "
+            f"max_concurrent_ops={self._max_concurrent_ops}"
+        )
+
+        # Create sandboxes (backend-specific)
+        self._sandboxes = await self._create_sandboxes()
+
+        # Create queue and populate with all sandboxes
+        self._queue = asyncio.Queue()
+        for sandbox in self._sandboxes:
+            await self._queue.put(sandbox)
+
+        self._started = True
+        logger.info(
+            f"Pool started: {len(self._sandboxes)} sandboxes ready, "
+            f"queue_size={self._queue.qsize()}"
+        )
+
+    async def shutdown(self) -> None:
+        """
+        Tear down all sandboxes and release resources.
+
+        Waits for all pending reset tasks to complete before stopping
+        sandboxes, ensuring clean shutdown without orphaned tasks.
+        """
+        if not self._started:
+            return
+
+        self._shutting_down = True
+
+        # Wait for all pending reset tasks to complete
+        if self._pending_resets:
+            logger.debug(f"Waiting for {len(self._pending_resets)} pending resets...")
+            await asyncio.gather(*self._pending_resets, return_exceptions=True)
+
+        # Stop all sandboxes
+        for sandbox in self._sandboxes:
+            await self._stop_sandbox(sandbox)
+
+        self._sandboxes.clear()
+        self._started = False
+        self._queue = None
+        self._shutting_down = False
+
+    async def drain_pending_resets(self, timeout_s: float = 60.0) -> int:
+        """
+        Wait for all pending reset tasks to complete.
+
+        Call this before switching between high-concurrency phases
+        (e.g., before eval after training step) to ensure all sandboxes
+        are available in the queue.
+
+        Uses asyncio.wait() instead of wait_for(gather()) to avoid
+        cancelling tasks on timeout (which would destroy sandboxes).
+
+        Args:
+            timeout_s: Maximum time to wait for resets to complete
+
+        Returns:
+            Number of resets that completed
+        """
+        if not self._pending_resets:
+            logger.debug("Drain called but no pending resets")
+            return 0
+
+        # Snapshot the current tasks (set may change during await)
+        tasks = list(self._pending_resets)
+        count = len(tasks)
+        logger.info(
+            f"Draining {count} pending resets... "
+            f"queue: {self.available}/{self._n_workers}"
+        )
+
+        import time
+        start = time.time()
+
+        # Use wait() instead of wait_for(gather()) - doesn't cancel on timeout
+        done, pending = await asyncio.wait(tasks, timeout=timeout_s)
+
+        elapsed = time.time() - start
+
+        if pending:
+            logger.warning(
+                f"Drain timeout after {elapsed:.1f}s! "
+                f"Completed: {len(done)}, still pending: {len(pending)}, "
+                f"queue: {self.available}/{self._n_workers}"
+            )
+        else:
+            logger.info(
+                f"Drain complete in {elapsed:.1f}s: "
+                f"{len(done)} resets finished, "
+                f"queue: {self.available}/{self._n_workers}"
+            )
+
+        return len(done)
+
+    # -------------------------------------------------------------------------
+    # Checkout / Release with background reset
+    # -------------------------------------------------------------------------
+
+    async def checkout(self, timeout_s: float = 30.0) -> Sandbox:
+        """
+        Get exclusive access to a sandbox.
+
+        The returned sandbox is guaranteed to be in a clean state (reset
+        was performed in the background after the previous release).
+
+        Waits on the queue for a sandbox to become available. Background
+        resets are rate-limited by semaphore to prevent backend deadlock.
+
+        Args:
+            timeout_s: Maximum time to wait for a sandbox
+
+        Returns:
+            Exclusive Sandbox handle
+
+        Raises:
+            RuntimeError: If pool not started
+            TimeoutError: If no sandbox available within timeout
+        """
+        if not self._started or self._queue is None:
+            raise RuntimeError("Pool not started. Call start() first.")
+
+        import time
+
+        start_time = time.monotonic()
+        deadline = start_time + timeout_s
+        attempt = 0
+
+        while True:
+            remaining = deadline - time.monotonic()
+            attempt += 1
+
+            if remaining <= 0:
+                # Detailed timeout diagnostics
+                semaphore_free = self._ops_semaphore._value if self._ops_semaphore else 0
+                logger.error(
+                    f"CHECKOUT TIMEOUT after {timeout_s}s! "
+                    f"Pool: {self._n_workers}, available: {self.available}, "
+                    f"pending_resets: {self.pending_resets}, "
+                    f"semaphore: {semaphore_free}/{self._max_concurrent_ops} free, "
+                    f"attempts: {attempt}"
+                )
+                raise TimeoutError(
+                    f"No sandbox available after {timeout_s}s. "
+                    f"Pool size: {self._n_workers}, available: {self.available}, "
+                    f"pending resets: {self.pending_resets}"
+                )
+
+            # Log if we're waiting with empty queue
+            if self._queue.empty() and attempt == 1:
+                logger.info(
+                    f"Checkout waiting: queue empty, "
+                    f"pending_resets: {self.pending_resets}"
+                )
+
+            try:
+                sandbox = await asyncio.wait_for(
+                    self._queue.get(),
+                    timeout=min(remaining, 5.0),  # Short timeout to recheck
+                )
+                wait_time = time.monotonic() - start_time
+                if wait_time > 1.0:
+                    logger.info(
+                        f"Checkout OK after {wait_time:.2f}s wait, "
+                        f"queue now: {self._queue.qsize()}/{self._n_workers}"
+                    )
+                return sandbox
+            except asyncio.TimeoutError:
+                # Log periodic status during long waits
+                elapsed = time.monotonic() - start_time
+                logger.warning(
+                    f"Checkout still waiting after {elapsed:.1f}s: "
+                    f"available: {self.available}, pending: {self.pending_resets}"
+                )
+                continue
+
+    async def release(self, sandbox: Sandbox) -> None:
+        """
+        Return a sandbox to the pool.
+
+        The sandbox is reset in a background task, then returned to the
+        available queue. This makes release() return immediately without
+        blocking the caller.
+
+        Args:
+            sandbox: The sandbox to release (must have been obtained via checkout)
+
+        Raises:
+            RuntimeError: If pool not started
+        """
+        if not self._started or self._queue is None:
+            raise RuntimeError("Pool not started")
+
+        if self._shutting_down:
+            # During shutdown, don't spawn new tasks
+            return
+
+        # Spawn background reset task
+        task = asyncio.create_task(
+            self._background_reset(sandbox),  # type: ignore
+            name=f"sandbox-reset-{id(sandbox)}",
+        )
+        self._pending_resets.add(task)
+        task.add_done_callback(self._pending_resets.discard)
+
+    async def _background_reset(self, sandbox: S) -> None:
+        """
+        Reset sandbox and return to queue (runs in background).
+
+        Uses semaphore to limit concurrent reset operations (prevents
+        podman deadlock with too many simultaneous exec calls).
+
+        On success, the sandbox is returned to the available queue.
+        On failure, the sandbox is discarded and optionally replaced.
+        """
+        import time
+
+        sandbox_id = id(sandbox) % 10000  # Short ID for logging
+        wait_start = time.time()
+
+        # Limit concurrent ops to prevent podman/docker deadlock
+        async with self._ops_semaphore:
+            wait_elapsed = time.time() - wait_start
+            if wait_elapsed > 0.1:
+                logger.debug(f"[SB-{sandbox_id}] Semaphore acquired after {wait_elapsed:.2f}s wait")
+
+            reset_start = time.time()
+            try:
+                await sandbox.reset()
+                reset_elapsed = time.time() - reset_start
+                total_elapsed = time.time() - wait_start
+
+                if self._queue is not None and not self._shutting_down:
+                    await self._queue.put(sandbox)
+                    logger.debug(
+                        f"[SB-{sandbox_id}] Reset OK: {reset_elapsed:.2f}s reset, "
+                        f"{total_elapsed:.2f}s total. "
+                        f"Queue now: {self._queue.qsize()}/{self._n_workers}"
+                    )
+            except Exception as e:
+                reset_elapsed = time.time() - reset_start
+                logger.error(
+                    f"[SB-{sandbox_id}] Reset FAILED after {reset_elapsed:.2f}s: {e}"
+                )
+                await self._handle_reset_failure(sandbox, e)
+
+    async def _handle_reset_failure(self, sandbox: S, error: Exception) -> None:
+        """
+        Handle a sandbox that failed to reset.
+
+        Logs the error, removes the sandbox from the pool, and optionally
+        creates a replacement. Implements circuit breaker pattern to detect
+        systemic failures.
+
+        Raises:
+            SandboxPoolExhaustedError: If consecutive failures exceed threshold
+        """
+        # Increment failure counter
+        self._consecutive_failures += 1
+
+        logger.warning(
+            f"Sandbox reset failed: {error}. Discarding sandbox. "
+            f"Consecutive failures: {self._consecutive_failures}/{self._max_consecutive_failures}"
+        )
+
+        # Check circuit breaker threshold
+        if self._consecutive_failures >= self._max_consecutive_failures:
+            logger.error(
+                f"Circuit breaker triggered: {self._consecutive_failures} consecutive "
+                f"sandbox reset failures. Pool is exhausted."
+            )
+            raise SandboxPoolExhaustedError(
+                f"Sandbox pool exhausted after {self._consecutive_failures} consecutive "
+                f"reset failures. This indicates a systemic issue requiring operator intervention."
+            )
+
+        # Remove from tracked sandboxes
+        if sandbox in self._sandboxes:
+            self._sandboxes.remove(sandbox)
+
+        # Try to stop the failed sandbox
+        try:
+            await self._stop_sandbox(sandbox)
+        except Exception as stop_error:
+            logger.warning(f"Failed to stop broken sandbox: {stop_error}")
+
+        # Optionally create replacement
+        if self._auto_replace_failed and not self._shutting_down:
+            try:
+                replacement = await self._create_replacement_sandbox()
+                if replacement is not None:
+                    self._sandboxes.append(replacement)
+                    if self._queue is not None:
+                        await self._queue.put(replacement)
+                    # Reset failure counter on successful replacement
+                    self._consecutive_failures = 0
+                    logger.info(
+                        "Created replacement sandbox after reset failure. "
+                        "Consecutive failure counter reset."
+                    )
+            except Exception as create_error:
+                logger.warning(f"Failed to create replacement sandbox: {create_error}")
+
+    # -------------------------------------------------------------------------
+    # Cache interface
+    # -------------------------------------------------------------------------
+
+    def get_cached(
+        self,
+        code_hash: str,
+        tests_hash: str,
+    ) -> Optional[BatchTestResult]:
+        """Check cache for result (sync, thread-safe)."""
+        return self._cache.get(code_hash, tests_hash)
+
+    def put_cached(
+        self,
+        code_hash: str,
+        tests_hash: str,
+        result: BatchTestResult,
+    ) -> None:
+        """Store result in cache (sync, thread-safe)."""
+        self._cache.put(code_hash, tests_hash, result)
diff --git a/src/ludic/envs/code_exec/runners.py b/src/ludic/envs/code_exec/runners.py
new file mode 100644
index 0000000..6f0f93d
--- /dev/null
+++ b/src/ludic/envs/code_exec/runners.py
@@ -0,0 +1,620 @@
+"""
+Code runners for executing code against test cases.
+
+This module defines the CodeRunner protocol and concrete implementations
+for different test execution strategies (stdin/stdout, function calls, etc.).
+
+The runner is responsible for:
+  1. Orchestrating compilation and execution via a Sandbox
+  2. Running code against multiple TestCases
+  3. Using an OutputVerifier to compare results
+  4. Building rich TestResult and BatchTestResult objects
+"""
+
+from __future__ import annotations
+
+import asyncio
+import hashlib
+import json
+import logging
+from typing import List, Optional, Protocol, Set, runtime_checkable
+
+from .adapters.base import OutputVerifier
+from .sandbox import Sandbox
+from .types import (
+    BatchExecutionSpec,
+    BatchTestResult,
+    CompileResult,
+    CompileStatus,
+    ExecutionResult,
+    RunStatus,
+    TestCase,
+    TestResult,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def compute_hash(content: str) -> str:
+    """
+    Compute SHA256 hash, return first 16 hex chars.
+
+    This is used for cache keys to uniquely identify code and test sets.
+    16 hex chars = 64 bits, which gives collision probability < 1e-10
+    for reasonable dataset sizes.
+
+    Args:
+        content: String to hash
+
+    Returns:
+        First 16 characters of SHA256 hex digest
+    """
+    return hashlib.sha256(content.encode()).hexdigest()[:16]
+
+
+def hash_tests(tests: List[TestCase]) -> str:
+    """
+    Compute stable hash of test cases for caching.
+
+    Creates a deterministic hash by converting test inputs and expected
+    outputs to a canonical JSON representation with sorted keys, then hashing.
+
+    Args:
+        tests: List of test cases to hash
+
+    Returns:
+        16-character hash string
+    """
+    # Use JSON with sorted keys for deterministic serialization
+    content = json.dumps(
+        [(t.input, t.expected) for t in tests],
+        sort_keys=True,
+        default=str,  # Handle non-JSON-serializable types
+    )
+    return compute_hash(content)
+
+
+@runtime_checkable
+class CodeRunner(Protocol):
+    """
+    Protocol for running code against test cases.
+
+    A runner orchestrates the interaction between a Sandbox and test cases,
+    using an OutputVerifier to determine if each test passes. It handles
+    compilation, execution, error recovery, and early stopping.
+
+    Implementations should be stateless and reusable across multiple
+    test runs. All state is passed explicitly via arguments.
+    """
+
+    async def run_tests(
+        self,
+        sandbox: Sandbox,
+        code: str,
+        tests: List[TestCase],
+        *,
+        verifier: OutputVerifier,
+        stop_on_first_failure: bool = False,
+        compile_first: bool = True,
+    ) -> BatchTestResult:
+        """
+        Run code against all test cases and return aggregated results.
+
+        Args:
+            sandbox: Sandbox to execute code in (must be checked out)
+            code: Source code to test
+            tests: List of test cases to run
+            verifier: Verifier to compare actual vs expected output
+            stop_on_first_failure: If True, skip remaining tests after first failure
+            compile_first: If True, compile once before running tests
+
+        Returns:
+            BatchTestResult with individual test results and metadata
+        """
+        ...
+
+
+class StdinStdoutRunner:
+    """
+    Runner for APPS-style stdin/stdout testing.
+
+    This runner executes code that reads from stdin and writes to stdout,
+    comparing the output against expected values. This is the standard
+    format for competitive programming problems (Codeforces, APPS, etc.).
+
+    Each test case's `input` field is passed as stdin, and the `expected`
+    field is compared against stdout using the provided verifier.
+
+    Design notes:
+      - Default timeout is 5.0s for efficiency (per user specification)
+      - Compilation is checked first by default to get early failure signal
+      - All operations are async to avoid blocking the event loop
+      - Rich error details in TestResult.comparison_details
+    """
+
+    def __init__(
+        self,
+        default_timeout_s: float = 5.0,
+        memory_limit_mb: Optional[int] = 256,
+        use_batch_execution: bool = True,
+    ) -> None:
+        """
+        Initialize the runner with default resource limits.
+
+        Args:
+            default_timeout_s: Default execution timeout per test (seconds).
+                              Tests can override via metadata["timeout_s"].
+            memory_limit_mb: Memory limit for execution (None = no limit)
+            use_batch_execution: If True and sandbox supports it, use batched
+                                execution to reduce semaphore acquisitions.
+        """
+        self._default_timeout_s = default_timeout_s
+        self._memory_limit_mb = memory_limit_mb
+        self._use_batch_execution = use_batch_execution
+
+    async def run_tests(
+        self,
+        sandbox: Sandbox,
+        code: str,
+        tests: List[TestCase],
+        *,
+        verifier: OutputVerifier,
+        stop_on_first_failure: bool = False,
+        compile_first: bool = True,
+    ) -> BatchTestResult:
+        """
+        Run stdin/stdout tests against code.
+
+        Implementation steps:
+          1. Compute code_hash and tests_hash for caching
+          2. If compile_first=True, compile code and fail fast if it fails
+          3. For each test:
+             - Execute with test.input as stdin
+             - Compare stdout against test.expected using verifier
+             - Build TestResult with full metadata
+          4. If stop_on_first_failure=True, mark remaining tests NOT_RUN
+          5. Return BatchTestResult
+
+        Args:
+            sandbox: Sandbox to execute code in (must be checked out)
+            code: Source code to test
+            tests: List of test cases (input/expected are stdin/stdout strings)
+            verifier: Verifier to compare stdout vs expected
+            stop_on_first_failure: If True, skip remaining tests after first failure
+            compile_first: If True, compile once before running tests
+
+        Returns:
+            BatchTestResult with results for each test
+        """
+        import time
+
+        run_start = time.perf_counter()
+
+        # Compute hashes for caching
+        code_hash = compute_hash(code)
+        tests_hash_val = hash_tests(tests)
+
+        # Use batch execution if enabled and sandbox supports it
+        has_batch = hasattr(sandbox, "execute_batch")
+        logger.debug(
+            f"run_tests: use_batch={self._use_batch_execution}, "
+            f"has_execute_batch={has_batch}, num_tests={len(tests)}"
+        )
+
+        if self._use_batch_execution and has_batch:
+            result = await self._run_tests_batched(
+                sandbox=sandbox,
+                code=code,
+                tests=tests,
+                verifier=verifier,
+                stop_on_first_failure=stop_on_first_failure,
+                compile_first=compile_first,
+                code_hash=code_hash,
+                tests_hash=tests_hash_val,
+            )
+            elapsed_ms = (time.perf_counter() - run_start) * 1000
+            logger.debug(
+                f"Batch execution completed: {len(tests)} tests in {elapsed_ms:.1f}ms, "
+                f"passed={result.passed_count}/{result.total_count}"
+            )
+            return result
+
+        # Non-batch execution
+        # Step 1: Compile first if requested
+        compile_result: Optional[CompileResult] = None
+        if compile_first:
+            compile_result = await sandbox.compile(
+                code,
+                timeout_s=self._default_timeout_s,
+            )
+
+            # If compilation failed, all tests fail without execution
+            if not compile_result.success:
+                return self._create_all_failed_batch(
+                    tests=tests,
+                    code_hash=code_hash,
+                    tests_hash=tests_hash_val,
+                    compile_result=compile_result,
+                    reason="compilation_failed",
+                )
+
+        # Step 2: Run tests (in parallel when possible)
+        if stop_on_first_failure:
+            # Sequential execution with early stopping
+            results: List[TestResult] = []
+            for test_case in tests:
+                # Get timeout for this test (allow per-test override)
+                timeout_s = test_case.metadata.get("timeout_s", self._default_timeout_s)
+                memory_limit = test_case.metadata.get(
+                    "memory_limit_mb", self._memory_limit_mb
+                )
+
+                # Execute the test
+                test_result = await self._run_single_test(
+                    sandbox=sandbox,
+                    code=code,
+                    test_case=test_case,
+                    verifier=verifier,
+                    timeout_s=timeout_s,
+                    memory_limit_mb=memory_limit,
+                    skip_compile=compile_first,  # Skip if we already compiled
+                )
+
+                results.append(test_result)
+
+                # Stop on first failure
+                if not test_result.passed:
+                    # Mark remaining tests as NOT_RUN
+                    for remaining_test in tests[len(results) :]:
+                        not_run_result = self._create_not_run_result(
+                            test_case=remaining_test,
+                            code_hash=code_hash,
+                        )
+                        results.append(not_run_result)
+                    break
+        else:
+            # Parallel execution with asyncio.gather
+            async def run_test_with_metadata(test_case: TestCase) -> TestResult:
+                timeout_s = test_case.metadata.get("timeout_s", self._default_timeout_s)
+                memory_limit = test_case.metadata.get(
+                    "memory_limit_mb", self._memory_limit_mb
+                )
+                return await self._run_single_test(
+                    sandbox=sandbox,
+                    code=code,
+                    test_case=test_case,
+                    verifier=verifier,
+                    timeout_s=timeout_s,
+                    memory_limit_mb=memory_limit,
+                    skip_compile=compile_first,  # Skip if we already compiled
+                )
+
+            # Run all tests in parallel
+            results = await asyncio.gather(
+                *[run_test_with_metadata(test) for test in tests]
+            )
+
+        return BatchTestResult(
+            results=list(results),
+            code_hash=code_hash,
+            tests_hash=tests_hash_val,
+        )
+
+    async def _run_single_test(
+        self,
+        sandbox: Sandbox,
+        code: str,
+        test_case: TestCase,
+        verifier: OutputVerifier,
+        timeout_s: float,
+        memory_limit_mb: Optional[int],
+        skip_compile: bool = False,
+    ) -> TestResult:
+        """
+        Run a single test case.
+
+        Args:
+            sandbox: Sandbox to execute in
+            code: Source code
+            test_case: Test to run
+            verifier: Output verifier
+            timeout_s: Execution timeout
+            memory_limit_mb: Memory limit
+            skip_compile: If True, skip compilation (assumes already compiled)
+
+        Returns:
+            TestResult for this test
+        """
+        # Execute code with test input
+        execution = await sandbox.execute(
+            code=code,
+            stdin=str(test_case.input),  # Ensure input is string
+            skip_compile=skip_compile,
+            timeout_s=timeout_s,
+            memory_limit_mb=memory_limit_mb,
+        )
+
+        # If execution failed (didn't compile or runtime error), test fails
+        if not execution.succeeded:
+            return TestResult(
+                test_case=test_case,
+                passed=False,
+                actual=execution.stdout,
+                execution=execution,
+                comparison_details=self._get_execution_failure_details(execution),
+            )
+
+        # Execution succeeded, compare output
+        actual_output = execution.stdout
+        expected_output = str(test_case.expected)
+
+        passed, comparison_details = verifier.verify(actual_output, expected_output)
+
+        return TestResult(
+            test_case=test_case,
+            passed=passed,
+            actual=actual_output,
+            execution=execution,
+            comparison_details=comparison_details,
+        )
+
+    async def _run_tests_batched(
+        self,
+        sandbox: Sandbox,
+        code: str,
+        tests: List[TestCase],
+        verifier: OutputVerifier,
+        stop_on_first_failure: bool,
+        compile_first: bool,
+        code_hash: str,
+        tests_hash: str,
+    ) -> BatchTestResult:
+        """
+        Run tests using batch execution API with crash resilience.
+
+        This method uses the sandbox's execute_batch() to run all tests
+        in a single podman exec call, reducing semaphore acquisitions
+        from O(2N) to O(2).
+
+        Args:
+            sandbox: Sandbox with execute_batch() method
+            code: Source code to test
+            tests: List of test cases
+            verifier: Output verifier for comparing results
+            stop_on_first_failure: If True, stop after first failure
+            compile_first: If True, compile before running tests
+            code_hash: Pre-computed hash of code
+            tests_hash: Pre-computed hash of tests
+
+        Returns:
+            BatchTestResult with results for each test
+        """
+        spec = BatchExecutionSpec(
+            code=code,
+            tests=tests,
+            compile_first=compile_first,
+            timeout_s=self._default_timeout_s,
+            stop_on_first_failure=stop_on_first_failure,
+        )
+
+        results: List[TestResult] = []
+        compile_result: Optional[CompileResult] = None
+        received_done = False
+        received_test_ids: Set[str] = set()
+
+        # Build lookup for test cases by ID
+        test_by_id = {t.id: t for t in tests}
+
+        try:
+            async for result in sandbox.execute_batch(spec):
+                if isinstance(result, CompileResult):
+                    compile_result = result
+                    if not result.success:
+                        # Compilation failed - return batch with all tests failed
+                        return self._create_all_failed_batch(
+                            tests=tests,
+                            code_hash=code_hash,
+                            tests_hash=tests_hash,
+                            compile_result=compile_result,
+                            reason="compilation_failed",
+                        )
+                elif isinstance(result, ExecutionResult):
+                    # This is a test result - find the matching test case
+                    # The execute_batch implementation tags results with test_id
+                    # in the cache_key field
+                    test_id = result.cache_key or ""
+                    received_test_ids.add(test_id)
+
+                    test_case = test_by_id.get(test_id)
+                    if test_case is None:
+                        logger.warning(
+                            f"Received result for unknown test_id: {test_id}"
+                        )
+                        continue
+
+                    # Build TestResult from ExecutionResult
+                    if not result.succeeded:
+                        # Execution failed
+                        test_result = TestResult(
+                            test_case=test_case,
+                            passed=False,
+                            actual=result.stdout,
+                            execution=result,
+                            comparison_details=self._get_execution_failure_details(
+                                result
+                            ),
+                        )
+                    else:
+                        # Execution succeeded, compare output
+                        actual_output = result.stdout
+                        expected_output = str(test_case.expected)
+                        passed, comparison_details = verifier.verify(
+                            actual_output, expected_output
+                        )
+                        test_result = TestResult(
+                            test_case=test_case,
+                            passed=passed,
+                            actual=actual_output,
+                            execution=result,
+                            comparison_details=comparison_details,
+                        )
+                    results.append(test_result)
+                elif isinstance(result, dict) and result.get("type") == "done":
+                    received_done = True
+                    break
+
+        except Exception as e:
+            # Stream broke unexpectedly (OOM, container killed, etc.)
+            logger.warning(f"Batch execution stream broke: {e}")
+
+        # Handle missing tests (stream truncated before "done")
+        if not received_done:
+            for test in tests:
+                if test.id not in received_test_ids:
+                    # Create SANDBOX_ERROR result for missing tests
+                    execution = ExecutionResult(
+                        compile_result=compile_result
+                        or CompileResult(status=CompileStatus.SUCCESS),
+                        run_status=RunStatus.SANDBOX_ERROR,
+                        stdout="",
+                        stderr="Batch execution terminated unexpectedly",
+                        exit_code=None,
+                    )
+                    results.append(
+                        TestResult(
+                            test_case=test,
+                            passed=False,
+                            actual="",
+                            execution=execution,
+                            comparison_details="Sandbox crashed before this test completed",
+                        )
+                    )
+
+        return BatchTestResult(
+            results=results,
+            code_hash=code_hash,
+            tests_hash=tests_hash,
+        )
+
+    def _get_execution_failure_details(self, execution: ExecutionResult) -> str:
+        """
+        Generate human-readable details for execution failures.
+
+        Args:
+            execution: The failed execution result
+
+        Returns:
+            Explanation of why execution failed
+        """
+        # Compilation failure
+        if not execution.compiled:
+            compile_msg = execution.compile_result.error_message or "Unknown error"
+            if execution.compile_result.error_line is not None:
+                return f"Compilation failed at line {execution.compile_result.error_line}: {compile_msg}"
+            return f"Compilation failed: {compile_msg}"
+
+        # Runtime failure
+        if execution.run_status == RunStatus.TIMEOUT:
+            return f"Execution timed out after {execution.run_duration_ms:.0f}ms"
+
+        if execution.run_status == RunStatus.MEMORY_EXCEEDED:
+            return "Memory limit exceeded"
+
+        if execution.run_status == RunStatus.RUNTIME_ERROR:
+            stderr = execution.stderr.strip()
+            if stderr:
+                # Show first few lines of stderr for debugging
+                stderr_lines = stderr.split("\n")
+                preview = "\n".join(stderr_lines[:5])
+                if len(stderr_lines) > 5:
+                    preview += f"\n... ({len(stderr_lines) - 5} more lines)"
+                return f"Runtime error:\n{preview}"
+            return f"Runtime error (exit code {execution.exit_code})"
+
+        # Other failure
+        return f"Execution failed with status: {execution.run_status}"
+
+    def _create_all_failed_batch(
+        self,
+        tests: List[TestCase],
+        code_hash: str,
+        tests_hash: str,
+        compile_result: CompileResult,
+        reason: str,
+    ) -> BatchTestResult:
+        """
+        Create a BatchTestResult where all tests failed due to compilation error.
+
+        Args:
+            tests: All test cases
+            code_hash: Hash of the code
+            tests_hash: Hash of the tests
+            compile_result: The failed compilation result
+            reason: Reason for batch failure
+
+        Returns:
+            BatchTestResult with all tests marked as failed
+        """
+        results: List[TestResult] = []
+
+        for test_case in tests:
+            # Create ExecutionResult with the compile failure
+            execution = ExecutionResult(
+                compile_result=compile_result,
+                run_status=None,  # Never ran
+                stdout="",
+                stderr="",
+                exit_code=None,
+                compile_duration_ms=compile_result.duration_ms,
+                run_duration_ms=0.0,
+                total_duration_ms=compile_result.duration_ms,
+            )
+
+            test_result = TestResult(
+                test_case=test_case,
+                passed=False,
+                actual="",
+                execution=execution,
+                comparison_details=self._get_execution_failure_details(execution),
+            )
+            results.append(test_result)
+
+        return BatchTestResult(
+            results=results,
+            code_hash=code_hash,
+            tests_hash=tests_hash,
+        )
+
+    def _create_not_run_result(
+        self,
+        test_case: TestCase,
+        code_hash: str,
+    ) -> TestResult:
+        """
+        Create a TestResult for a test that was skipped.
+
+        Args:
+            test_case: The test case that was skipped
+            code_hash: Hash of the code (for metadata)
+
+        Returns:
+            TestResult marked as NOT_RUN
+        """
+        # Create a minimal ExecutionResult indicating the test wasn't run
+        execution = ExecutionResult(
+            compile_result=CompileResult(
+                status=CompileStatus.SUCCESS  # Compilation already succeeded
+            ),
+            run_status=RunStatus.NOT_RUN,
+            stdout="",
+            stderr="",
+            exit_code=None,
+        )
+
+        return TestResult(
+            test_case=test_case,
+            passed=False,
+            actual="",
+            execution=execution,
+            comparison_details="Test skipped (stop_on_first_failure=True)",
+        )
diff --git a/src/ludic/envs/code_exec/sandbox.py b/src/ludic/envs/code_exec/sandbox.py
new file mode 100644
index 0000000..d2b2d81
--- /dev/null
+++ b/src/ludic/envs/code_exec/sandbox.py
@@ -0,0 +1,239 @@
+"""
+Sandbox protocols for isolated code execution.
+
+These protocols define the contract for sandbox implementations.
+The actual implementations (Docker, subprocess, etc.) live in separate modules.
+"""
+
+from __future__ import annotations
+
+from typing import Dict, Optional, Protocol, runtime_checkable
+
+from .types import BatchTestResult, CompileResult, ExecutionResult
+
+
+@runtime_checkable
+class Sandbox(Protocol):
+    """
+    Async handle to a single isolated execution environment.
+
+    Invariants:
+      - A sandbox is exclusive to one env instance at a time
+      - reset() clears all state from previous executions
+      - All operations are async to avoid blocking the event loop
+
+    Lifecycle:
+      1. Obtained via SandboxPool.checkout()
+      2. reset() called to ensure clean state
+      3. compile() and/or execute() called as needed
+      4. Returned via SandboxPool.release()
+
+    Implementations should ensure:
+      - Network isolation (no external access)
+      - Resource limits (CPU, memory)
+      - Timeout enforcement
+      - Filesystem isolation between uses
+    """
+
+    @property
+    def python_version(self) -> str:
+        """Python version in this sandbox (e.g., '3.11')."""
+        ...
+
+    async def reset(self) -> None:
+        """
+        Clear filesystem, kill processes, restore to clean state.
+
+        Must be called before first use and is automatically called
+        by SandboxPool.release().
+        """
+        ...
+
+    async def compile(
+        self,
+        code: str,
+        *,
+        timeout_s: float = 5.0,
+    ) -> CompileResult:
+        """
+        Syntax-check / compile code without executing.
+
+        For Python: runs py_compile or ast.parse to catch syntax errors.
+        For compiled languages: runs the compiler.
+
+        Args:
+            code: Source code to compile/check
+            timeout_s: Maximum time for compilation
+
+        Returns:
+            CompileResult with status and error details if failed
+        """
+        ...
+
+    async def execute(
+        self,
+        code: str,
+        *,
+        stdin: str = "",
+        skip_compile: bool = False,
+        timeout_s: float = 10.0,
+        memory_limit_mb: Optional[int] = None,
+        env_vars: Optional[Dict[str, str]] = None,
+    ) -> ExecutionResult:
+        """
+        Execute code and return rich results.
+
+        Implicitly compiles first if not already compiled (unless skip_compile=True).
+        The compile result is included in the returned ExecutionResult.
+
+        Args:
+            code: Source code to execute
+            stdin: Input to feed to the process via stdin
+            skip_compile: If True, skip compilation step (assumes code already compiled)
+            timeout_s: Maximum execution time (excluding compilation)
+            memory_limit_mb: Memory limit override (None uses sandbox default)
+            env_vars: Additional environment variables
+
+        Returns:
+            ExecutionResult with compile status, output, timing, etc.
+        """
+        ...
+
+
+@runtime_checkable
+class SandboxPool(Protocol):
+    """
+    Async pool of reusable sandboxes with caching.
+
+    The pool manages:
+      1. Sandbox lifecycle (start/stop containers, processes, etc.)
+      2. Checkout/release of exclusive sandbox handles
+      3. Execution cache (code+tests -> result)
+
+    Lifecycle:
+      1. start() - Initialize pool (start containers, etc.)
+      2. checkout() - Get exclusive sandbox access
+      3. release() - Return sandbox to pool
+      4. shutdown() - Tear down all sandboxes
+
+    The pool should be started once at application startup and shared
+    across all CodeExecEnv instances via factory closure injection.
+
+    Caching:
+      The pool maintains an LRU cache keyed by (code_hash, tests_hash).
+      This avoids redundant execution when the same code is submitted
+      for the same tests (common in GRPO where multiple generations
+      are evaluated against the same problem).
+    """
+
+    @property
+    def python_version(self) -> str:
+        """Python version used by sandboxes in this pool."""
+        ...
+
+    @property
+    def available(self) -> int:
+        """Number of sandboxes currently available for checkout."""
+        ...
+
+    @property
+    def cache_stats(self) -> Dict[str, int]:
+        """
+        Cache statistics.
+
+        Returns dict with keys:
+          - hits: number of cache hits
+          - misses: number of cache misses
+          - size: current cache size
+          - max_size: maximum cache size
+        """
+        ...
+
+    async def start(self) -> None:
+        """
+        Initialize the pool.
+
+        This starts all sandboxes (containers, processes, etc.).
+        Should be called once before any checkout() calls.
+        Idempotent - calling multiple times has no effect.
+        """
+        ...
+
+    async def checkout(self, timeout_s: float = 30.0) -> Sandbox:
+        """
+        Get exclusive access to a sandbox.
+
+        Blocks until a sandbox is available or timeout is reached.
+        The returned sandbox is guaranteed to be in a clean state.
+
+        Args:
+            timeout_s: Maximum time to wait for a sandbox
+
+        Returns:
+            Exclusive Sandbox handle
+
+        Raises:
+            TimeoutError: If no sandbox available within timeout
+        """
+        ...
+
+    async def release(self, sandbox: Sandbox) -> None:
+        """
+        Return a sandbox to the pool.
+
+        The sandbox is automatically reset before being made available
+        to other callers.
+
+        Args:
+            sandbox: The sandbox to release (must have been obtained via checkout)
+        """
+        ...
+
+    async def shutdown(self) -> None:
+        """
+        Tear down all sandboxes and release resources.
+
+        After shutdown(), the pool cannot be used again without calling start().
+        """
+        ...
+
+    # ----- Cache interface -----
+
+    def get_cached(
+        self,
+        code_hash: str,
+        tests_hash: str,
+    ) -> Optional[BatchTestResult]:
+        """
+        Check if we have a cached result for this code+tests pair.
+
+        This is a synchronous method for use from env_step().
+        Thread-safe.
+
+        Args:
+            code_hash: Hash of the submitted code
+            tests_hash: Hash of the test cases
+
+        Returns:
+            Cached BatchTestResult if found, None otherwise
+        """
+        ...
+
+    def put_cached(
+        self,
+        code_hash: str,
+        tests_hash: str,
+        result: BatchTestResult,
+    ) -> None:
+        """
+        Cache a result for future lookups.
+
+        This is a synchronous method for use from env_step().
+        Thread-safe. Uses LRU eviction when cache is full.
+
+        Args:
+            code_hash: Hash of the submitted code
+            tests_hash: Hash of the test cases
+            result: The BatchTestResult to cache
+        """
+        ...
diff --git a/src/ludic/envs/code_exec/types.py b/src/ludic/envs/code_exec/types.py
new file mode 100644
index 0000000..806559b
--- /dev/null
+++ b/src/ludic/envs/code_exec/types.py
@@ -0,0 +1,258 @@
+"""
+Core types for code execution environments.
+
+These types capture rich metadata about code compilation and execution,
+providing RL-relevant signals for reward shaping and analysis.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional
+
+
+class SandboxPoolExhaustedError(Exception):
+    """
+    Raised when sandbox pool experiences too many consecutive failures.
+
+    This indicates a systemic issue with sandbox creation/reset that
+    requires operator intervention.
+    """
+
+    pass
+
+
+class CompileStatus(Enum):
+    """Status of code compilation/syntax checking."""
+
+    SUCCESS = "success"
+    SYNTAX_ERROR = "syntax_error"
+    IMPORT_ERROR = "import_error"
+    TIMEOUT = "timeout"
+    UNKNOWN_ERROR = "unknown_error"
+
+
+class RunStatus(Enum):
+    """Status of code execution."""
+
+    SUCCESS = "success"
+    RUNTIME_ERROR = "runtime_error"
+    TIMEOUT = "timeout"
+    MEMORY_EXCEEDED = "memory_exceeded"
+    KILLED = "killed"
+    NOT_RUN = "not_run"  # e.g., skipped due to earlier failure
+    SANDBOX_ERROR = "sandbox_error"  # sandbox crashed, not user code
+
+
+@dataclass
+class CompileResult:
+    """
+    Result of compiling/syntax-checking code.
+
+    For Python, this typically uses py_compile or ast.parse to catch
+    syntax errors before execution.
+    """
+
+    status: CompileStatus
+    error_message: Optional[str] = None
+    error_line: Optional[int] = None
+    error_column: Optional[int] = None
+    duration_ms: float = 0.0
+
+    @property
+    def success(self) -> bool:
+        return self.status == CompileStatus.SUCCESS
+
+
+@dataclass
+class ExecutionResult:
+    """
+    Rich result of running code in a sandbox.
+
+    All fields are RL-relevant metadata that can be used for:
+      - Reward shaping (compile errors vs runtime errors vs wrong answer)
+      - Curriculum learning (filter by execution characteristics)
+      - Analysis (understanding failure modes)
+
+    This is the atomic unit returned by sandbox.execute().
+    """
+
+    # Compilation phase
+    compile_result: CompileResult
+
+    # Execution phase (only meaningful if compilation succeeded)
+    run_status: Optional[RunStatus] = None
+    stdout: str = ""
+    stderr: str = ""
+    exit_code: Optional[int] = None
+    return_value: Optional[str] = None  # for function-based testing
+
+    # Timing (all in milliseconds)
+    compile_duration_ms: float = 0.0
+    run_duration_ms: float = 0.0
+    total_duration_ms: float = 0.0
+
+    # Resource usage (optional, depends on sandbox implementation)
+    peak_memory_bytes: Optional[int] = None
+    cpu_time_ms: Optional[float] = None
+
+    # Cache info
+    cache_hit: bool = False
+    cache_key: Optional[str] = None
+
+    @property
+    def compiled(self) -> bool:
+        """True if code compiled successfully."""
+        return self.compile_result.success
+
+    @property
+    def succeeded(self) -> bool:
+        """True if code compiled and ran without errors."""
+        return self.compiled and self.run_status == RunStatus.SUCCESS
+
+    @property
+    def timed_out(self) -> bool:
+        """True if either compilation or execution timed out."""
+        return (
+            self.compile_result.status == CompileStatus.TIMEOUT
+            or self.run_status == RunStatus.TIMEOUT
+        )
+
+
+@dataclass
+class TestCase:
+    """
+    A single test case.
+
+    The interpretation of `input` and `expected` depends on the CodeRunner:
+      - stdin/stdout: input is stdin string, expected is stdout string
+      - function call: input is (args, kwargs), expected is return value
+      - pytest: input is test code, expected is None (pass/fail from exit code)
+    """
+
+    __test__ = False  # Prevent pytest from collecting this as a test class
+
+    input: Any
+    expected: Any
+    id: str = ""
+    weight: float = 1.0  # for weighted partial credit
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class BatchExecutionSpec:
+    """
+    Specification for executing multiple tests in a single batch.
+
+    Used by execute_batch() to run all tests with minimal semaphore acquisitions.
+    The batch runner receives this as a manifest and executes tests sequentially
+    inside the container, streaming results back as JSONL.
+    """
+
+    code: str
+    tests: List[TestCase]
+    compile_first: bool = True
+    timeout_s: float = 5.0
+    stop_on_first_failure: bool = True
+
+
+@dataclass
+class TestResult:
+    """Result of running a single test case."""
+
+    __test__ = False  # Prevent pytest from collecting this as a test class
+
+    test_case: TestCase
+    passed: bool
+    actual: Any
+    execution: ExecutionResult
+    comparison_details: Optional[str] = None  # explains why comparison failed
+
+    @property
+    def compiled(self) -> bool:
+        """True if code compiled for this test."""
+        return self.execution.compiled
+
+    @property
+    def ran(self) -> bool:
+        """True if code actually executed (not skipped)."""
+        return self.execution.run_status not in (None, RunStatus.NOT_RUN)
+
+
+@dataclass
+class BatchTestResult:
+    """
+    Result of running all tests for a code submission.
+
+    Aggregates individual TestResults and provides convenience properties
+    for computing rewards and analyzing results.
+    """
+
+    results: List[TestResult]
+    code_hash: str
+    tests_hash: str
+
+    @property
+    def passed_count(self) -> int:
+        """Number of tests that passed."""
+        return sum(1 for r in self.results if r.passed)
+
+    @property
+    def total_count(self) -> int:
+        """Total number of tests."""
+        return len(self.results)
+
+    @property
+    def all_passed(self) -> bool:
+        """True if all tests passed."""
+        return self.passed_count == self.total_count and self.total_count > 0
+
+    @property
+    def pass_rate(self) -> float:
+        """Fraction of tests that passed (0.0 to 1.0)."""
+        if self.total_count == 0:
+            return 0.0
+        return self.passed_count / self.total_count
+
+    @property
+    def first_failure(self) -> Optional[TestResult]:
+        """The first test that failed, or None if all passed."""
+        for r in self.results:
+            if not r.passed:
+                return r
+        return None
+
+    @property
+    def compile_failed(self) -> bool:
+        """True if code failed to compile (before any tests ran)."""
+        if not self.results:
+            return False
+        # If compilation failed, all tests will have the same compile failure
+        return not self.results[0].compiled
+
+    @property
+    def total_execution_ms(self) -> float:
+        """Total execution time across all tests."""
+        return sum(r.execution.total_duration_ms for r in self.results)
+
+    @property
+    def total_compile_ms(self) -> float:
+        """Total compilation time (usually same across tests if compiled once)."""
+        if not self.results:
+            return 0.0
+        # Compilation typically happens once, take max to be safe
+        return max(r.execution.compile_duration_ms for r in self.results)
+
+    @property
+    def total_run_ms(self) -> float:
+        """Total runtime across all tests (excluding compilation)."""
+        return sum(r.execution.run_duration_ms for r in self.results)
+
+    def get_failures(self) -> List[TestResult]:
+        """All tests that failed."""
+        return [r for r in self.results if not r.passed]
+
+    def get_successes(self) -> List[TestResult]:
+        """All tests that passed."""
+        return [r for r in self.results if r.passed]
diff --git a/src/ludic/eval/cli.py b/src/ludic/eval/cli.py
index 1deaee2..2fa9061 100644
--- a/src/ludic/eval/cli.py
+++ b/src/ludic/eval/cli.py
@@ -22,7 +22,7 @@
     SamplingParams,
     ReturnSpec,
 )
-from ludic.interaction import SingleAgentSyncProtocol
+from ludic.interaction import SingleAgentProtocol
 from ludic.parsers import ParseResult
 from ludic.training.batching.rollout_engine import RolloutEngine
 
@@ -88,14 +88,14 @@ def build_single_agent_engine(
 ) -> RolloutEngine:
     make_ctx = context_factory or (lambda sp: FullDialog(system_prompt=sp))
 
-    def protocol_factory() -> SingleAgentSyncProtocol:
+    def protocol_factory() -> SingleAgentProtocol:
         agent = Agent(
             client=client,
             model=model,
             ctx=make_ctx(system_prompt),
             parser=parser,
         )
-        return SingleAgentSyncProtocol(
+        return SingleAgentProtocol(
             agent=agent,
             stop_on_parse_error=stop_on_parse_error,
         )
diff --git a/src/ludic/inference/vllm_server.py b/src/ludic/inference/vllm_server.py
index a985a21..1f9b5f4 100644
--- a/src/ludic/inference/vllm_server.py
+++ b/src/ludic/inference/vllm_server.py
@@ -108,9 +108,9 @@ def init_communicator(self, host: str, port: int, world_size: int) -> None:
         # --- DEBUG: Print internal vLLM parameter names ---
         # This executes on the worker process. We use Rank 0 to avoid duplicates.
         if self.pynccl_comm.rank == 0:
-            print("\n" + "="*60)
+            print("\n" + "=" * 60)
             print("🔍 [DEBUG] vLLM Internal Parameter Names (Worker Rank 0)")
-            print("="*60)
+            print("=" * 60)
             try:
                 # Access the underlying torch model
                 model_instance = self.model_runner.model
@@ -121,7 +121,7 @@ def init_communicator(self, host: str, port: int, world_size: int) -> None:
                 print(f"Total parameters found: {count}")
             except Exception as e:
                 print(f"⚠️ Could not print parameter names: {e}")
-            print("="*60 + "\n")
+            print("=" * 60 + "\n")
         # --------------------------------------------------
 
     def update_named_param(self, name: str, dtype: str, shape: Sequence[int]) -> None:
@@ -230,7 +230,7 @@ def update_state(self, batch_update: Optional[BatchUpdate]) -> None:
                 self.req_state.pop(ridx, None)
 
         # 2) Handle additions
-        for (req_idx, params, prompt_ids, output_ids) in batch_update.added:
+        for req_idx, params, prompt_ids, output_ids in batch_update.added:
             assert isinstance(params, SamplingParams)
             extra_args = getattr(params, "extra_args", None)
 
@@ -248,7 +248,7 @@ def update_state(self, batch_update: Optional[BatchUpdate]) -> None:
             }
 
         # 3) Handle moves
-        for (src, dst, direction) in batch_update.moved:
+        for src, dst, direction in batch_update.moved:
             if direction == MoveDirectionality.UNIDIRECTIONAL:
                 state = self.req_state.pop(src, None)
                 if state is not None:
@@ -371,9 +371,7 @@ async def health() -> dict[str, str]:
 
     @app.get("/get_world_size")
     async def get_world_size() -> dict[str, int]:
-        return {
-            "world_size": args.tensor_parallel_size * args.data_parallel_size
-        }
+        return {"world_size": args.tensor_parallel_size * args.data_parallel_size}
 
     @app.get("/runtime_version")
     async def runtime_version() -> dict[str, int]:
@@ -390,9 +388,7 @@ async def init_communicator(request: Request) -> dict[str, str]:
         world_size = data.get("world_size")
 
         create_background_task(
-            engine.collective_rpc(
-                "init_communicator", args=(host, port, world_size)
-            )
+            engine.collective_rpc("init_communicator", args=(host, port, world_size))
         )
         return {"status": "ok"}
 
@@ -433,18 +429,18 @@ async def update_param_batch(request: Request) -> dict[str, str]:
         """
         data = await request.json()
         metadata = data.get("metadata", [])  # List of {name, dtype, shape}
-        
+
         # --- DEBUG: Verify what the server received ---
-        print("\n" + "="*80)
+        print("\n" + "=" * 80)
         print(f"📥 [SERVER DEBUG] Received Batch Metadata (Total: {len(metadata)})")
-        print("="*80)
+        print("=" * 80)
         for i, m in enumerate(metadata):
             # Print only first 10 to avoid spamming logs, or all if short
             if i < 10:
                 print(f"  • {m.get('name')} | {m.get('shape')}")
         if len(metadata) > 10:
-            print(f"  ... (+{len(metadata)-10} more)")
-        print("="*80 + "\n")
+            print(f"  ... (+{len(metadata) - 10} more)")
+        print("=" * 80 + "\n")
         # ----------------------------------------------
 
         # Check if an explicit version was provided by the Trainer
@@ -462,7 +458,7 @@ async def do_update_batch() -> None:
 
                 # Reset cache and bump version after full batch
                 await engine.reset_prefix_cache()
-                
+
                 global RUNTIME_VERSION
                 async with RUNTIME_VERSION_LOCK:
                     if forced_version is not None:
@@ -499,7 +495,7 @@ async def do_update() -> None:
                     await engine.collective_rpc(
                         "update_named_param", args=(name, dtype, shape)
                     )
-                
+
                 global RUNTIME_VERSION
                 async with RUNTIME_VERSION_LOCK:
                     if requested_version is not None:
@@ -578,7 +574,9 @@ def main() -> None:
     # vLLM can silently override sampling params using the model's Hugging Face
     # `generation_config` unless `--generation-config vllm` is set. Defaulting
     # to `vllm` makes Ludic's SamplingParams the source of truth.
-    if not any(a == "--generation-config" or a.startswith("--generation-config=") for a in argv):
+    if not any(
+        a == "--generation-config" or a.startswith("--generation-config=") for a in argv
+    ):
         argv = [*argv, "--generation-config", "vllm"]
     args = parser.parse_args(argv)
     assert args is not None
diff --git a/src/ludic/interaction/__init__.py b/src/ludic/interaction/__init__.py
index a9f1cf4..a826ea6 100644
--- a/src/ludic/interaction/__init__.py
+++ b/src/ludic/interaction/__init__.py
@@ -3,12 +3,12 @@
 from .base import InteractionProtocol
 from .info import merge_step_info
 from .multi_agent import MultiAgentProtocol
-from .single_agent import SingleAgentSyncProtocol
+from .single_agent import SingleAgentProtocol
 from .step_collector import TraceCollector
 
 __all__ = [
     "InteractionProtocol",
-    "SingleAgentSyncProtocol",
+    "SingleAgentProtocol",
     "MultiAgentProtocol",
     "TraceCollector",
     "merge_step_info",
diff --git a/src/ludic/interaction/multi_agent.py b/src/ludic/interaction/multi_agent.py
index 3992b9c..680f205 100644
--- a/src/ludic/interaction/multi_agent.py
+++ b/src/ludic/interaction/multi_agent.py
@@ -30,6 +30,12 @@ class MultiAgentProtocol(InteractionProtocol):
       logged for the failing agent (reward=parse_result.reward, info
       includes parse_error=True). The failing agent's context is updated
       with the synthetic observation for the next turn.
+
+    Async environment support:
+      This protocol does NOT currently support async environments.
+      It uses the synchronous env.reset() and env.step() methods.
+      For async multi-agent environments, this protocol would need
+      async detection similar to SingleAgentProtocol.
     """
 
     def __init__(self, agents: Dict[str, Agent]):
diff --git a/src/ludic/interaction/single_agent.py b/src/ludic/interaction/single_agent.py
index d7fe6cc..6cd3f98 100644
--- a/src/ludic/interaction/single_agent.py
+++ b/src/ludic/interaction/single_agent.py
@@ -1,4 +1,6 @@
 from __future__ import annotations
+
+import inspect
 from typing import Optional, List
 
 from ludic.envs.env import LudicEnv
@@ -8,15 +10,48 @@
 from .base import InteractionProtocol
 from .info import merge_step_info
 
-class SingleAgentSyncProtocol(InteractionProtocol):
+
+def _has_async_env_methods(env: LudicEnv) -> tuple[bool, bool]:
     """
-    Implements the standard single-agent, synchronous interaction loop.
-    
+    Detect if environment has async env_reset/env_step methods.
+
+    WARNING: If this returns (True, True), you MUST use the async methods
+    directly (env.env_reset(), env.env_step()) rather than the sync wrappers
+    (env.reset(), env.step()). Calling sync wrappers on an async env will
+    return coroutine objects instead of results.
+
+    This is used to support envs like CodeExecEnv that have async methods
+    while maintaining backward compatibility with sync envs.
+
+    Returns:
+        Tuple of (has_async_reset, has_async_step)
+    """
+    has_async_reset = (
+        hasattr(env, "env_reset")
+        and inspect.iscoroutinefunction(env.env_reset)
+    )
+    has_async_step = (
+        hasattr(env, "env_step")
+        and inspect.iscoroutinefunction(env.env_step)
+    )
+    return has_async_reset, has_async_step
+
+class SingleAgentProtocol(InteractionProtocol):
+    """
+    Implements the standard single-agent interaction loop.
+
     This protocol consumes a LudicEnv but ASSUMES it has exactly
     one agent and that this agent is active every step.
-    
+
     It works perfectly with any env inheriting from SingleAgentEnv.
 
+    Async env support:
+      This protocol automatically detects envs with async `env_reset` and
+      `env_step` methods (e.g., CodeExecEnv). For such envs, the protocol
+      calls these methods directly and awaits them, bypassing the sync
+      wrappers in SingleAgentEnv. This provides full backward compatibility
+      with sync envs while supporting async envs transparently.
+
     Parser failures:
       If the agent's parser returns ParseResult.action=None, the protocol
       does not call env.step(). Instead it logs a synthetic Step with
@@ -69,15 +104,22 @@ async def run(
         agent_ids = env.agent_ids
         if len(agent_ids) != 1:
             raise ValueError(
-                f"SingleAgentSyncProtocol requires a LudicEnv with "
+                f"SingleAgentProtocol requires a LudicEnv with "
                 f"exactly one agent, but found {len(agent_ids)}."
             )
         agent_id = agent_ids[0]
 
+        # Check for async env methods (e.g., CodeExecEnv)
+        has_async_reset, has_async_step = _has_async_env_methods(env)
+
         # 2. --- Reset Env ---
-        # env.reset() returns a dict
-        obs_info_dict = env.reset(seed=env_seed)
-        obs, info = obs_info_dict[agent_id]
+        # For async envs, call env_reset directly and await it.
+        # For sync envs, use the standard reset() wrapper.
+        if has_async_reset:
+            obs, info = await env.env_reset(seed=env_seed)  # type: ignore[union-attr]
+        else:
+            obs_info_dict = env.reset(seed=env_seed)
+            obs, info = obs_info_dict[agent_id]
         
         # 3. --- Reset Agent & Feed First Obs ---
         # Choose system prompt: prefer the context's default if set, else env suggestion.
@@ -149,12 +191,14 @@ async def run(
                 parsed_action = parse_result.action
                 parser_reward = parse_result.reward
 
-                # Send action to env in the required dict format
-                actions_dict = {agent_id: parsed_action}
-                outcomes_dict = env.step(actions_dict)
-
-                # Unwrap the outcome for our agent
-                env_outcome = outcomes_dict[agent_id]
+                # For async envs, call env_step directly and await it.
+                # For sync envs, use the standard step() wrapper.
+                if has_async_step:
+                    env_outcome = await env.env_step(parsed_action)  # type: ignore[union-attr]
+                else:
+                    actions_dict = {agent_id: parsed_action}
+                    outcomes_dict = env.step(actions_dict)
+                    env_outcome = outcomes_dict[agent_id]
                 
                 # Combine parser and env rewards
                 total_reward = env_outcome.reward + parser_reward
diff --git a/src/ludic/training/algorithm.py b/src/ludic/training/algorithm.py
index 720d447..2d112fd 100644
--- a/src/ludic/training/algorithm.py
+++ b/src/ludic/training/algorithm.py
@@ -14,9 +14,19 @@
     ClippedSurrogateLoss,
     TokenClippedSurrogateLoss,
     CISPOLoss,
+    SAPOLoss,
+    GMPOLoss,
     MaskedCausalLMCrossEntropyLoss,
+    CompositeLoss,
+    LossTerm,
+    TokenKLLoss,
+)
+from ludic.training.credit_assignment import (
+    MonteCarloReturn,
+    GroupNormalizedReturn,
+    HybridNormalizedReturn,
+    ConstantCredit,
 )
-from ludic.training.credit_assignment import MonteCarloReturn, GroupNormalizedReturn, ConstantCredit
 
 
 Batch = Mapping[str, Tensor]
@@ -47,9 +57,19 @@ def compute_loss(
         self,
         model: nn.Module,
         batch: Batch,
+        *,
+        cast_logits_to_fp32: bool = False,
     ) -> tuple[Tensor, Dict[str, Any]]:
         """
         Runs the forward pass once and delegates to the Loss object.
+
+        Args:
+            model: The trainable model.
+            batch: Collated batch tensors (input_ids, attention_mask, etc.).
+            cast_logits_to_fp32: If True, cast logits to FP32 before loss computation.
+                This improves importance sampling ratio stability for ratio-based
+                objectives (GRPO, CISPO, etc.) by reducing precision errors in
+                exp(log_ratio). Recommended by ScaleRL paper (arXiv:2510.13786).
         """
         # --- Run the forward pass ---
         input_ids = batch["input_ids"]
@@ -60,6 +80,10 @@ def compute_loss(
         )
         logits: Logits = outputs.logits
 
+        # ScaleRL: FP32 logits prevent IS ratio precision issues in exp(logp_new - logp_old)
+        if cast_logits_to_fp32:
+            logits = logits.float()
+
         # Pass the resulting logits to the loss function
         return self.loss.compute(logits, batch)
 
@@ -282,6 +306,63 @@ def make_grpo(
     )
 
 
+def make_dr_grpo(
+    *,
+    group_size: int,
+    positive_only: bool = False,
+    clip_eps_low: float = 0.2,
+    clip_eps_high: float = 0.27,
+    length_normalize: bool = False,
+    ratio_clip: Optional[float] = None,
+    drop_zero_weight: bool = False,
+    drop_zero_weight_eps: float = 1e-4,
+    name: str = "dr_grpo",
+) -> RLAlgorithm:
+    """
+    Dr. GRPO (GRPO Done Right): removes per-response length normalization and
+    per-group std normalization while keeping the GRPO-style clipped surrogate.
+
+      - Credit assignment: group-mean baseline only (no std normalization)
+      - Loss: token-level PPO-style clipped surrogate (Token-TIS)
+
+    This corresponds to the unbiased GRPO variant described in
+    "Understanding R1-Zero-Like Training: A Critical Perspective".
+
+    Args:
+        group_size: Number of rollouts per group.
+        positive_only: If True, clip negative advantages to zero.
+        clip_eps_low: Lower PPO clipping epsilon for the surrogate objective.
+        clip_eps_high: Upper PPO clipping epsilon for the surrogate objective.
+        length_normalize: If True, normalizes by number of action tokens.
+            This reintroduces length normalization and deviates from Dr. GRPO.
+        ratio_clip: Optional upper bound C for truncation (min(r, C)).
+        name: Algorithm name for logging/metrics.
+    """
+    credit_assigner: CreditAssigner = GroupNormalizedReturn(
+        group_size=group_size,
+        normalize_adv=False,
+        positive_only=positive_only,
+    )
+    loss: Loss = TokenClippedSurrogateLoss(
+        clip_eps_low=clip_eps_low,
+        clip_eps_high=clip_eps_high,
+        length_normalize=length_normalize,
+        ratio_clip=ratio_clip,
+    )
+    preprocess_fns = []
+    if drop_zero_weight:
+        preprocess_fns.append(lambda batch: drop_zero_weight_samples(batch, eps=drop_zero_weight_eps))
+    preprocess_fns.append(validate_actor_logps)
+    preprocess = compose_preprocess(*preprocess_fns)
+
+    return RLAlgorithm(
+        name=name,
+        credit_assigner=credit_assigner,
+        loss=loss,
+        preprocess=preprocess,
+    )
+
+
 def make_gspo(
     *,
     group_size: int,
@@ -400,6 +481,204 @@ def make_cispo(
     )
 
 
+def make_sapo(
+    *,
+    group_size: int,
+    group_normalize_adv: bool = True,
+    positive_only: bool = False,
+    tau_pos: float = 1.0,
+    tau_neg: float = 1.05,
+    length_normalize: bool = False,
+    drop_zero_weight: bool = False,
+    drop_zero_weight_eps: float = 1e-4,
+    name: str = "sapo",
+) -> RLAlgorithm:
+    """
+    SAPO (Soft Adaptive Policy Optimization) preset.
+
+    SAPO replaces hard clipping with a smooth, temperature-controlled sigmoid gate
+    that adaptively attenuates off-policy updates while preserving learning signals.
+    The soft gate implements a continuous trust region that is both sequence-coherent
+    and token-adaptive.
+
+    Core mechanism:
+        Instead of hard clipping: min(r * A, clip(r, 1-ε, 1+ε) * A)
+        SAPO uses soft gate:      f(r) * A, where f(r) = (4/τ) * σ(τ(r - 1))
+
+    The sigmoid gate σ(τ(r - 1)) peaks at r=1 (on-policy) and decays smoothly as
+    r deviates, providing gradual attenuation rather than abrupt cutoff.
+
+    Asymmetric temperatures:
+        - τ_pos: temperature for positive advantages (increase token logit)
+        - τ_neg: temperature for negative advantages (decrease token logit)
+
+    Setting τ_neg > τ_pos makes negative gradients decay faster, improving stability.
+    This is motivated by the observation that negative updates diffuse to many
+    unsampled tokens in the vocabulary, introducing more noise than positive updates.
+
+    Advantages over hard clipping methods:
+        - vs GRPO: smooth token-level scaling instead of hard cutoff
+        - vs GSPO: token-adaptive (preserves signal from near-on-policy tokens even
+          when sequence has outliers)
+        - Maintains sequence-level coherence under mild conditions (small steps,
+          low token variance)
+
+    Args:
+        group_size: Number of rollouts per group for advantage normalization.
+        group_normalize_adv: Whether to normalize advantages within each group.
+        positive_only: If True, clip negative advantages to zero.
+        tau_pos: Temperature for positive advantages. Default: 1.0 (paper setting).
+        tau_neg: Temperature for negative advantages. Default: 1.05 (paper setting).
+            Higher values → faster decay → more conservative.
+        length_normalize: Whether to normalize by sequence length.
+        drop_zero_weight: Whether to drop zero-advantage samples.
+        drop_zero_weight_eps: Epsilon for zero-weight detection.
+        name: Algorithm name for logging.
+
+    Note: Rollouts must carry `group_id` in their metadata and each group
+    must have exactly `group_size` members. Use GRPORequestStrategy for
+    request expansion.
+
+    Reference: "Soft Adaptive Policy Optimization" (arXiv:2511.20347v2)
+    https://arxiv.org/abs/2511.20347
+
+    Usage example:
+        ```python
+        from ludic.training import make_sapo, GRPORequestStrategy
+
+        # Create SAPO algorithm
+        algo = make_sapo(group_size=4)
+
+        # Use with GRPO request expansion
+        request_strategy = GRPORequestStrategy(group_size=4)
+        ```
+    """
+    credit_assigner: CreditAssigner = GroupNormalizedReturn(
+        group_size=group_size,
+        normalize_adv=group_normalize_adv,
+        positive_only=positive_only,
+    )
+    loss: Loss = SAPOLoss(
+        tau_pos=tau_pos,
+        tau_neg=tau_neg,
+        length_normalize=length_normalize,
+    )
+    preprocess_fns = []
+    if drop_zero_weight:
+        preprocess_fns.append(lambda batch: drop_zero_weight_samples(batch, eps=drop_zero_weight_eps))
+    preprocess_fns.append(validate_actor_logps)
+    preprocess = compose_preprocess(*preprocess_fns)
+
+    return RLAlgorithm(
+        name=name,
+        credit_assigner=credit_assigner,
+        loss=loss,
+        preprocess=preprocess,
+    )
+
+
+def make_gmpo(
+    *,
+    group_size: int,
+    group_normalize_adv: bool = True,
+    positive_only: bool = False,
+    clip_eps_low: float = 0.4,
+    clip_eps_high: float = 0.4,
+    length_normalize: bool = True,
+    ratio_clip: Optional[float] = None,
+    drop_zero_weight: bool = False,
+    drop_zero_weight_eps: float = 1e-4,
+    name: str = "gmpo",
+) -> RLAlgorithm:
+    """
+    GMPO (Geometric-Mean Policy Optimization) preset.
+
+    GMPO stabilizes GRPO by using the geometric mean of token-level importance
+    ratios instead of the arithmetic mean. This makes the objective less sensitive
+    to outliers and results in more stable policy updates with fewer extreme
+    importance sampling ratios.
+
+    Key advantages over GRPO:
+        1. More robust to outlier tokens (geometric mean vs arithmetic mean)
+        2. More stable importance sampling ratios during training
+        3. Supports wider clipping ranges (e.g., (e^-0.4, e^0.4) vs (0.8, 1.2))
+        4. Better exploration due to higher entropy maintenance
+        5. More stable gradients and lower KL divergence from reference policy
+
+    Objective:
+        J_GMPO = E[ (∏_t min(ρ_t * A, clip(ρ_t, e^-ε_low, e^ε_high) * A))^(1/|o|) * sgn(A) ]
+
+    where:
+        - ρ_t = π_new(a_t|s_t) / π_old(a_t|s_t) is the token-level importance ratio
+        - A is the advantage (group-normalized)
+        - |o| is the sequence length
+        - Clipping is performed at the token level in log-space
+
+    Implementation differences from GRPO:
+        - Uses geometric mean: (∏_t ρ_t)^(1/|o|) instead of (1/|o|) Σ_t ρ_t
+        - All operations performed in log-space for numerical stability
+        - Token-level clipping (not sequence-level as in DeepSeek-R1)
+        - Wider default clipping range: (e^-0.4, e^0.4) ≈ (0.67, 1.49)
+
+    Args:
+        group_size: Number of rollouts per group for advantage normalization.
+        group_normalize_adv: Whether to normalize advantages within each group.
+            Recommended: True (follows GRPO and paper experiments).
+        positive_only: If True, clip negative advantages to zero.
+        clip_eps_low: Lower clipping epsilon in log-space. Default 0.4 means
+            clipping to e^-0.4 ≈ 0.67. Paper uses (e^-0.4, e^0.4).
+        clip_eps_high: Upper clipping epsilon in log-space. Default 0.4 means
+            clipping to e^0.4 ≈ 1.49.
+        length_normalize: Whether to normalize by sequence length (1/|o|).
+            This is critical for GMPO stability. Default: True.
+        ratio_clip: Optional upper bound for geometric mean ratio truncation.
+        drop_zero_weight: Whether to drop zero-advantage samples before training.
+        drop_zero_weight_eps: Epsilon for zero-weight detection.
+        name: Algorithm name for logging/metrics.
+
+    Note: Rollouts must carry `group_id` in their metadata and each group
+    must have exactly `group_size` members. Use GRPORequestStrategy for
+    request expansion.
+
+    Reference: "GMPO: Geometric-Mean Policy Optimization" (arXiv:2507.20673v3)
+    https://arxiv.org/abs/2507.20673
+
+    Usage example:
+        ```python
+        from ludic.training import make_gmpo, GRPORequestStrategy
+
+        # Create GMPO algorithm
+        algo = make_gmpo(group_size=4)
+
+        # Use with GRPO request expansion
+        request_strategy = GRPORequestStrategy(group_size=4)
+        ```
+    """
+    credit_assigner: CreditAssigner = GroupNormalizedReturn(
+        group_size=group_size,
+        normalize_adv=group_normalize_adv,
+        positive_only=positive_only,
+    )
+    loss: Loss = GMPOLoss(
+        clip_eps_low=clip_eps_low,
+        clip_eps_high=clip_eps_high,
+        length_normalize=length_normalize,
+        ratio_clip=ratio_clip,
+    )
+    preprocess_fns = []
+    if drop_zero_weight:
+        preprocess_fns.append(lambda batch: drop_zero_weight_samples(batch, eps=drop_zero_weight_eps))
+    preprocess_fns.append(validate_actor_logps)
+    preprocess = compose_preprocess(*preprocess_fns)
+
+    return RLAlgorithm(
+        name=name,
+        credit_assigner=credit_assigner,
+        loss=loss,
+        preprocess=preprocess,
+    )
+
+
 # ---------------------------------------------------------------------------
 # SFT (Supervised Fine-Tuning / Behavioral Cloning)
 # ---------------------------------------------------------------------------
@@ -450,3 +729,105 @@ def make_sft(
         credit_assigner=credit_assigner,
         loss=loss,
     )
+
+
+# ---------------------------------------------------------------------------
+# ScaleRL (CISPO + Hybrid Normalization)
+# ---------------------------------------------------------------------------
+
+
+def make_scalerl(
+    *,
+    group_size: int,
+    positive_only: bool = False,
+    clip_eps_low: float = 0.20,
+    clip_eps_high: float = 0.28,
+    length_normalize: bool = True,
+    kl_coeff: float = 0.0,
+    drop_zero_weight_eps: float = 1e-4,
+    name: str = "scalerl",
+) -> RLAlgorithm:
+    """
+    ScaleRL recipe: CISPO loss + hybrid advantage normalization + zero-weight filtering.
+
+    This combines the key sample-efficiency improvements from the ScaleRL paper:
+
+    1. **HybridNormalizedReturn**: Group-mean centering + batch-std scaling.
+       More robust than pure group-level normalization because it avoids
+       std=0 explosions in low-variance groups (easy prompts).
+
+    2. **CISPOLoss**: Truncated IS-weight policy gradient that preserves
+       gradient contributions from rare tokens (crucial for reflective
+       reasoning behaviors like "Wait", "However", "Recheck").
+
+    3. **Drop zero-weight samples**: After credit assignment, drop samples with
+       near-zero weight to reduce no-op updates.
+
+    4. **FP32 logits** (via TrainerConfig.cast_logits_to_fp32):
+       Recommended for IS ratio stability. Not controlled by this preset—
+       set in TrainerConfig.
+
+    Args:
+        group_size: Number of rollouts per group (required for credit assignment).
+        positive_only: If True, clip negative advantages to zero (REINFORCE-only).
+        clip_eps_low: Lower CISPO clipping bound. Default 0.20 per context-notes.md.
+        clip_eps_high: Upper CISPO clipping bound. Default 0.28 per context-notes.md.
+        length_normalize: Whether to normalize by number of action tokens.
+        kl_coeff: Coefficient for optional token-level KL penalty.
+            Set > 0 for additional stability. Typical: 0.01-0.1. Default 0.0.
+        drop_zero_weight_eps: Epsilon for zero-weight sample detection.
+        name: Algorithm name for logging/metrics.
+
+    Note: Rollouts must carry `group_id` in their metadata and each group
+    must have exactly `group_size` members. Use GRPORequestStrategy for
+    request expansion.
+
+    References:
+        - ScaleRL: arXiv:2510.13786
+        - DAPO (zero-weight filtering): arXiv:2503.14476
+        - MiniMax-M1 (CISPO): arXiv:2506.13585
+    """
+    # HybridNormalizedReturn: group-mean baseline + batch-std scaling
+    credit_assigner: CreditAssigner = HybridNormalizedReturn(
+        group_size=group_size,
+        positive_only=positive_only,
+    )
+
+    # CISPO loss with asymmetric clipping
+    cispo_loss: Loss = CISPOLoss(
+        clip_eps_low=clip_eps_low,
+        clip_eps_high=clip_eps_high,
+        length_normalize=length_normalize,
+    )
+
+    # Optionally add token-level KL penalty for stability
+    if kl_coeff > 0:
+        kl_loss = TokenKLLoss(coeff=kl_coeff, length_normalize=length_normalize)
+        loss: Loss = CompositeLoss(
+            terms=[
+                LossTerm(name="cispo", loss=cispo_loss, weight=1.0),
+                LossTerm(name="kl", loss=kl_loss, weight=1.0),
+            ]
+        )
+    else:
+        loss = cispo_loss
+
+    # Build preprocessing pipeline (order matters)
+    preprocess_fns = []
+
+    # 1. Drop individual zero-weight samples (after credit assignment)
+    preprocess_fns.append(
+        lambda batch: drop_zero_weight_samples(batch, eps=drop_zero_weight_eps)
+    )
+
+    # 2. Validate actor logprobs (required for CISPO ratio computation)
+    preprocess_fns.append(validate_actor_logps)
+
+    preprocess = compose_preprocess(*preprocess_fns)
+
+    return RLAlgorithm(
+        name=name,
+        credit_assigner=credit_assigner,
+        loss=loss,
+        preprocess=preprocess,
+    )
diff --git a/src/ludic/training/batching/synced_batching.py b/src/ludic/training/batching/synced_batching.py
index f4e5377..e55e285 100644
--- a/src/ludic/training/batching/synced_batching.py
+++ b/src/ludic/training/batching/synced_batching.py
@@ -1,4 +1,6 @@
 from __future__ import annotations
+import logging
+import time
 from typing import Callable, List, Optional
 
 from ludic.training.types import (
@@ -10,6 +12,8 @@
 )
 from .rollout_engine import RolloutEngine
 
+logger = logging.getLogger(__name__)
+
 
 class RolloutBatchSource(BatchSource):
     """
@@ -53,7 +57,13 @@ async def next_batch(self) -> SAWBatch:
         Pull requests -> Generate (blocking) -> Return Batch.
         """
         requests = self._requests_fn()
-        return await self._engine.generate_batch(
+        n_requests = len(requests)
+        logger.info(
+            f"Generating batch: {n_requests} rollouts with concurrency={self._concurrency}"
+        )
+        start_time = time.monotonic()
+
+        batch = await self._engine.generate_batch(
             requests=requests,
             max_steps=self._max_steps,
             credit_assigner=self._credit_assigner,
@@ -61,3 +71,10 @@ async def next_batch(self) -> SAWBatch:
             concurrency=self._concurrency,
             sample_filter=self._sample_filter,
         )
+
+        elapsed = time.monotonic() - start_time
+        logger.info(
+            f"Batch complete: {len(batch.items)} samples from {n_requests} rollouts "
+            f"in {elapsed:.1f}s ({n_requests / elapsed:.1f} rollouts/s)"
+        )
+        return batch
diff --git a/src/ludic/training/config.py b/src/ludic/training/config.py
index a2a82fb..b703e39 100644
--- a/src/ludic/training/config.py
+++ b/src/ludic/training/config.py
@@ -1,5 +1,17 @@
-from dataclasses import dataclass
-from typing import Optional
+from dataclasses import dataclass, field
+from typing import Any, Optional, Union
+
+
+def _extract_pad_token_id(tokenizer: Any) -> int:
+    """Extract pad_token_id from a tokenizer, with eos_token_id fallback."""
+    if (pad := getattr(tokenizer, "pad_token_id", None)) is not None:
+        return pad
+    if (eos := getattr(tokenizer, "eos_token_id", None)) is not None:
+        return eos
+    raise ValueError(
+        "Tokenizer has no pad_token_id or eos_token_id. "
+        "Set tokenizer.pad_token_id explicitly before passing to TrainerConfig."
+    )
 
 
 @dataclass
@@ -10,6 +22,16 @@ class TrainerConfig:
     This is *purely* about optimization / model device / collation.
     Rollout and batch-generation config live in BatchSource / Orchestrator.
 
+    ==========================
+    Required
+    ==========================
+
+    - pad_token_id:
+          Token ID used when padding sequences during SAW collation.
+          Pass your tokenizer directly and the pad_token_id will be
+          extracted automatically (with eos_token_id as fallback).
+          You can also pass an int if you know the exact token ID.
+
     ==========================
     Model / Optimization
     ==========================
@@ -31,27 +53,27 @@ class TrainerConfig:
 
     - max_seq_len:
           Max token length for any single sample. Trainer raises if exceeded.
-          
+
     - micro_token_budget:
           Max padded tokens per micro-batch (roughly batch_size * max_seq_len).
           Trainer splits macro-batches into micro-batches that fit this budget.
           Must be >= max_seq_len.
-          
+
     - sync_every_steps:
-          Frequency (in macro-steps) at which to push updated policy 
+          Frequency (in macro-steps) at which to push updated policy
           weights to the Agent's runtime (e.g., vLLM). Set to 0 to disable
           syncing (e.g., pure offline/local training).
 
     - mixed_precision_dtype:
-          Optional string to configure FSDP's mixed precision policy. 
+          Optional string to configure FSDP's mixed precision policy.
           Use "bf16" or "fp16". If None, defaults to full precision (fp32).
 
-    ==========================
-    Collation
-    ==========================
-
-    - pad_token_id:
-          Used when padding sequences during SAW collation.
+    - cast_logits_to_fp32:
+          If True, cast model logits to FP32 before loss computation.
+          Critical for importance sampling stability in ratio-based RL objectives
+          (GRPO, CISPO, etc.) where BF16 precision errors compound in exp(log_ratio).
+          Follows ScaleRL paper's "FP32 at LM head" recommendation.
+          See: arXiv:2510.13786 (ScaleRL)
 
     ==========================
     Distributed
@@ -90,6 +112,9 @@ class TrainerConfig:
           Optional per-call timeout for eval rollouts.
     """
 
+    # ----- required (no default) ------------------
+    pad_token_id: Union[int, Any]  # int or tokenizer-like object
+
     # ----- model / optimization -------------------
     model_device: str = "cuda"
     runtime_device: Optional[str] = None
@@ -106,6 +131,7 @@ class TrainerConfig:
     micro_token_budget: int = 8192
     sync_every_steps: int = 1
     mixed_precision_dtype: Optional[str] = "bf16"
+    cast_logits_to_fp32: bool = False  # ScaleRL: FP32 logits for IS ratio stability
 
     # PipelineRL specific settings
     max_lag: Optional[int] = None  # Drop batches older than N steps
@@ -113,12 +139,13 @@ class TrainerConfig:
     profile_memory: bool = False
     log_every: int = 1
 
-    # ----- collation ------------------------------
-    pad_token_id: int = 0
-
     # ----- evaluation -----------------------------
     eval_at_start: bool = False
     eval_every_n_steps: Optional[int] = None
     eval_concurrency: int = 32
     eval_max_steps: int = 1
     eval_timeout_s: Optional[float] = None
+
+    def __post_init__(self) -> None:
+        if not isinstance(self.pad_token_id, int):
+            self.pad_token_id = _extract_pad_token_id(self.pad_token_id)
diff --git a/src/ludic/training/credit_assignment.py b/src/ludic/training/credit_assignment.py
index c17c351..9418618 100644
--- a/src/ludic/training/credit_assignment.py
+++ b/src/ludic/training/credit_assignment.py
@@ -97,6 +97,113 @@ def compute(
         return out
 
 
+@dataclass
+class HybridNormalizedReturn:
+    """
+    ScaleRL-style advantage normalization: group-mean baseline, batch-std scaling.
+
+    Formula: A_i = (R_i - mean(R_group)) / (std(A_batch) + eps)
+
+    This is more robust than pure group-level normalization (GroupNormalizedReturn)
+    because:
+    1. Avoids std=0 explosions in low-variance groups (easy prompts)
+    2. Provides consistent advantage scale across diverse prompts
+    3. Recommended by ScaleRL and "Tricks or Traps Part I" papers
+
+    The key insight: use group-level *centering* (baseline = group mean) but
+    batch-level *scaling* (divide by batch std). This combines GRPO's per-prompt
+    baseline with robust global scaling.
+
+    Contract:
+    - Rollouts must have `group_id` in `rollout.meta["request_meta"]["group_id"]`.
+    - Each group must have exactly `group_size` rollouts.
+    - Raises ValueError if either condition is violated.
+
+    Args:
+        group_size: Number of rollouts per group.
+        eps: Small constant for numerical stability in std division.
+        positive_only: If True, clip negative advantages to 0.
+
+    Reference: ScaleRL (arXiv:2510.13786), Tricks or Traps Part I (arXiv:2508.08221)
+    """
+
+    group_size: int
+    eps: float = 1e-8
+    positive_only: bool = False
+
+    def __post_init__(self):
+        if self.group_size <= 0:
+            raise ValueError(f"group_size must be positive, got {self.group_size}")
+
+    def compute(
+        self,
+        rollouts: List[Rollout],
+    ) -> Dict[RolloutStepKey, float]:
+
+        out: Dict[RolloutStepKey, float] = {}
+
+        # Group by group_id from request meta
+        groups: Dict[str, List[Rollout]] = defaultdict(list)
+        for r in rollouts:
+            group_id = r.meta.get("request_meta", {}).get("group_id")
+            if group_id is None:
+                raise ValueError(
+                    f"Rollout {r.id} missing group_id in meta['request_meta']. "
+                    "HybridNormalizedReturn requires each rollout to have a group_id."
+                )
+            groups[group_id].append(r)
+
+        # Phase 1: Compute group-centered advantages (A_i = R_i - mean(R_group))
+        # Store (rollout, advantage) pairs for batch-level normalization
+        all_advantages: List[float] = []
+        rollout_advantages: List[tuple[Rollout, float]] = []
+
+        for group_id, group_rollouts in groups.items():
+            # Validate group size
+            actual_size = len(group_rollouts)
+            if actual_size != self.group_size:
+                raise ValueError(
+                    f"Group size mismatch for group_id={group_id}: "
+                    f"expected {self.group_size}, got {actual_size}."
+                )
+
+            # Get total reward for each rollout in the group
+            rewards = torch.tensor(
+                [r.total_reward for r in group_rollouts],
+                dtype=torch.float32,
+            )
+
+            # Group-level centering: A_i = R_i - mean(R_group)
+            baseline = rewards.mean()
+            advantages = rewards - baseline
+
+            for i, r in enumerate(group_rollouts):
+                adv = advantages[i].item()
+                all_advantages.append(adv)
+                rollout_advantages.append((r, adv))
+
+        # Phase 2: Batch-level std normalization
+        if len(all_advantages) == 0:
+            return out
+
+        all_adv_tensor = torch.tensor(all_advantages, dtype=torch.float32)
+        batch_std = all_adv_tensor.std(unbiased=False)
+
+        # Normalize all advantages by batch std
+        for rollout, raw_adv in rollout_advantages:
+            adv = raw_adv / (batch_std.item() + self.eps)
+
+            if self.positive_only:
+                adv = max(adv, 0.0)
+
+            # Assign same advantage to all steps in the rollout
+            for step in rollout.steps:
+                key: RolloutStepKey = (rollout.id, step.index)
+                out[key] = adv
+
+        return out
+
+
 @dataclass
 class MonteCarloReturn:
     """
diff --git a/src/ludic/training/hardware.py b/src/ludic/training/hardware.py
new file mode 100644
index 0000000..e1cad63
--- /dev/null
+++ b/src/ludic/training/hardware.py
@@ -0,0 +1,251 @@
+"""
+GPU hardware detection and Flash Attention configuration utilities.
+
+This module provides utilities for:
+- Detecting GPU architecture (Hopper, Ampere, etc.)
+- Selecting optimal attention implementation based on hardware
+- Configuring PyTorch SDPA backends for Flash Attention
+
+Usage:
+    from ludic.training.hardware import configure_flash_attention
+
+    # In training script, after device detection:
+    attn_impl = configure_flash_attention(device="cuda", disable_flash_attn=False)
+    model = AutoModelForCausalLM.from_pretrained(..., attn_implementation=attn_impl)
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Literal, Optional
+
+import torch
+
+logger = logging.getLogger(__name__)
+
+# GPU architecture compute capability mapping
+# See: https://developer.nvidia.com/cuda-gpus
+GPU_ARCHITECTURES = {
+    (9, 0): "hopper",   # H100, H200, GH200
+    (8, 9): "ada",      # RTX 4090, L40
+    (8, 6): "ampere",   # RTX 3090, A10
+    (8, 0): "ampere",   # A100
+    (7, 5): "turing",   # RTX 2080, T4
+    (7, 0): "volta",    # V100
+}
+
+AttentionImpl = Literal["flash_attention_3", "flash_attention_2", "sdpa", "eager"]
+
+
+def detect_gpu_architecture() -> Optional[str]:
+    """
+    Detect the GPU architecture from CUDA compute capability.
+    
+    Returns:
+        Architecture name: "hopper", "ampere", "ada", "turing", "volta", or None
+        if no CUDA GPU is available.
+    """
+    if not torch.cuda.is_available():
+        return None
+    
+    try:
+        capability = torch.cuda.get_device_capability()
+        arch = GPU_ARCHITECTURES.get(capability)
+        if arch is None:
+            # Unknown architecture, try to infer from major version
+            major = capability[0]
+            if major >= 9:
+                arch = "hopper"
+            elif major >= 8:
+                arch = "ampere"
+            else:
+                arch = "older"
+        return arch
+    except Exception as e:
+        logger.warning(f"Failed to detect GPU architecture: {e}")
+        return None
+
+
+def get_cuda_version() -> Optional[tuple[int, int]]:
+    """
+    Get the CUDA runtime version.
+    
+    Returns:
+        Tuple of (major, minor) version, or None if CUDA unavailable.
+    """
+    if not torch.cuda.is_available():
+        return None
+    
+    try:
+        version = torch.version.cuda
+        if version is None:
+            return None
+        parts = version.split(".")
+        return (int(parts[0]), int(parts[1]))
+    except Exception as e:
+        logger.warning(f"Failed to get CUDA version: {e}")
+        return None
+
+
+def _check_flash_attn_3_available() -> bool:
+    """
+    Check if Flash Attention 3 is available for HuggingFace Transformers.
+    
+    HuggingFace Transformers checks for flash_attention_3 support via:
+        importlib.util.find_spec("flash_attn_3")
+    
+    This requires either:
+    1. The flash_attn_3 package installed (pip install flash_attn_3)
+    2. Building flash-attn from the hopper/ subdirectory
+    3. Using HuggingFace 'kernels' package (pip install kernels)
+    
+    Returns True only if HuggingFace will accept flash_attention_3.
+    """
+    import importlib.util
+    
+    # Check what HuggingFace Transformers actually checks
+    if importlib.util.find_spec("flash_attn_3") is not None:
+        logger.info("flash_attn_3 package found - FA3 available")
+        return True
+    
+    # Also check for flash_attn_interface (alternative FA3 installation)
+    if importlib.util.find_spec("flash_attn_interface") is not None:
+        logger.info("flash_attn_interface found - FA3 may be available")
+        # Note: This might not work with all HF Transformers versions
+        # as they specifically check for flash_attn_3, not flash_attn_interface
+        return False  # Be conservative - HF checks for flash_attn_3 specifically
+    
+    logger.debug("FA3 not available (flash_attn_3 package not found)")
+    return False
+
+
+def get_optimal_attention_impl(
+    *,
+    disable_flash_attn: bool = False,
+) -> AttentionImpl:
+    """
+    Determine the optimal attention implementation for the current hardware.
+    
+    Selection logic:
+    - Hopper (H100/H200) + CUDA >= 12.3 + flash-attn >= 2.7: flash_attention_3
+    - Ampere/Ada + CUDA >= 11.6 + flash-attn installed: flash_attention_2  
+    - Otherwise: sdpa (PyTorch native, still uses flash kernels when possible)
+    
+    Args:
+        disable_flash_attn: If True, skip flash attention and use SDPA.
+    
+    Returns:
+        Attention implementation string for HuggingFace models:
+        "flash_attention_3", "flash_attention_2", "sdpa", or "eager"
+    """
+    if disable_flash_attn:
+        logger.info("Flash Attention disabled by user request, using SDPA")
+        return "sdpa"
+    
+    arch = detect_gpu_architecture()
+    cuda_version = get_cuda_version()
+    
+    # Check if flash_attn is available
+    try:
+        import flash_attn
+        flash_attn_available = True
+        flash_attn_version = getattr(flash_attn, "__version__", "unknown")
+    except ImportError:
+        flash_attn_available = False
+        flash_attn_version = None
+    
+    if not flash_attn_available:
+        logger.info(f"flash-attn not installed, using SDPA (arch={arch})")
+        return "sdpa"
+    
+    # Flash Attention 3: Hopper-only (H100/H200) with CUDA >= 12.3
+    # Achieves 1.5-2x speedup over FA2, 75% H100 utilization
+    # Ref: https://arxiv.org/abs/2407.08608
+    if arch == "hopper" and cuda_version and cuda_version >= (12, 3):
+        if _check_flash_attn_3_available():
+            logger.info(
+                f"Using flash_attention_3 (arch={arch}, cuda={cuda_version}, "
+                f"flash_attn={flash_attn_version})"
+            )
+            return "flash_attention_3"
+    
+    # Flash Attention 2: Ampere+ with CUDA >= 11.6
+    if arch in ("hopper", "ampere", "ada") and cuda_version and cuda_version >= (11, 6):
+        logger.info(
+            f"Using flash_attention_2 (arch={arch}, cuda={cuda_version}, "
+            f"flash_attn={flash_attn_version})"
+        )
+        return "flash_attention_2"
+    
+    # Fallback to SDPA (PyTorch native, also uses flash kernels when possible)
+    logger.info(f"Using SDPA (arch={arch}, cuda={cuda_version})")
+    return "sdpa"
+
+
+def configure_flash_attention(
+    device: str = "cuda",
+    *,
+    disable_flash_attn: bool = False,
+) -> AttentionImpl:
+    """
+    Configure Flash Attention for optimal performance.
+    
+    This function:
+    1. Enables PyTorch's Flash SDP backend (if available)
+    2. Returns the optimal attention implementation for HuggingFace models
+    
+    Args:
+        device: Target device ("cuda" or "cpu")
+        disable_flash_attn: If True, disable flash attention entirely.
+    
+    Returns:
+        Attention implementation string to pass to model.from_pretrained().
+    
+    Example:
+        attn_impl = configure_flash_attention("cuda")
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            attn_implementation=attn_impl,
+        )
+    """
+    if device != "cuda" or not torch.cuda.is_available():
+        logger.info("No CUDA device, using eager attention")
+        return "eager"
+    
+    # Enable Flash SDP backend in PyTorch (uses flash kernels for F.scaled_dot_product_attention)
+    if not disable_flash_attn:
+        try:
+            torch.backends.cuda.enable_flash_sdp(True)
+            logger.debug("Enabled torch.backends.cuda.flash_sdp")
+        except Exception as e:
+            logger.warning(f"Could not enable flash_sdp: {e}")
+    
+    return get_optimal_attention_impl(disable_flash_attn=disable_flash_attn)
+
+
+def log_hardware_info() -> None:
+    """Log GPU hardware information for debugging."""
+    if not torch.cuda.is_available():
+        logger.info("No CUDA GPU available")
+        return
+    
+    try:
+        device_name = torch.cuda.get_device_name()
+        capability = torch.cuda.get_device_capability()
+        arch = detect_gpu_architecture()
+        cuda_version = get_cuda_version()
+        
+        logger.info(
+            f"GPU: {device_name} (sm_{capability[0]}{capability[1]}, {arch}), "
+            f"CUDA: {cuda_version[0]}.{cuda_version[1] if cuda_version else 'N/A'}"
+        )
+        
+        # Check flash_attn
+        try:
+            import flash_attn
+            logger.info(f"flash-attn version: {flash_attn.__version__}")
+        except ImportError:
+            logger.info("flash-attn: not installed")
+            
+    except Exception as e:
+        logger.warning(f"Could not log hardware info: {e}")
diff --git a/src/ludic/training/loss.py b/src/ludic/training/loss.py
index 18fe078..ebc708f 100644
--- a/src/ludic/training/loss.py
+++ b/src/ludic/training/loss.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from contextlib import contextmanager
+from contextvars import ContextVar
 import logging
 import os
 from beartype.typing import Any, Dict, Mapping, Protocol, Tuple, List, Optional
@@ -31,6 +33,177 @@ def _no_op(fn):
 )
 
 
+# ---------------------------------------------------------------------------
+# Shared context for memory-efficient loss composition
+# ---------------------------------------------------------------------------
+
+
+_shared_context_var: ContextVar[Optional["SharedContext"]] = ContextVar(
+    "ludic_shared_context",
+    default=None,
+)
+
+
+def _get_shared_context(
+    logits: Logits,
+    *,
+    batch: Optional[Batch] = None,
+    input_ids: Optional[TokenIds] = None,
+    action_mask: Optional[Mask] = None,
+) -> Optional["SharedContext"]:
+    shared = _shared_context_var.get()
+    if shared is None:
+        return None
+    if shared.logits is not logits:
+        return None
+    if batch is not None and shared.batch is not batch:
+        return None
+    if input_ids is not None and shared.batch.get("input_ids") is not input_ids:
+        return None
+    if action_mask is not None and shared.batch.get("action_mask") is not action_mask:
+        return None
+    return shared
+
+
+@contextmanager
+def _use_shared_context(shared: "SharedContext"):
+    token = _shared_context_var.set(shared)
+    try:
+        yield
+    finally:
+        _shared_context_var.reset(token)
+
+
+class SharedContext:
+    """
+    Lazy-computed shared tensors for memory-efficient loss composition.
+
+    When multiple losses are combined via CompositeLoss, each typically needs
+    the same expensive intermediate tensors (e.g., token_logp from log_softmax).
+    Without sharing, each loss computes these independently, creating separate
+    autograd graphs that store duplicate [B, T, V] activations for backward.
+
+    SharedContext solves this by computing expensive tensors ONCE on first access
+    and caching them for subsequent uses. All losses receive the same tensor
+    objects, sharing a single autograd graph.
+
+    Memory savings example (7B model, V=32K, B=8, T=4096):
+        - Without sharing (2 losses): 2× [B, T, V] ≈ 4GB activations
+        - With sharing (2 losses):    1× [B, T, V] ≈ 2GB activations
+
+    Usage:
+        # CompositeLoss installs a SharedContext so helpers can reuse cached tensors.
+        with _use_shared_context(SharedContext(logits, batch)):
+            token_logp = compute_token_logp(logits, batch["input_ids"])
+
+    Note: Properties that depend on batch["actor_logps"] will raise KeyError
+    if that key is missing. This is intentional - not all loss combinations
+    need actor logprobs.
+    """
+
+    __slots__ = ("logits", "batch", "_cache")
+
+    def __init__(self, logits: Logits, batch: Batch) -> None:
+        self.logits = logits
+        self.batch = batch
+        self._cache: Dict[str, Tensor] = {}
+
+    @property
+    def input_ids(self) -> TokenIds:
+        """Token IDs from batch (not cached, just a convenience accessor)."""
+        return self.batch["input_ids"]
+
+    @property
+    def action_mask(self) -> Mask:
+        """Action mask from batch (not cached, just a convenience accessor)."""
+        return self.batch["action_mask"]
+
+    @property
+    def token_logp(self) -> Float[Tensor, "B T-1"]:
+        """
+        Per-token log probabilities: log π(a_t|s_t) for each position.
+
+        THIS IS THE EXPENSIVE OPERATION - calls selective_log_softmax which
+        requires storing [B, T, V] activations for backward. Caching this
+        is the primary memory optimization.
+        """
+        if "token_logp" not in self._cache:
+            self._cache["token_logp"] = _compute_token_logp_raw(self.logits, self.input_ids)
+        return self._cache["token_logp"]
+
+    @property
+    def token_mask(self) -> Float[Tensor, "B T-1"]:
+        """Action mask aligned with token_logp (shifted by 1 for next-token prediction)."""
+        if "token_mask" not in self._cache:
+            self._cache["token_mask"] = self.action_mask[:, 1:].to(
+                self.token_logp.dtype
+            )
+        return self._cache["token_mask"]
+
+    @property
+    def token_counts(self) -> Float[Tensor, "B"]:
+        """Number of action tokens per sample (for length normalization)."""
+        if "token_counts" not in self._cache:
+            self._cache["token_counts"] = self.token_mask.sum(dim=-1).clamp(min=1.0)
+        return self._cache["token_counts"]
+
+    @property
+    def actor_logps_shifted(self) -> Float[Tensor, "B T-1"]:
+        """
+        Behavior policy log probs aligned with token_logp.
+
+        Raises:
+            KeyError: If batch["actor_logps"] is not present.
+        """
+        if "actor_logps_shifted" not in self._cache:
+            if "actor_logps" not in self.batch:
+                raise KeyError(
+                    "SharedContext.actor_logps_shifted requires batch['actor_logps']. "
+                    "Ensure your rollouts include actor_logps for ratio-based objectives."
+                )
+            actor_logps = self.batch["actor_logps"]
+            if actor_logps.shape != self.input_ids.shape:
+                raise ValueError(
+                    f"actor_logps shape {tuple(actor_logps.shape)} does not match input_ids "
+                    f"{tuple(self.input_ids.shape)}."
+                )
+            self._cache["actor_logps_shifted"] = actor_logps[:, 1:]
+        return self._cache["actor_logps_shifted"]
+
+    @property
+    def log_ratio(self) -> Float[Tensor, "B T-1"]:
+        """Log importance ratio: log(π_new/π_old) per token."""
+        if "log_ratio" not in self._cache:
+            self._cache["log_ratio"] = self.token_logp - self.actor_logps_shifted
+        return self._cache["log_ratio"]
+
+    @property
+    def ratio(self) -> Float[Tensor, "B T-1"]:
+        """Importance ratio: π_new/π_old per token."""
+        if "ratio" not in self._cache:
+            self._cache["ratio"] = torch.exp(self.log_ratio)
+        return self._cache["ratio"]
+
+    def logp_action(self, *, length_normalize: bool = False) -> Float[Tensor, "B"]:
+        """
+        Sequence-level log probability (sum over action tokens).
+
+        Unlike token_logp, this is a cheap derivation that doesn't require
+        additional [B, T, V] storage. The length_normalize flag controls
+        whether to divide by number of action tokens.
+
+        Args:
+            length_normalize: If True, return mean log prob instead of sum.
+
+        Returns:
+            [B] tensor of per-sample log probabilities.
+        """
+        masked_logp = (self.token_logp * self.token_mask).sum(dim=-1)
+        if length_normalize:
+            return masked_logp / self.token_counts
+        return masked_logp
+
+
 class Loss(Protocol):
     """
     Generic loss: given model outputs (logits) and a collated batch, return
@@ -46,11 +219,13 @@ def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Tenso
 # We define this as a standalone helper so torch.compile can cache it cleanly.
 # dynamic=True is critical for varying sequence lengths (preventing recompilation).
 @jaxtyped(typechecker=typechecker)
-@torch.compile(dynamic=True)
-def selective_log_softmax(logits: Logits, index: TokenIds) -> Float[Tensor, "B T"]:
+def _selective_log_softmax_impl(
+    logits: Logits,
+    index: TokenIds,
+) -> Float[Tensor, "B T"]:
     """
     Fused kernel for log_softmax + gather.
-    
+
     Inductor (torch.compile) generates a kernel that computes the log_softmax
     normalization term and selects the target token in a single pass.
     This avoids materializing the massive [B, T, V] probability tensor in VRAM.
@@ -59,8 +234,32 @@ def selective_log_softmax(logits: Logits, index: TokenIds) -> Float[Tensor, "B T
     logprobs = logits.log_softmax(dim=-1)
     return torch.gather(logprobs, dim=-1, index=index.unsqueeze(-1)).squeeze(-1)
 
+
+_USE_TORCH_COMPILE = os.getenv("LUDIC_DISABLE_TORCH_COMPILE", "0") != "1"
+_USE_COMPILED_SELECTIVE_LOG_SOFTMAX = _USE_TORCH_COMPILE
+if _USE_TORCH_COMPILE:
+    _selective_log_softmax_compiled = torch.compile(
+        _selective_log_softmax_impl, dynamic=True
+    )
+else:
+    _selective_log_softmax_compiled = _selective_log_softmax_impl
+
+
+def selective_log_softmax(logits: Logits, index: TokenIds) -> Float[Tensor, "B T"]:
+    global _USE_COMPILED_SELECTIVE_LOG_SOFTMAX
+    if _USE_COMPILED_SELECTIVE_LOG_SOFTMAX:
+        try:
+            return _selective_log_softmax_compiled(logits, index)
+        except Exception as exc:
+            logger.warning(
+                "torch.compile failed for selective_log_softmax, falling back to eager: %s",
+                exc,
+            )
+            _USE_COMPILED_SELECTIVE_LOG_SOFTMAX = False
+    return _selective_log_softmax_impl(logits, index)
+
 @jaxtyped(typechecker=typechecker)
-def compute_logp_action(
+def _compute_logp_action_raw(
     logits: Logits,
     input_ids: TokenIds,
     action_mask: Mask,
@@ -80,16 +279,18 @@ def compute_logp_action(
     """
     if logits.ndim != 3:
         raise ValueError(f"Expected logits [B, T, V], got {tuple(logits.shape)}")
-    
+
     if input_ids.shape != logits.shape[:2]:
-        raise ValueError(f"Shape mismatch: input_ids {input_ids.shape} vs logits {logits.shape}")
+        raise ValueError(
+            f"Shape mismatch: input_ids {input_ids.shape} vs logits {logits.shape}"
+        )
 
     # Shift for causal LM: logits[t] predicts input_ids[t+1]
     if logits.size(1) < 2:
         raise ValueError("Sequence too short to compute next-token logprobs.")
-    logits_shifted = logits[:, :-1, :]          # [B, T-1, V]
-    target_ids = input_ids[:, 1:]               # [B, T-1]
-    action_mask_shifted = action_mask[:, 1:]    # [B, T-1]
+    logits_shifted = logits[:, :-1, :]  # [B, T-1, V]
+    target_ids = input_ids[:, 1:]  # [B, T-1]
+    action_mask_shifted = action_mask[:, 1:]  # [B, T-1]
 
     # Use the compiled fused kernel on aligned targets
     token_logp = selective_log_softmax(logits_shifted, target_ids)
@@ -106,7 +307,30 @@ def compute_logp_action(
 
 
 @jaxtyped(typechecker=typechecker)
-def compute_token_logp(
+def compute_logp_action(
+    logits: Logits,
+    input_ids: TokenIds,
+    action_mask: Mask,
+    *,
+    length_normalize: bool = False,
+) -> Weights:
+    shared = _get_shared_context(
+        logits,
+        input_ids=input_ids,
+        action_mask=action_mask,
+    )
+    if shared is not None:
+        return shared.logp_action(length_normalize=length_normalize)
+    return _compute_logp_action_raw(
+        logits,
+        input_ids,
+        action_mask,
+        length_normalize=length_normalize,
+    )
+
+
+@jaxtyped(typechecker=typechecker)
+def _compute_token_logp_raw(
     logits: Logits,
     input_ids: TokenIds,
 ) -> Float[Tensor, "B T-1"]:
@@ -128,6 +352,19 @@ def compute_token_logp(
     return selective_log_softmax(logits_shifted, target_ids)
 
 
+@jaxtyped(typechecker=typechecker)
+def compute_token_logp(
+    logits: Logits,
+    input_ids: TokenIds,
+) -> Float[Tensor, "B T-1"]:
+    shared = _get_shared_context(logits, input_ids=input_ids)
+    if shared is not None:
+        if "token_logp" not in shared._cache:
+            shared._cache["token_logp"] = _compute_token_logp_raw(logits, input_ids)
+        return shared._cache["token_logp"]
+    return _compute_token_logp_raw(logits, input_ids)
+
+
 # ---------------------------------------------------------------------------
 # REINFORCE family
 # ---------------------------------------------------------------------------
@@ -155,7 +392,11 @@ class ReinforceLoss:
     old_logp_key: str = "old_logp_action"
 
     @jaxtyped(typechecker=typechecker)
-    def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]:
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
         input_ids = batch["input_ids"]            # [B, T]
         action_mask = batch["action_mask"]        # [B, T]
         advantages = batch["weight"]              # [B]
@@ -166,10 +407,11 @@ def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]
         logp_action = compute_logp_action(
             logits, input_ids, action_mask, length_normalize=self.length_normalize
         )  # [B]
+        token_counts = action_mask[:, 1:].sum(dim=-1).clamp(min=1.0)
 
         old_logp = batch[self.old_logp_key]  # [B]
         if self.length_normalize:
-            lengths = action_mask[:, 1:].to(old_logp.dtype).sum(dim=-1).clamp(min=1.0)
+            lengths = token_counts.to(old_logp.dtype)
             old_logp = old_logp / lengths
 
         log_ratio = logp_action - old_logp
@@ -214,7 +456,11 @@ class MaskedCausalLMCrossEntropyLoss:
     length_normalize: bool = True
 
     @jaxtyped(typechecker=typechecker)
-    def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]:
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
         input_ids = batch["input_ids"]  # [B, T]
         action_mask = batch["action_mask"]  # [B, T]
         weights = batch.get("weight")
@@ -222,7 +468,9 @@ def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]
         if logits.ndim != 3:
             raise ValueError(f"Expected logits [B, T, V], got {tuple(logits.shape)}")
         if input_ids.shape != logits.shape[:2]:
-            raise ValueError(f"Shape mismatch: input_ids {input_ids.shape} vs logits {logits.shape}")
+            raise ValueError(
+                f"Shape mismatch: input_ids {input_ids.shape} vs logits {logits.shape}"
+            )
 
         if logits.size(1) < 2:
             raise ValueError("Sequence too short to compute next-token loss.")
@@ -281,21 +529,26 @@ class ReinforceBaselineLoss:
     old_logp_key: str = "old_logp_action"
 
     @jaxtyped(typechecker=typechecker)
-    def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]:
-        input_ids = batch["input_ids"]
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
         action_mask = batch["action_mask"]
         adv_raw = batch["weight"]                # [B]
 
         if self.old_logp_key not in batch:
             raise KeyError(f"ReinforceBaselineLoss requires '{self.old_logp_key}' in batch.")
 
+        input_ids = batch["input_ids"]
         logp_action = compute_logp_action(
             logits, input_ids, action_mask, length_normalize=self.length_normalize
         )  # [B]
+        token_counts = action_mask[:, 1:].sum(dim=-1).clamp(min=1.0)
 
         old_logp = batch[self.old_logp_key]  # [B]
         if self.length_normalize:
-            lengths = action_mask[:, 1:].to(old_logp.dtype).sum(dim=-1).clamp(min=1.0)
+            lengths = token_counts.to(old_logp.dtype)
             old_logp = old_logp / lengths
 
         log_ratio = logp_action - old_logp
@@ -362,22 +615,30 @@ def __post_init__(self) -> None:
             raise ValueError(f"ratio_clip must be positive, got {self.ratio_clip}")
 
     @jaxtyped(typechecker=typechecker)
-    def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]:
-        input_ids = batch["input_ids"]
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
         action_mask = batch["action_mask"]
         advantages = batch["weight"]              # [B]
         if self.old_logp_key not in batch:
-            raise KeyError(f"ClippedSurrogateLoss requires '{self.old_logp_key}' in batch.")
+            raise KeyError(
+                f"ClippedSurrogateLoss requires '{self.old_logp_key}' in batch."
+            )
 
+        input_ids = batch["input_ids"]
         logp_action = compute_logp_action(
             logits,
             input_ids,
             action_mask,
             length_normalize=self.length_normalize,
         )  # [B]
+        token_counts = action_mask[:, 1:].sum(dim=-1).clamp(min=1.0)
+
         old_logp = batch[self.old_logp_key]  # [B]
         if self.length_normalize:
-            lengths = action_mask[:, 1:].to(old_logp.dtype).sum(dim=-1).clamp(min=1.0)
+            lengths = token_counts.to(old_logp.dtype)
             old_logp = old_logp / lengths
 
         log_ratio = logp_action - old_logp
@@ -395,9 +656,21 @@ def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]
         obj = torch.min(unclipped, clipped)
         loss = -obj.mean()
 
-        ppo_clip_frac = (
-            (ratio > 1.0 + self.clip_eps_high) | (ratio < 1.0 - self.clip_eps_low)
-        ).float().mean()
+        # Token-weighted clip fraction: counts tokens in sequences where the
+        # clipped branch is active (sequence-level GSPO-style metric).
+        token_mask = action_mask[:, 1:].to(dtype=ratio.dtype)
+        token_counts = token_mask.sum(dim=-1).clamp(min=1.0)
+        adv_pos = advantages >= 0
+        seq_clipped = torch.where(
+            adv_pos,
+            ratio > 1.0 + self.clip_eps_high,
+            ratio < 1.0 - self.clip_eps_low,
+        )
+        total_tokens = token_counts.sum()
+        if total_tokens > 0:
+            ppo_clip_frac = (seq_clipped.to(token_counts.dtype) * token_counts).sum() / total_tokens
+        else:
+            ppo_clip_frac = torch.zeros((), device=ratio.device, dtype=ratio.dtype)
         if self.ratio_clip is not None:
             ratio_clip_frac = (ratio >= self.ratio_clip).float().mean()
         else:
@@ -453,14 +726,18 @@ def __post_init__(self) -> None:
             raise ValueError(f"clip_eps_high must be non-negative, got {self.clip_eps_high}")
 
     @jaxtyped(typechecker=typechecker)
-    def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]:
-        input_ids = batch["input_ids"]
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
         action_mask = batch["action_mask"]
         advantages = batch["weight"]  # [B]
 
         if "actor_logps" not in batch:
             raise KeyError("CISPOLoss requires batch['actor_logps'] for importance sampling.")
 
+        input_ids = batch["input_ids"]
         actor_logps = batch["actor_logps"]  # [B, T]
         if actor_logps.shape != input_ids.shape:
             raise ValueError(
@@ -565,20 +842,23 @@ def __post_init__(self) -> None:
             raise ValueError(f"ratio_clip must be positive, got {self.ratio_clip}")
 
     @jaxtyped(typechecker=typechecker)
-    def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]:
-        input_ids = batch["input_ids"]
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
         action_mask = batch["action_mask"]
         advantages = batch["weight"]
         if "actor_logps" not in batch:
             raise KeyError("TokenClippedSurrogateLoss requires batch['actor_logps'] for token IS.")
 
+        input_ids = batch["input_ids"]
         actor_logps = batch["actor_logps"]
         if actor_logps.shape != input_ids.shape:
             raise ValueError(
                 f"actor_logps shape {tuple(actor_logps.shape)} does not match input_ids "
                 f"{tuple(input_ids.shape)}."
             )
-
         token_logp = compute_token_logp(logits, input_ids)  # [B, T-1]
         token_mask = action_mask[:, 1:].to(token_logp.dtype)
         token_counts = token_mask.sum(dim=-1).clamp(min=1.0)
@@ -640,11 +920,431 @@ def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]
         return loss, stats
 
 
+@dataclass
+class SAPOLoss:
+    """
+    SAPO (Soft Adaptive Policy Optimization) loss.
+
+    SAPO replaces hard clipping with a smooth, temperature-controlled sigmoid gate
+    that adaptively attenuates off-policy updates while preserving useful learning
+    signals. Unlike hard clipping (GRPO) or sequence-level gates (GSPO), SAPO
+    applies a soft trust region at the token level that naturally yields sequence-level
+    coherence under mild conditions.
+
+    Core idea:
+        Instead of hard clipping: min(r * A, clip(r, 1-ε, 1+ε) * A)
+        SAPO uses soft gate:   f(r) * A, where f(r) = (4/τ) * σ(τ(r - 1))
+
+    The sigmoid gate σ(τ(r - 1)) peaks at r=1 and decays smoothly as r deviates,
+    implementing a continuous trust region. The temperature τ controls decay rate:
+    larger τ → faster decay → more conservative updates.
+
+    Asymmetric temperatures:
+        - τ_pos for positive advantages (token logit should increase)
+        - τ_neg for negative advantages (token logit should decrease)
+
+    Setting τ_neg > τ_pos makes negative gradients decay faster, improving stability.
+    Rationale: Negative updates diffuse to many unsampled tokens in a large vocabulary,
+    introducing more noise than positive updates which focus on the sampled token.
+
+    Objective:
+        J_SAPO = E[ (1/|o|) Σ_t f(ρ_t) * A ]
+        where f(r) = (4/τ) * σ(τ(r - 1))
+        and τ = τ_pos if A > 0 else τ_neg
+
+    Gradient weight (from differentiating f):
+        w(r) = 4 * p(r) * (1 - p(r)), where p(r) = σ(τ(r - 1))
+        This peaks at r=1 with value 1 and decays smoothly.
+
+    Connection to other methods:
+        - Under mild conditions (small steps, low token variance), SAPO reduces to
+          sequence-level optimization like GSPO but with smooth gating
+        - Compared to GRPO's hard token clipping, SAPO provides smooth scaling
+        - Compared to GSPO's sequence-level hard clipping, SAPO is token-adaptive
+
+    Expects:
+        - batch["weight"]:       A (advantages)       [B]
+        - batch["actor_logps"]:  token logps under behavior policy [B, T]
+        - input_ids / attention_mask / action_mask for π_new
+
+    Reference: "Soft Adaptive Policy Optimization" (arXiv:2511.20347v2)
+    https://arxiv.org/abs/2511.20347
+    """
+
+    tau_pos: float = 1.0    # Temperature for positive advantages
+    tau_neg: float = 1.05   # Temperature for negative advantages (higher for stability)
+    length_normalize: bool = False  # Normalize by sequence length
+
+    def __post_init__(self) -> None:
+        if self.tau_pos <= 0 or self.tau_neg <= 0:
+            raise ValueError(
+                f"tau_pos/tau_neg must be positive, got {self.tau_pos}, {self.tau_neg}"
+            )
+
+    @jaxtyped(typechecker=typechecker)
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
+        action_mask = batch["action_mask"]
+        advantages = batch["weight"]  # [B]
+
+        if "actor_logps" not in batch:
+            raise KeyError("SAPOLoss requires batch['actor_logps'] for token IS.")
+
+        input_ids = batch["input_ids"]
+        actor_logps = batch["actor_logps"]
+        if actor_logps.shape != input_ids.shape:
+            raise ValueError(
+                f"actor_logps shape {tuple(actor_logps.shape)} does not match input_ids "
+                f"{tuple(input_ids.shape)}."
+            )
+
+        # Compute token-level log probabilities
+        token_logp = compute_token_logp(logits, input_ids)  # [B, T-1]
+        token_mask = action_mask[:, 1:].to(token_logp.dtype)  # [B, T-1]
+        token_counts = token_mask.sum(dim=-1).clamp(min=1.0)  # [B]
+        actor_logps_shifted = actor_logps[:, 1:]  # [B, T-1]
+
+        # Compute importance ratios
+        log_ratio = token_logp - actor_logps_shifted  # [B, T-1]
+        ratio = torch.exp(log_ratio)  # [B, T-1]
+
+        # Select temperature based on advantage sign
+        # Use where to select between tau_pos and tau_neg without creating new tensors
+        adv_positive = advantages > 0  # [B]
+        tau_pos_val = self.tau_pos
+        tau_neg_val = self.tau_neg
+
+        # Compute soft gate for positive and negative advantages separately
+        # This allows kernel fusion since we're not creating tensors in the graph
+        ratio_minus_1 = ratio - 1.0  # [B, T-1]
+
+        # For positive advantages: f(r) = (4/τ_pos) * σ(τ_pos * (r - 1))
+        # For negative advantages: f(r) = (4/τ_neg) * σ(τ_neg * (r - 1))
+        sigmoid_arg_pos = tau_pos_val * ratio_minus_1  # [B, T-1]
+        sigmoid_arg_neg = tau_neg_val * ratio_minus_1  # [B, T-1]
+
+        gate_pos = torch.sigmoid(sigmoid_arg_pos)  # [B, T-1]
+        gate_neg = torch.sigmoid(sigmoid_arg_neg)  # [B, T-1]
+
+        soft_gate_pos = (4.0 / tau_pos_val) * gate_pos  # [B, T-1]
+        soft_gate_neg = (4.0 / tau_neg_val) * gate_neg  # [B, T-1]
+
+        # Select based on advantage sign (broadcast over tokens)
+        adv_positive_expanded = adv_positive.unsqueeze(-1)  # [B, 1]
+        soft_gate = torch.where(adv_positive_expanded, soft_gate_pos, soft_gate_neg)  # [B, T-1]
+
+        # Apply gate to advantages (broadcast advantages over tokens)
+        adv_expanded = advantages.unsqueeze(-1)  # [B, 1]
+        gated_obj = soft_gate * adv_expanded * token_mask  # [B, T-1]
+
+        # Aggregate over tokens
+        per_sample_obj = gated_obj.sum(dim=-1)  # [B]
+        if self.length_normalize:
+            per_sample_obj = per_sample_obj / token_counts
+
+        loss = -per_sample_obj.mean()
+
+        # --- Stats computation ---
+        # Gradient weight: w(r) = 4 * p(r) * (1 - p(r))
+        # Select the correct gate based on advantage sign
+        gate_selected = torch.where(adv_positive_expanded, gate_pos, gate_neg)  # [B, T-1]
+        grad_weight = 4.0 * gate_selected * (1.0 - gate_selected)  # [B, T-1]
+
+        # Compute KL for monitoring
+        token_mismatch_kl = ratio - log_ratio - 1.0  # [B, T-1]
+
+        mask = token_mask > 0
+        if mask.any():
+            ratio_vals = ratio.masked_select(mask)
+            ratio_mean = ratio_vals.mean()
+            ratio_std = ratio_vals.std(unbiased=False)
+            mismatch_kl = token_mismatch_kl.masked_select(mask).mean()
+
+            # Average gradient weight (for monitoring soft gating)
+            grad_weight_vals = grad_weight.masked_select(mask)
+            grad_weight_mean = grad_weight_vals.mean()
+            grad_weight_std = grad_weight_vals.std(unbiased=False)
+        else:
+            ratio_mean = torch.zeros((), device=ratio.device, dtype=ratio.dtype)
+            ratio_std = torch.zeros((), device=ratio.device, dtype=ratio.dtype)
+            mismatch_kl = torch.zeros((), device=ratio.device, dtype=ratio.dtype)
+            grad_weight_mean = torch.zeros((), device=ratio.device, dtype=ratio.dtype)
+            grad_weight_std = torch.zeros((), device=ratio.device, dtype=ratio.dtype)
+
+        logp_action = (token_logp * token_mask).sum(dim=-1)
+        stats: Dict[str, Any] = {
+            "loss": loss.detach(),
+            "ratio_mean": ratio_mean.detach(),
+            "ratio_std": ratio_std.detach(),
+            "grad_weight_mean": grad_weight_mean.detach(),
+            "grad_weight_std": grad_weight_std.detach(),
+            "kl_actor_policy": mismatch_kl.detach(),
+            "adv_mean": advantages.mean().detach(),
+            "adv_std": advantages.std(unbiased=False).detach(),
+            "logp_mean": logp_action.mean().detach(),
+            "avg_action_tokens": token_counts.mean().detach(),
+        }
+        return loss, stats
+
+
+@dataclass
+class GMPOLoss:
+    """
+    GMPO (Geometric-Mean Policy Optimization) loss.
+
+    GMPO stabilizes GRPO by using the geometric mean of token-level importance ratios
+    instead of the arithmetic mean. This makes the objective less sensitive to outliers
+    and results in more stable policy updates.
+
+    Objective:
+        J_GMPO = E[ (∏_t min(ρ_t * A, clip(ρ_t, 1-ε_low, 1+ε_high) * A))^(1/|o|) * sgn(A) ]
+
+    where:
+        - ρ_t = π_new(a_t|s_t) / π_old(a_t|s_t) is the token-level importance ratio
+        - A is the advantage (from batch["weight"])
+        - |o| is the sequence length (normalization factor)
+        - sgn(A) ensures correct optimization direction
+
+    Key differences from GRPO (TokenClippedSurrogateLoss):
+        1. Uses geometric mean instead of arithmetic mean (more robust to outliers)
+        2. Applies token-level clipping (not sequence-level)
+        3. Supports wider clipping ranges (e.g., (e^-0.4, e^0.4) instead of (0.8, 1.2))
+        4. Results in more stable importance sampling ratios during training
+
+    Implementation details:
+        - All operations performed in log-space for numerical stability
+        - Clipping applied at token level before geometric mean computation
+        - Normalization by sequence length (1/|o|) is critical for stability
+
+    Expects:
+        - batch["weight"]:       A (advantages)       [B]
+        - batch["actor_logps"]:  token logps under behavior policy [B, T]
+        - input_ids / attention_mask / action_mask for π_new.
+
+    Reference: "GMPO: Geometric-Mean Policy Optimization" (arXiv:2507.20673v3)
+    Defaults follow the GMPO paper settings with wider clipping (e^-0.4, e^0.4).
+    """
+
+    clip_eps_low: float = 0.4      # In log-space: clip to e^-0.4 ≈ 0.67
+    clip_eps_high: float = 0.4     # In log-space: clip to e^0.4 ≈ 1.49
+    length_normalize: bool = True  # 1/|o| normalization (critical for GMPO)
+    ratio_clip: Optional[float] = None
+
+    def __post_init__(self) -> None:
+        if self.clip_eps_low < 0 or self.clip_eps_high < 0:
+            raise ValueError(
+                f"clip_eps_low/high must be non-negative, got {self.clip_eps_low}, {self.clip_eps_high}"
+            )
+        if self.ratio_clip is not None and self.ratio_clip <= 0:
+            raise ValueError(f"ratio_clip must be positive, got {self.ratio_clip}")
+
+    @jaxtyped(typechecker=typechecker)
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
+        action_mask = batch["action_mask"]
+        advantages = batch["weight"]  # [B]
+
+        if "actor_logps" not in batch:
+            raise KeyError("GMPOLoss requires batch['actor_logps'] for token IS.")
+
+        shared = _get_shared_context(logits, batch=batch)
+        if shared is not None:
+            token_logp = shared.token_logp
+            token_mask = shared.token_mask
+            token_counts = shared.token_counts
+            actor_logps_shifted = shared.actor_logps_shifted
+        else:
+            input_ids = batch["input_ids"]
+            actor_logps = batch["actor_logps"]
+            if actor_logps.shape != input_ids.shape:
+                raise ValueError(
+                    f"actor_logps shape {tuple(actor_logps.shape)} does not match input_ids "
+                    f"{tuple(input_ids.shape)}."
+                )
+
+            # Compute token-level log probabilities
+            token_logp = compute_token_logp(logits, input_ids)  # [B, T-1]
+            token_mask = action_mask[:, 1:].to(token_logp.dtype)  # [B, T-1]
+            token_counts = token_mask.sum(dim=-1).clamp(min=1.0)  # [B]
+            actor_logps_shifted = actor_logps[:, 1:]  # [B, T-1]
+
+        # Compute log importance ratios (in log-space for numerical stability)
+        log_ratio = token_logp - actor_logps_shifted  # [B, T-1]
+
+        # Sign of advantage (for correct optimization direction)
+        sgn_adv = torch.sign(advantages).unsqueeze(-1)  # [B, 1]
+
+        # Apply advantage sign to log ratios: sgn(A) * log(ρ_t)
+        sgn_log_ratio = sgn_adv * log_ratio  # [B, T-1]
+
+        # Token-level clipping in log-space
+        # clip(sgn(A) * log(ρ_t), -ε_low, ε_high)
+        sgn_log_ratio_clipped = torch.clamp(
+            sgn_log_ratio,
+            -self.clip_eps_low,
+            self.clip_eps_high
+        )  # [B, T-1]
+
+        # Take min of unclipped and clipped (still in log-space, signed)
+        sgn_log_ratio_min = torch.min(sgn_log_ratio, sgn_log_ratio_clipped)  # [B, T-1]
+
+        # Remove sign to get actual log ratios for geometric mean
+        log_ratio_min = sgn_adv * sgn_log_ratio_min  # [B, T-1]
+
+        # Geometric mean: exp(sum(log(ρ_t)) / |o|) = exp(mean(log(ρ_t)))
+        # Only sum over valid tokens (token_mask == 1)
+        sum_log_ratio = (log_ratio_min * token_mask).sum(dim=-1)  # [B]
+
+        if self.length_normalize:
+            # Normalize by sequence length: 1/|o| * sum(log(ρ_t))
+            geom_mean_log_ratio = sum_log_ratio / token_counts  # [B]
+        else:
+            geom_mean_log_ratio = sum_log_ratio  # [B]
+
+        # Convert back from log-space: ∏_t ρ_t^(1/|o|)
+        geom_mean_ratio = torch.exp(geom_mean_log_ratio)  # [B]
+
+        # Optional ratio clipping (after geometric mean)
+        if self.ratio_clip is not None:
+            geom_mean_ratio = torch.clamp(geom_mean_ratio, max=self.ratio_clip)
+
+        # Objective: geom_mean_ratio * A (advantage sign already handled in clipping)
+        obj = geom_mean_ratio * advantages  # [B]
+        loss = -obj.mean()
+
+        # --- Stats computation ---
+        # Compute raw ratios for monitoring (not used in loss)
+        ratio_raw = torch.exp(log_ratio)  # [B, T-1]
+        token_mismatch_kl = ratio_raw - log_ratio - 1.0  # [B, T-1]
+
+        mask = token_mask > 0
+        if mask.any():
+            ratio_vals = ratio_raw.masked_select(mask)
+
+            # Clip fraction in original ratio space (for comparison with GRPO)
+            # Note: GMPO clips in log-space, so we convert bounds
+            lower_bound = torch.exp(torch.tensor(-self.clip_eps_low, device=ratio_vals.device))
+            upper_bound = torch.exp(torch.tensor(self.clip_eps_high, device=ratio_vals.device))
+            ppo_clip_frac = (
+                (ratio_vals > upper_bound) | (ratio_vals < lower_bound)
+            ).float().mean()
+
+            ratio_mean = ratio_vals.mean()
+            ratio_std = ratio_vals.std(unbiased=False)
+            mismatch_kl = token_mismatch_kl.masked_select(mask).mean()
+
+            if self.ratio_clip is not None:
+                ratio_clip_frac = (geom_mean_ratio >= self.ratio_clip).float().mean()
+            else:
+                ratio_clip_frac = torch.zeros((), device=ratio_vals.device, dtype=ratio_vals.dtype)
+        else:
+            ratio_mean = torch.zeros((), device=log_ratio.device, dtype=log_ratio.dtype)
+            ratio_std = torch.zeros((), device=log_ratio.device, dtype=log_ratio.dtype)
+            ppo_clip_frac = torch.zeros((), device=log_ratio.device, dtype=log_ratio.dtype)
+            ratio_clip_frac = torch.zeros((), device=log_ratio.device, dtype=log_ratio.dtype)
+            mismatch_kl = torch.zeros((), device=log_ratio.device, dtype=log_ratio.dtype)
+
+        logp_action = (token_logp * token_mask).sum(dim=-1)
+        stats: Dict[str, Any] = {
+            "loss": loss.detach(),
+            "ratio_mean": ratio_mean.detach(),
+            "ratio_std": ratio_std.detach(),
+            "geom_mean_ratio_mean": geom_mean_ratio.mean().detach(),
+            "geom_mean_ratio_std": geom_mean_ratio.std(unbiased=False).detach(),
+            "clip_frac": ppo_clip_frac.detach(),
+            "ratio_clip_frac": ratio_clip_frac.detach(),
+            "kl_actor_policy": mismatch_kl.detach(),
+            "adv_mean": advantages.mean().detach(),
+            "adv_std": advantages.std(unbiased=False).detach(),
+            "logp_mean": logp_action.mean().detach(),
+            "avg_action_tokens": token_counts.mean().detach(),
+        }
+        return loss, stats
+
+
 # ---------------------------------------------------------------------------
 # KL penalty and entropy bonus
 # ---------------------------------------------------------------------------
 
 
+@dataclass
+class TokenKLLoss:
+    """
+    Token-level KL penalty between π_new and a reference policy.
+
+    Uses the standard policy-gradient surrogate estimate:
+
+        KL(π_new || π_old) ≈ E_{a ~ π_new} [ log π_new(a|s) - log π_old(a|s) ]
+
+    Computed over action tokens and averaged per sequence if length_normalize=True.
+    """
+
+    coeff: float = 1.0
+    old_logp_key: str = "actor_logps"
+    length_normalize: bool = True
+
+    @jaxtyped(typechecker=typechecker)
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
+        action_mask = batch["action_mask"]
+
+        if self.old_logp_key not in batch:
+            raise KeyError(f"TokenKLLoss requires batch['{self.old_logp_key}'].")
+
+        shared = _get_shared_context(logits, batch=batch)
+        if shared is not None:
+            token_logp = shared.token_logp
+            token_mask = shared.token_mask
+            token_counts = shared.token_counts
+            if self.old_logp_key == "actor_logps":
+                old_logps_shifted = shared.actor_logps_shifted
+            else:
+                input_ids = batch["input_ids"]
+                old_logps = batch[self.old_logp_key]
+                if old_logps.shape != input_ids.shape:
+                    raise ValueError(
+                        f"{self.old_logp_key} shape {tuple(old_logps.shape)} does not match input_ids "
+                        f"{tuple(input_ids.shape)}."
+                    )
+                old_logps_shifted = old_logps[:, 1:]
+        else:
+            input_ids = batch["input_ids"]
+            old_logps = batch[self.old_logp_key]
+            if old_logps.shape != input_ids.shape:
+                raise ValueError(
+                    f"{self.old_logp_key} shape {tuple(old_logps.shape)} does not match input_ids "
+                    f"{tuple(input_ids.shape)}."
+                )
+            token_logp = compute_token_logp(logits, input_ids)
+            token_mask = action_mask[:, 1:].to(token_logp.dtype)
+            token_counts = token_mask.sum(dim=-1).clamp(min=1.0)
+            old_logps_shifted = old_logps[:, 1:]
+
+        token_kl = (token_logp - old_logps_shifted) * token_mask
+        per_sample_kl = token_kl.sum(dim=-1)
+        if self.length_normalize:
+            per_sample_kl = per_sample_kl / token_counts
+
+        loss = self.coeff * per_sample_kl.mean()
+
+        stats: Dict[str, Any] = {
+            "loss": loss.detach(),
+            "kl_mean": per_sample_kl.mean().detach(),
+            "kl_std": per_sample_kl.std(unbiased=False).detach(),
+            "avg_action_tokens": token_counts.mean().detach(),
+        }
+        return loss, stats
+
+
 @dataclass
 class KLLoss:
     """
@@ -667,22 +1367,32 @@ class KLLoss:
     length_normalize: bool = False
 
     @jaxtyped(typechecker=typechecker)
-    def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]:
-        input_ids = batch["input_ids"]
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
         action_mask = batch["action_mask"]
         old_logp = batch[self.old_logp_key]       # [B]
 
+        input_ids = batch["input_ids"]
         logp_new = compute_logp_action(
             logits,
             input_ids,
             action_mask,
             length_normalize=self.length_normalize,
         )  # [B]
+        shared = _get_shared_context(logits, batch=batch)
+        if shared is not None:
+            token_counts = shared.token_counts
+        else:
+            token_counts = action_mask[:, 1:].sum(dim=-1).clamp(min=1.0)
+
         if self.length_normalize:
-            lengths = action_mask[:, 1:].to(old_logp.dtype).sum(dim=-1).clamp(min=1.0)
+            lengths = token_counts.to(old_logp.dtype)
             old_logp = old_logp / lengths
 
-        kl = logp_new - old_logp                                           # [B]
+        kl = logp_new - old_logp  # [B]
         loss = self.coeff * kl.mean()
 
         stats: Dict[str, Any] = {
@@ -709,7 +1419,11 @@ class EntropyBonus:
     coeff: float = 0.01
 
     @jaxtyped(typechecker=typechecker)
-    def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]:
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
         action_mask = batch["action_mask"]
 
         logprobs = torch.log_softmax(logits, dim=-1)
@@ -720,7 +1434,7 @@ def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]
 
         mask = action_mask.to(token_entropy.dtype)
 
-        masked_entropy = token_entropy * mask   # [B, T]
+        masked_entropy = token_entropy * mask  # [B, T]
         # avoid divide-by-zero if mask is all zeros
         denom = mask.sum()
         if denom.item() == 0:
@@ -751,6 +1465,7 @@ class LossTerm:
     - loss:   loss object implementing Loss protocol
     - weight: scalar multiplier applied to that loss
     """
+
     name: str
     loss: Loss
     weight: float = 1.0
@@ -768,36 +1483,54 @@ class CompositeLoss:
         "{name}/loss", "{name}/<stat_key>", ...
 
     and a top-level "loss" key for the final combined loss.
-    
-    This class expects logits to be passed in, and it passes them
-    down to all child terms.
+
+    Memory Efficiency:
+        CompositeLoss automatically creates a SharedContext to cache expensive
+        intermediate tensors (like token_logp from log_softmax). All child losses
+        can access the same SharedContext implicitly, sharing cached tensors and
+        avoiding duplicate autograd graphs.
+
+        Without SharedContext: N losses → N× [B, T, V] autograd activations
+        With SharedContext:    N losses → 1× [B, T, V] autograd activations
+
+    Note:
+        SharedContext is made available implicitly during computation so
+        compute_logp_action/compute_token_logp can reuse cached tensors.
     """
 
     terms: List[LossTerm]
 
     @jaxtyped(typechecker=typechecker)
-    def compute(self, logits: Logits, batch: Batch) -> Tuple[Tensor, Dict[str, Any]]:
+    def compute(
+        self,
+        logits: Logits,
+        batch: Batch,
+    ) -> Tuple[Tensor, Dict[str, Any]]:
         if not self.terms:
             raise ValueError("CompositeLoss.terms must be non-empty")
 
+        # Create shared context for memory-efficient tensor sharing.
+        shared = SharedContext(logits, batch)
+
         total_loss: Tensor | None = None
         stats: Dict[str, Any] = {}
 
-        for term in self.terms:
-            # Pass the pre-computed logits down to the child term
-            raw_loss, term_stats = term.loss.compute(logits, batch)
-            scaled_loss = term.weight * raw_loss
+        with _use_shared_context(shared):
+            for term in self.terms:
+                raw_loss, term_stats = term.loss.compute(logits, batch)
 
-            if total_loss is None:
-                total_loss = scaled_loss
-            else:
-                total_loss = total_loss + scaled_loss
+                scaled_loss = term.weight * raw_loss
+
+                if total_loss is None:
+                    total_loss = scaled_loss
+                else:
+                    total_loss = total_loss + scaled_loss
 
-            # per-term stats
-            stats[f"{term.name}/loss"] = raw_loss.detach()
-            stats[f"{term.name}/weight"] = term.weight
-            for k, v in term_stats.items():
-                stats[f"{term.name}/{k}"] = v
+                # per-term stats
+                stats[f"{term.name}/loss"] = raw_loss.detach()
+                stats[f"{term.name}/weight"] = term.weight
+                for k, v in term_stats.items():
+                    stats[f"{term.name}/{k}"] = v
 
         assert total_loss is not None
         stats["loss"] = total_loss.detach()
diff --git a/src/ludic/training/trainer.py b/src/ludic/training/trainer.py
index 8a01d8a..b312b03 100644
--- a/src/ludic/training/trainer.py
+++ b/src/ludic/training/trainer.py
@@ -70,8 +70,8 @@ def __init__(
         model: nn.Module,
         algo: RLAlgorithm,
         batch_source: BatchSource,
+        cfg: TrainerConfig,
         publisher: Optional[PolicyPublisher] = None,
-        cfg: TrainerConfig = TrainerConfig(),
         param_filter: Optional[Callable[[str, Tensor], bool]] = None,
         enable_gradient_checkpointing: bool = False,
         checkpointer: Optional[CheckpointManager] = None,
@@ -96,14 +96,14 @@ def __init__(
                 The SAWBatch is treated as a macro-batch and split into
                 micro-batches for gradient accumulation.
 
-            publisher:
-                Abstract interface to push weights to inference workers. If None, weight
-                syncing is disabled.
-
             cfg:
                 TrainerConfig for device, optimizer hyperparams, pad_token_id,
                 micro_token_budget, max_seq_len, and sync_every_steps.
 
+            publisher:
+                Abstract interface to push weights to inference workers. If None, weight
+                syncing is disabled.
+
             param_filter:
                 Optional predicate (name, Tensor) -> bool deciding which
                 parameters get pushed into the runtime.
@@ -575,7 +575,11 @@ async def train_step(self) -> Dict[str, float]:
             # ---- 2c) Loss + backward (scaled) --------------------------
             pre_forward_alloc = self._reset_peak_memory(device) if profile_memory else None
             try:
-                loss, stats = self.algo.compute_loss(self.model, batch_tensors)
+                loss, stats = self.algo.compute_loss(
+                    self.model,
+                    batch_tensors,
+                    cast_logits_to_fp32=self.cfg.cast_logits_to_fp32,
+                )
 
                 # Scale loss by micro-batch size to preserve macro-batch mean.
                 scaled_loss = loss * (item_count / total_items)
@@ -697,8 +701,6 @@ def _validate_invariants(self) -> None:
             raise ValueError(
                 "Trainer evaluation requested (eval_at_start or eval_every_n_steps) but no evaluator was provided."
             )
-        if self.cfg.pad_token_id is None:
-            raise ValueError("TrainerConfig.pad_token_id must be set for collation.")
         if self.cfg.max_seq_len < 1:
             raise ValueError("TrainerConfig.max_seq_len must be >= 1.")
         if self.cfg.micro_token_budget <= 0:
diff --git a/tests/integration/test_code_exec_docker.py b/tests/integration/test_code_exec_docker.py
new file mode 100644
index 0000000..ed14ed6
--- /dev/null
+++ b/tests/integration/test_code_exec_docker.py
@@ -0,0 +1,615 @@
+"""
+Integration tests for Docker-based code execution sandbox.
+
+These tests require Docker to be running and will create/destroy containers.
+Run with: pytest -m integration tests/integration/test_code_exec_docker.py
+
+To skip GPU tests while running integration tests:
+    pytest -m "integration and not gpu"
+"""
+
+from __future__ import annotations
+
+import asyncio
+
+import pytest
+
+pytestmark = [pytest.mark.integration]
+
+
+# Try to import docker - skip all tests if not available
+try:
+    import docker
+    from docker.errors import DockerException
+
+    # Try to connect to Docker daemon
+    try:
+        _client = docker.from_env()
+        _client.ping()
+        _client.close()
+        DOCKER_AVAILABLE = True
+    except (DockerException, Exception):
+        DOCKER_AVAILABLE = False
+except ImportError:
+    DOCKER_AVAILABLE = False
+
+
+skip_if_no_docker = pytest.mark.skipif(
+    not DOCKER_AVAILABLE,
+    reason="Docker daemon not available or docker package not installed",
+)
+
+
+# ---------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------
+
+
+@pytest.fixture
+async def sandbox_pool():
+    """Create and tear down a sandbox pool for testing."""
+    from ludic.envs.code_exec.docker_sandbox import DockerSandboxPool, DockerSandboxConfig
+
+    config = DockerSandboxConfig(
+        python_version="3.11",
+        memory_limit="128m",
+        cpu_quota=25000,
+        network_disabled=True,
+    )
+
+    pool = DockerSandboxPool(
+        n_workers=2,
+        config=config,
+        cache_size=100,
+    )
+
+    await pool.start()
+    yield pool
+    await pool.shutdown()
+
+
+@pytest.fixture
+async def sandbox(sandbox_pool):
+    """Get a single sandbox for testing."""
+    sandbox = await sandbox_pool.checkout()
+    yield sandbox
+    await sandbox_pool.release(sandbox)
+
+
+# ---------------------------------------------------------------------
+# DockerSandbox Tests
+# ---------------------------------------------------------------------
+
+
+@skip_if_no_docker
+class TestDockerSandboxCompile:
+    @pytest.mark.asyncio
+    async def test_compile_valid_code(self, sandbox):
+        """Valid Python code should compile successfully."""
+        from ludic.envs.code_exec.types import CompileStatus
+
+        code = """
+def hello():
+    return "Hello, World!"
+
+print(hello())
+"""
+        result = await sandbox.compile(code)
+
+        assert result.success is True
+        assert result.status == CompileStatus.SUCCESS
+        assert result.error_message is None
+        assert result.duration_ms > 0
+
+    @pytest.mark.asyncio
+    async def test_compile_syntax_error(self, sandbox):
+        """Syntax errors should be detected and reported."""
+        from ludic.envs.code_exec.types import CompileStatus
+
+        code = """
+def broken(
+    print("missing parenthesis")
+"""
+        result = await sandbox.compile(code)
+
+        assert result.success is False
+        assert result.status == CompileStatus.SYNTAX_ERROR
+        assert result.error_message is not None
+        assert "SyntaxError" in result.error_message or "syntax" in result.error_message.lower()
+
+    @pytest.mark.asyncio
+    async def test_compile_indentation_error(self, sandbox):
+        """Indentation errors should be detected."""
+        from ludic.envs.code_exec.types import CompileStatus
+
+        code = """
+def foo():
+print("bad indent")
+"""
+        result = await sandbox.compile(code)
+
+        assert result.success is False
+        assert result.status == CompileStatus.SYNTAX_ERROR
+
+
+@skip_if_no_docker
+class TestDockerSandboxExecute:
+    @pytest.mark.asyncio
+    async def test_execute_simple_print(self, sandbox):
+        """Simple print statement should produce output."""
+        from ludic.envs.code_exec.types import RunStatus
+
+        code = 'print("Hello from Docker!")'
+        result = await sandbox.execute(code)
+
+        assert result.compiled is True
+        assert result.succeeded is True
+        assert result.run_status == RunStatus.SUCCESS
+        assert "Hello from Docker!" in result.stdout.strip()
+        assert result.exit_code == 0
+
+    @pytest.mark.asyncio
+    async def test_execute_with_stdin(self, sandbox):
+        """Code should be able to read from stdin."""
+        from ludic.envs.code_exec.types import RunStatus
+
+        code = """
+import sys
+line = input()
+print(f"Got: {line}")
+"""
+        result = await sandbox.execute(code, stdin="test_input")
+
+        assert result.compiled is True
+        # Note: stdin handling in docker exec is tricky
+        # This test may need adjustment based on actual behavior
+
+    @pytest.mark.asyncio
+    async def test_execute_runtime_error(self, sandbox):
+        """Runtime errors should be captured."""
+        from ludic.envs.code_exec.types import RunStatus
+
+        code = """
+x = undefined_variable
+"""
+        result = await sandbox.execute(code)
+
+        assert result.compiled is True
+        assert result.succeeded is False
+        assert result.run_status == RunStatus.RUNTIME_ERROR
+        assert "NameError" in result.stderr or "undefined" in result.stderr.lower()
+
+    @pytest.mark.asyncio
+    async def test_execute_division_by_zero(self, sandbox):
+        """Division by zero should be a runtime error."""
+        from ludic.envs.code_exec.types import RunStatus
+
+        code = """
+result = 1 / 0
+"""
+        result = await sandbox.execute(code)
+
+        assert result.compiled is True
+        assert result.succeeded is False
+        assert result.run_status == RunStatus.RUNTIME_ERROR
+        assert "ZeroDivision" in result.stderr
+
+    @pytest.mark.asyncio
+    async def test_execute_timeout(self, sandbox):
+        """Infinite loops should timeout."""
+        from ludic.envs.code_exec.types import RunStatus
+
+        code = """
+while True:
+    pass
+"""
+        result = await sandbox.execute(code, timeout_s=1.0)
+
+        assert result.compiled is True
+        assert result.timed_out is True
+        assert result.run_status == RunStatus.TIMEOUT
+
+    @pytest.mark.asyncio
+    async def test_execute_returns_timing(self, sandbox):
+        """Execution should return timing information."""
+        code = """
+import time
+time.sleep(0.1)
+print("done")
+"""
+        result = await sandbox.execute(code)
+
+        assert result.compile_duration_ms > 0
+        assert result.run_duration_ms >= 100  # At least 100ms for sleep
+        assert result.total_duration_ms > 0
+
+
+@skip_if_no_docker
+class TestDockerSandboxReset:
+    @pytest.mark.asyncio
+    async def test_reset_clears_files(self, sandbox):
+        """Reset should clear workspace files."""
+        # Write a file
+        code1 = """
+with open('test_file.txt', 'w') as f:
+    f.write('hello')
+"""
+        await sandbox.execute(code1)
+
+        # Reset
+        await sandbox.reset()
+
+        # Try to read the file - should fail
+        code2 = """
+try:
+    with open('test_file.txt', 'r') as f:
+        print(f.read())
+except FileNotFoundError:
+    print("FILE_NOT_FOUND")
+"""
+        result = await sandbox.execute(code2)
+
+        assert "FILE_NOT_FOUND" in result.stdout
+
+
+# ---------------------------------------------------------------------
+# DockerSandboxPool Tests
+# ---------------------------------------------------------------------
+
+
+@skip_if_no_docker
+class TestDockerSandboxPool:
+    @pytest.mark.asyncio
+    async def test_pool_checkout_and_release(self, sandbox_pool):
+        """Should be able to checkout and release sandboxes."""
+        sandbox = await sandbox_pool.checkout()
+        assert sandbox is not None
+        assert sandbox_pool.available == 1  # One still available
+
+        await sandbox_pool.release(sandbox)
+        assert sandbox_pool.available == 2  # Both available again
+
+    @pytest.mark.asyncio
+    async def test_pool_concurrent_checkout(self, sandbox_pool):
+        """Multiple checkouts should work concurrently."""
+        sandbox1 = await sandbox_pool.checkout()
+        sandbox2 = await sandbox_pool.checkout()
+
+        assert sandbox1 is not sandbox2
+        assert sandbox_pool.available == 0
+
+        await sandbox_pool.release(sandbox1)
+        await sandbox_pool.release(sandbox2)
+        assert sandbox_pool.available == 2
+
+    @pytest.mark.asyncio
+    async def test_pool_checkout_timeout(self, sandbox_pool):
+        """Checkout should timeout when no sandboxes available."""
+        # Check out all sandboxes
+        sandbox1 = await sandbox_pool.checkout()
+        sandbox2 = await sandbox_pool.checkout()
+
+        # Third checkout should timeout
+        with pytest.raises(TimeoutError):
+            await sandbox_pool.checkout(timeout_s=0.5)
+
+        await sandbox_pool.release(sandbox1)
+        await sandbox_pool.release(sandbox2)
+
+    @pytest.mark.asyncio
+    async def test_pool_caching(self, sandbox_pool):
+        """Pool should cache execution results."""
+        from ludic.envs.code_exec.types import (
+            BatchTestResult,
+            CompileResult,
+            CompileStatus,
+            ExecutionResult,
+            RunStatus,
+            TestCase,
+            TestResult,
+        )
+
+        # Create a mock result
+        test_result = TestResult(
+            test_case=TestCase(input="1", expected="2", id="t1"),
+            passed=True,
+            actual="2",
+            execution=ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+            ),
+        )
+        batch_result = BatchTestResult(
+            results=[test_result],
+            code_hash="abc123",
+            tests_hash="xyz789",
+        )
+
+        # Cache it
+        sandbox_pool.put_cached("abc123", "xyz789", batch_result)
+
+        # Retrieve it
+        cached = sandbox_pool.get_cached("abc123", "xyz789")
+        assert cached is batch_result
+
+        # Check cache stats
+        stats = sandbox_pool.cache_stats
+        assert stats["hits"] == 1
+        assert stats["size"] == 1
+
+
+# ---------------------------------------------------------------------
+# StdinStdoutRunner Integration Tests
+# ---------------------------------------------------------------------
+
+
+@skip_if_no_docker
+class TestStdinStdoutRunnerIntegration:
+    @pytest.mark.asyncio
+    async def test_runner_all_pass(self, sandbox):
+        """Runner should correctly execute code and verify outputs."""
+        from ludic.envs.code_exec.runners import StdinStdoutRunner
+        from ludic.envs.code_exec.adapters.base import ExactMatchVerifier
+        from ludic.envs.code_exec.types import TestCase
+
+        code = """
+n = int(input())
+print(n * 2)
+"""
+        tests = [
+            TestCase(input="5", expected="10", id="t1"),
+            TestCase(input="10", expected="20", id="t2"),
+            TestCase(input="0", expected="0", id="t3"),
+        ]
+
+        runner = StdinStdoutRunner(default_timeout_s=5.0)
+        verifier = ExactMatchVerifier()
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code=code,
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.all_passed is True
+        assert result.passed_count == 3
+        assert result.total_count == 3
+
+    @pytest.mark.asyncio
+    async def test_runner_some_fail(self, sandbox):
+        """Runner should correctly identify failing tests."""
+        from ludic.envs.code_exec.runners import StdinStdoutRunner
+        from ludic.envs.code_exec.adapters.base import ExactMatchVerifier
+        from ludic.envs.code_exec.types import TestCase
+
+        # Code that only works for positive numbers
+        code = """
+n = int(input())
+if n < 0:
+    print("error")
+else:
+    print(n * 2)
+"""
+        tests = [
+            TestCase(input="5", expected="10", id="t1"),  # Pass
+            TestCase(input="-5", expected="-10", id="t2"),  # Fail
+        ]
+
+        runner = StdinStdoutRunner(default_timeout_s=5.0)
+        verifier = ExactMatchVerifier()
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code=code,
+            tests=tests,
+            verifier=verifier,
+            stop_on_first_failure=False,
+        )
+
+        assert result.all_passed is False
+        assert result.passed_count == 1
+        assert result.total_count == 2
+        assert result.results[0].passed is True
+        assert result.results[1].passed is False
+
+    @pytest.mark.asyncio
+    async def test_runner_compile_failure(self, sandbox):
+        """Runner should handle compilation failures gracefully."""
+        from ludic.envs.code_exec.runners import StdinStdoutRunner
+        from ludic.envs.code_exec.adapters.base import ExactMatchVerifier
+        from ludic.envs.code_exec.types import TestCase
+
+        code = """
+def broken(
+    print("syntax error")
+"""
+        tests = [
+            TestCase(input="1", expected="x", id="t1"),
+            TestCase(input="2", expected="y", id="t2"),
+        ]
+
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code=code,
+            tests=tests,
+            verifier=verifier,
+            compile_first=True,
+        )
+
+        assert result.compile_failed is True
+        assert result.all_passed is False
+        assert result.passed_count == 0
+        # All tests should be marked as not compiled
+        for r in result.results:
+            assert r.compiled is False
+
+    @pytest.mark.asyncio
+    async def test_runner_stop_on_first_failure(self, sandbox):
+        """Runner should stop after first failure when configured."""
+        from ludic.envs.code_exec.runners import StdinStdoutRunner
+        from ludic.envs.code_exec.adapters.base import ExactMatchVerifier
+        from ludic.envs.code_exec.types import TestCase, RunStatus
+
+        code = """
+n = int(input())
+print("wrong" if n == 1 else "correct")
+"""
+        tests = [
+            TestCase(input="1", expected="correct", id="t1"),  # Fails
+            TestCase(input="2", expected="correct", id="t2"),  # Skipped
+            TestCase(input="3", expected="correct", id="t3"),  # Skipped
+        ]
+
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code=code,
+            tests=tests,
+            verifier=verifier,
+            stop_on_first_failure=True,
+        )
+
+        assert result.passed_count == 0
+        assert result.results[0].passed is False
+        assert result.results[0].ran is True
+        assert result.results[1].ran is False
+        assert result.results[1].execution.run_status == RunStatus.NOT_RUN
+        assert result.results[2].ran is False
+
+
+# ---------------------------------------------------------------------
+# End-to-End CodeExecEnv Tests
+# ---------------------------------------------------------------------
+
+
+@skip_if_no_docker
+class TestCodeExecEnvIntegration:
+    @pytest.mark.asyncio
+    async def test_env_full_workflow(self, sandbox_pool):
+        """Test complete workflow from reset to step."""
+        from ludic.envs.code_exec.env import CodeExecEnv, CodeExecConfig
+        from ludic.envs.code_exec.adapters.apps import APPSTestAdapter
+
+        sample = {
+            "problem_id": "test_add",
+            "question": "Write a program that reads two integers and prints their sum.",
+            "inputs": ["1 2", "10 20", "-5 5"],
+            "outputs": ["3", "30", "0"],
+        }
+
+        adapter = APPSTestAdapter()
+        config = CodeExecConfig(
+            timeout_per_test_s=5.0,
+            stop_on_first_failure=False,
+            compile_first=True,
+        )
+
+        env = CodeExecEnv(
+            sample=sample,
+            sandbox_pool=sandbox_pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        # Reset
+        obs, info = await env.env_reset()
+
+        assert "two integers" in obs.lower()
+        assert info["problem_id"] == "test_add"
+        assert info["num_tests"] == 3
+
+        # Submit correct code
+        correct_code = """
+a, b = map(int, input().split())
+print(a + b)
+"""
+        outcome = await env.env_step(correct_code)
+
+        assert outcome.terminated is True
+        assert outcome.reward == 1.0
+        assert outcome.info["all_passed"] is True
+        assert outcome.info["passed"] == 3
+        assert outcome.info["total"] == 3
+
+    @pytest.mark.asyncio
+    async def test_env_wrong_code(self, sandbox_pool):
+        """Test env with incorrect code submission."""
+        from ludic.envs.code_exec.env import CodeExecEnv, CodeExecConfig
+        from ludic.envs.code_exec.adapters.apps import APPSTestAdapter
+
+        sample = {
+            "problem_id": "test_double",
+            "question": "Write a program that reads an integer and prints it doubled.",
+            "inputs": ["5", "10"],
+            "outputs": ["10", "20"],
+        }
+
+        adapter = APPSTestAdapter()
+        config = CodeExecConfig(stop_on_first_failure=False)
+
+        env = CodeExecEnv(
+            sample=sample,
+            sandbox_pool=sandbox_pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+
+        # Submit wrong code (triples instead of doubles)
+        wrong_code = """
+n = int(input())
+print(n * 3)
+"""
+        outcome = await env.env_step(wrong_code)
+
+        assert outcome.terminated is True
+        assert outcome.reward == 0.0  # Binary reward, not all passed
+        assert outcome.info["all_passed"] is False
+        assert outcome.info["passed"] == 0
+
+    @pytest.mark.asyncio
+    async def test_env_partial_credit(self, sandbox_pool):
+        """Test env with partial credit enabled."""
+        from ludic.envs.code_exec.env import CodeExecEnv, CodeExecConfig
+        from ludic.envs.code_exec.adapters.apps import APPSTestAdapter
+
+        sample = {
+            "problem_id": "test_abs",
+            "question": "Write a program that reads an integer and prints its absolute value.",
+            "inputs": ["5", "-5", "0", "-10"],
+            "outputs": ["5", "5", "0", "10"],
+        }
+
+        adapter = APPSTestAdapter()
+        config = CodeExecConfig(
+            partial_credit=True,
+            stop_on_first_failure=False,
+        )
+
+        env = CodeExecEnv(
+            sample=sample,
+            sandbox_pool=sandbox_pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+
+        # Code that only works for non-negative numbers
+        partial_code = """
+n = int(input())
+print(n)  # Wrong for negative numbers
+"""
+        outcome = await env.env_step(partial_code)
+
+        assert outcome.terminated is True
+        assert outcome.info["all_passed"] is False
+        assert outcome.info["passed"] == 2  # Only positive and zero pass
+        assert outcome.reward == pytest.approx(0.5)  # 2/4 = 0.5
diff --git a/tests/integration/test_grpo_e2e.py b/tests/integration/test_grpo_e2e.py
index efe9311..9f31280 100644
--- a/tests/integration/test_grpo_e2e.py
+++ b/tests/integration/test_grpo_e2e.py
@@ -22,7 +22,7 @@
     GroupNormalizedReturn,
 )
 from ludic.interaction.base import InteractionProtocol
-from ludic.interaction.single_agent import SingleAgentSyncProtocol
+from ludic.interaction.single_agent import SingleAgentProtocol
 
 from tests._mocks import SeedableMockAgent
 
@@ -51,7 +51,9 @@ def suggested_sysprompt(self) -> Optional[str]:
 
     def env_reset(self, *, seed: Optional[int] = None) -> Tuple[Observation, Info]:
         self._t = 0
-        self._obs = f"Start state for seed {seed}. Correct action is {self.correct_action}."
+        self._obs = (
+            f"Start state for seed {seed}. Correct action is {self.correct_action}."
+        )
         return self._obs, {"seed": seed}
 
     def env_step(self, action: str) -> StepOutcome:
@@ -110,7 +112,9 @@ def create_protocol() -> InteractionProtocol:
         return SingleAgentSyncProtocol(agent=agent)
 
     protocol_registry = {"grpo_protocol": create_protocol}
-    engine = RolloutEngine(protocol_registry=protocol_registry, env_registry=env_registry)
+    engine = RolloutEngine(
+        protocol_registry=protocol_registry, env_registry=env_registry
+    )
 
     def make_expanded_requests() -> List[RolloutRequest]:
         inference = InferenceSpec(
diff --git a/tests/test_batch_execution.py b/tests/test_batch_execution.py
new file mode 100644
index 0000000..6b70344
--- /dev/null
+++ b/tests/test_batch_execution.py
@@ -0,0 +1,501 @@
+"""
+Unit tests for batch execution functionality.
+
+Tests the batch execution path in StdinStdoutRunner using mock sandboxes
+that implement execute_batch().
+"""
+
+import pytest
+from typing import AsyncIterator, Union
+
+from ludic.envs.code_exec.runners import StdinStdoutRunner
+from ludic.envs.code_exec.types import (
+    BatchExecutionSpec,
+    TestCase,
+    CompileResult,
+    CompileStatus,
+    ExecutionResult,
+    RunStatus,
+)
+from ludic.envs.code_exec.adapters.base import ExactMatchVerifier
+
+
+# ---------------------------------------------------------------------
+# Mock Sandbox with execute_batch() support
+# ---------------------------------------------------------------------
+
+
+class MockBatchSandbox:
+    """
+    A mock sandbox that supports execute_batch() for testing the batched
+    execution path in StdinStdoutRunner.
+
+    Can be configured with:
+      - batch_results: List of results to yield from execute_batch()
+      - compile_success: Whether compilation succeeds
+      - break_after: If set, raise exception after yielding N results
+    """
+
+    def __init__(
+        self,
+        batch_results: list[Union[CompileResult, ExecutionResult, dict]] | None = None,
+        compile_success: bool = True,
+        break_after: int | None = None,
+    ):
+        self._batch_results = batch_results or []
+        self._compile_success = compile_success
+        self._break_after = break_after
+        self._python_version = "3.11"
+
+        # Track calls
+        self.execute_batch_calls: list[BatchExecutionSpec] = []
+
+    @property
+    def python_version(self) -> str:
+        return self._python_version
+
+    async def reset(self) -> None:
+        pass
+
+    async def compile(self, code: str, *, timeout_s: float = 5.0) -> CompileResult:
+        if self._compile_success:
+            return CompileResult(status=CompileStatus.SUCCESS, duration_ms=10.0)
+        return CompileResult(
+            status=CompileStatus.SYNTAX_ERROR,
+            error_message="SyntaxError",
+            duration_ms=5.0,
+        )
+
+    async def execute(
+        self,
+        code: str,
+        *,
+        stdin: str = "",
+        skip_compile: bool = False,
+        timeout_s: float = 10.0,
+        memory_limit_mb: int | None = None,
+        env_vars: dict[str, str] | None = None,
+    ) -> ExecutionResult:
+        # Fallback for non-batch execution
+        return ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+            run_status=RunStatus.SUCCESS,
+            stdout="",
+            stderr="",
+            exit_code=0,
+        )
+
+    async def execute_batch(
+        self,
+        spec: BatchExecutionSpec,
+    ) -> AsyncIterator[Union[CompileResult, ExecutionResult, dict]]:
+        """Yield pre-configured batch results."""
+        self.execute_batch_calls.append(spec)
+
+        count = 0
+        for result in self._batch_results:
+            if self._break_after is not None and count >= self._break_after:
+                raise RuntimeError("Simulated container crash")
+            yield result
+            count += 1
+
+
+def make_success_execution(test_id: str, stdout: str) -> ExecutionResult:
+    """Helper to create a successful ExecutionResult for a test."""
+    return ExecutionResult(
+        compile_result=CompileResult(status=CompileStatus.SUCCESS),
+        run_status=RunStatus.SUCCESS,
+        stdout=stdout,
+        stderr="",
+        exit_code=0,
+        cache_key=test_id,  # Used to identify which test this result is for
+    )
+
+
+def make_failure_execution(
+    test_id: str, status: RunStatus = RunStatus.RUNTIME_ERROR
+) -> ExecutionResult:
+    """Helper to create a failed ExecutionResult for a test."""
+    return ExecutionResult(
+        compile_result=CompileResult(status=CompileStatus.SUCCESS),
+        run_status=status,
+        stdout="",
+        stderr="Error occurred",
+        exit_code=1,
+        cache_key=test_id,
+    )
+
+
+# ---------------------------------------------------------------------
+# Batch Execution Tests
+# ---------------------------------------------------------------------
+
+
+class TestBatchExecution:
+    @pytest.mark.asyncio
+    async def test_batch_all_tests_pass(self):
+        """All tests pass through batch execution."""
+        batch_results = [
+            CompileResult(status=CompileStatus.SUCCESS, duration_ms=10.0),
+            make_success_execution("t1", "expected1"),
+            make_success_execution("t2", "expected2"),
+            {"type": "done", "passed": 2, "failed": 0},
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results)
+        runner = StdinStdoutRunner(use_batch_execution=True)
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(input="input1", expected="expected1", id="t1"),
+            TestCase(input="input2", expected="expected2", id="t2"),
+        ]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="print('hello')",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.all_passed is True
+        assert result.passed_count == 2
+        assert result.total_count == 2
+        assert len(sandbox.execute_batch_calls) == 1
+
+    @pytest.mark.asyncio
+    async def test_batch_compile_failure(self):
+        """Compilation failure returns all tests as failed."""
+        batch_results = [
+            CompileResult(
+                status=CompileStatus.SYNTAX_ERROR,
+                error_message="SyntaxError: invalid syntax",
+                error_line=1,
+                duration_ms=5.0,
+            ),
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results)
+        runner = StdinStdoutRunner(use_batch_execution=True)
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(input="input1", expected="x", id="t1"),
+            TestCase(input="input2", expected="y", id="t2"),
+        ]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="invalid syntax",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.compile_failed is True
+        assert result.all_passed is False
+        assert result.passed_count == 0
+        assert len(result.results) == 2
+
+    @pytest.mark.asyncio
+    async def test_batch_some_tests_fail(self):
+        """Mixed pass/fail through batch execution."""
+        batch_results = [
+            CompileResult(status=CompileStatus.SUCCESS, duration_ms=10.0),
+            make_success_execution("t1", "correct"),
+            make_success_execution("t2", "wrong"),  # Output doesn't match expected
+            {"type": "done", "passed": 1, "failed": 1},
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results)
+        runner = StdinStdoutRunner(use_batch_execution=True)
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(input="input1", expected="correct", id="t1"),
+            TestCase(input="input2", expected="correct", id="t2"),  # Will fail
+        ]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.all_passed is False
+        assert result.passed_count == 1
+        assert result.total_count == 2
+        assert result.results[0].passed is True
+        assert result.results[1].passed is False
+
+    @pytest.mark.asyncio
+    async def test_batch_runtime_error(self):
+        """Runtime error in batch execution."""
+        batch_results = [
+            CompileResult(status=CompileStatus.SUCCESS, duration_ms=10.0),
+            make_failure_execution("t1", RunStatus.RUNTIME_ERROR),
+            {"type": "done", "passed": 0, "failed": 1},
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results)
+        runner = StdinStdoutRunner(use_batch_execution=True)
+        verifier = ExactMatchVerifier()
+
+        tests = [TestCase(input="input1", expected="output", id="t1")]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="raise Exception()",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.passed_count == 0
+        assert result.results[0].passed is False
+        assert "Runtime error" in (result.results[0].comparison_details or "")
+
+    @pytest.mark.asyncio
+    async def test_batch_timeout(self):
+        """Timeout in batch execution."""
+        batch_results = [
+            CompileResult(status=CompileStatus.SUCCESS, duration_ms=10.0),
+            make_failure_execution("t1", RunStatus.TIMEOUT),
+            {"type": "done", "passed": 0, "failed": 1},
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results)
+        runner = StdinStdoutRunner(use_batch_execution=True)
+        verifier = ExactMatchVerifier()
+
+        tests = [TestCase(input="input1", expected="output", id="t1")]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="while True: pass",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.passed_count == 0
+        assert result.results[0].passed is False
+
+    @pytest.mark.asyncio
+    async def test_batch_stop_on_first_failure_spec(self):
+        """Verify stop_on_first_failure is passed to BatchExecutionSpec."""
+        batch_results = [
+            CompileResult(status=CompileStatus.SUCCESS, duration_ms=10.0),
+            make_success_execution("t1", "output"),
+            {"type": "done", "passed": 1, "failed": 0},
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results)
+        runner = StdinStdoutRunner(use_batch_execution=True)
+        verifier = ExactMatchVerifier()
+
+        tests = [TestCase(input="input1", expected="output", id="t1")]
+
+        await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+            stop_on_first_failure=True,
+        )
+
+        assert len(sandbox.execute_batch_calls) == 1
+        spec = sandbox.execute_batch_calls[0]
+        assert spec.stop_on_first_failure is True
+
+    @pytest.mark.asyncio
+    async def test_batch_broken_stream_sandbox_error(self):
+        """Broken stream marks missing tests as SANDBOX_ERROR."""
+        # Stream breaks after compile result, before any test results
+        batch_results = [
+            CompileResult(status=CompileStatus.SUCCESS, duration_ms=10.0),
+            make_success_execution("t1", "output1"),
+            # Stream breaks here - t2 and t3 never received
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results, break_after=2)
+        runner = StdinStdoutRunner(use_batch_execution=True)
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(input="input1", expected="output1", id="t1"),
+            TestCase(input="input2", expected="output2", id="t2"),
+            TestCase(input="input3", expected="output3", id="t3"),
+        ]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        # t1 should have succeeded, t2 and t3 should be SANDBOX_ERROR
+        assert len(result.results) == 3
+        assert result.results[0].passed is True
+        assert result.results[0].test_case.id == "t1"
+
+        # Find t2 and t3 results (order may vary due to dict iteration)
+        t2_result = next(r for r in result.results if r.test_case.id == "t2")
+        t3_result = next(r for r in result.results if r.test_case.id == "t3")
+
+        assert t2_result.passed is False
+        assert t2_result.execution.run_status == RunStatus.SANDBOX_ERROR
+        assert "Sandbox crashed" in (t2_result.comparison_details or "")
+
+        assert t3_result.passed is False
+        assert t3_result.execution.run_status == RunStatus.SANDBOX_ERROR
+
+    @pytest.mark.asyncio
+    async def test_batch_no_done_marker_adds_missing(self):
+        """Missing 'done' marker triggers fallback for unreceived tests."""
+        # No "done" marker, but some tests received
+        batch_results = [
+            CompileResult(status=CompileStatus.SUCCESS, duration_ms=10.0),
+            make_success_execution("t1", "output1"),
+            # No "done" marker - stream ended unexpectedly
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results)
+        runner = StdinStdoutRunner(use_batch_execution=True)
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(input="input1", expected="output1", id="t1"),
+            TestCase(input="input2", expected="output2", id="t2"),
+        ]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert len(result.results) == 2
+        assert result.results[0].passed is True
+
+        # t2 should be marked as SANDBOX_ERROR
+        t2_result = next(r for r in result.results if r.test_case.id == "t2")
+        assert t2_result.execution.run_status == RunStatus.SANDBOX_ERROR
+
+    @pytest.mark.asyncio
+    async def test_batch_disabled_falls_back_to_individual(self):
+        """With use_batch_execution=False, individual execution is used."""
+        sandbox = MockBatchSandbox()
+        runner = StdinStdoutRunner(use_batch_execution=False)
+        verifier = ExactMatchVerifier()
+
+        tests = [TestCase(input="input1", expected="", id="t1")]
+
+        await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        # execute_batch should NOT be called
+        assert len(sandbox.execute_batch_calls) == 0
+
+    @pytest.mark.asyncio
+    async def test_batch_spec_contains_all_test_info(self):
+        """Verify BatchExecutionSpec contains all test information."""
+        batch_results = [
+            CompileResult(status=CompileStatus.SUCCESS),
+            make_success_execution("t1", "out"),
+            {"type": "done"},
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results)
+        runner = StdinStdoutRunner(use_batch_execution=True, default_timeout_s=7.5)
+        verifier = ExactMatchVerifier()
+
+        tests = [TestCase(input="my_input", expected="out", id="t1")]
+
+        await runner.run_tests(
+            sandbox=sandbox,
+            code="my_code",
+            tests=tests,
+            verifier=verifier,
+            compile_first=True,
+            stop_on_first_failure=False,
+        )
+
+        assert len(sandbox.execute_batch_calls) == 1
+        spec = sandbox.execute_batch_calls[0]
+
+        assert spec.code == "my_code"
+        assert len(spec.tests) == 1
+        assert spec.tests[0].id == "t1"
+        assert spec.tests[0].input == "my_input"
+        assert spec.compile_first is True
+        assert spec.stop_on_first_failure is False
+        assert spec.timeout_s == 7.5
+
+    @pytest.mark.asyncio
+    async def test_batch_hashes_computed(self):
+        """Verify code_hash and tests_hash are computed for batch execution."""
+        batch_results = [
+            CompileResult(status=CompileStatus.SUCCESS),
+            make_success_execution("t1", "output"),
+            {"type": "done"},
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results)
+        runner = StdinStdoutRunner(use_batch_execution=True)
+        verifier = ExactMatchVerifier()
+
+        code = "print('hello')"
+        tests = [TestCase(input="input1", expected="output", id="t1")]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code=code,
+            tests=tests,
+            verifier=verifier,
+        )
+
+        # Verify hashes are present
+        assert len(result.code_hash) == 16
+        assert len(result.tests_hash) == 16
+        assert all(c in "0123456789abcdef" for c in result.code_hash)
+
+
+class TestBatchExecutionNotRunStatus:
+    """Tests for NOT_RUN status handling in batch execution."""
+
+    @pytest.mark.asyncio
+    async def test_not_run_tests_from_batch_stream(self):
+        """Tests marked as NOT_RUN in batch stream are handled correctly."""
+        batch_results = [
+            CompileResult(status=CompileStatus.SUCCESS, duration_ms=10.0),
+            make_failure_execution("t1", RunStatus.RUNTIME_ERROR),
+            # t2 marked as not_run by batch_runner due to stop_on_first_failure
+            ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.NOT_RUN,
+                stdout="",
+                stderr="",
+                exit_code=None,
+                cache_key="t2",
+            ),
+            {"type": "done", "passed": 0, "failed": 1},
+        ]
+        sandbox = MockBatchSandbox(batch_results=batch_results)
+        runner = StdinStdoutRunner(use_batch_execution=True)
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(input="input1", expected="output1", id="t1"),
+            TestCase(input="input2", expected="output2", id="t2"),
+        ]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+            stop_on_first_failure=True,
+        )
+
+        assert len(result.results) == 2
+        assert result.results[0].passed is False
+        assert result.results[0].execution.run_status == RunStatus.RUNTIME_ERROR
+
+        t2_result = next(r for r in result.results if r.test_case.id == "t2")
+        assert t2_result.passed is False
+        assert t2_result.execution.run_status == RunStatus.NOT_RUN
diff --git a/tests/test_code_exec_adapters.py b/tests/test_code_exec_adapters.py
new file mode 100644
index 0000000..2d701bd
--- /dev/null
+++ b/tests/test_code_exec_adapters.py
@@ -0,0 +1,335 @@
+"""
+Unit tests for ludic.envs.code_exec.adapters
+
+Tests verifiers and test adapters.
+"""
+
+import pytest
+
+from ludic.envs.code_exec.adapters.base import (
+    ExactMatchVerifier,
+    WhitespaceNormalizedVerifier,
+    FloatTolerantVerifier,
+)
+from ludic.envs.code_exec.adapters.apps import (
+    APPSTestAdapter,
+    APPS_SYSTEM_PROMPT,
+)
+from ludic.envs.code_exec.types import TestCase
+
+
+# ---------------------------------------------------------------------
+# ExactMatchVerifier Tests
+# ---------------------------------------------------------------------
+
+
+class TestExactMatchVerifier:
+    def test_exact_match_passes(self):
+        verifier = ExactMatchVerifier()
+        passed, details = verifier.verify("hello", "hello")
+        assert passed is True
+        assert details is None
+
+    def test_mismatch_fails(self):
+        verifier = ExactMatchVerifier()
+        passed, details = verifier.verify("hello", "world")
+        assert passed is False
+        assert details is not None
+
+    def test_strips_whitespace_by_default(self):
+        verifier = ExactMatchVerifier()
+        passed, _ = verifier.verify("  hello  \n", "hello")
+        assert passed is True
+
+    def test_strip_disabled(self):
+        verifier = ExactMatchVerifier(strip=False)
+        passed, _ = verifier.verify("hello ", "hello")
+        assert passed is False
+
+    def test_case_sensitive_by_default(self):
+        verifier = ExactMatchVerifier()
+        passed, _ = verifier.verify("Hello", "hello")
+        assert passed is False
+
+    def test_case_insensitive(self):
+        verifier = ExactMatchVerifier(case_sensitive=False)
+        passed, _ = verifier.verify("HELLO", "hello")
+        assert passed is True
+
+    def test_length_mismatch_details(self):
+        verifier = ExactMatchVerifier()
+        passed, details = verifier.verify("abc", "abcdef")
+        assert passed is False
+        assert "Length mismatch" in details
+        assert "3" in details
+        assert "6" in details
+
+    def test_first_diff_details(self):
+        verifier = ExactMatchVerifier()
+        passed, details = verifier.verify("abc", "axc")
+        assert passed is False
+        assert "First diff" in details
+
+
+# ---------------------------------------------------------------------
+# WhitespaceNormalizedVerifier Tests
+# ---------------------------------------------------------------------
+
+
+class TestWhitespaceNormalizedVerifier:
+    def test_normalizes_multiple_spaces(self):
+        verifier = WhitespaceNormalizedVerifier()
+        passed, _ = verifier.verify("hello   world", "hello world")
+        assert passed is True
+
+    def test_normalizes_newlines(self):
+        verifier = WhitespaceNormalizedVerifier()
+        passed, _ = verifier.verify("hello\n\nworld", "hello world")
+        assert passed is True
+
+    def test_normalizes_tabs(self):
+        verifier = WhitespaceNormalizedVerifier()
+        passed, _ = verifier.verify("hello\t\tworld", "hello world")
+        assert passed is True
+
+    def test_normalizes_mixed_whitespace(self):
+        verifier = WhitespaceNormalizedVerifier()
+        passed, _ = verifier.verify("  hello \n\t world  ", "hello world")
+        assert passed is True
+
+    def test_content_mismatch_fails(self):
+        verifier = WhitespaceNormalizedVerifier()
+        passed, _ = verifier.verify("hello world", "hello mars")
+        assert passed is False
+
+
+# ---------------------------------------------------------------------
+# FloatTolerantVerifier Tests
+# ---------------------------------------------------------------------
+
+
+class TestFloatTolerantVerifier:
+    def test_exact_float_match(self):
+        verifier = FloatTolerantVerifier()
+        passed, _ = verifier.verify("3.14159", "3.14159")
+        assert passed is True
+
+    def test_float_within_tolerance(self):
+        verifier = FloatTolerantVerifier(abs_tol=1e-6)
+        passed, _ = verifier.verify("3.141590001", "3.14159")
+        assert passed is True
+
+    def test_float_outside_tolerance(self):
+        verifier = FloatTolerantVerifier(abs_tol=1e-9)
+        passed, _ = verifier.verify("3.15", "3.14")
+        assert passed is False
+
+    def test_integer_match(self):
+        verifier = FloatTolerantVerifier()
+        passed, _ = verifier.verify("42", "42")
+        assert passed is True
+
+    def test_string_exact_match(self):
+        verifier = FloatTolerantVerifier()
+        passed, _ = verifier.verify("hello", "hello")
+        assert passed is True
+
+    def test_string_mismatch(self):
+        verifier = FloatTolerantVerifier()
+        passed, _ = verifier.verify("hello", "world")
+        assert passed is False
+
+    def test_multiple_tokens(self):
+        verifier = FloatTolerantVerifier(abs_tol=1e-6)
+        passed, _ = verifier.verify("1.0 2.0 3.0", "1.0 2.0 3.0")
+        assert passed is True
+
+    def test_multiple_tokens_within_tolerance(self):
+        verifier = FloatTolerantVerifier(abs_tol=0.01)
+        passed, _ = verifier.verify("1.001 2.002 3.003", "1.0 2.0 3.0")
+        assert passed is True
+
+    def test_token_count_mismatch(self):
+        verifier = FloatTolerantVerifier()
+        passed, details = verifier.verify("1 2", "1 2 3")
+        assert passed is False
+        assert "Token count mismatch" in details
+
+    def test_relative_tolerance(self):
+        verifier = FloatTolerantVerifier(rel_tol=0.01, abs_tol=0)
+        # 1% of 100 = 1, so 100.5 should match 100
+        passed, _ = verifier.verify("100.5", "100")
+        assert passed is True
+
+    def test_strips_whitespace(self):
+        verifier = FloatTolerantVerifier()
+        passed, _ = verifier.verify("  42  ", "42")
+        assert passed is True
+
+
+# ---------------------------------------------------------------------
+# APPSTestAdapter Tests
+# ---------------------------------------------------------------------
+
+
+class TestAPPSTestAdapter:
+    def test_get_prompt_extracts_question(self):
+        adapter = APPSTestAdapter()
+        sample = {
+            "question": "Write a function to add two numbers.",
+            "inputs": ["1 2"],
+            "outputs": ["3"],
+        }
+        prompt = adapter.get_prompt(sample)
+        assert prompt == "Write a function to add two numbers."
+
+    def test_get_prompt_with_custom_key(self):
+        adapter = APPSTestAdapter(question_key="problem_description")
+        sample = {
+            "problem_description": "Custom problem text",
+            "inputs": [],
+            "outputs": [],
+        }
+        prompt = adapter.get_prompt(sample)
+        assert prompt == "Custom problem text"
+
+    def test_get_problem_id(self):
+        adapter = APPSTestAdapter()
+        sample = {
+            "problem_id": "prob_123",
+            "question": "Q",
+            "inputs": [],
+            "outputs": [],
+        }
+        assert adapter.get_problem_id(sample) == "prob_123"
+
+    def test_get_problem_id_missing_returns_unknown(self):
+        adapter = APPSTestAdapter()
+        sample = {
+            "question": "Q",
+            "inputs": [],
+            "outputs": [],
+        }
+        assert adapter.get_problem_id(sample) == "unknown"
+
+    def test_get_problem_id_custom_key(self):
+        adapter = APPSTestAdapter(problem_id_key="id")
+        sample = {
+            "id": "custom_id",
+            "question": "Q",
+            "inputs": [],
+            "outputs": [],
+        }
+        assert adapter.get_problem_id(sample) == "custom_id"
+
+    def test_get_tests_single_test(self):
+        adapter = APPSTestAdapter()
+        sample = {
+            "question": "Q",
+            "inputs": ["1 2"],
+            "outputs": ["3"],
+        }
+        tests = adapter.get_tests(sample)
+        assert len(tests) == 1
+        assert tests[0].input == "1 2"
+        assert tests[0].expected == "3"
+        assert tests[0].id == "test_0"
+
+    def test_get_tests_multiple_tests(self):
+        adapter = APPSTestAdapter()
+        sample = {
+            "question": "Q",
+            "inputs": ["1", "2", "3"],
+            "outputs": ["a", "b", "c"],
+        }
+        tests = adapter.get_tests(sample)
+        assert len(tests) == 3
+        assert tests[0].input == "1"
+        assert tests[0].expected == "a"
+        assert tests[0].id == "test_0"
+        assert tests[1].input == "2"
+        assert tests[1].expected == "b"
+        assert tests[1].id == "test_1"
+        assert tests[2].input == "3"
+        assert tests[2].expected == "c"
+        assert tests[2].id == "test_2"
+
+    def test_get_tests_mismatched_length_raises(self):
+        adapter = APPSTestAdapter()
+        sample = {
+            "question": "Q",
+            "inputs": ["1", "2", "3"],
+            "outputs": ["a", "b"],  # One less
+        }
+        with pytest.raises(ValueError) as exc_info:
+            adapter.get_tests(sample)
+        assert "Mismatched" in str(exc_info.value)
+
+    def test_get_tests_custom_keys(self):
+        adapter = APPSTestAdapter(inputs_key="test_inputs", outputs_key="test_outputs")
+        sample = {
+            "question": "Q",
+            "test_inputs": ["x"],
+            "test_outputs": ["y"],
+        }
+        tests = adapter.get_tests(sample)
+        assert len(tests) == 1
+        assert tests[0].input == "x"
+        assert tests[0].expected == "y"
+
+    def test_hash_tests_deterministic(self):
+        adapter = APPSTestAdapter()
+        tests = [
+            TestCase(input="1", expected="a", id="t1"),
+            TestCase(input="2", expected="b", id="t2"),
+        ]
+        hash1 = adapter.hash_tests(tests)
+        hash2 = adapter.hash_tests(tests)
+        assert hash1 == hash2
+        assert len(hash1) == 16  # 16 hex chars
+
+    def test_hash_tests_different_for_different_tests(self):
+        adapter = APPSTestAdapter()
+        tests1 = [TestCase(input="1", expected="a", id="t1")]
+        tests2 = [TestCase(input="2", expected="b", id="t1")]
+        hash1 = adapter.hash_tests(tests1)
+        hash2 = adapter.hash_tests(tests2)
+        assert hash1 != hash2
+
+    def test_hash_tests_order_matters(self):
+        adapter = APPSTestAdapter()
+        tests1 = [
+            TestCase(input="1", expected="a", id="t1"),
+            TestCase(input="2", expected="b", id="t2"),
+        ]
+        tests2 = [
+            TestCase(input="2", expected="b", id="t2"),
+            TestCase(input="1", expected="a", id="t1"),
+        ]
+        hash1 = adapter.hash_tests(tests1)
+        hash2 = adapter.hash_tests(tests2)
+        assert hash1 != hash2
+
+    def test_hash_tests_ignores_id(self):
+        """Hash should be based on input/expected, not id."""
+        adapter = APPSTestAdapter()
+        tests1 = [TestCase(input="1", expected="a", id="test_0")]
+        tests2 = [TestCase(input="1", expected="a", id="different_id")]
+        hash1 = adapter.hash_tests(tests1)
+        hash2 = adapter.hash_tests(tests2)
+        assert hash1 == hash2
+
+
+class TestAPPSSystemPrompt:
+    def test_system_prompt_exists(self):
+        assert APPS_SYSTEM_PROMPT is not None
+        assert len(APPS_SYSTEM_PROMPT) > 0
+
+    def test_system_prompt_mentions_python(self):
+        assert "Python" in APPS_SYSTEM_PROMPT or "python" in APPS_SYSTEM_PROMPT
+
+    def test_system_prompt_mentions_stdin(self):
+        assert "stdin" in APPS_SYSTEM_PROMPT
+
+    def test_system_prompt_mentions_stdout(self):
+        assert "stdout" in APPS_SYSTEM_PROMPT
diff --git a/tests/test_code_exec_async_protocol.py b/tests/test_code_exec_async_protocol.py
new file mode 100644
index 0000000..c45142a
--- /dev/null
+++ b/tests/test_code_exec_async_protocol.py
@@ -0,0 +1,465 @@
+"""
+Integration tests for async env support in SingleAgentProtocol.
+
+Tests that the protocol correctly detects and handles envs with async
+env_reset and env_step methods (like CodeExecEnv).
+"""
+
+from typing import Optional, Tuple
+
+import pytest
+
+from ludic.context.full_dialog import FullDialog
+from ludic.interaction.single_agent import SingleAgentProtocol, _has_async_env_methods
+from ludic.agents.base_agent import Agent
+from ludic.envs.single_agent_env import SingleAgentEnv
+from ludic.parsers import ParseResult
+from ludic.types import Info, Observation, StepOutcome
+from tests._mocks import MockClient
+
+
+# Simple pass-through parser for tests
+def _passthrough_parser(raw: str) -> ParseResult:
+    return ParseResult(action=raw, reward=0.0, obs=None)
+
+
+# ---------------------------------------------------------------------
+# Mock Async Env for Testing
+# ---------------------------------------------------------------------
+
+
+class MockAsyncEnv(SingleAgentEnv):
+    """
+    A mock async env that simulates CodeExecEnv behavior.
+
+    Has async env_reset and env_step methods, unlike standard sync envs.
+    """
+
+    def __init__(
+        self,
+        target_action: str = "correct_code",
+        max_steps: int = 3,
+    ):
+        super().__init__()
+        self._target_action = target_action
+        self._max_steps = max_steps
+        self._step_count = 0
+        self._obs = "Write code to solve the problem."
+
+        # Track calls for assertions
+        self.reset_calls = 0
+        self.step_calls = 0
+
+    @property
+    def suggested_sysprompt(self) -> Optional[str]:
+        return "You are a code assistant."
+
+    async def env_reset(self, *, seed: Optional[int] = None) -> Tuple[Observation, Info]:
+        """Async reset method (like CodeExecEnv)."""
+        self.reset_calls += 1
+        self._step_count = 0
+        self._obs = "Write code to solve the problem."
+        return self._obs, {"problem_id": "test_problem", "async_env": True}
+
+    async def env_step(self, action: str) -> StepOutcome:
+        """Async step method (like CodeExecEnv)."""
+        self.step_calls += 1
+        self._step_count += 1
+
+        if action == self._target_action:
+            # Correct code - terminate with success
+            return StepOutcome(
+                obs="All tests passed!",
+                reward=1.0,
+                truncated=False,
+                terminated=True,
+                info={"all_passed": True, "step_count": self._step_count},
+            )
+        elif self._step_count >= self._max_steps:
+            # Max steps reached - truncate
+            return StepOutcome(
+                obs=f"Tests failed. Attempt {self._step_count}/{self._max_steps}.",
+                reward=-0.1,
+                truncated=True,
+                terminated=False,
+                info={"all_passed": False, "step_count": self._step_count},
+            )
+        else:
+            # Wrong code but more attempts allowed
+            return StepOutcome(
+                obs=f"Tests failed. Try again. Attempt {self._step_count}/{self._max_steps}.",
+                reward=-0.1,
+                truncated=False,
+                terminated=False,
+                info={"all_passed": False, "step_count": self._step_count},
+            )
+
+    def env_current_obs(self) -> Observation:
+        return self._obs
+
+
+class MockSyncEnv(SingleAgentEnv):
+    """
+    A standard sync env for comparison testing.
+    Uses regular (non-async) env_reset and env_step.
+    """
+
+    def __init__(self, target_action: str = "correct"):
+        super().__init__()
+        self._target_action = target_action
+        self._obs = "Sync env observation"
+        self.reset_calls = 0
+        self.step_calls = 0
+
+    def env_reset(self, *, seed: Optional[int] = None) -> Tuple[Observation, Info]:
+        """Standard sync reset."""
+        self.reset_calls += 1
+        self._obs = "Sync env observation"
+        return self._obs, {"sync_env": True}
+
+    def env_step(self, action: str) -> StepOutcome:
+        """Standard sync step."""
+        self.step_calls += 1
+        terminated = action == self._target_action
+        return StepOutcome(
+            obs="Success" if terminated else "Wrong",
+            reward=1.0 if terminated else -0.1,
+            truncated=False,
+            terminated=terminated,
+            info={},
+        )
+
+    def env_current_obs(self) -> Observation:
+        return self._obs
+
+
+# ---------------------------------------------------------------------
+# Async Detection Tests
+# ---------------------------------------------------------------------
+
+
+class TestAsyncDetection:
+    def test_detects_async_reset(self):
+        env = MockAsyncEnv()
+        has_async_reset, has_async_step = _has_async_env_methods(env)
+        assert has_async_reset is True
+
+    def test_detects_async_step(self):
+        env = MockAsyncEnv()
+        has_async_reset, has_async_step = _has_async_env_methods(env)
+        assert has_async_step is True
+
+    def test_detects_sync_env(self):
+        env = MockSyncEnv()
+        has_async_reset, has_async_step = _has_async_env_methods(env)
+        assert has_async_reset is False
+        assert has_async_step is False
+
+
+# ---------------------------------------------------------------------
+# Protocol Async Env Integration Tests
+# ---------------------------------------------------------------------
+
+
+class TestProtocolAsyncEnvIntegration:
+    @pytest.mark.asyncio
+    async def test_protocol_runs_async_env_successfully(self):
+        """Protocol should correctly run an async env and produce rollouts."""
+        env = MockAsyncEnv(target_action="correct_code")
+        agent = Agent(
+            client=MockClient(text="correct_code"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        rollouts = await protocol.run(env=env, max_steps=5)
+
+        assert len(rollouts) == 1
+        rollout = rollouts[0]
+
+        # Should terminate on first step with correct action
+        assert len(rollout.steps) == 1
+        assert rollout.steps[0].terminated is True
+        assert rollout.steps[0].reward == pytest.approx(1.0)
+        assert rollout.steps[0].info.get("all_passed") is True
+
+    @pytest.mark.asyncio
+    async def test_protocol_calls_async_reset(self):
+        """Protocol should call async env_reset and receive correct observation."""
+        env = MockAsyncEnv()
+        agent = Agent(
+            client=MockClient(text="wrong_code"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        await protocol.run(env=env, max_steps=1)
+
+        # Verify async reset was called
+        assert env.reset_calls == 1
+
+    @pytest.mark.asyncio
+    async def test_protocol_calls_async_step(self):
+        """Protocol should call async env_step with the parsed action."""
+        env = MockAsyncEnv()
+        agent = Agent(
+            client=MockClient(text="some_code"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        await protocol.run(env=env, max_steps=1)
+
+        # Verify async step was called
+        assert env.step_calls == 1
+
+    @pytest.mark.asyncio
+    async def test_protocol_uses_async_env_system_prompt(self):
+        """Protocol should use the async env's suggested_sysprompt."""
+        env = MockAsyncEnv()
+        agent = Agent(
+            client=MockClient(text="code"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        await protocol.run(env=env, max_steps=1)
+
+        # The agent context should have the system prompt from env
+        messages = agent._ctx.messages
+        # First message should be system prompt
+        assert any(
+            m.get("role") == "system" and "code assistant" in m.get("content", "").lower()
+            for m in messages
+        )
+
+    @pytest.mark.asyncio
+    async def test_async_env_multiple_steps(self):
+        """Test that async env works correctly over multiple steps."""
+        env = MockAsyncEnv(target_action="correct", max_steps=5)
+
+        # Agent says "wrong" first 2 times, then "correct"
+        call_count = 0
+
+        class CountingClient(MockClient):
+            def __init__(self):
+                super().__init__(text="wrong")
+
+            async def complete(self, *args, **kwargs):
+                nonlocal call_count
+                call_count += 1
+                if call_count < 3:
+                    self._text = "wrong"
+                else:
+                    self._text = "correct"
+                return await super().complete(*args, **kwargs)
+
+        agent = Agent(
+            client=CountingClient(),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        rollouts = await protocol.run(env=env, max_steps=10)
+
+        assert len(rollouts) == 1
+        rollout = rollouts[0]
+
+        # Should have taken 3 steps to get correct answer
+        assert len(rollout.steps) == 3
+        assert rollout.steps[0].terminated is False
+        assert rollout.steps[1].terminated is False
+        assert rollout.steps[2].terminated is True
+
+        # Total reward: -0.1 + -0.1 + 1.0 = 0.8
+        assert rollout.total_reward == pytest.approx(0.8)
+
+
+# ---------------------------------------------------------------------
+# Backward Compatibility Tests
+# ---------------------------------------------------------------------
+
+
+class TestBackwardCompatibility:
+    @pytest.mark.asyncio
+    async def test_sync_env_still_works(self):
+        """Sync envs should continue to work without changes."""
+        env = MockSyncEnv(target_action="correct")
+        agent = Agent(
+            client=MockClient(text="correct"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        rollouts = await protocol.run(env=env, max_steps=5)
+
+        assert len(rollouts) == 1
+        rollout = rollouts[0]
+
+        assert len(rollout.steps) == 1
+        assert rollout.steps[0].terminated is True
+        assert rollout.steps[0].reward == pytest.approx(1.0)
+
+    @pytest.mark.asyncio
+    async def test_sync_env_reset_is_called(self):
+        """Sync env reset should be called through normal path."""
+        env = MockSyncEnv()
+        agent = Agent(
+            client=MockClient(text="wrong"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        await protocol.run(env=env, max_steps=1)
+
+        assert env.reset_calls == 1
+
+    @pytest.mark.asyncio
+    async def test_sync_env_step_is_called(self):
+        """Sync env step should be called through normal path."""
+        env = MockSyncEnv()
+        agent = Agent(
+            client=MockClient(text="wrong"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        await protocol.run(env=env, max_steps=1)
+
+        assert env.step_calls == 1
+
+
+# ---------------------------------------------------------------------
+# Info Propagation Tests
+# ---------------------------------------------------------------------
+
+
+class TestAsyncEnvInfoPropagation:
+    @pytest.mark.asyncio
+    async def test_reset_info_accessible_in_rollout(self):
+        """Info from async env_reset should be accessible."""
+        env = MockAsyncEnv()
+        agent = Agent(
+            client=MockClient(text="correct_code"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        rollouts = await protocol.run(env=env, max_steps=1)
+
+        # The first step's prev_obs should be from reset
+        assert rollouts[0].steps[0].prev_obs == "Write code to solve the problem."
+
+    @pytest.mark.asyncio
+    async def test_step_info_propagated_to_rollout(self):
+        """Info from async env_step should be in the step info."""
+        env = MockAsyncEnv()
+        agent = Agent(
+            client=MockClient(text="correct_code"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        rollouts = await protocol.run(env=env, max_steps=1)
+
+        step_info = rollouts[0].steps[0].info
+        assert step_info.get("all_passed") is True
+        assert step_info.get("step_count") == 1
+
+
+# ---------------------------------------------------------------------
+# Edge Cases
+# ---------------------------------------------------------------------
+
+
+class TestAsyncEnvEdgeCases:
+    @pytest.mark.asyncio
+    async def test_async_env_truncation_on_max_steps(self):
+        """Async env that never terminates should truncate at max_steps."""
+        env = MockAsyncEnv(target_action="impossible", max_steps=100)
+        agent = Agent(
+            client=MockClient(text="wrong"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=_passthrough_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        rollouts = await protocol.run(env=env, max_steps=3)
+
+        rollout = rollouts[0]
+        assert len(rollout.steps) == 3
+        assert rollout.steps[-1].truncated is True
+        assert rollout.meta.get("episode_truncated") is True
+
+    @pytest.mark.asyncio
+    async def test_async_env_with_parser(self):
+        """Parser should work correctly with async envs."""
+        from ludic.parsers import xml_tag_parser
+
+        env = MockAsyncEnv(target_action="parsed_code")
+        agent = Agent(
+            client=MockClient(text="<code>parsed_code</code>"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=xml_tag_parser("code"),
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        rollouts = await protocol.run(env=env, max_steps=5)
+
+        rollout = rollouts[0]
+        assert len(rollout.steps) == 1
+        assert rollout.steps[0].terminated is True
+        assert rollout.steps[0].info.get("parsed_action") == "parsed_code"
+
+    @pytest.mark.asyncio
+    async def test_async_env_parser_failure(self):
+        """Parser failures should be handled correctly with async envs."""
+        from ludic.parsers import ParseResult
+
+        def strict_parser(text: str) -> ParseResult:
+            if text.startswith("VALID:"):
+                return ParseResult(action=text[6:], reward=0.1, obs=None)
+            return ParseResult(action=None, reward=-0.5, obs="Invalid format")
+
+        env = MockAsyncEnv()
+        agent = Agent(
+            client=MockClient(text="invalid_format"),
+            model="mock",
+            ctx=FullDialog(),
+            parser=strict_parser,
+        )
+        protocol = SingleAgentProtocol(agent=agent)
+
+        rollouts = await protocol.run(env=env, max_steps=1)
+
+        rollout = rollouts[0]
+        assert len(rollout.steps) == 1
+        step = rollout.steps[0]
+
+        # Parser failure - no env step called
+        assert env.step_calls == 0
+        assert step.info.get("parse_error") is True
+        assert step.reward == pytest.approx(-0.5)
+        assert step.next_obs == "Invalid format"
diff --git a/tests/test_code_exec_cache.py b/tests/test_code_exec_cache.py
new file mode 100644
index 0000000..e04d929
--- /dev/null
+++ b/tests/test_code_exec_cache.py
@@ -0,0 +1,392 @@
+"""
+Unit tests for ludic.envs.code_exec.docker_sandbox.LRUCache
+
+Tests thread safety, eviction behavior, and statistics tracking.
+
+Note: Requires the `docker` package to be installed for LRUCache import.
+"""
+
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+
+from ludic.envs.code_exec.types import (
+    BatchTestResult,
+    CompileResult,
+    CompileStatus,
+    ExecutionResult,
+    RunStatus,
+    TestCase,
+    TestResult,
+)
+
+# Try to import LRUCache - skip all tests if docker package not installed
+try:
+    from ludic.envs.code_exec.docker_sandbox import LRUCache
+except ImportError:
+    LRUCache = None  # type: ignore[misc, assignment]
+
+pytestmark = pytest.mark.skipif(
+    LRUCache is None,
+    reason="docker package not installed (required for LRUCache)",
+)
+
+
+def _make_batch_result(passed_count: int = 1, total_count: int = 1) -> BatchTestResult:
+    """Helper to create a BatchTestResult with minimal boilerplate."""
+    results = []
+    for i in range(total_count):
+        passed = i < passed_count
+        results.append(
+            TestResult(
+                test_case=TestCase(input=f"input_{i}", expected="out", id=f"t{i}"),
+                passed=passed,
+                actual="out" if passed else "wrong",
+                execution=ExecutionResult(
+                    compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                    run_status=RunStatus.SUCCESS if passed else RunStatus.RUNTIME_ERROR,
+                ),
+            )
+        )
+    return BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+
+
+# ---------------------------------------------------------------------
+# Basic Operations
+# ---------------------------------------------------------------------
+
+
+class TestLRUCacheBasicOperations:
+    def test_get_returns_none_for_missing_key(self):
+        cache = LRUCache(max_size=10)
+        result = cache.get("nonexistent_code", "nonexistent_tests")
+        assert result is None
+
+    def test_put_and_get(self):
+        cache = LRUCache(max_size=10)
+        batch_result = _make_batch_result()
+
+        cache.put("code1", "tests1", batch_result)
+        retrieved = cache.get("code1", "tests1")
+
+        assert retrieved is batch_result
+
+    def test_get_returns_none_after_different_key(self):
+        cache = LRUCache(max_size=10)
+        batch_result = _make_batch_result()
+
+        cache.put("code1", "tests1", batch_result)
+
+        # Different code hash
+        assert cache.get("code2", "tests1") is None
+        # Different tests hash
+        assert cache.get("code1", "tests2") is None
+        # Both different
+        assert cache.get("code2", "tests2") is None
+
+    def test_put_overwrites_existing_entry(self):
+        cache = LRUCache(max_size=10)
+        result1 = _make_batch_result(passed_count=1, total_count=2)
+        result2 = _make_batch_result(passed_count=2, total_count=2)
+
+        cache.put("code1", "tests1", result1)
+        cache.put("code1", "tests1", result2)
+
+        retrieved = cache.get("code1", "tests1")
+        assert retrieved is result2
+        assert retrieved.passed_count == 2
+
+
+# ---------------------------------------------------------------------
+# Eviction Behavior
+# ---------------------------------------------------------------------
+
+
+class TestLRUCacheEviction:
+    def test_evicts_oldest_when_full(self):
+        cache = LRUCache(max_size=3)
+
+        cache.put("code1", "tests", _make_batch_result())
+        cache.put("code2", "tests", _make_batch_result())
+        cache.put("code3", "tests", _make_batch_result())
+
+        # Cache is now full
+        assert cache.stats["size"] == 3
+
+        # Add one more - oldest (code1) should be evicted
+        cache.put("code4", "tests", _make_batch_result())
+
+        assert cache.stats["size"] == 3
+        assert cache.get("code1", "tests") is None  # Evicted
+        assert cache.get("code2", "tests") is not None
+        assert cache.get("code3", "tests") is not None
+        assert cache.get("code4", "tests") is not None
+
+    def test_access_refreshes_entry_avoiding_eviction(self):
+        cache = LRUCache(max_size=3)
+
+        cache.put("code1", "tests", _make_batch_result())
+        cache.put("code2", "tests", _make_batch_result())
+        cache.put("code3", "tests", _make_batch_result())
+
+        # Access code1 to make it most recently used
+        cache.get("code1", "tests")
+
+        # Add new entry - code2 (now oldest accessed) should be evicted
+        cache.put("code4", "tests", _make_batch_result())
+
+        assert cache.get("code1", "tests") is not None  # Still present
+        assert cache.get("code2", "tests") is None  # Evicted
+        assert cache.get("code3", "tests") is not None
+        assert cache.get("code4", "tests") is not None
+
+    def test_put_refreshes_existing_entry(self):
+        cache = LRUCache(max_size=3)
+
+        cache.put("code1", "tests", _make_batch_result())
+        cache.put("code2", "tests", _make_batch_result())
+        cache.put("code3", "tests", _make_batch_result())
+
+        # Update code1 (makes it most recently used)
+        cache.put("code1", "tests", _make_batch_result())
+
+        # Add new entry - code2 should be evicted now
+        cache.put("code4", "tests", _make_batch_result())
+
+        assert cache.get("code1", "tests") is not None
+        assert cache.get("code2", "tests") is None  # Evicted
+        assert cache.get("code3", "tests") is not None
+        assert cache.get("code4", "tests") is not None
+
+    def test_max_size_one(self):
+        cache = LRUCache(max_size=1)
+
+        cache.put("code1", "tests", _make_batch_result())
+        assert cache.get("code1", "tests") is not None
+
+        cache.put("code2", "tests", _make_batch_result())
+        assert cache.get("code1", "tests") is None
+        assert cache.get("code2", "tests") is not None
+
+
+# ---------------------------------------------------------------------
+# Statistics Tracking
+# ---------------------------------------------------------------------
+
+
+class TestLRUCacheStats:
+    def test_initial_stats(self):
+        cache = LRUCache(max_size=100)
+        stats = cache.stats
+
+        assert stats["hits"] == 0
+        assert stats["misses"] == 0
+        assert stats["size"] == 0
+        assert stats["max_size"] == 100
+
+    def test_hit_tracking(self):
+        cache = LRUCache(max_size=10)
+        cache.put("code", "tests", _make_batch_result())
+
+        # First hit
+        cache.get("code", "tests")
+        assert cache.stats["hits"] == 1
+        assert cache.stats["misses"] == 0
+
+        # Second hit
+        cache.get("code", "tests")
+        assert cache.stats["hits"] == 2
+        assert cache.stats["misses"] == 0
+
+    def test_miss_tracking(self):
+        cache = LRUCache(max_size=10)
+
+        # First miss
+        cache.get("nonexistent", "tests")
+        assert cache.stats["hits"] == 0
+        assert cache.stats["misses"] == 1
+
+        # Second miss
+        cache.get("also_nonexistent", "tests")
+        assert cache.stats["hits"] == 0
+        assert cache.stats["misses"] == 2
+
+    def test_mixed_hits_and_misses(self):
+        cache = LRUCache(max_size=10)
+        cache.put("code1", "tests", _make_batch_result())
+
+        cache.get("code1", "tests")  # hit
+        cache.get("code2", "tests")  # miss
+        cache.get("code1", "tests")  # hit
+        cache.get("code3", "tests")  # miss
+        cache.get("code1", "tests")  # hit
+
+        stats = cache.stats
+        assert stats["hits"] == 3
+        assert stats["misses"] == 2
+
+    def test_size_tracking(self):
+        cache = LRUCache(max_size=10)
+
+        assert cache.stats["size"] == 0
+
+        cache.put("code1", "tests", _make_batch_result())
+        assert cache.stats["size"] == 1
+
+        cache.put("code2", "tests", _make_batch_result())
+        assert cache.stats["size"] == 2
+
+        # Overwrite existing doesn't increase size
+        cache.put("code1", "tests", _make_batch_result())
+        assert cache.stats["size"] == 2
+
+
+# ---------------------------------------------------------------------
+# Thread Safety
+# ---------------------------------------------------------------------
+
+
+class TestLRUCacheThreadSafety:
+    def test_concurrent_puts(self):
+        cache = LRUCache(max_size=1000)
+        n_threads = 10
+        puts_per_thread = 100
+
+        def put_items(thread_id: int):
+            for i in range(puts_per_thread):
+                cache.put(f"code_{thread_id}_{i}", "tests", _make_batch_result())
+
+        with ThreadPoolExecutor(max_workers=n_threads) as executor:
+            futures = [executor.submit(put_items, i) for i in range(n_threads)]
+            for f in futures:
+                f.result()
+
+        # All items should be accessible
+        expected_size = n_threads * puts_per_thread
+        assert cache.stats["size"] == expected_size
+
+    def test_concurrent_gets(self):
+        cache = LRUCache(max_size=100)
+
+        # Pre-populate
+        for i in range(100):
+            cache.put(f"code_{i}", "tests", _make_batch_result())
+
+        n_threads = 10
+        gets_per_thread = 100
+
+        def get_items(thread_id: int):
+            hits = 0
+            for i in range(gets_per_thread):
+                key = f"code_{i % 100}"  # Round-robin through existing keys
+                if cache.get(key, "tests") is not None:
+                    hits += 1
+            return hits
+
+        with ThreadPoolExecutor(max_workers=n_threads) as executor:
+            futures = [executor.submit(get_items, i) for i in range(n_threads)]
+            results = [f.result() for f in futures]
+
+        # All gets should have found their items
+        assert all(r == gets_per_thread for r in results)
+
+        # Stats should reflect all hits
+        assert cache.stats["hits"] == n_threads * gets_per_thread
+
+    def test_concurrent_mixed_operations(self):
+        cache = LRUCache(max_size=50)
+        n_threads = 8
+        ops_per_thread = 100
+
+        errors = []
+
+        def mixed_operations(thread_id: int):
+            try:
+                for i in range(ops_per_thread):
+                    if i % 3 == 0:
+                        cache.put(f"code_{i}", "tests", _make_batch_result())
+                    else:
+                        cache.get(f"code_{i % 30}", "tests")
+                    # Access stats during operations
+                    _ = cache.stats
+            except Exception as e:
+                errors.append(str(e))
+
+        with ThreadPoolExecutor(max_workers=n_threads) as executor:
+            futures = [executor.submit(mixed_operations, i) for i in range(n_threads)]
+            for f in futures:
+                f.result()
+
+        # No errors should have occurred
+        assert len(errors) == 0, f"Errors during concurrent operations: {errors}"
+
+        # Cache should be in a consistent state
+        stats = cache.stats
+        assert stats["size"] <= stats["max_size"]
+        assert stats["hits"] >= 0
+        assert stats["misses"] >= 0
+
+    def test_concurrent_eviction_stress(self):
+        """Test that concurrent puts with eviction don't cause issues."""
+        cache = LRUCache(max_size=10)
+        n_threads = 20
+        puts_per_thread = 100
+
+        errors = []
+
+        def stress_puts(thread_id: int):
+            try:
+                for i in range(puts_per_thread):
+                    cache.put(f"code_{thread_id}_{i}", "tests", _make_batch_result())
+            except Exception as e:
+                errors.append(str(e))
+
+        with ThreadPoolExecutor(max_workers=n_threads) as executor:
+            futures = [executor.submit(stress_puts, i) for i in range(n_threads)]
+            for f in futures:
+                f.result()
+
+        assert len(errors) == 0
+        assert cache.stats["size"] == 10  # Should stay at max
+
+
+# ---------------------------------------------------------------------
+# Edge Cases
+# ---------------------------------------------------------------------
+
+
+class TestLRUCacheEdgeCases:
+    def test_empty_hash_strings(self):
+        cache = LRUCache(max_size=10)
+        batch = _make_batch_result()
+
+        cache.put("", "", batch)
+        assert cache.get("", "") is batch
+
+    def test_very_long_hash_strings(self):
+        cache = LRUCache(max_size=10)
+        batch = _make_batch_result()
+
+        long_code_hash = "a" * 10000
+        long_tests_hash = "b" * 10000
+
+        cache.put(long_code_hash, long_tests_hash, batch)
+        assert cache.get(long_code_hash, long_tests_hash) is batch
+
+    def test_special_characters_in_hashes(self):
+        cache = LRUCache(max_size=10)
+        batch = _make_batch_result()
+
+        special_hash = "!@#$%^&*()_+-=[]{}|;':\",./<>?"
+        cache.put(special_hash, special_hash, batch)
+        assert cache.get(special_hash, special_hash) is batch
+
+    def test_unicode_in_hashes(self):
+        cache = LRUCache(max_size=10)
+        batch = _make_batch_result()
+
+        unicode_hash = "hash_with_unicode_"
+        cache.put(unicode_hash, unicode_hash, batch)
+        assert cache.get(unicode_hash, unicode_hash) is batch
diff --git a/tests/test_code_exec_env.py b/tests/test_code_exec_env.py
new file mode 100644
index 0000000..70d3cb1
--- /dev/null
+++ b/tests/test_code_exec_env.py
@@ -0,0 +1,880 @@
+"""
+Unit tests for ludic.envs.code_exec.env.CodeExecEnv
+
+Tests the environment with mock sandbox pools to avoid Docker dependency.
+"""
+
+import pytest
+
+from ludic.envs.code_exec.env import CodeExecConfig, CodeExecEnv
+from ludic.envs.code_exec.types import (
+    BatchTestResult,
+    CompileResult,
+    CompileStatus,
+    ExecutionResult,
+    RunStatus,
+    TestCase,
+    TestResult,
+)
+from ludic.envs.code_exec.adapters.base import ExactMatchVerifier, TestAdapter
+from ludic.envs.code_exec.sandbox import Sandbox, SandboxPool
+
+
+# ---------------------------------------------------------------------
+# Mock Implementations
+# ---------------------------------------------------------------------
+
+
+class MockSandbox:
+    """Mock sandbox for testing without Docker."""
+
+    def __init__(
+        self,
+        compile_result: CompileResult | None = None,
+        execute_results: dict[str, ExecutionResult] | None = None,
+        default_stdout: str = "",
+    ):
+        self._compile_result = compile_result or CompileResult(
+            status=CompileStatus.SUCCESS,
+            duration_ms=10.0,
+        )
+        self._execute_results = execute_results or {}
+        self._default_stdout = default_stdout
+        self._python_version = "3.11"
+
+        # Track calls
+        self.reset_calls = 0
+        self.compile_calls: list[str] = []
+        self.execute_calls: list[tuple[str, str]] = []
+
+    @property
+    def python_version(self) -> str:
+        return self._python_version
+
+    async def reset(self) -> None:
+        self.reset_calls += 1
+
+    async def compile(self, code: str, *, timeout_s: float = 5.0) -> CompileResult:
+        self.compile_calls.append(code)
+        return self._compile_result
+
+    async def execute(
+        self,
+        code: str,
+        *,
+        stdin: str = "",
+        skip_compile: bool = False,
+        timeout_s: float = 10.0,
+        memory_limit_mb: int | None = None,
+        env_vars: dict[str, str] | None = None,
+    ) -> ExecutionResult:
+        self.execute_calls.append((code, stdin))
+
+        if stdin in self._execute_results:
+            return self._execute_results[stdin]
+
+        return ExecutionResult(
+            compile_result=self._compile_result,
+            run_status=RunStatus.SUCCESS,
+            stdout=self._default_stdout,
+            stderr="",
+            exit_code=0,
+            compile_duration_ms=10.0,
+            run_duration_ms=50.0,
+            total_duration_ms=60.0,
+        )
+
+
+class MockSandboxPool:
+    """Mock sandbox pool for testing without Docker."""
+
+    def __init__(
+        self,
+        sandbox: MockSandbox | None = None,
+        python_version: str = "3.11",
+    ):
+        self._sandbox = sandbox or MockSandbox()
+        self._python_version = python_version
+        self._cache: dict[tuple[str, str], BatchTestResult] = {}
+
+        # Track calls
+        self.start_calls = 0
+        self.checkout_calls = 0
+        self.release_calls = 0
+        self.shutdown_calls = 0
+
+    @property
+    def python_version(self) -> str:
+        return self._python_version
+
+    async def start(self) -> None:
+        self.start_calls += 1
+
+    async def checkout(self, timeout_s: float = 30.0) -> Sandbox:
+        self.checkout_calls += 1
+        return self._sandbox
+
+    async def release(self, sandbox: Sandbox) -> None:
+        self.release_calls += 1
+
+    async def shutdown(self) -> None:
+        self.shutdown_calls += 1
+
+    def get_cached(self, code_hash: str, tests_hash: str) -> BatchTestResult | None:
+        return self._cache.get((code_hash, tests_hash))
+
+    def put_cached(
+        self, code_hash: str, tests_hash: str, result: BatchTestResult
+    ) -> None:
+        self._cache[(code_hash, tests_hash)] = result
+
+    @property
+    def cache_stats(self) -> dict[str, int]:
+        """Return mock cache statistics."""
+        return {
+            "hits": 0,
+            "misses": 0,
+            "size": len(self._cache),
+            "max_size": 10000,
+        }
+
+
+class MockTestAdapter:
+    """Mock test adapter for testing."""
+
+    def __init__(
+        self,
+        prompt: str = "Write a program.",
+        problem_id: str = "test_problem",
+        tests: list[TestCase] | None = None,
+    ):
+        self._prompt = prompt
+        self._problem_id = problem_id
+        self._tests = tests or [
+            TestCase(input="1", expected="1", id="test_0"),
+        ]
+
+    def get_prompt(self, sample: dict) -> str:
+        return self._prompt
+
+    def get_problem_id(self, sample: dict) -> str:
+        return self._problem_id
+
+    def get_tests(self, sample: dict) -> list[TestCase]:
+        return self._tests
+
+    def hash_tests(self, tests: list[TestCase]) -> str:
+        return "mock_tests_hash_1234"
+
+
+# ---------------------------------------------------------------------
+# Environment Reset Tests
+# ---------------------------------------------------------------------
+
+
+class TestCodeExecEnvReset:
+    @pytest.mark.asyncio
+    async def test_reset_returns_prompt_and_info(self):
+        sandbox = MockSandbox(default_stdout="1")
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(prompt="Add two numbers.", problem_id="prob_1")
+
+        env = CodeExecEnv(
+            sample={"question": "Add two numbers."},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+        )
+
+        obs, info = await env.env_reset()
+
+        assert obs == "Add two numbers."
+        assert info["problem_id"] == "prob_1"
+        assert "num_tests" in info
+        assert "tests_hash" in info
+        assert "python_version" in info
+
+    @pytest.mark.asyncio
+    async def test_reset_extracts_correct_number_of_tests(self):
+        sandbox = MockSandbox(default_stdout="out")
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[
+                TestCase(input="1", expected="a", id="t0"),
+                TestCase(input="2", expected="b", id="t1"),
+                TestCase(input="3", expected="c", id="t2"),
+            ]
+        )
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+        )
+
+        obs, info = await env.env_reset()
+
+        assert info["num_tests"] == 3
+
+    @pytest.mark.asyncio
+    async def test_reset_respects_max_tests_config(self):
+        sandbox = MockSandbox(default_stdout="out")
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[
+                TestCase(input="1", expected="a", id="t0"),
+                TestCase(input="2", expected="b", id="t1"),
+                TestCase(input="3", expected="c", id="t2"),
+                TestCase(input="4", expected="d", id="t3"),
+                TestCase(input="5", expected="e", id="t4"),
+            ]
+        )
+
+        config = CodeExecConfig(max_tests=2)
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        obs, info = await env.env_reset()
+
+        assert info["num_tests"] == 2
+
+    @pytest.mark.asyncio
+    async def test_reset_handles_empty_tests(self):
+        sandbox = MockSandbox()
+        pool = MockSandboxPool(sandbox=sandbox)
+
+        # Create adapter that returns empty tests
+        class EmptyTestsAdapter:
+            def get_prompt(self, sample: dict) -> str:
+                return "Write a program."
+
+            def get_problem_id(self, sample: dict) -> str:
+                return "test_problem"
+
+            def get_tests(self, sample: dict) -> list[TestCase]:
+                return []  # No tests!
+
+            def hash_tests(self, tests: list[TestCase]) -> str:
+                return "empty_hash"
+
+        adapter = EmptyTestsAdapter()
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+        )
+
+        obs, info = await env.env_reset()
+
+        assert "error" in info
+        assert info["error"] == "no_tests_extracted"
+
+    @pytest.mark.asyncio
+    async def test_reset_sets_system_prompt(self):
+        sandbox = MockSandbox()
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter()
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            system_prompt="You are a Python expert.",
+        )
+
+        assert env.suggested_sysprompt == "You are a Python expert."
+
+
+# ---------------------------------------------------------------------
+# Environment Step Tests - Success Cases
+# ---------------------------------------------------------------------
+
+
+class TestCodeExecEnvStepSuccess:
+    @pytest.mark.asyncio
+    async def test_step_all_tests_pass(self):
+        sandbox = MockSandbox(default_stdout="expected_output")
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[
+                TestCase(input="in1", expected="expected_output", id="t0"),
+                TestCase(input="in2", expected="expected_output", id="t1"),
+            ]
+        )
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("print('expected_output')")
+
+        assert outcome.terminated is True
+        assert outcome.truncated is False
+        assert outcome.reward == 1.0
+        assert outcome.info["all_passed"] is True
+        assert outcome.info["passed"] == 2
+        assert outcome.info["total"] == 2
+        assert "All" in outcome.obs and "passed" in outcome.obs
+
+    @pytest.mark.asyncio
+    async def test_step_releases_sandbox(self):
+        sandbox = MockSandbox(default_stdout="output")
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[TestCase(input="x", expected="output", id="t0")]
+        )
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+        )
+
+        await env.env_reset()
+        await env.env_step("code")
+
+        assert pool.checkout_calls == 1
+        assert pool.release_calls == 1
+
+
+# ---------------------------------------------------------------------
+# Environment Step Tests - Failure Cases
+# ---------------------------------------------------------------------
+
+
+class TestCodeExecEnvStepFailure:
+    @pytest.mark.asyncio
+    async def test_step_without_reset_returns_error(self):
+        sandbox = MockSandbox()
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter()
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+        )
+
+        # Skip reset
+        outcome = await env.env_step("some code")
+
+        assert outcome.terminated is True
+        assert outcome.reward == -1.0
+        assert outcome.info["error"] == "reset_not_called"
+
+    @pytest.mark.asyncio
+    async def test_step_with_empty_code(self):
+        sandbox = MockSandbox()
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter()
+        config = CodeExecConfig(compile_failure_reward=-0.5)
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("")
+
+        assert outcome.terminated is True
+        assert outcome.reward == -0.5
+        assert outcome.info["error"] == "empty_code"
+
+    @pytest.mark.asyncio
+    async def test_step_with_whitespace_only_code(self):
+        sandbox = MockSandbox()
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter()
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("   \n\t  ")
+
+        assert outcome.info["error"] == "empty_code"
+
+    @pytest.mark.asyncio
+    async def test_step_compile_failure(self):
+        compile_result = CompileResult(
+            status=CompileStatus.SYNTAX_ERROR,
+            error_message="SyntaxError: invalid syntax",
+            error_line=5,
+            duration_ms=10.0,
+        )
+        sandbox = MockSandbox(compile_result=compile_result)
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter()
+        config = CodeExecConfig(compile_failure_reward=-0.2)
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("def foo(")
+
+        assert outcome.reward == -0.2
+        assert outcome.info["compile_failed"] is True
+        assert "Compilation failed" in outcome.obs
+        assert "SyntaxError" in outcome.obs
+
+    @pytest.mark.asyncio
+    async def test_step_some_tests_fail(self):
+        execute_results = {
+            "input1": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="correct",
+            ),
+            "input2": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="wrong",  # Will fail
+            ),
+        }
+        sandbox = MockSandbox(execute_results=execute_results)
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[
+                TestCase(input="input1", expected="correct", id="t0"),
+                TestCase(input="input2", expected="correct", id="t1"),
+            ]
+        )
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=CodeExecConfig(stop_on_first_failure=False),
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("code")
+
+        assert outcome.reward == 0.0  # Binary reward, not all passed
+        assert outcome.info["all_passed"] is False
+        assert outcome.info["passed"] == 1
+        assert outcome.info["total"] == 2
+
+
+# ---------------------------------------------------------------------
+# Reward Shaping Tests
+# ---------------------------------------------------------------------
+
+
+class TestCodeExecEnvRewardShaping:
+    @pytest.mark.asyncio
+    async def test_binary_reward_all_pass(self):
+        sandbox = MockSandbox(default_stdout="out")
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[TestCase(input="x", expected="out", id="t0")]
+        )
+        config = CodeExecConfig(partial_credit=False)
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("code")
+
+        assert outcome.reward == 1.0
+
+    @pytest.mark.asyncio
+    async def test_binary_reward_some_fail(self):
+        execute_results = {
+            "in1": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="correct",
+            ),
+            "in2": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="wrong",
+            ),
+        }
+        sandbox = MockSandbox(execute_results=execute_results)
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[
+                TestCase(input="in1", expected="correct", id="t0"),
+                TestCase(input="in2", expected="correct", id="t1"),
+            ]
+        )
+        config = CodeExecConfig(partial_credit=False, stop_on_first_failure=False)
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("code")
+
+        assert outcome.reward == 0.0  # Binary: all or nothing
+
+    @pytest.mark.asyncio
+    async def test_partial_credit_half_pass(self):
+        execute_results = {
+            "in1": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="correct",
+            ),
+            "in2": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="correct",
+            ),
+            "in3": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="wrong",
+            ),
+            "in4": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="wrong",
+            ),
+        }
+        sandbox = MockSandbox(execute_results=execute_results)
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[
+                TestCase(input="in1", expected="correct", id="t0"),
+                TestCase(input="in2", expected="correct", id="t1"),
+                TestCase(input="in3", expected="correct", id="t2"),
+                TestCase(input="in4", expected="correct", id="t3"),
+            ]
+        )
+        config = CodeExecConfig(partial_credit=True, stop_on_first_failure=False)
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("code")
+
+        assert outcome.reward == pytest.approx(0.5)  # 2/4 passed
+
+
+# ---------------------------------------------------------------------
+# Caching Tests
+# ---------------------------------------------------------------------
+
+
+class TestCodeExecEnvCaching:
+    @pytest.mark.asyncio
+    async def test_cache_hit_skips_execution(self):
+        sandbox = MockSandbox(default_stdout="output")
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[TestCase(input="x", expected="output", id="t0")]
+        )
+        config = CodeExecConfig(use_cache=True)
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+
+        # First call - should execute
+        outcome1 = await env.env_step("print('output')")
+        assert pool.checkout_calls == 1
+        assert outcome1.info["cache_hit"] is False
+
+        # Second call with same code - should hit cache
+        await env.env_reset()  # Reset to allow another step
+        outcome2 = await env.env_step("print('output')")
+        assert pool.checkout_calls == 1  # No new checkout
+        assert outcome2.info["cache_hit"] is True
+
+    @pytest.mark.asyncio
+    async def test_cache_disabled(self):
+        sandbox = MockSandbox(default_stdout="output")
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[TestCase(input="x", expected="output", id="t0")]
+        )
+        config = CodeExecConfig(use_cache=False)
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+        outcome1 = await env.env_step("print('output')")
+        assert pool.checkout_calls == 1
+
+        await env.env_reset()
+        outcome2 = await env.env_step("print('output')")
+        assert pool.checkout_calls == 2  # New execution each time
+        assert outcome2.info["cache_hit"] is False
+
+
+# ---------------------------------------------------------------------
+# Info Dict Tests
+# ---------------------------------------------------------------------
+
+
+class TestCodeExecEnvInfo:
+    @pytest.mark.asyncio
+    async def test_info_contains_required_fields(self):
+        sandbox = MockSandbox(default_stdout="out")
+        pool = MockSandboxPool(sandbox=sandbox, python_version="3.10")
+        adapter = MockTestAdapter(problem_id="prob_42")
+        adapter._tests = [TestCase(input="x", expected="out", id="t0")]
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("code")
+        info = outcome.info
+
+        # Problem metadata
+        assert info["problem_id"] == "prob_42"
+        assert "code_hash" in info
+        assert "tests_hash" in info
+
+        # Test results summary
+        assert "passed" in info
+        assert "total" in info
+        assert "all_passed" in info
+        assert "pass_rate" in info
+        assert "compile_failed" in info
+
+        # Detailed results
+        assert "test_results" in info
+        assert isinstance(info["test_results"], list)
+
+        # Timing
+        assert "timing" in info
+        assert "total_compile_ms" in info["timing"]
+        assert "total_run_ms" in info["timing"]
+        assert "total_execution_ms" in info["timing"]
+
+        # Cache and env info
+        assert "cache_hit" in info
+        assert info["python_version"] == "3.10"
+
+    @pytest.mark.asyncio
+    async def test_info_test_results_detail(self):
+        execute_results = {
+            "in1": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="correct",
+                run_duration_ms=100.0,
+            ),
+        }
+        sandbox = MockSandbox(execute_results=execute_results)
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[TestCase(input="in1", expected="correct", id="test_001")]
+        )
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("code")
+
+        test_result = outcome.info["test_results"][0]
+        assert test_result["test_id"] == "test_001"
+        assert test_result["passed"] is True
+        assert test_result["compiled"] is True
+        assert test_result["ran"] is True
+        assert test_result["run_status"] == "success"
+        assert test_result["compile_status"] == "success"
+
+
+# ---------------------------------------------------------------------
+# Observation Building Tests
+# ---------------------------------------------------------------------
+
+
+class TestCodeExecEnvObservation:
+    @pytest.mark.asyncio
+    async def test_observation_on_success(self):
+        sandbox = MockSandbox(default_stdout="out")
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[
+                TestCase(input="x", expected="out", id="t0"),
+                TestCase(input="y", expected="out", id="t1"),
+            ]
+        )
+
+        env = CodeExecEnv(sample={}, sandbox_pool=pool, test_adapter=adapter)
+
+        await env.env_reset()
+        outcome = await env.env_step("code")
+
+        assert "All 2 tests passed" in outcome.obs
+
+    @pytest.mark.asyncio
+    async def test_observation_on_compile_error_includes_line(self):
+        compile_result = CompileResult(
+            status=CompileStatus.SYNTAX_ERROR,
+            error_message="invalid syntax",
+            error_line=42,
+            duration_ms=5.0,
+        )
+        sandbox = MockSandbox(compile_result=compile_result)
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter()
+
+        env = CodeExecEnv(sample={}, sandbox_pool=pool, test_adapter=adapter)
+
+        await env.env_reset()
+        outcome = await env.env_step("bad code")
+
+        assert "Compilation failed" in outcome.obs
+        assert "line 42" in outcome.obs
+
+    @pytest.mark.asyncio
+    async def test_observation_truncates_long_errors(self):
+        long_error = "E" * 1000
+        compile_result = CompileResult(
+            status=CompileStatus.SYNTAX_ERROR,
+            error_message=long_error,
+            duration_ms=5.0,
+        )
+        sandbox = MockSandbox(compile_result=compile_result)
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter()
+        config = CodeExecConfig(max_error_length=100)
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("code")
+
+        # Error should be truncated with "..."
+        assert len(outcome.obs) < len(long_error)
+        assert "..." in outcome.obs
+
+    @pytest.mark.asyncio
+    async def test_observation_includes_stderr_when_configured(self):
+        execute_results = {
+            "input": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.RUNTIME_ERROR,
+                stdout="",
+                stderr="NameError: x is not defined",
+            ),
+        }
+        sandbox = MockSandbox(execute_results=execute_results)
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[TestCase(input="input", expected="output", id="t0")]
+        )
+        config = CodeExecConfig(include_stderr_in_obs=True)
+
+        env = CodeExecEnv(
+            sample={},
+            sandbox_pool=pool,
+            test_adapter=adapter,
+            config=config,
+        )
+
+        await env.env_reset()
+        outcome = await env.env_step("print(x)")
+
+        assert "Stderr" in outcome.obs
+        assert "NameError" in outcome.obs
+
+
+# ---------------------------------------------------------------------
+# Current Observation Tests
+# ---------------------------------------------------------------------
+
+
+class TestCodeExecEnvCurrentObs:
+    @pytest.mark.asyncio
+    async def test_env_current_obs_before_reset(self):
+        sandbox = MockSandbox()
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter()
+
+        env = CodeExecEnv(sample={}, sandbox_pool=pool, test_adapter=adapter)
+
+        obs = env.env_current_obs()
+        assert "Error" in obs
+        assert "reset" in obs.lower()
+
+    @pytest.mark.asyncio
+    async def test_env_current_obs_after_reset(self):
+        sandbox = MockSandbox()
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(prompt="Solve this problem.")
+
+        env = CodeExecEnv(sample={}, sandbox_pool=pool, test_adapter=adapter)
+
+        await env.env_reset()
+        obs = env.env_current_obs()
+
+        assert obs == "Solve this problem."
+
+    @pytest.mark.asyncio
+    async def test_env_current_obs_after_step(self):
+        sandbox = MockSandbox(default_stdout="result")
+        pool = MockSandboxPool(sandbox=sandbox)
+        adapter = MockTestAdapter(
+            tests=[TestCase(input="x", expected="result", id="t0")]
+        )
+
+        env = CodeExecEnv(sample={}, sandbox_pool=pool, test_adapter=adapter)
+
+        await env.env_reset()
+        await env.env_step("code")
+        obs = env.env_current_obs()
+
+        assert "passed" in obs
diff --git a/tests/test_code_exec_podman.py b/tests/test_code_exec_podman.py
new file mode 100644
index 0000000..301184d
--- /dev/null
+++ b/tests/test_code_exec_podman.py
@@ -0,0 +1,546 @@
+"""
+Unit tests for Podman-HPC sandbox implementation.
+
+These tests mock subprocess calls to test the logic without requiring
+actual podman-hpc CLI or containers.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+from dataclasses import dataclass
+from typing import Optional
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from ludic.envs.code_exec.podman_sandbox import (
+    LRUCache,
+    PodmanConfig,
+    PodmanError,
+    PodmanHPCSandbox,
+    PodmanHPCSandboxPool,
+    PodmanResult,
+    _get_container_name_prefix,
+)
+from ludic.envs.code_exec.backend import (
+    SandboxBackend,
+    detect_available_backend,
+    is_docker_available,
+    is_podman_hpc_available,
+    is_singularity_available,
+    get_backend_info,
+)
+from ludic.envs.code_exec.types import (
+    BatchTestResult,
+    CompileStatus,
+    RunStatus,
+    TestCase,
+    TestResult,
+    CompileResult,
+    ExecutionResult,
+)
+
+
+# ============================================================================
+# Container naming tests
+# ============================================================================
+
+
+class TestContainerNaming:
+    """Tests for container name prefix generation."""
+
+    def test_local_prefix_without_slurm(self):
+        """Without SLURM_JOB_ID, should use 'local' prefix."""
+        with patch.dict(os.environ, {}, clear=True):
+            # Ensure SLURM_JOB_ID is not set
+            os.environ.pop("SLURM_JOB_ID", None)
+            prefix = _get_container_name_prefix()
+            assert prefix == "ludic-sandbox-local"
+
+    def test_slurm_prefix_with_job_id(self):
+        """With SLURM_JOB_ID, should include job ID in prefix."""
+        with patch.dict(os.environ, {"SLURM_JOB_ID": "12345"}):
+            prefix = _get_container_name_prefix()
+            assert prefix == "ludic-sandbox-12345"
+
+
+# ============================================================================
+# PodmanConfig tests
+# ============================================================================
+
+
+class TestPodmanConfig:
+    """Tests for PodmanConfig dataclass."""
+
+    def test_default_config(self):
+        """Test default configuration values."""
+        config = PodmanConfig()
+        assert config.memory_limit == "256m"
+        assert config.cpu_quota is None
+        assert config.network_disabled is True
+        assert config.working_dir == "/workspace"
+        assert config.gpu is False
+        assert config.extra_args is None
+
+    def test_custom_config(self):
+        """Test custom configuration values."""
+        config = PodmanConfig(
+            memory_limit="512m",
+            cpu_quota=0.5,
+            network_disabled=False,
+            gpu=True,
+            extra_args=["--security-opt", "label=disable"],
+        )
+        assert config.memory_limit == "512m"
+        assert config.cpu_quota == 0.5
+        assert config.network_disabled is False
+        assert config.gpu is True
+        assert config.extra_args == ["--security-opt", "label=disable"]
+
+
+# ============================================================================
+# LRUCache tests (same as Docker implementation)
+# ============================================================================
+
+
+class TestLRUCache:
+    """Tests for LRUCache implementation."""
+
+    def _make_batch_result(self, code_hash: str, tests_hash: str) -> BatchTestResult:
+        """Helper to create a BatchTestResult."""
+        return BatchTestResult(
+            results=[],
+            code_hash=code_hash,
+            tests_hash=tests_hash,
+        )
+
+    def test_get_miss(self):
+        """Cache miss should return None and increment miss counter."""
+        cache = LRUCache(max_size=10)
+        result = cache.get("code1", "tests1")
+        assert result is None
+        assert cache.stats["misses"] == 1
+        assert cache.stats["hits"] == 0
+
+    def test_put_and_get(self):
+        """Should store and retrieve values."""
+        cache = LRUCache(max_size=10)
+        batch_result = self._make_batch_result("code1", "tests1")
+        cache.put("code1", "tests1", batch_result)
+
+        result = cache.get("code1", "tests1")
+        assert result is batch_result
+        assert cache.stats["hits"] == 1
+        assert cache.stats["size"] == 1
+
+    def test_lru_eviction(self):
+        """Should evict least recently used when full."""
+        cache = LRUCache(max_size=2)
+
+        result1 = self._make_batch_result("code1", "tests1")
+        result2 = self._make_batch_result("code2", "tests2")
+        result3 = self._make_batch_result("code3", "tests3")
+
+        cache.put("code1", "tests1", result1)
+        cache.put("code2", "tests2", result2)
+        # Access code1 to make it recently used
+        cache.get("code1", "tests1")
+        # Add code3, should evict code2 (least recently used)
+        cache.put("code3", "tests3", result3)
+
+        assert cache.get("code1", "tests1") is result1  # Still there
+        assert cache.get("code2", "tests2") is None  # Evicted
+        assert cache.get("code3", "tests3") is result3  # Still there
+
+    def test_put_overwrites_existing(self):
+        """Should overwrite existing values with same key."""
+        cache = LRUCache(max_size=10)
+        result1 = self._make_batch_result("code1", "tests1")
+        result2 = self._make_batch_result("code1", "tests1")
+
+        cache.put("code1", "tests1", result1)
+        cache.put("code1", "tests1", result2)
+
+        result = cache.get("code1", "tests1")
+        assert result is result2
+        assert cache.stats["size"] == 1
+
+
+# ============================================================================
+# PodmanHPCSandbox tests (mocked subprocess)
+# ============================================================================
+
+
+class TestPodmanHPCSandbox:
+    """Tests for PodmanHPCSandbox with mocked subprocess."""
+
+    @pytest.fixture
+    def sandbox(self):
+        """Create a sandbox instance for testing."""
+        config = PodmanConfig(memory_limit="256m", network_disabled=True)
+        return PodmanHPCSandbox(
+            container_name="test-container",
+            image="python:3.11-slim",
+            config=config,
+            python_version="3.11",
+        )
+
+    @pytest.mark.asyncio
+    async def test_start_creates_container(self, sandbox):
+        """Start should create and run a persistent container."""
+        mock_process = AsyncMock()
+        mock_process.returncode = 0
+        mock_process.communicate = AsyncMock(return_value=(b"", b""))
+
+        with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec:
+            await sandbox.start()
+
+            # Should have called rm -f, run -d, and mkdir
+            assert mock_exec.call_count == 3
+            calls = mock_exec.call_args_list
+
+            # First call: rm -f
+            assert calls[0][0][0] == "podman-hpc"
+            assert "rm" in calls[0][0]
+            assert "-f" in calls[0][0]
+
+            # Second call: run -d
+            assert calls[1][0][0] == "podman-hpc"
+            assert "run" in calls[1][0]
+            assert "-d" in calls[1][0]
+            assert "--name" in calls[1][0]
+            assert "test-container" in calls[1][0]
+            assert "sleep" in calls[1][0]
+            assert "infinity" in calls[1][0]
+
+            # Third call: mkdir
+            assert calls[2][0][0] == "podman-hpc"
+            assert "exec" in calls[2][0]
+            assert "mkdir" in calls[2][0]
+
+    @pytest.mark.asyncio
+    async def test_reset_clears_workspace(self, sandbox):
+        """Reset should clear the workspace directory."""
+        sandbox._started = True
+
+        mock_process = AsyncMock()
+        mock_process.returncode = 0
+        mock_process.communicate = AsyncMock(return_value=(b"", b""))
+
+        with patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec:
+            await sandbox.reset()
+
+            mock_exec.assert_called_once()
+            args = mock_exec.call_args[0]
+            assert "podman-hpc" in args
+            assert "exec" in args
+            assert "rm" in " ".join(args)
+            assert "/workspace/*" in " ".join(args)
+
+    @pytest.mark.asyncio
+    async def test_compile_success(self, sandbox):
+        """Compile should return SUCCESS for valid code."""
+        sandbox._started = True
+
+        mock_process = AsyncMock()
+        mock_process.returncode = 0
+        mock_process.communicate = AsyncMock(return_value=(b"", b""))
+
+        with patch("asyncio.create_subprocess_exec", return_value=mock_process):
+            result = await sandbox.compile("print('hello')")
+
+            assert result.status == CompileStatus.SUCCESS
+            assert result.error_message is None
+
+    @pytest.mark.asyncio
+    async def test_compile_syntax_error(self, sandbox):
+        """Compile should return SYNTAX_ERROR for invalid code."""
+        sandbox._started = True
+
+        error_output = b"  File \"_check.py\", line 1\n    def foo(\n         ^\nSyntaxError: invalid syntax"
+
+        # Create two different mock processes:
+        # 1. For _write_file (tar command) - should succeed
+        # 2. For py_compile - should fail with syntax error
+        write_process = AsyncMock()
+        write_process.returncode = 0
+        write_process.communicate = AsyncMock(return_value=(b"", b""))
+
+        compile_process = AsyncMock()
+        compile_process.returncode = 1
+        compile_process.communicate = AsyncMock(return_value=(b"", error_output))
+
+        call_count = [0]
+        def create_mock_process(*args, **kwargs):
+            call_count[0] += 1
+            # First call is tar (write_file), second is py_compile
+            if call_count[0] == 1:
+                return write_process
+            return compile_process
+
+        with patch("asyncio.create_subprocess_exec", side_effect=create_mock_process):
+            result = await sandbox.compile("def foo(")
+
+            assert result.status == CompileStatus.SYNTAX_ERROR
+            assert "SyntaxError" in result.error_message
+            assert result.error_line == 1
+
+    @pytest.mark.asyncio
+    async def test_execute_success(self, sandbox):
+        """Execute should return SUCCESS and stdout for valid code."""
+        sandbox._started = True
+
+        # Mock two processes: one for compile (py_compile), one for execute
+        compile_process = AsyncMock()
+        compile_process.returncode = 0
+        compile_process.communicate = AsyncMock(return_value=(b"", b""))
+
+        exec_process = AsyncMock()
+        exec_process.returncode = 0
+        exec_process.communicate = AsyncMock(return_value=(b"hello world\n", b""))
+
+        call_count = [0]
+        def mock_create_subprocess(*args, **kwargs):
+            call_count[0] += 1
+            # First few calls are for compile (write file, py_compile)
+            # Later calls are for execute (write file, run)
+            if "py_compile" in args or call_count[0] <= 2:
+                return compile_process
+            return exec_process
+
+        with patch("asyncio.create_subprocess_exec", side_effect=mock_create_subprocess):
+            result = await sandbox.execute("print('hello world')")
+
+            assert result.compiled
+            assert result.run_status == RunStatus.SUCCESS
+            assert "hello world" in result.stdout
+
+    @pytest.mark.asyncio
+    async def test_execute_runtime_error(self, sandbox):
+        """Execute should return RUNTIME_ERROR for code that raises exception."""
+        sandbox._started = True
+
+        # Mock processes for various stages:
+        # 1. tar write (compile _write_file)
+        # 2. py_compile
+        # 3. tar write (execute _write_file)
+        # 4. python execution (runtime error)
+        success_process = AsyncMock()
+        success_process.returncode = 0
+        success_process.communicate = AsyncMock(return_value=(b"", b""))
+
+        exec_process = AsyncMock()
+        exec_process.returncode = 1
+        exec_process.communicate = AsyncMock(return_value=(b"", b"ZeroDivisionError: division by zero"))
+
+        call_count = [0]
+        def mock_create_subprocess(*args, **kwargs):
+            call_count[0] += 1
+            # Calls 1-3 are compile phase (tar, py_compile) and execute tar
+            # Call 4 is the actual execution
+            if call_count[0] <= 3:
+                return success_process
+            return exec_process
+
+        with patch("asyncio.create_subprocess_exec", side_effect=mock_create_subprocess):
+            result = await sandbox.execute("1/0")
+
+            assert result.compiled
+            assert result.run_status == RunStatus.RUNTIME_ERROR
+            assert "ZeroDivisionError" in result.stderr
+
+    def test_parse_syntax_error(self):
+        """Test syntax error parsing."""
+        error_msg = """  File "_check.py", line 5
+    def foo(
+           ^
+SyntaxError: invalid syntax"""
+
+        line, column, clean_msg = PodmanHPCSandbox._parse_syntax_error(error_msg)
+
+        assert line == 5
+        assert "SyntaxError" in clean_msg
+        assert "invalid syntax" in clean_msg
+
+
+# ============================================================================
+# PodmanHPCSandboxPool tests
+# ============================================================================
+
+
+class TestPodmanHPCSandboxPool:
+    """Tests for PodmanHPCSandboxPool."""
+
+    def test_parse_python_version_from_image(self):
+        """Should extract Python version from image name."""
+        assert PodmanHPCSandboxPool._parse_python_version("python:3.11-slim") == "3.11"
+        assert PodmanHPCSandboxPool._parse_python_version("python:3.10") == "3.10"
+        assert PodmanHPCSandboxPool._parse_python_version("ghcr.io/foo/python:3.12-bullseye") == "3.12"
+        assert PodmanHPCSandboxPool._parse_python_version("custom-image:latest") == "3.11"  # fallback
+
+    def test_pool_initialization(self):
+        """Test pool initialization without starting."""
+        pool = PodmanHPCSandboxPool(
+            n_workers=4,
+            image="python:3.11-slim",
+            cache_size=1000,
+        )
+
+        assert pool.python_version == "3.11"
+        assert pool.available == 0  # Not started yet
+        assert pool.cache_stats["size"] == 0
+
+    @pytest.mark.asyncio
+    async def test_checkout_before_start_raises(self):
+        """Checkout before start should raise RuntimeError."""
+        pool = PodmanHPCSandboxPool(n_workers=2)
+
+        with pytest.raises(RuntimeError, match="not started"):
+            await pool.checkout()
+
+    @pytest.mark.asyncio
+    async def test_cache_operations(self):
+        """Test cache get/put operations."""
+        pool = PodmanHPCSandboxPool(n_workers=2, cache_size=100)
+
+        batch_result = BatchTestResult(
+            results=[],
+            code_hash="abc123",
+            tests_hash="def456",
+        )
+
+        # Cache miss
+        assert pool.get_cached("abc123", "def456") is None
+
+        # Cache put
+        pool.put_cached("abc123", "def456", batch_result)
+
+        # Cache hit
+        result = pool.get_cached("abc123", "def456")
+        assert result is batch_result
+
+
+# ============================================================================
+# Backend detection tests
+# ============================================================================
+
+
+class TestBackendDetection:
+    """Tests for backend detection functions."""
+
+    def test_sandbox_backend_enum(self):
+        """Test SandboxBackend enum values."""
+        assert SandboxBackend.DOCKER.value == "docker"
+        assert SandboxBackend.PODMAN_HPC.value == "podman-hpc"
+        assert SandboxBackend.SINGULARITY.value == "singularity"
+        assert SandboxBackend.AUTO.value == "auto"
+
+    def test_is_podman_hpc_available_not_installed(self):
+        """Should return False when podman-hpc is not in PATH."""
+        with patch("shutil.which", return_value=None):
+            assert is_podman_hpc_available() is False
+
+    def test_is_podman_hpc_available_installed(self):
+        """Should return True when podman-hpc is in PATH."""
+        with patch("shutil.which", return_value="/usr/bin/podman-hpc"):
+            assert is_podman_hpc_available() is True
+
+    def test_is_singularity_available_not_installed(self):
+        """Should return False when singularity is not in PATH."""
+        with patch("shutil.which", return_value=None):
+            assert is_singularity_available() is False
+
+    def test_is_singularity_available_installed(self):
+        """Should return True when singularity is in PATH."""
+        def mock_which(cmd):
+            if cmd == "singularity":
+                return "/usr/bin/singularity"
+            return None
+
+        with patch("shutil.which", side_effect=mock_which):
+            assert is_singularity_available() is True
+
+    def test_is_singularity_available_apptainer(self):
+        """Should return True when apptainer (renamed singularity) is in PATH."""
+        def mock_which(cmd):
+            if cmd == "apptainer":
+                return "/usr/bin/apptainer"
+            return None
+
+        with patch("shutil.which", side_effect=mock_which):
+            assert is_singularity_available() is True
+
+    def test_detect_backend_in_slurm_with_podman(self):
+        """In Slurm with podman-hpc available, should prefer podman-hpc."""
+        with patch.dict(os.environ, {"SLURM_JOB_ID": "12345"}):
+            with patch("shutil.which", return_value="/usr/bin/podman-hpc"):
+                with patch("ludic.envs.code_exec.backend.is_docker_available", return_value=True):
+                    backend = detect_available_backend()
+                    assert backend == "podman-hpc"
+
+    def test_detect_backend_outside_slurm_with_docker(self):
+        """Outside Slurm with Docker available, should prefer Docker."""
+        with patch.dict(os.environ, {}, clear=True):
+            os.environ.pop("SLURM_JOB_ID", None)
+            with patch("ludic.envs.code_exec.backend.is_docker_available", return_value=True):
+                backend = detect_available_backend()
+                assert backend == "docker"
+
+    def test_detect_backend_outside_slurm_no_docker_with_podman(self):
+        """Outside Slurm without Docker but with podman-hpc, should use podman-hpc."""
+        with patch.dict(os.environ, {}, clear=True):
+            os.environ.pop("SLURM_JOB_ID", None)
+            with patch("ludic.envs.code_exec.backend.is_docker_available", return_value=False):
+                with patch("shutil.which", return_value="/usr/bin/podman-hpc"):
+                    backend = detect_available_backend()
+                    assert backend == "podman-hpc"
+
+    def test_detect_backend_none_available_raises(self):
+        """Should raise RuntimeError when no backend is available."""
+        with patch.dict(os.environ, {}, clear=True):
+            os.environ.pop("SLURM_JOB_ID", None)
+            with patch("ludic.envs.code_exec.backend.is_docker_available", return_value=False):
+                with patch("shutil.which", return_value=None):
+                    with pytest.raises(RuntimeError, match="No sandbox backend available"):
+                        detect_available_backend()
+
+    def test_get_backend_info(self):
+        """Test get_backend_info returns structured data."""
+        with patch.dict(os.environ, {"SLURM_JOB_ID": "99999"}):
+            with patch("ludic.envs.code_exec.backend.is_docker_available", return_value=False):
+                with patch("shutil.which", return_value="/usr/bin/podman-hpc"):
+                    info = get_backend_info()
+
+                    assert info["environment"]["in_slurm"] is True
+                    assert info["environment"]["slurm_job_id"] == "99999"
+                    assert "docker" in info["backends"]
+                    assert "podman-hpc" in info["backends"]
+                    assert info["backends"]["podman-hpc"]["available"] is True
+                    assert info["backends"]["docker"]["available"] is False
+
+
+# ============================================================================
+# Factory tests
+# ============================================================================
+
+
+class TestFactory:
+    """Tests for create_sandbox_pool factory."""
+
+    @pytest.mark.asyncio
+    async def test_factory_unknown_backend_raises(self):
+        """Factory should raise ValueError for unknown backend."""
+        from ludic.envs.code_exec.factory import create_sandbox_pool
+
+        with pytest.raises(ValueError, match="Unknown backend"):
+            await create_sandbox_pool(backend="unknown")
+
+    @pytest.mark.asyncio
+    async def test_factory_singularity_not_implemented(self):
+        """Factory should raise NotImplementedError for singularity."""
+        from ludic.envs.code_exec.factory import create_sandbox_pool
+
+        with pytest.raises(NotImplementedError, match="Singularity backend is not yet implemented"):
+            await create_sandbox_pool(backend="singularity")
diff --git a/tests/test_code_exec_runners.py b/tests/test_code_exec_runners.py
new file mode 100644
index 0000000..7853c4c
--- /dev/null
+++ b/tests/test_code_exec_runners.py
@@ -0,0 +1,478 @@
+"""
+Unit tests for ludic.envs.code_exec.runners
+
+Tests hash utilities and StdinStdoutRunner with mock sandbox.
+"""
+
+import pytest
+
+from ludic.envs.code_exec.runners import (
+    compute_hash,
+    hash_tests,
+    StdinStdoutRunner,
+)
+from ludic.envs.code_exec.types import (
+    TestCase,
+    CompileResult,
+    CompileStatus,
+    ExecutionResult,
+    RunStatus,
+)
+from ludic.envs.code_exec.adapters.base import ExactMatchVerifier
+
+
+# ---------------------------------------------------------------------
+# Hash Utility Tests
+# ---------------------------------------------------------------------
+
+
+class TestComputeHash:
+    def test_returns_16_chars(self):
+        result = compute_hash("hello world")
+        assert len(result) == 16
+
+    def test_deterministic(self):
+        result1 = compute_hash("test content")
+        result2 = compute_hash("test content")
+        assert result1 == result2
+
+    def test_different_content_different_hash(self):
+        result1 = compute_hash("content a")
+        result2 = compute_hash("content b")
+        assert result1 != result2
+
+    def test_hex_characters_only(self):
+        result = compute_hash("any content")
+        assert all(c in "0123456789abcdef" for c in result)
+
+    def test_empty_string(self):
+        result = compute_hash("")
+        assert len(result) == 16
+
+
+class TestHashTests:
+    def test_returns_16_chars(self):
+        tests = [TestCase(input="1", expected="2", id="t1")]
+        result = hash_tests(tests)
+        assert len(result) == 16
+
+    def test_deterministic(self):
+        tests = [
+            TestCase(input="1", expected="a", id="t1"),
+            TestCase(input="2", expected="b", id="t2"),
+        ]
+        result1 = hash_tests(tests)
+        result2 = hash_tests(tests)
+        assert result1 == result2
+
+    def test_different_tests_different_hash(self):
+        tests1 = [TestCase(input="1", expected="a", id="t1")]
+        tests2 = [TestCase(input="2", expected="b", id="t2")]
+        result1 = hash_tests(tests1)
+        result2 = hash_tests(tests2)
+        assert result1 != result2
+
+    def test_order_matters(self):
+        tests1 = [
+            TestCase(input="1", expected="a", id="t1"),
+            TestCase(input="2", expected="b", id="t2"),
+        ]
+        tests2 = [
+            TestCase(input="2", expected="b", id="t2"),
+            TestCase(input="1", expected="a", id="t1"),
+        ]
+        result1 = hash_tests(tests1)
+        result2 = hash_tests(tests2)
+        assert result1 != result2
+
+    def test_empty_list(self):
+        result = hash_tests([])
+        assert len(result) == 16
+
+
+# ---------------------------------------------------------------------
+# Mock Sandbox for Runner Tests
+# ---------------------------------------------------------------------
+
+
+class MockSandbox:
+    """
+    A mock sandbox for testing runners.
+
+    Can be configured with:
+      - compile_result: What to return from compile()
+      - execute_results: Dict mapping stdin -> ExecutionResult
+      - default_execute_result: Fallback for unmapped stdin
+    """
+
+    def __init__(
+        self,
+        compile_result: CompileResult | None = None,
+        execute_results: dict[str, ExecutionResult] | None = None,
+        default_stdout: str = "",
+    ):
+        self._compile_result = compile_result or CompileResult(
+            status=CompileStatus.SUCCESS,
+            duration_ms=10.0,
+        )
+        self._execute_results = execute_results or {}
+        self._default_stdout = default_stdout
+        self._python_version = "3.11"
+
+        # Track calls for assertions
+        self.compile_calls: list[str] = []
+        self.execute_calls: list[tuple[str, str]] = []
+
+    @property
+    def python_version(self) -> str:
+        return self._python_version
+
+    async def reset(self) -> None:
+        pass
+
+    async def compile(self, code: str, *, timeout_s: float = 5.0) -> CompileResult:
+        self.compile_calls.append(code)
+        return self._compile_result
+
+    async def execute(
+        self,
+        code: str,
+        *,
+        stdin: str = "",
+        skip_compile: bool = False,
+        timeout_s: float = 10.0,
+        memory_limit_mb: int | None = None,
+        env_vars: dict[str, str] | None = None,
+    ) -> ExecutionResult:
+        self.execute_calls.append((code, stdin))
+
+        if stdin in self._execute_results:
+            return self._execute_results[stdin]
+
+        # Default: successful execution returning default_stdout
+        return ExecutionResult(
+            compile_result=self._compile_result,
+            run_status=RunStatus.SUCCESS,
+            stdout=self._default_stdout,
+            stderr="",
+            exit_code=0,
+            compile_duration_ms=10.0,
+            run_duration_ms=50.0,
+            total_duration_ms=60.0,
+        )
+
+
+# ---------------------------------------------------------------------
+# StdinStdoutRunner Tests
+# ---------------------------------------------------------------------
+
+
+class TestStdinStdoutRunner:
+    @pytest.mark.asyncio
+    async def test_all_tests_pass(self):
+        sandbox = MockSandbox(default_stdout="expected_output")
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(input="input1", expected="expected_output", id="t1"),
+            TestCase(input="input2", expected="expected_output", id="t2"),
+        ]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="print('expected_output')",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.all_passed is True
+        assert result.passed_count == 2
+        assert result.total_count == 2
+
+    @pytest.mark.asyncio
+    async def test_some_tests_fail(self):
+        # First test passes, second fails
+        execute_results = {
+            "input1": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="correct",
+            ),
+            "input2": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="wrong",
+            ),
+        }
+        sandbox = MockSandbox(execute_results=execute_results)
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(input="input1", expected="correct", id="t1"),
+            TestCase(input="input2", expected="correct", id="t2"),  # Will fail
+        ]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.all_passed is False
+        assert result.passed_count == 1
+        assert result.total_count == 2
+        assert result.results[0].passed is True
+        assert result.results[1].passed is False
+
+    @pytest.mark.asyncio
+    async def test_compile_failure_fails_all_tests(self):
+        compile_result = CompileResult(
+            status=CompileStatus.SYNTAX_ERROR,
+            error_message="SyntaxError: invalid syntax",
+            error_line=5,
+            duration_ms=5.0,
+        )
+        sandbox = MockSandbox(compile_result=compile_result)
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(input="input1", expected="x", id="t1"),
+            TestCase(input="input2", expected="y", id="t2"),
+            TestCase(input="input3", expected="z", id="t3"),
+        ]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="invalid syntax here",
+            tests=tests,
+            verifier=verifier,
+            compile_first=True,
+        )
+
+        assert result.compile_failed is True
+        assert result.all_passed is False
+        assert result.passed_count == 0
+        assert len(result.results) == 3
+
+        # All should have compile failure details
+        for r in result.results:
+            assert r.compiled is False
+            assert "Compilation failed" in (r.comparison_details or "")
+
+    @pytest.mark.asyncio
+    async def test_stop_on_first_failure(self):
+        execute_results = {
+            "input1": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.SUCCESS,
+                stdout="wrong",  # First test fails
+            ),
+        }
+        sandbox = MockSandbox(execute_results=execute_results, default_stdout="correct")
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(input="input1", expected="correct", id="t1"),  # Fails
+            TestCase(input="input2", expected="correct", id="t2"),  # Should be skipped
+            TestCase(input="input3", expected="correct", id="t3"),  # Should be skipped
+        ]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+            stop_on_first_failure=True,
+        )
+
+        assert result.passed_count == 0
+        assert len(result.results) == 3
+
+        # First test ran and failed
+        assert result.results[0].passed is False
+        assert result.results[0].ran is True
+
+        # Second and third were skipped
+        assert result.results[1].passed is False
+        assert result.results[1].execution.run_status == RunStatus.NOT_RUN
+        assert result.results[2].passed is False
+        assert result.results[2].execution.run_status == RunStatus.NOT_RUN
+
+    @pytest.mark.asyncio
+    async def test_runtime_error_fails_test(self):
+        execute_results = {
+            "input1": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.RUNTIME_ERROR,
+                stdout="",
+                stderr="NameError: name 'x' is not defined",
+                exit_code=1,
+            ),
+        }
+        sandbox = MockSandbox(execute_results=execute_results)
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        tests = [TestCase(input="input1", expected="output", id="t1")]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="print(x)",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.passed_count == 0
+        assert result.results[0].passed is False
+        assert "Runtime error" in (result.results[0].comparison_details or "")
+
+    @pytest.mark.asyncio
+    async def test_timeout_fails_test(self):
+        execute_results = {
+            "input1": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.TIMEOUT,
+                stdout="",
+                stderr="",
+                run_duration_ms=5000.0,
+            ),
+        }
+        sandbox = MockSandbox(execute_results=execute_results)
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        tests = [TestCase(input="input1", expected="output", id="t1")]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="while True: pass",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.passed_count == 0
+        assert result.results[0].passed is False
+        assert "timed out" in (result.results[0].comparison_details or "").lower()
+
+    @pytest.mark.asyncio
+    async def test_memory_exceeded_fails_test(self):
+        execute_results = {
+            "input1": ExecutionResult(
+                compile_result=CompileResult(status=CompileStatus.SUCCESS),
+                run_status=RunStatus.MEMORY_EXCEEDED,
+                stdout="",
+                stderr="",
+            ),
+        }
+        sandbox = MockSandbox(execute_results=execute_results)
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        tests = [TestCase(input="input1", expected="output", id="t1")]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="x = [0] * 10**9",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.passed_count == 0
+        assert result.results[0].passed is False
+        assert "Memory" in (result.results[0].comparison_details or "")
+
+    @pytest.mark.asyncio
+    async def test_per_test_timeout_override(self):
+        sandbox = MockSandbox(default_stdout="output")
+        runner = StdinStdoutRunner(default_timeout_s=5.0)
+        verifier = ExactMatchVerifier()
+
+        tests = [
+            TestCase(
+                input="input1",
+                expected="output",
+                id="t1",
+                metadata={"timeout_s": 30.0},  # Override
+            ),
+        ]
+
+        await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        # Check that execute was called with the overridden timeout
+        # The mock doesn't actually use timeout, but we can verify the call was made
+        assert len(sandbox.execute_calls) == 1
+
+    @pytest.mark.asyncio
+    async def test_compile_first_false_skips_compile(self):
+        sandbox = MockSandbox(default_stdout="output")
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        tests = [TestCase(input="input1", expected="output", id="t1")]
+
+        await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+            compile_first=False,
+        )
+
+        # compile() should not be called when compile_first=False
+        assert len(sandbox.compile_calls) == 0
+        assert len(sandbox.execute_calls) == 1
+
+    @pytest.mark.asyncio
+    async def test_hashes_computed_correctly(self):
+        sandbox = MockSandbox(default_stdout="output")
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()
+
+        code = "print('hello')"
+        tests = [TestCase(input="input1", expected="output", id="t1")]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code=code,
+            tests=tests,
+            verifier=verifier,
+        )
+
+        # Verify hashes are present and have correct format
+        assert len(result.code_hash) == 16
+        assert len(result.tests_hash) == 16
+        assert all(c in "0123456789abcdef" for c in result.code_hash)
+        assert all(c in "0123456789abcdef" for c in result.tests_hash)
+
+        # Verify code_hash matches compute_hash
+        from ludic.envs.code_exec.runners import compute_hash
+
+        assert result.code_hash == compute_hash(code)
+
+    @pytest.mark.asyncio
+    async def test_whitespace_stripping_in_comparison(self):
+        """Verifier should strip whitespace from output."""
+        sandbox = MockSandbox(default_stdout="  output\n")
+        runner = StdinStdoutRunner()
+        verifier = ExactMatchVerifier()  # strips by default
+
+        tests = [TestCase(input="input1", expected="output", id="t1")]
+
+        result = await runner.run_tests(
+            sandbox=sandbox,
+            code="code",
+            tests=tests,
+            verifier=verifier,
+        )
+
+        assert result.all_passed is True
diff --git a/tests/test_code_exec_types.py b/tests/test_code_exec_types.py
new file mode 100644
index 0000000..edffc08
--- /dev/null
+++ b/tests/test_code_exec_types.py
@@ -0,0 +1,400 @@
+"""
+Unit tests for ludic.envs.code_exec.types
+
+Tests all dataclasses and their properties/methods.
+"""
+
+import pytest
+
+from ludic.envs.code_exec.types import (
+    CompileStatus,
+    RunStatus,
+    CompileResult,
+    ExecutionResult,
+    TestCase,
+    TestResult,
+    BatchTestResult,
+)
+
+
+# ---------------------------------------------------------------------
+# CompileResult Tests
+# ---------------------------------------------------------------------
+
+
+class TestCompileResult:
+    def test_success_property_true_when_status_success(self):
+        result = CompileResult(status=CompileStatus.SUCCESS)
+        assert result.success is True
+
+    def test_success_property_false_when_syntax_error(self):
+        result = CompileResult(
+            status=CompileStatus.SYNTAX_ERROR,
+            error_message="SyntaxError: invalid syntax",
+            error_line=5,
+            error_column=10,
+        )
+        assert result.success is False
+
+    def test_success_property_false_for_all_error_statuses(self):
+        error_statuses = [
+            CompileStatus.SYNTAX_ERROR,
+            CompileStatus.IMPORT_ERROR,
+            CompileStatus.TIMEOUT,
+            CompileStatus.UNKNOWN_ERROR,
+        ]
+        for status in error_statuses:
+            result = CompileResult(status=status)
+            assert result.success is False, f"Expected success=False for {status}"
+
+    def test_duration_ms_default_zero(self):
+        result = CompileResult(status=CompileStatus.SUCCESS)
+        assert result.duration_ms == 0.0
+
+
+# ---------------------------------------------------------------------
+# ExecutionResult Tests
+# ---------------------------------------------------------------------
+
+
+class TestExecutionResult:
+    def test_compiled_true_when_compile_succeeded(self):
+        result = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+            run_status=RunStatus.SUCCESS,
+        )
+        assert result.compiled is True
+
+    def test_compiled_false_when_compile_failed(self):
+        result = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SYNTAX_ERROR),
+        )
+        assert result.compiled is False
+
+    def test_succeeded_true_when_compiled_and_run_success(self):
+        result = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+            run_status=RunStatus.SUCCESS,
+            stdout="output",
+        )
+        assert result.succeeded is True
+
+    def test_succeeded_false_when_compile_failed(self):
+        result = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SYNTAX_ERROR),
+        )
+        assert result.succeeded is False
+
+    def test_succeeded_false_when_runtime_error(self):
+        result = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+            run_status=RunStatus.RUNTIME_ERROR,
+            stderr="NameError: name 'x' is not defined",
+        )
+        assert result.succeeded is False
+
+    def test_succeeded_false_when_timeout(self):
+        result = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+            run_status=RunStatus.TIMEOUT,
+        )
+        assert result.succeeded is False
+
+    def test_timed_out_true_when_compile_timeout(self):
+        result = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.TIMEOUT),
+        )
+        assert result.timed_out is True
+
+    def test_timed_out_true_when_run_timeout(self):
+        result = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+            run_status=RunStatus.TIMEOUT,
+        )
+        assert result.timed_out is True
+
+    def test_timed_out_false_when_success(self):
+        result = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+            run_status=RunStatus.SUCCESS,
+        )
+        assert result.timed_out is False
+
+    def test_default_values(self):
+        result = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+        )
+        assert result.run_status is None
+        assert result.stdout == ""
+        assert result.stderr == ""
+        assert result.exit_code is None
+        assert result.cache_hit is False
+
+
+# ---------------------------------------------------------------------
+# TestCase Tests
+# ---------------------------------------------------------------------
+
+
+class TestTestCase:
+    def test_basic_creation(self):
+        tc = TestCase(input="1 2", expected="3", id="test_add")
+        assert tc.input == "1 2"
+        assert tc.expected == "3"
+        assert tc.id == "test_add"
+
+    def test_default_weight(self):
+        tc = TestCase(input="x", expected="y")
+        assert tc.weight == 1.0
+
+    def test_default_metadata_is_empty_dict(self):
+        tc = TestCase(input="x", expected="y")
+        assert tc.metadata == {}
+
+    def test_metadata_with_custom_values(self):
+        tc = TestCase(
+            input="x",
+            expected="y",
+            metadata={"timeout_s": 10.0, "category": "math"},
+        )
+        assert tc.metadata["timeout_s"] == 10.0
+        assert tc.metadata["category"] == "math"
+
+
+# ---------------------------------------------------------------------
+# TestResult Tests
+# ---------------------------------------------------------------------
+
+
+class TestTestResult:
+    def test_compiled_delegates_to_execution(self):
+        execution = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+            run_status=RunStatus.SUCCESS,
+        )
+        tr = TestResult(
+            test_case=TestCase(input="1", expected="1"),
+            passed=True,
+            actual="1",
+            execution=execution,
+        )
+        assert tr.compiled is True
+
+    def test_compiled_false_when_execution_compile_failed(self):
+        execution = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SYNTAX_ERROR),
+        )
+        tr = TestResult(
+            test_case=TestCase(input="1", expected="1"),
+            passed=False,
+            actual="",
+            execution=execution,
+        )
+        assert tr.compiled is False
+
+    def test_ran_true_when_execution_has_run_status(self):
+        execution = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+            run_status=RunStatus.SUCCESS,
+        )
+        tr = TestResult(
+            test_case=TestCase(input="1", expected="1"),
+            passed=True,
+            actual="1",
+            execution=execution,
+        )
+        assert tr.ran is True
+
+    def test_ran_false_when_run_status_none(self):
+        execution = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SYNTAX_ERROR),
+            run_status=None,
+        )
+        tr = TestResult(
+            test_case=TestCase(input="1", expected="1"),
+            passed=False,
+            actual="",
+            execution=execution,
+        )
+        assert tr.ran is False
+
+    def test_ran_false_when_run_status_not_run(self):
+        execution = ExecutionResult(
+            compile_result=CompileResult(status=CompileStatus.SUCCESS),
+            run_status=RunStatus.NOT_RUN,
+        )
+        tr = TestResult(
+            test_case=TestCase(input="1", expected="1"),
+            passed=False,
+            actual="",
+            execution=execution,
+        )
+        assert tr.ran is False
+
+
+# ---------------------------------------------------------------------
+# BatchTestResult Tests
+# ---------------------------------------------------------------------
+
+
+def _make_test_result(passed: bool, compiled: bool = True) -> TestResult:
+    """Helper to create TestResult with minimal boilerplate."""
+    if compiled:
+        compile_result = CompileResult(status=CompileStatus.SUCCESS)
+        run_status = RunStatus.SUCCESS if passed else RunStatus.RUNTIME_ERROR
+    else:
+        compile_result = CompileResult(status=CompileStatus.SYNTAX_ERROR)
+        run_status = None
+
+    return TestResult(
+        test_case=TestCase(input="x", expected="y"),
+        passed=passed,
+        actual="y" if passed else "z",
+        execution=ExecutionResult(
+            compile_result=compile_result,
+            run_status=run_status,
+        ),
+    )
+
+
+class TestBatchTestResult:
+    def test_passed_count(self):
+        results = [
+            _make_test_result(passed=True),
+            _make_test_result(passed=True),
+            _make_test_result(passed=False),
+        ]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        assert batch.passed_count == 2
+
+    def test_total_count(self):
+        results = [_make_test_result(passed=True) for _ in range(5)]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        assert batch.total_count == 5
+
+    def test_all_passed_true_when_all_pass(self):
+        results = [_make_test_result(passed=True) for _ in range(3)]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        assert batch.all_passed is True
+
+    def test_all_passed_false_when_one_fails(self):
+        results = [
+            _make_test_result(passed=True),
+            _make_test_result(passed=False),
+        ]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        assert batch.all_passed is False
+
+    def test_all_passed_false_when_empty(self):
+        batch = BatchTestResult(results=[], code_hash="abc", tests_hash="xyz")
+        assert batch.all_passed is False
+
+    def test_pass_rate_full(self):
+        results = [_make_test_result(passed=True) for _ in range(4)]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        assert batch.pass_rate == pytest.approx(1.0)
+
+    def test_pass_rate_half(self):
+        results = [
+            _make_test_result(passed=True),
+            _make_test_result(passed=True),
+            _make_test_result(passed=False),
+            _make_test_result(passed=False),
+        ]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        assert batch.pass_rate == pytest.approx(0.5)
+
+    def test_pass_rate_zero_when_empty(self):
+        batch = BatchTestResult(results=[], code_hash="abc", tests_hash="xyz")
+        assert batch.pass_rate == pytest.approx(0.0)
+
+    def test_first_failure_returns_first_failed_test(self):
+        results = [
+            _make_test_result(passed=True),
+            _make_test_result(passed=False),  # first failure
+            _make_test_result(passed=False),
+        ]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        assert batch.first_failure is results[1]
+
+    def test_first_failure_none_when_all_pass(self):
+        results = [_make_test_result(passed=True) for _ in range(3)]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        assert batch.first_failure is None
+
+    def test_compile_failed_true_when_first_result_not_compiled(self):
+        results = [
+            _make_test_result(passed=False, compiled=False),
+            _make_test_result(passed=False, compiled=False),
+        ]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        assert batch.compile_failed is True
+
+    def test_compile_failed_false_when_compiled(self):
+        results = [_make_test_result(passed=True)]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        assert batch.compile_failed is False
+
+    def test_compile_failed_false_when_empty(self):
+        batch = BatchTestResult(results=[], code_hash="abc", tests_hash="xyz")
+        assert batch.compile_failed is False
+
+    def test_get_failures_returns_only_failed_tests(self):
+        results = [
+            _make_test_result(passed=True),
+            _make_test_result(passed=False),
+            _make_test_result(passed=True),
+            _make_test_result(passed=False),
+        ]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        failures = batch.get_failures()
+        assert len(failures) == 2
+        assert failures[0] is results[1]
+        assert failures[1] is results[3]
+
+    def test_get_successes_returns_only_passed_tests(self):
+        results = [
+            _make_test_result(passed=True),
+            _make_test_result(passed=False),
+            _make_test_result(passed=True),
+        ]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+        successes = batch.get_successes()
+        assert len(successes) == 2
+        assert successes[0] is results[0]
+        assert successes[1] is results[2]
+
+    def test_timing_aggregation(self):
+        # Create results with specific timing
+        def make_result_with_timing(compile_ms: float, run_ms: float) -> TestResult:
+            return TestResult(
+                test_case=TestCase(input="x", expected="y"),
+                passed=True,
+                actual="y",
+                execution=ExecutionResult(
+                    compile_result=CompileResult(
+                        status=CompileStatus.SUCCESS,
+                        duration_ms=compile_ms,
+                    ),
+                    run_status=RunStatus.SUCCESS,
+                    compile_duration_ms=compile_ms,
+                    run_duration_ms=run_ms,
+                    total_duration_ms=compile_ms + run_ms,
+                ),
+            )
+
+        results = [
+            make_result_with_timing(10.0, 100.0),
+            make_result_with_timing(10.0, 200.0),
+            make_result_with_timing(10.0, 150.0),
+        ]
+        batch = BatchTestResult(results=results, code_hash="abc", tests_hash="xyz")
+
+        # Compile time: max across all (since compilation usually happens once)
+        assert batch.total_compile_ms == pytest.approx(10.0)
+
+        # Run time: sum across all tests
+        assert batch.total_run_ms == pytest.approx(450.0)  # 100 + 200 + 150
+
+        # Total: sum of all total_duration_ms
+        assert batch.total_execution_ms == pytest.approx(480.0)  # 110 + 210 + 160
diff --git a/tests/test_flash_attention.py b/tests/test_flash_attention.py
new file mode 100644
index 0000000..fa33e07
--- /dev/null
+++ b/tests/test_flash_attention.py
@@ -0,0 +1,151 @@
+"""
+GPU tests for Flash Attention and hardware detection.
+
+These tests are designed to run on interactive GPU nodes (not login nodes).
+Mark with @pytest.mark.gpu and run with: pytest -v -m gpu
+
+Usage on Isambard:
+    srun --nodes=1 --gpus=1 --time=10:00 --pty bash
+    uv run pytest tests/test_flash_attention.py -v -m gpu -s
+"""
+
+from __future__ import annotations
+
+import logging
+import pytest
+import torch
+
+# Configure logging for visibility during tests
+logging.basicConfig(level=logging.DEBUG, format="%(name)s: %(message)s")
+
+
+
+@pytest.mark.gpu
+def test_cuda_available():
+    """Verify CUDA is available (basic sanity check)."""
+    assert torch.cuda.is_available(), "CUDA not available - run on a GPU node"
+
+
+@pytest.mark.gpu
+def test_flash_sdp_enabled():
+    """Verify Flash SDP backend can be enabled."""
+    torch.backends.cuda.enable_flash_sdp(True)
+    # Note: flash_sdp_enabled() returns True only if flash kernels are actually usable
+    # This depends on the input shapes and dtypes at runtime
+    assert hasattr(torch.backends.cuda, "flash_sdp_enabled")
+
+
+@pytest.mark.gpu
+def test_detect_gpu_architecture():
+    """Detect real GPU architecture."""
+    from ludic.training.hardware import detect_gpu_architecture
+    
+    arch = detect_gpu_architecture()
+    assert arch is not None, "Could not detect GPU architecture"
+    
+    # Log the detected architecture
+    device_name = torch.cuda.get_device_name()
+    capability = torch.cuda.get_device_capability()
+    print(f"GPU: {device_name}")
+    print(f"Compute capability: sm_{capability[0]}{capability[1]}")
+    print(f"Detected architecture: {arch}")
+    
+    # Validate known architectures
+    assert arch in ("hopper", "ampere", "ada", "turing", "volta", "older")
+
+
+@pytest.mark.gpu
+def test_get_cuda_version():
+    """Verify CUDA version detection."""
+    from ludic.training.hardware import get_cuda_version
+    
+    version = get_cuda_version()
+    assert version is not None, "Could not get CUDA version"
+    
+    major, minor = version
+    print(f"CUDA version: {major}.{minor}")
+    
+    # Reasonable version bounds
+    assert major >= 11, f"CUDA version {major}.{minor} is too old for Flash Attention"
+
+
+@pytest.mark.gpu
+def test_flash_attn_import():
+    """Verify flash-attn package loads and reports version."""
+    try:
+        import flash_attn
+        version = flash_attn.__version__
+        print(f"flash-attn version: {version}")
+        
+        # Check version is >= 2.7.0 for FA3 support
+        parts = version.split(".")
+        major, minor = int(parts[0]), int(parts[1])
+        assert (major, minor) >= (2, 7), f"flash-attn {version} < 2.7.0, FA3 not supported"
+        
+    except ImportError as e:
+        pytest.skip(f"flash-attn not installed: {e}")
+
+
+@pytest.mark.gpu
+def test_get_optimal_attention_impl():
+    """Test optimal attention implementation selection."""
+    from ludic.training.hardware import get_optimal_attention_impl
+    
+    # With flash attention enabled (default)
+    impl = get_optimal_attention_impl(disable_flash_attn=False)
+    print(f"Optimal attention (enabled): {impl}")
+    assert impl in ("flash_attention_3", "flash_attention_2", "sdpa", "eager")
+    
+    # With flash attention disabled
+    impl_disabled = get_optimal_attention_impl(disable_flash_attn=True)
+    print(f"Optimal attention (disabled): {impl_disabled}")
+    assert impl_disabled == "sdpa"
+
+
+@pytest.mark.gpu
+def test_configure_flash_attention():
+    """Test full Flash Attention configuration."""
+    from ludic.training.hardware import configure_flash_attention
+    
+    # Configure for CUDA device
+    attn_impl = configure_flash_attention("cuda", disable_flash_attn=False)
+    print(f"Configured attention: {attn_impl}")
+    assert attn_impl in ("flash_attention_3", "flash_attention_2", "sdpa")
+    
+    # Configure for CPU (should return eager)
+    attn_impl_cpu = configure_flash_attention("cpu", disable_flash_attn=False)
+    assert attn_impl_cpu == "eager"
+
+
+@pytest.mark.gpu
+def test_model_with_flash_attention():
+    """Load a small model with flash attention and run forward pass."""
+    from ludic.training.hardware import configure_flash_attention
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    
+    model_name = "Qwen/Qwen2.5-0.5B-Instruct"
+    
+    # Configure flash attention
+    attn_impl = configure_flash_attention("cuda", disable_flash_attn=False)
+    print(f"Using attention: {attn_impl}")
+    
+    # Load model with flash attention
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.bfloat16,
+        trust_remote_code=True,
+        attn_implementation=attn_impl,
+    ).cuda()
+    
+    # Verify model loaded with correct attention
+    print(f"Model attention impl: {model.config._attn_implementation}")
+    
+    # Run a forward pass
+    inputs = tokenizer("Hello, world!", return_tensors="pt").to("cuda")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    
+    assert outputs.logits is not None
+    assert outputs.logits.shape[0] == 1  # batch size
+    print(f"Forward pass successful, logits shape: {outputs.logits.shape}")
diff --git a/tests/test_incomplete_completion.py b/tests/test_incomplete_completion.py
index c72989d..d40c044 100644
--- a/tests/test_incomplete_completion.py
+++ b/tests/test_incomplete_completion.py
@@ -4,7 +4,7 @@
 
 from ludic.agents.base_agent import Agent
 from ludic.context.full_dialog import FullDialog
-from ludic.interaction.single_agent import SingleAgentSyncProtocol
+from ludic.interaction.single_agent import SingleAgentProtocol
 from ludic.parsers import ParseResult
 from tests._mocks import MockClient, MockEnv
 
@@ -66,7 +66,7 @@ async def test_single_agent_protocol_marks_incomplete_completion_as_parse_error(
         ctx=FullDialog(),
         parser=pass_through_parser,
     )
-    protocol = SingleAgentSyncProtocol(agent=agent)
+    protocol = SingleAgentProtocol(agent=agent)
 
     env = MockEnv(max_steps=10, target="1")
     rollouts = await protocol.run(env=env, max_steps=1)
diff --git a/tests/test_interaction.py b/tests/test_interaction.py
index 9159c2d..2351710 100644
--- a/tests/test_interaction.py
+++ b/tests/test_interaction.py
@@ -2,7 +2,7 @@
 import pytest
 
 from ludic.context.full_dialog import FullDialog
-from ludic.interaction.single_agent import SingleAgentSyncProtocol
+from ludic.interaction.single_agent import SingleAgentProtocol
 from ludic.interaction.multi_agent import MultiAgentProtocol
 from ludic.agents.base_agent import Agent
 from ludic.inference.client import ChatResponse
@@ -28,7 +28,7 @@ async def test_happy_path_terminates_immediately():
     env = MockEnv(max_steps=3, target="1")
     # MockAgent provides a default ctx and a pass-through parser
     agent = MockAgent(client=MockClient(text="1"))
-    protocol = SingleAgentSyncProtocol(agent=agent)
+    protocol = SingleAgentProtocol(agent=agent)
 
     # run() now returns List[Rollout]
     rollouts = await protocol.run(
@@ -51,7 +51,7 @@ async def complete(self, request: ChatCompletionRequest, **kwargs):
 
     env = MockEnv(max_steps=2, target="1")
     agent = MockAgent(client=WrongClient())
-    protocol = SingleAgentSyncProtocol(agent=agent)
+    protocol = SingleAgentProtocol(agent=agent)
 
     rollouts = await protocol.run(
         env=env,
@@ -97,7 +97,7 @@ async def test_run_episode_uses_action_parser_and_logs_parsed_action():
         parser=action_parser
     )
     
-    protocol = SingleAgentSyncProtocol(agent=agent)
+    protocol = SingleAgentProtocol(agent=agent)
 
     rollouts = await protocol.run(
         env=env,
@@ -312,7 +312,7 @@ async def test_multi_agent_handles_unmanaged_bot_turns():
 @pytest.mark.asyncio
 async def test_single_agent_protocol_logs_parser_failure_without_env_step():
     """
-    If the agent parser fails, SingleAgentSyncProtocol should:
+    If the agent parser fails, SingleAgentProtocol should:
       - NOT call env.step()
       - log a synthetic step with parse_error info
       - feed the synthetic observation back to the agent context
@@ -337,7 +337,7 @@ def always_fail_parser(_: str) -> ParseResult:
         ctx=FullDialog(),
         parser=always_fail_parser,
     )
-    protocol = SingleAgentSyncProtocol(agent=agent)
+    protocol = SingleAgentProtocol(agent=agent)
 
     rollouts = await protocol.run(env=env, max_steps=1)
 
@@ -497,7 +497,7 @@ async def test_single_agent_max_steps_truncation():
     # Agent always says "wrong", env wants "correct"
     env = MockEnv(max_steps=10, target="correct")  # env allows many steps
     agent = MockAgent(client=MockClient(text="wrong"))
-    protocol = SingleAgentSyncProtocol(agent=agent)
+    protocol = SingleAgentProtocol(agent=agent)
 
     # Protocol max_steps=3, so we'll hit that before env's max_steps
     rollouts = await protocol.run(env=env, max_steps=3)
@@ -533,7 +533,7 @@ async def test_single_agent_env_truncation_preserved():
     # Env will truncate after 2 wrong answers
     env = MockEnv(max_steps=2, target="correct")
     agent = MockAgent(client=MockClient(text="wrong"))
-    protocol = SingleAgentSyncProtocol(agent=agent)
+    protocol = SingleAgentProtocol(agent=agent)
 
     # Protocol allows many steps, but env will truncate at 2
     rollouts = await protocol.run(env=env, max_steps=100)
@@ -559,7 +559,7 @@ async def test_single_agent_normal_termination_not_truncated():
     """
     env = MockEnv(max_steps=10, target="win")
     agent = MockAgent(client=MockClient(text="win"))
-    protocol = SingleAgentSyncProtocol(agent=agent)
+    protocol = SingleAgentProtocol(agent=agent)
 
     rollouts = await protocol.run(env=env, max_steps=100)
 
diff --git a/tests/test_public_api_imports.py b/tests/test_public_api_imports.py
index fc5609c..35bbd51 100644
--- a/tests/test_public_api_imports.py
+++ b/tests/test_public_api_imports.py
@@ -8,7 +8,7 @@ def test_top_level_exports_import() -> None:
     from ludic.context import ContextStrategy, FullDialog, TruncatedThinkingContext  # noqa: F401
     from ludic.envs import LudicEnv, SingleAgentEnv, DatasetQAEnv  # noqa: F401
     from ludic.inference import VLLMChatClient, start_vllm_server, wait_for_vllm_health  # noqa: F401
-    from ludic.interaction import InteractionProtocol, SingleAgentSyncProtocol, MultiAgentProtocol, TraceCollector  # noqa: F401
+    from ludic.interaction import InteractionProtocol, SingleAgentProtocol, MultiAgentProtocol, TraceCollector  # noqa: F401
     from ludic.parsers import boxed_parser, xml_tag_parser, compose_parsers, think_prefix_parser  # noqa: F401
     from ludic.distributed import create_vllm_publisher  # noqa: F401
     from ludic.types import Rollout, Step  # noqa: F401
diff --git a/tests/test_rollout_engine.py b/tests/test_rollout_engine.py
index 20c17ed..85f6010 100644
--- a/tests/test_rollout_engine.py
+++ b/tests/test_rollout_engine.py
@@ -8,7 +8,7 @@
 from ludic.agents.base_agent import Agent
 from ludic.inference.client import ChatResponse
 from ludic.interaction.base import InteractionProtocol
-from ludic.interaction.single_agent import SingleAgentSyncProtocol
+from ludic.interaction.single_agent import SingleAgentProtocol
 from ludic.context.full_dialog import FullDialog
 from ludic.envs.env import LudicEnv
 from ludic.inference.request import ChatCompletionRequest, InferenceSpec, ReturnSpec
@@ -111,7 +111,7 @@ async def test_generate_rollouts_basic_metadata_and_termination(
     mock_agent,
 ) -> None:
     protocol_registry: ProtocolRegistry = {
-        "mock_protocol": lambda: SingleAgentSyncProtocol(agent=mock_agent)
+        "mock_protocol": lambda: SingleAgentProtocol(agent=mock_agent)
     }
     
     engine = RolloutEngine(
@@ -203,7 +203,7 @@ async def test_generate_rollouts_unknown_env_raises(
     mock_agent,
 ) -> None:
     protocol_registry = {
-        "mock_protocol": lambda: SingleAgentSyncProtocol(agent=mock_agent)
+        "mock_protocol": lambda: SingleAgentProtocol(agent=mock_agent)
     }
     engine = RolloutEngine(
         env_registry=env_registry,
@@ -259,10 +259,10 @@ async def test_generate_rollouts_heterogeneous_protocols(
     """
     # Define two different agent/protocol setups
     agent_A = MockAgent(client=MockClient(text="Agent A says hi"))
-    protocol_A = SingleAgentSyncProtocol(agent=agent_A)
+    protocol_A = SingleAgentProtocol(agent=agent_A)
 
     agent_B = MockAgent(client=MockClient(text="Agent B says hi"))
-    protocol_B = SingleAgentSyncProtocol(agent=agent_B)
+    protocol_B = SingleAgentProtocol(agent=agent_B)
 
     protocol_registry = {
         "protocol_A": lambda: protocol_A,
@@ -317,7 +317,7 @@ async def test_generate_rollouts_writes_jsonl(
     jsonl_path = tmp_path / "rollouts.jsonl"
     
     protocol_registry = {
-        "mock_protocol": lambda: SingleAgentSyncProtocol(agent=mock_agent)
+        "mock_protocol": lambda: SingleAgentProtocol(agent=mock_agent)
     }
 
     engine = RolloutEngine(
@@ -373,7 +373,7 @@ async def test_generate_batch_uses_model_token_ids_when_available(
     )
     
     protocol_registry = {
-        "token_protocol": lambda: SingleAgentSyncProtocol(agent=agent)
+        "token_protocol": lambda: SingleAgentProtocol(agent=agent)
     }
 
     engine = RolloutEngine(
@@ -429,7 +429,7 @@ async def test_generate_batch_raises_if_no_token_ids_and_no_retokenize(
     mock_agent,
 ) -> None:
     protocol_registry = {
-        "mock_protocol": lambda: SingleAgentSyncProtocol(agent=mock_agent)
+        "mock_protocol": lambda: SingleAgentProtocol(agent=mock_agent)
     }
     
     engine = RolloutEngine(
@@ -471,7 +471,7 @@ async def test_rollout_batch_source_next_batch_integration(
         parser=_mock_parser,
     )
     protocol_registry = {
-        "mock_protocol": lambda: SingleAgentSyncProtocol(agent=agent)
+        "mock_protocol": lambda: SingleAgentProtocol(agent=agent)
     }
     
     engine = RolloutEngine(
@@ -527,7 +527,7 @@ async def test_rollout_batch_source_passes_sample_filter(
         parser=_mock_parser,
     )
     protocol_registry = {
-        "mock_protocol": lambda: SingleAgentSyncProtocol(agent=agent)
+        "mock_protocol": lambda: SingleAgentProtocol(agent=agent)
     }
 
     engine = RolloutEngine(
@@ -579,7 +579,7 @@ async def test_saw_item_contains_truncation_flags(
         parser=_mock_parser,
     )  # Never terminates the env since it never outputs target="win"
     protocol_registry = {
-        "mock_protocol": lambda: SingleAgentSyncProtocol(agent=agent),
+        "mock_protocol": lambda: SingleAgentProtocol(agent=agent),
     }
 
     engine = RolloutEngine(
@@ -634,7 +634,7 @@ async def test_generate_batch_applies_sample_filter_and_updates_counts(
         parser=_mock_parser,
     )  # Never terminates the env since it never outputs target="win"
     protocol_registry = {
-        "mock_protocol": lambda: SingleAgentSyncProtocol(agent=agent),
+        "mock_protocol": lambda: SingleAgentProtocol(agent=agent),
     }
 
     engine = RolloutEngine(
@@ -701,7 +701,7 @@ async def complete(  # type: ignore[override]
         parser=_mock_parser,
     )
     protocol_registry = {
-        "mock_protocol": lambda: SingleAgentSyncProtocol(agent=agent),
+        "mock_protocol": lambda: SingleAgentProtocol(agent=agent),
     }
 
     engine = RolloutEngine(
diff --git a/uv.lock b/uv.lock
index a49fea3..a17cec9 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,6 +1,11 @@
 version = 1
 revision = 3
 requires-python = "==3.12.*"
+resolution-markers = [
+    "sys_platform != 'darwin' and sys_platform != 'linux'",
+    "sys_platform == 'darwin'",
+    "sys_platform == 'linux'",
+]
 
 [[package]]
 name = "accelerate"
@@ -8,12 +13,15 @@ version = "1.12.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" },
     { name = "packaging" },
     { name = "psutil" },
     { name = "pyyaml" },
     { name = "safetensors" },
-    { name = "torch" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/4a/8e/ac2a9566747a93f8be36ee08532eb0160558b07630a081a6056a9f89bf1d/accelerate-1.12.0.tar.gz", hash = "sha256:70988c352feb481887077d2ab845125024b2a137a5090d6d7a32b57d03a45df6", size = 398399, upload-time = "2025-11-21T11:27:46.973Z" }
 wheels = [
@@ -78,11 +86,11 @@ wheels = [
 
 [[package]]
 name = "annotated-doc"
-version = "0.0.3"
+version = "0.0.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d7/a6/dc46877b911e40c00d395771ea710d5e77b6de7bacd5fdcd78d70cc5a48f/annotated_doc-0.0.3.tar.gz", hash = "sha256:e18370014c70187422c33e945053ff4c286f453a984eba84d0dbfa0c935adeda", size = 5535, upload-time = "2025-10-24T14:57:10.718Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/02/b7/cf592cb5de5cb3bade3357f8d2cf42bf103bbe39f459824b4939fd212911/annotated_doc-0.0.3-py3-none-any.whl", hash = "sha256:348ec6664a76f1fd3be81f43dffbee4c7e8ce931ba71ec67cc7f4ade7fbbb580", size = 5488, upload-time = "2025-10-24T14:57:09.462Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
 ]
 
 [[package]]
@@ -99,41 +107,31 @@ name = "anthropic"
 version = "0.71.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio" },
-    { name = "distro" },
-    { name = "docstring-parser" },
-    { name = "httpx" },
-    { name = "jiter" },
-    { name = "pydantic" },
-    { name = "sniffio" },
-    { name = "typing-extensions" },
+    { name = "anyio", marker = "sys_platform == 'linux'" },
+    { name = "distro", marker = "sys_platform == 'linux'" },
+    { name = "docstring-parser", marker = "sys_platform == 'linux'" },
+    { name = "httpx", marker = "sys_platform == 'linux'" },
+    { name = "jiter", marker = "sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "sniffio", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/82/4f/70682b068d897841f43223df82d96ec1d617435a8b759c4a2d901a50158b/anthropic-0.71.0.tar.gz", hash = "sha256:eb8e6fa86d049061b3ef26eb4cbae0174ebbff21affa6de7b3098da857d8de6a", size = 489102, upload-time = "2025-10-16T15:54:40.08Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5d/77/073e8ac488f335aec7001952825275582fb8f433737e90f24eeef9d878f6/anthropic-0.71.0-py3-none-any.whl", hash = "sha256:85c5015fcdbdc728390f11b17642a65a4365d03b12b799b18b6cc57e71fdb327", size = 355035, upload-time = "2025-10-16T15:54:38.238Z" },
 ]
 
-[[package]]
-name = "antlr4-python3-runtime"
-version = "4.13.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/33/5f/2cdf6f7aca3b20d3f316e9f505292e1f256a32089bd702034c29ebde6242/antlr4_python3_runtime-4.13.2.tar.gz", hash = "sha256:909b647e1d2fc2b70180ac586df3933e38919c85f98ccc656a96cd3f25ef3916", size = 117467, upload-time = "2024-08-03T19:00:12.757Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/89/03/a851e84fcbb85214dc637b6378121ef9a0dd61b4c65264675d8a5c9b1ae7/antlr4_python3_runtime-4.13.2-py3-none-any.whl", hash = "sha256:fe3835eb8d33daece0e799090eda89719dbccee7aa39ef94eed3818cafa5a7e8", size = 144462, upload-time = "2024-08-03T19:00:11.134Z" },
-]
-
 [[package]]
 name = "anyio"
-version = "4.11.0"
+version = "4.12.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
-    { name = "sniffio" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094, upload-time = "2025-09-23T09:19:12.58Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/16/ce/8a777047513153587e5434fd752e89334ac33e379aa3497db860eeb60377/anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0", size = 228266, upload-time = "2025-11-28T23:37:38.911Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/9c/36c5c37947ebfb8c7f22e0eb6e4d188ee2d53aa3880f3f2744fb894f0cb1/anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb", size = 113362, upload-time = "2025-11-28T23:36:57.897Z" },
 ]
 
 [[package]]
@@ -141,16 +139,14 @@ name = "apache-tvm-ffi"
 version = "0.1.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/45/20/8da071821b2142bdeed757d2859dede4817e0b82a96e9a4d8cfbffd49006/apache_tvm_ffi-0.1.6.tar.gz", hash = "sha256:53088126f7fce11823ddf0fb101e968a90298d79fd68829c0a981f25467a574c", size = 2387987, upload-time = "2025-12-16T19:00:33.523Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1f/de/4ae5dd4d493b1cea755a25d59088895486432c053cff5a3287b75e36ce54/apache_tvm_ffi-0.1.6-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:5f4c0678854dbf3bfaa37795465f570d79c68759896b04b3d31774af0a03bcb8", size = 1779381, upload-time = "2025-12-16T18:59:59.593Z" },
     { url = "https://files.pythonhosted.org/packages/2d/40/2e943cbda764c3266a6966a34e582d3f0ac6046ab6aaa756631df9afd7bf/apache_tvm_ffi-0.1.6-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:653f1d4c8ffd6bca5300fd1825a81373a5be82f31dc79353d1c476fa31cf377a", size = 1936756, upload-time = "2025-12-16T19:00:00.844Z" },
     { url = "https://files.pythonhosted.org/packages/a3/91/fc43f155b4d4363e61707655c1f4bee75af1d6dd4a76680f4956dd9846fe/apache_tvm_ffi-0.1.6-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6a2cdfa90860a80e3cfb2364ce3b66a559fa5748de8d593a203b2e5992d92bc1", size = 2013641, upload-time = "2025-12-16T19:00:02.479Z" },
     { url = "https://files.pythonhosted.org/packages/14/9b/45208f2a9c70a88fd8e65668c0628f3917625d64668800ff55a2390d7fe0/apache_tvm_ffi-0.1.6-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223ac7ac08b34a6dbabe7085f23939b4aaa70666e72ddad41015659034e095af", size = 1881149, upload-time = "2025-12-16T19:00:03.776Z" },
     { url = "https://files.pythonhosted.org/packages/7d/c5/e3ba08379127578bb3417605b61e9cd5e513184a6947ec7f3fac93d16355/apache_tvm_ffi-0.1.6-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05cedb3ba7600dc9ae35c17b7325d44ecf02c56c3ba1b62668dca8390da7ec28", size = 1992886, upload-time = "2025-12-16T19:00:05.047Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/7b/4df1e523ae4bcbfbe65a3e7ef3c8810cb76e9ae44fa9b44c9fac152ecc2b/apache_tvm_ffi-0.1.6-cp312-abi3-win_amd64.whl", hash = "sha256:a6c29ba9dbc6273f4534bfc0e8a52a784f264724eb62df62daedc2b349dabe85", size = 1758454, upload-time = "2025-12-16T19:00:06.498Z" },
 ]
 
 [[package]]
@@ -186,8 +182,6 @@ version = "1.0.8"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/75/aa/abcd75e9600987a0bc6cfe9b6b2ff3f0e2cb08c170addc6e76035b5c4cb3/blake3-1.0.8.tar.gz", hash = "sha256:513cc7f0f5a7c035812604c2c852a0c1468311345573de647e310aca4ab165ba", size = 117308, upload-time = "2025-10-14T06:47:48.83Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ed/a0/b7b6dff04012cfd6e665c09ee446f749bd8ea161b00f730fe1bdecd0f033/blake3-1.0.8-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:d8da4233984d51471bd4e4366feda1d90d781e712e0a504ea54b1f2b3577557b", size = 347983, upload-time = "2025-10-14T06:45:47.214Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/a2/264091cac31d7ae913f1f296abc20b8da578b958ffb86100a7ce80e8bf5c/blake3-1.0.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1257be19f2d381c868a34cc822fc7f12f817ddc49681b6d1a2790bfbda1a9865", size = 325415, upload-time = "2025-10-14T06:45:48.482Z" },
     { url = "https://files.pythonhosted.org/packages/ee/7d/85a4c0782f613de23d114a7a78fcce270f75b193b3ff3493a0de24ba104a/blake3-1.0.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:269f255b110840e52b6ce9db02217e39660ebad3e34ddd5bca8b8d378a77e4e1", size = 371296, upload-time = "2025-10-14T06:45:49.674Z" },
     { url = "https://files.pythonhosted.org/packages/e3/20/488475254976ed93fab57c67aa80d3b40df77f7d9db6528c9274bff53e08/blake3-1.0.8-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:66ca28a673025c40db3eba21a9cac52f559f83637efa675b3f6bd8683f0415f3", size = 374516, upload-time = "2025-10-14T06:45:51.23Z" },
     { url = "https://files.pythonhosted.org/packages/7b/21/2a1c47fedb77fb396512677ec6d46caf42ac6e9a897db77edd0a2a46f7bb/blake3-1.0.8-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcb04966537777af56c1f399b35525aa70a1225816e121ff95071c33c0f7abca", size = 447911, upload-time = "2025-10-14T06:45:52.637Z" },
@@ -196,17 +190,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/94/eafaa5cdddadc0c9c603a6a6d8339433475e1a9f60c8bb9c2eed2d8736b6/blake3-1.0.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:504d1399b7fb91dfe5c25722d2807990493185faa1917456455480c36867adb5", size = 388001, upload-time = "2025-10-14T06:45:57.067Z" },
     { url = "https://files.pythonhosted.org/packages/17/81/735fa00d13de7f68b25e1b9cb36ff08c6f165e688d85d8ec2cbfcdedccc5/blake3-1.0.8-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c84af132aa09abeadf9a0118c8fb26f4528f3f42c10ef8be0fcf31c478774ec4", size = 550302, upload-time = "2025-10-14T06:45:58.657Z" },
     { url = "https://files.pythonhosted.org/packages/0e/c6/d1fe8bdea4a6088bd54b5a58bc40aed89a4e784cd796af7722a06f74bae7/blake3-1.0.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a25db3d36b55f5ed6a86470155cc749fc9c5b91c949b8d14f48658f9d960d9ec", size = 554211, upload-time = "2025-10-14T06:46:00.269Z" },
-    { url = "https://files.pythonhosted.org/packages/55/d1/ca74aa450cbe10e396e061f26f7a043891ffa1485537d6b30d3757e20995/blake3-1.0.8-cp312-cp312-win32.whl", hash = "sha256:e0fee93d5adcd44378b008c147e84f181f23715307a64f7b3db432394bbfce8b", size = 228343, upload-time = "2025-10-14T06:46:01.533Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/42/bbd02647169e3fbed27558555653ac2578c6f17ccacf7d1956c58ef1d214/blake3-1.0.8-cp312-cp312-win_amd64.whl", hash = "sha256:6a6eafc29e4f478d365a87d2f25782a521870c8514bb43734ac85ae9be71caf7", size = 215704, upload-time = "2025-10-14T06:46:02.79Z" },
 ]
 
 [[package]]
 name = "cachetools"
-version = "6.2.1"
+version = "6.2.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/cc/7e/b975b5814bd36faf009faebe22c1072a1fa1168db34d285ef0ba071ad78c/cachetools-6.2.1.tar.gz", hash = "sha256:3f391e4bd8f8bf0931169baf7456cc822705f4e2a31f840d218f445b9a854201", size = 31325, upload-time = "2025-10-12T14:55:30.139Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bc/1d/ede8680603f6016887c062a2cf4fc8fdba905866a3ab8831aa8aa651320c/cachetools-6.2.4.tar.gz", hash = "sha256:82c5c05585e70b6ba2d3ae09ea60b79548872185d2f24ae1f2709d37299fd607", size = 31731, upload-time = "2025-12-15T18:24:53.744Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl", hash = "sha256:09868944b6dde876dfd44e1d47e18484541eaf12f26f29b7af91b26cc892d701", size = 11280, upload-time = "2025-10-12T14:55:28.382Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/fc/1d7b80d0eb7b714984ce40efc78859c022cd930e402f599d8ca9e39c78a4/cachetools-6.2.4-py3-none-any.whl", hash = "sha256:69a7a52634fed8b8bf6e24a050fb60bff1c9bd8f6d24572b99c32d4e71e62a51", size = 11551, upload-time = "2025-12-15T18:24:52.332Z" },
 ]
 
 [[package]]
@@ -215,24 +207,20 @@ version = "5.7.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/a2/b8/c0f6a7d46f816cb18b1fda61a2fe648abe16039f1ff93ea720a6e9fb3cee/cbor2-5.7.1.tar.gz", hash = "sha256:7a405a1d7c8230ee9acf240aad48ae947ef584e8af05f169f3c1bde8f01f8b71", size = 102467, upload-time = "2025-10-24T09:23:06.569Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/56/54/48426472f0c051982c647331441aed09b271a0500356ae0b7054c813d174/cbor2-5.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bd5ca44891c06f6b85d440836c967187dc1d30b15f86f315d55c675d3a841078", size = 69031, upload-time = "2025-10-24T09:22:25.438Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/68/1dd58c7706e9752188358223db58c83f3c48e07f728aa84221ffd244652f/cbor2-5.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:537d73ef930ccc1a7b6a2e8d2cbf81407d270deb18e40cda5eb511bd70f71078", size = 68825, upload-time = "2025-10-24T09:22:26.497Z" },
     { url = "https://files.pythonhosted.org/packages/09/4e/380562fe9f9995a1875fb5ec26fd041e19d61f4630cb690a98c5195945fc/cbor2-5.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:edbf814dd7763b6eda27a5770199f6ccd55bd78be8f4367092460261bfbf19d0", size = 286222, upload-time = "2025-10-24T09:22:27.546Z" },
     { url = "https://files.pythonhosted.org/packages/7c/bb/9eccdc1ea3c4d5c7cdb2e49b9de49534039616be5455ce69bd64c0b2efe2/cbor2-5.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9fc81da8c0e09beb42923e455e477b36ff14a03b9ca18a8a2e9b462de9a953e8", size = 285688, upload-time = "2025-10-24T09:22:28.651Z" },
     { url = "https://files.pythonhosted.org/packages/59/8c/4696d82f5bd04b3d45d9a64ec037fa242630c134e3218d6c252b4f59b909/cbor2-5.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e4a7d660d428911a3aadb7105e94438d7671ab977356fdf647a91aab751033bd", size = 277063, upload-time = "2025-10-24T09:22:29.775Z" },
     { url = "https://files.pythonhosted.org/packages/95/50/6538e44ca970caaad2fa376b81701d073d84bf597aac07a59d0a253b1a7f/cbor2-5.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:228e0af9c0a9ddf6375b6ae010eaa1942a1901d403f134ac9ee6a76a322483f9", size = 278334, upload-time = "2025-10-24T09:22:30.904Z" },
-    { url = "https://files.pythonhosted.org/packages/64/a9/156ccd2207fb26b5b61d23728b4dbdc595d1600125aa79683a4a8ddc9313/cbor2-5.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:2d08a6c0d9ed778448e185508d870f4160ba74f59bb17a966abd0d14d0ff4dd3", size = 68404, upload-time = "2025-10-24T09:22:32.108Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/49/adc53615e9dd32c4421f6935dfa2235013532c6e6b28ee515bbdd92618be/cbor2-5.7.1-cp312-cp312-win_arm64.whl", hash = "sha256:752506cfe72da0f4014b468b30191470ee8919a64a0772bd3b36a4fccf5fcefc", size = 64047, upload-time = "2025-10-24T09:22:33.147Z" },
     { url = "https://files.pythonhosted.org/packages/d5/7d/383bafeabb54c17fe5b6d5aca4e863e6b7df10bcc833b34aa169e9dfce1a/cbor2-5.7.1-py3-none-any.whl", hash = "sha256:68834e4eff2f56629ce6422b0634bc3f74c5a4269de5363f5265fe452c706ba7", size = 23829, upload-time = "2025-10-24T09:23:05.54Z" },
 ]
 
 [[package]]
 name = "certifi"
-version = "2025.10.5"
+version = "2025.11.12"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
+    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
 ]
 
 [[package]]
@@ -240,12 +228,10 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+    { name = "pycparser", marker = "implementation_name != 'PyPy' and sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" },
-    { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" },
     { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
     { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
     { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
@@ -253,9 +239,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
     { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
     { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" },
-    { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" },
 ]
 
 [[package]]
@@ -285,14 +268,14 @@ wheels = [
 
 [[package]]
 name = "click"
-version = "8.2.1"
+version = "8.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload-time = "2025-05-20T23:19:49.832Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215, upload-time = "2025-05-20T23:19:47.796Z" },
+    { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
 ]
 
 [[package]]
@@ -318,10 +301,10 @@ name = "compressed-tensors"
 version = "0.12.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "loguru" },
-    { name = "pydantic" },
-    { name = "torch" },
-    { name = "transformers" },
+    { name = "loguru", marker = "sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
+    { name = "transformers", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a2/79/4c5c1cd14266f8cf2650bdb940f986ce7fcaeb56aad8cfa9e9afedf14e2f/compressed_tensors-0.12.2.tar.gz", hash = "sha256:5bb40856dd17f128ab73557ecc73799f80db4dd82fab6de875f1e6899b9ea0c4", size = 190409, upload-time = "2025-10-07T14:30:59.302Z" }
 wheels = [
@@ -333,11 +316,10 @@ name = "cryptography"
 version = "46.0.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
+    { name = "cffi", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/42/9c391dd801d6cf0d561b5890549d4b27bafcc53b39c31a817e69d87c625b/cryptography-46.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:109d4ddfadf17e8e7779c39f9b18111a09efb969a301a31e987416a0191ed93a", size = 7225004, upload-time = "2025-10-15T23:16:52.239Z" },
     { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" },
     { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" },
     { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" },
@@ -349,10 +331,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" },
     { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" },
     { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" },
-    { url = "https://files.pythonhosted.org/packages/96/92/8a6a9525893325fc057a01f654d7efc2c64b9de90413adcf605a85744ff4/cryptography-46.0.3-cp311-abi3-win32.whl", hash = "sha256:f260d0d41e9b4da1ed1e0f1ce571f97fe370b152ab18778e9e8f67d6af432018", size = 3055988, upload-time = "2025-10-15T23:17:14.65Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/bf/80fbf45253ea585a1e492a6a17efcb93467701fa79e71550a430c5e60df0/cryptography-46.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:a9a3008438615669153eb86b26b61e09993921ebdd75385ddd748702c5adfddb", size = 3514451, upload-time = "2025-10-15T23:17:16.142Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/af/9b302da4c87b0beb9db4e756386a7c6c5b8003cd0e742277888d352ae91d/cryptography-46.0.3-cp311-abi3-win_arm64.whl", hash = "sha256:5d7f93296ee28f68447397bf5198428c9aeeab45705a55d53a6343455dcb2c3c", size = 2928007, upload-time = "2025-10-15T23:17:18.04Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/23/45fe7f376a7df8daf6da3556603b36f53475a99ce4faacb6ba2cf3d82021/cryptography-46.0.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:cb3d760a6117f621261d662bccc8ef5bc32ca673e037c83fbe565324f5c46936", size = 7218248, upload-time = "2025-10-15T23:17:46.294Z" },
     { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" },
     { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" },
     { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" },
@@ -364,9 +342,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" },
     { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" },
     { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/6e/1c8331ddf91ca4730ab3086a0f1be19c65510a33b5a441cb334e7a2d2560/cryptography-46.0.3-cp38-abi3-win32.whl", hash = "sha256:6276eb85ef938dc035d59b87c8a7dc559a232f954962520137529d77b18ff1df", size = 3036695, upload-time = "2025-10-15T23:18:08.672Z" },
-    { url = "https://files.pythonhosted.org/packages/90/45/b0d691df20633eff80955a0fc7695ff9051ffce8b69741444bd9ed7bd0db/cryptography-46.0.3-cp38-abi3-win_amd64.whl", hash = "sha256:416260257577718c05135c55958b674000baef9a1c7d9e8f306ec60d71db850f", size = 3501720, upload-time = "2025-10-15T23:18:10.632Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/cb/2da4cc83f5edb9c3257d09e1e7ab7b23f049c7962cae8d842bbef0a9cec9/cryptography-46.0.3-cp38-abi3-win_arm64.whl", hash = "sha256:d89c3468de4cdc4f08a57e214384d0471911a3830fcdaf7a8cc587e42a866372", size = 2918740, upload-time = "2025-10-15T23:18:12.277Z" },
 ]
 
 [[package]]
@@ -374,12 +349,11 @@ name = "cuda-bindings"
 version = "13.1.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cuda-pathfinder" },
+    { name = "cuda-pathfinder", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/53/3d/c8ed9d169843091f3f0d6b8218e826fd59520a37e0434c204feada597988/cuda_bindings-13.1.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e75ad0cb863330df784236d289612d71ca855c013d19ae00e5693574abd6915", size = 15530160, upload-time = "2025-12-09T22:05:55.386Z" },
     { url = "https://files.pythonhosted.org/packages/4a/8e/368295623ee43fba622909d780fbb6863efc1638dff55f67a0f04eac6470/cuda_bindings-13.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:25785d1a3cdcd98f151240fd5efd025609319a6720a217dee2a929241749d488", size = 16110386, upload-time = "2025-12-09T22:05:57.71Z" },
-    { url = "https://files.pythonhosted.org/packages/60/1f/ecc4701ade3e85f091c625a920574527b9daf7fb354189fbfbc5516af6cd/cuda_bindings-13.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:ccde9c95c0e953b31fe7731bb08da9d0a34b1770498df9a3c156fdfdbe3951ad", size = 15250028, upload-time = "2025-12-09T22:06:00.346Z" },
 ]
 
 [[package]]
@@ -395,8 +369,8 @@ name = "cuda-python"
 version = "13.1.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cuda-bindings" },
-    { name = "cuda-pathfinder" },
+    { name = "cuda-bindings", marker = "sys_platform == 'linux'" },
+    { name = "cuda-pathfinder", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/cd/08/b5e3b9822662d72d540d830531e3ab6a7cabbda3dd56175696aabccfeb76/cuda_python-13.1.1-py3-none-any.whl", hash = "sha256:944cc4fe6482673d28dd545797a28840945a1668739328fa2ad1e9be4f7050d9", size = 8038, upload-time = "2025-12-09T22:13:10.719Z" },
@@ -407,18 +381,17 @@ name = "cupy-cuda12x"
 version = "13.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "fastrlock" },
-    { name = "numpy" },
+    { name = "fastrlock", marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/12/c5/7e7fc4816d0de0154e5d9053242c3a08a0ca8b43ee656a6f7b3b95055a7b/cupy_cuda12x-13.6.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:a6970ceefe40f9acbede41d7fe17416bd277b1bd2093adcde457b23b578c5a59", size = 127334633, upload-time = "2025-08-18T08:24:43.065Z" },
     { url = "https://files.pythonhosted.org/packages/e0/95/d7e1295141e7d530674a3cc567e13ed0eb6b81524cb122d797ed996b5bea/cupy_cuda12x-13.6.0-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:79b0cacb5e8b190ef409f9e03f06ac8de1b021b0c0dda47674d446f5557e0eb1", size = 112886268, upload-time = "2025-08-18T08:24:49.294Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/8c/14555b63fd78cfac7b88af0094cea0a3cb845d243661ec7da69f7b3ea0de/cupy_cuda12x-13.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:ca06fede7b8b83ca9ad80062544ef2e5bb8d4762d1c4fc3ac8349376de9c8a5e", size = 89785108, upload-time = "2025-08-18T08:24:54.527Z" },
 ]
 
 [[package]]
 name = "datasets"
-version = "4.4.1"
+version = "4.4.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "dill" },
@@ -427,7 +400,8 @@ dependencies = [
     { name = "httpx" },
     { name = "huggingface-hub" },
     { name = "multiprocess" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" },
     { name = "packaging" },
     { name = "pandas" },
     { name = "pyarrow" },
@@ -436,9 +410,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/93/bf/0dae295d6d1ba0b1a200a9dd216838464b5bbd05da01407cb1330b377445/datasets-4.4.1.tar.gz", hash = "sha256:80322699aa8c0bbbdb7caa87906da689c3c2e29523cff698775c67f28fdab1fc", size = 585341, upload-time = "2025-11-05T16:00:38.162Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c4/54/9359803da96bc65439a28fbb014dc2c90b7d4d8034a93b72362b0d40191f/datasets-4.4.2.tar.gz", hash = "sha256:9de16e415c4ba4713eac0493f7c7dc74f3aa21599297f00cc6ddab409cb7b24b", size = 586474, upload-time = "2025-12-19T15:03:09.129Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/5e/6f8d874366788ad5d549e9ba258037d974dda6e004843be1bda794571701/datasets-4.4.1-py3-none-any.whl", hash = "sha256:c1163de5211e42546079ab355cc0250c7e6db16eb209ac5ac6252f801f596c44", size = 511591, upload-time = "2025-11-05T16:00:36.365Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/b5/fefa518c809de7bced5cddb7c21c010da66fa2ae494bda96844a280cc6ce/datasets-4.4.2-py3-none-any.whl", hash = "sha256:6f5ef3417504d9cd663c71c1b90b9a494ff4c2076a2cd6a6e40ceee6ad95befc", size = 512268, upload-time = "2025-12-19T15:03:07.087Z" },
 ]
 
 [[package]]
@@ -446,8 +420,8 @@ name = "depyf"
 version = "0.20.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "astor" },
-    { name = "dill" },
+    { name = "astor", marker = "sys_platform == 'linux'" },
+    { name = "dill", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/88/35/83fb0178212279aa0af031031905804c6de5618435d229f41ed21bb9ad2c/depyf-0.20.0.tar.gz", hash = "sha256:fb7683bd72c44f67b56029df2c47721e9a02ffa4d7b19095f1c54c4ebf797a98", size = 6168761, upload-time = "2025-10-13T12:33:38.589Z" }
 wheels = [
@@ -490,6 +464,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" },
 ]
 
+[[package]]
+name = "docker"
+version = "7.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "requests" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/91/9b/4a2ea29aeba62471211598dac5d96825bb49348fa07e906ea930394a83ce/docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", size = 117834, upload-time = "2024-05-23T11:13:57.216Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" },
+]
+
 [[package]]
 name = "docstring-parser"
 version = "0.17.0"
@@ -513,8 +501,8 @@ name = "email-validator"
 version = "2.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "dnspython" },
-    { name = "idna" },
+    { name = "dnspython", marker = "sys_platform == 'linux'" },
+    { name = "idna", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/f5/22/900cb125c76b7aaa450ce02fd727f452243f2e91a61af068b40adba60ea9/email_validator-2.3.0.tar.gz", hash = "sha256:9fc05c37f2f6cf439ff414f8fc46d917929974a82244c20eb10231ba60c54426", size = 51238, upload-time = "2025-08-26T13:09:06.831Z" }
 wheels = [
@@ -523,65 +511,86 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.121.1"
+version = "0.128.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "annotated-doc" },
-    { name = "pydantic" },
-    { name = "starlette" },
-    { name = "typing-extensions" },
+    { name = "annotated-doc", marker = "sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "starlette", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6b/a4/29e1b861fc9017488ed02ff1052feffa40940cb355ed632a8845df84ce84/fastapi-0.121.1.tar.gz", hash = "sha256:b6dba0538fd15dab6fe4d3e5493c3957d8a9e1e9257f56446b5859af66f32441", size = 342523, upload-time = "2025-11-08T21:48:14.068Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/94/fd/2e6f7d706899cc08690c5f6641e2ffbfffe019e8f16ce77104caa5730910/fastapi-0.121.1-py3-none-any.whl", hash = "sha256:2c5c7028bc3a58d8f5f09aecd3fd88a000ccc0c5ad627693264181a3c33aa1fc", size = 109192, upload-time = "2025-11-08T21:48:12.458Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" },
 ]
 
 [package.optional-dependencies]
 standard = [
-    { name = "email-validator" },
-    { name = "fastapi-cli", extra = ["standard"] },
-    { name = "httpx" },
-    { name = "jinja2" },
-    { name = "python-multipart" },
-    { name = "uvicorn", extra = ["standard"] },
+    { name = "email-validator", marker = "sys_platform == 'linux'" },
+    { name = "fastapi-cli", extra = ["standard"], marker = "sys_platform == 'linux'" },
+    { name = "httpx", marker = "sys_platform == 'linux'" },
+    { name = "jinja2", marker = "sys_platform == 'linux'" },
+    { name = "pydantic-extra-types", marker = "sys_platform == 'linux'" },
+    { name = "pydantic-settings", marker = "sys_platform == 'linux'" },
+    { name = "python-multipart", marker = "sys_platform == 'linux'" },
+    { name = "uvicorn", extra = ["standard"], marker = "sys_platform == 'linux'" },
 ]
 
 [[package]]
 name = "fastapi-cli"
-version = "0.0.14"
+version = "0.0.20"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "rich-toolkit" },
-    { name = "typer" },
-    { name = "uvicorn", extra = ["standard"] },
+    { name = "rich-toolkit", marker = "sys_platform == 'linux'" },
+    { name = "typer", marker = "sys_platform == 'linux'" },
+    { name = "uvicorn", extra = ["standard"], marker = "sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/cc/13/11e43d630be84e51ba5510a6da6a11eb93b44b72caa796137c5dddda937b/fastapi_cli-0.0.14.tar.gz", hash = "sha256:ddfb5de0a67f77a8b3271af1460489bd4d7f4add73d11fbfac613827b0275274", size = 17994, upload-time = "2025-10-20T16:33:21.054Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/ca/d90fb3bfbcbd6e56c77afd9d114dd6ce8955d8bb90094399d1c70e659e40/fastapi_cli-0.0.20.tar.gz", hash = "sha256:d17c2634f7b96b6b560bc16b0035ed047d523c912011395f49f00a421692bc3a", size = 19786, upload-time = "2025-12-22T17:13:33.794Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/40/e8/bc8bbfd93dcc8e347ce98a3e654fb0d2e5f2739afb46b98f41a30c339269/fastapi_cli-0.0.14-py3-none-any.whl", hash = "sha256:e66b9ad499ee77a4e6007545cde6de1459b7f21df199d7f29aad2adaab168eca", size = 11151, upload-time = "2025-10-20T16:33:19.318Z" },
+    { url = "https://files.pythonhosted.org/packages/08/89/5c4eef60524d0fd704eb0706885b82cd5623a43396b94e4a5b17d3a3f516/fastapi_cli-0.0.20-py3-none-any.whl", hash = "sha256:e58b6a0038c0b1532b7a0af690656093dee666201b6b19d3c87175b358e9f783", size = 12390, upload-time = "2025-12-22T17:13:31.708Z" },
 ]
 
 [package.optional-dependencies]
 standard = [
-    { name = "fastapi-cloud-cli" },
-    { name = "uvicorn", extra = ["standard"] },
+    { name = "fastapi-cloud-cli", marker = "sys_platform == 'linux'" },
+    { name = "uvicorn", extra = ["standard"], marker = "sys_platform == 'linux'" },
 ]
 
 [[package]]
 name = "fastapi-cloud-cli"
-version = "0.3.1"
+version = "0.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "httpx" },
-    { name = "pydantic", extra = ["email"] },
-    { name = "rich-toolkit" },
-    { name = "rignore" },
-    { name = "sentry-sdk" },
-    { name = "typer" },
-    { name = "uvicorn", extra = ["standard"] },
+    { name = "fastar", marker = "sys_platform == 'linux'" },
+    { name = "httpx", marker = "sys_platform == 'linux'" },
+    { name = "pydantic", extra = ["email"], marker = "sys_platform == 'linux'" },
+    { name = "rich-toolkit", marker = "sys_platform == 'linux'" },
+    { name = "rignore", marker = "sys_platform == 'linux'" },
+    { name = "sentry-sdk", marker = "sys_platform == 'linux'" },
+    { name = "typer", marker = "sys_platform == 'linux'" },
+    { name = "uvicorn", extra = ["standard"], marker = "sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f9/48/0f14d8555b750dc8c04382804e4214f1d7f55298127f3a0237ba566e69dd/fastapi_cloud_cli-0.3.1.tar.gz", hash = "sha256:8c7226c36e92e92d0c89827e8f56dbf164ab2de4444bd33aa26b6c3f7675db69", size = 24080, upload-time = "2025-10-09T11:32:58.174Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/51/5d/3b33438de35521fab4968b232caa9a4bd568a5078f2b2dfb7bb8a4528603/fastapi_cloud_cli-0.8.0.tar.gz", hash = "sha256:cf07c502528bfd9e6b184776659f05d9212811d76bbec9fbb6bf34bed4c7456f", size = 30257, upload-time = "2025-12-23T12:08:33.904Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dd/8e/abb95ef59e91bb5adaa2d18fbf9ea70fd524010bb03f406a2dd2a4775ef9/fastapi_cloud_cli-0.8.0-py3-none-any.whl", hash = "sha256:e9f40bee671d985fd25d7a5409b56d4f103777bf8a0c6d746ea5fbf97a8186d9", size = 22306, upload-time = "2025-12-23T12:08:32.68Z" },
+]
+
+[[package]]
+name = "fastar"
+version = "0.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/e7/f89d54fb04104114dd0552836dc2b47914f416cc0e200b409dd04a33de5e/fastar-0.8.0.tar.gz", hash = "sha256:f4d4d68dbf1c4c2808f0e730fac5843493fc849f70fe3ad3af60dfbaf68b9a12", size = 68524, upload-time = "2025-11-26T02:36:00.72Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/68/79/7f5a5e5513e6a737e5fb089d9c59c74d4d24dc24d581d3aa519b326bedda/fastapi_cloud_cli-0.3.1-py3-none-any.whl", hash = "sha256:7d1a98a77791a9d0757886b2ffbf11bcc6b3be93210dd15064be10b216bf7e00", size = 19711, upload-time = "2025-10-09T11:32:57.118Z" },
+    { url = "https://files.pythonhosted.org/packages/22/7e/1ae005addc789924a9268da2394d3bb5c6f96836f7e37b7e3d23c2362675/fastar-0.8.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9d210da2de733ca801de83e931012349d209f38b92d9630ccaa94bd445bdc9b8", size = 868938, upload-time = "2025-11-26T02:33:51.119Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/77/290a892b073b84bf82e6b2259708dfe79c54f356e252c2dd40180b16fe07/fastar-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa02270721517078a5bd61a38719070ac2537a4aa6b6c48cf369cf2abc59174a", size = 765204, upload-time = "2025-11-26T02:32:47.02Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/00/c3155171b976003af3281f5258189f1935b15d1221bfc7467b478c631216/fastar-0.8.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:83c391e5b789a720e4d0029b9559f5d6dee3226693c5b39c0eab8eaece997e0f", size = 764717, upload-time = "2025-11-26T02:33:02.453Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/43/405b7ad76207b2c11b7b59335b70eac19e4a2653977f5588a1ac8fed54f4/fastar-0.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3258d7a78a72793cdd081545da61cabe85b1f37634a1d0b97ffee0ff11d105ef", size = 931502, upload-time = "2025-11-26T02:33:18.619Z" },
+    { url = "https://files.pythonhosted.org/packages/da/8a/a3dde6d37cc3da4453f2845cdf16675b5686b73b164f37e2cc579b057c2c/fastar-0.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6eab95dd985cdb6a50666cbeb9e4814676e59cfe52039c880b69d67cfd44767", size = 821454, upload-time = "2025-11-26T02:33:33.427Z" },
+    { url = "https://files.pythonhosted.org/packages/da/c1/904fe2468609c8990dce9fe654df3fbc7324a8d8e80d8240ae2c89757064/fastar-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:829b1854166141860887273c116c94e31357213fa8e9fe8baeb18bd6c38aa8d9", size = 821647, upload-time = "2025-11-26T02:34:07Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/73/a0642ab7a400bc07528091785e868ace598fde06fcd139b8f865ec1b6f3c/fastar-0.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b1667eae13f9457a3c737f4376d68e8c3e548353538b28f7e4273a30cb3965cd", size = 986342, upload-time = "2025-11-26T02:34:53.371Z" },
+    { url = "https://files.pythonhosted.org/packages/af/af/60c1bfa6edab72366461a95f053d0f5f7ab1825fe65ca2ca367432cd8629/fastar-0.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b864a95229a7db0814cd9ef7987cb713fd43dce1b0d809dd17d9cd6f02fdde3e", size = 1040207, upload-time = "2025-11-26T02:35:10.65Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/a0/0d624290dec622e7fa084b6881f456809f68777d54a314f5dde932714506/fastar-0.8.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c05fbc5618ce17675a42576fa49858d79734627f0a0c74c0875ab45ee8de340c", size = 1045031, upload-time = "2025-11-26T02:35:28.108Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/74/cf663af53c4706ba88e6b4af44a6b0c3bd7d7ca09f079dc40647a8f06585/fastar-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7f41c51ee96f338662ee3c3df4840511ba3f9969606840f1b10b7cb633a3c716", size = 994877, upload-time = "2025-11-26T02:35:45.797Z" },
 ]
 
 [[package]]
@@ -594,36 +603,45 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/80/07/cdecb7aa976f34328372f1c4efd6c9dc1b039b3cc8d3f38787d640009a25/fastrlock-0.8.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5f13ec08f1adb1aa916c384b05ecb7dbebb8df9ea81abd045f60941c6283a670", size = 53924, upload-time = "2024-12-17T11:02:20.85Z" },
     { url = "https://files.pythonhosted.org/packages/88/6d/59c497f8db9a125066dd3a7442fab6aecbe90d6fec344c54645eaf311666/fastrlock-0.8.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0ea4e53a04980d646def0f5e4b5e8bd8c7884288464acab0b37ca0c65c482bfe", size = 52140, upload-time = "2024-12-17T11:02:22.263Z" },
     { url = "https://files.pythonhosted.org/packages/62/04/9138943c2ee803d62a48a3c17b69de2f6fa27677a6896c300369e839a550/fastrlock-0.8.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:38340f6635bd4ee2a4fb02a3a725759fe921f2ca846cb9ca44531ba739cc17b4", size = 53261, upload-time = "2024-12-17T11:02:24.418Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/4b/db35a52589764c7745a613b6943bbd018f128d42177ab92ee7dde88444f6/fastrlock-0.8.3-cp312-cp312-win_amd64.whl", hash = "sha256:da06d43e1625e2ffddd303edcd6d2cd068e1c486f5fd0102b3f079c44eb13e2c", size = 31235, upload-time = "2024-12-17T11:02:25.708Z" },
 ]
 
 [[package]]
 name = "filelock"
-version = "3.20.0"
+version = "3.20.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4", size = 18922, upload-time = "2025-10-08T18:03:50.056Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/23/ce7a1126827cedeb958fc043d61745754464eb56c5937c35bbf2b8e26f34/filelock-3.20.1.tar.gz", hash = "sha256:b8360948b351b80f420878d8516519a2204b07aefcdcfd24912a5d33127f188c", size = 19476, upload-time = "2025-12-15T23:54:28.027Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054, upload-time = "2025-10-08T18:03:48.35Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/7f/a1a97644e39e7316d850784c642093c99df1290a460df4ede27659056834/filelock-3.20.1-py3-none-any.whl", hash = "sha256:15d9e9a67306188a44baa72f569d2bfd803076269365fdea0934385da4dc361a", size = 16666, upload-time = "2025-12-15T23:54:26.874Z" },
+]
+
+[[package]]
+name = "flash-attn"
+version = "2.8.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "einops", marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/3b/b2/8d76c41ad7974ee264754709c22963447f7f8134613fd9ce80984ed0dab7/flash_attn-2.8.3.tar.gz", hash = "sha256:1e71dd64a9e0280e0447b8a0c2541bad4bf6ac65bdeaa2f90e51a9e57de0370d", size = 8447812, upload-time = "2025-08-15T08:28:12.911Z" }
 
 [[package]]
 name = "flashinfer-python"
 version = "0.5.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "apache-tvm-ffi" },
-    { name = "click" },
-    { name = "einops" },
-    { name = "ninja" },
-    { name = "numpy" },
-    { name = "nvidia-cudnn-frontend" },
-    { name = "nvidia-cutlass-dsl" },
-    { name = "nvidia-ml-py" },
-    { name = "packaging" },
-    { name = "requests" },
-    { name = "tabulate" },
-    { name = "torch" },
-    { name = "tqdm" },
+    { name = "apache-tvm-ffi", marker = "sys_platform == 'linux'" },
+    { name = "click", marker = "sys_platform == 'linux'" },
+    { name = "einops", marker = "sys_platform == 'linux'" },
+    { name = "ninja", marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-frontend", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cutlass-dsl", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-ml-py", marker = "sys_platform == 'linux'" },
+    { name = "packaging", marker = "sys_platform == 'linux'" },
+    { name = "requests", marker = "sys_platform == 'linux'" },
+    { name = "tabulate", marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
+    { name = "tqdm", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b4/91/cca69baeff24bb3efd12c7479a026432c8717ee47193694010494c528b22/flashinfer_python-0.5.3.tar.gz", hash = "sha256:100d59b0ede47878d2808cd3a1b9039d7a952d66338bc9f68dac192ae1b2e3f1", size = 4682367, upload-time = "2025-11-20T21:22:46.976Z" }
 wheels = [
@@ -674,15 +692,39 @@ name = "gguf"
 version = "0.17.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy" },
-    { name = "pyyaml" },
-    { name = "tqdm" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "pyyaml", marker = "sys_platform == 'linux'" },
+    { name = "tqdm", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/08/08/7de1ca4b71e7bf33b547f82bb22505e221b5fa42f67d635e200e0ad22ad6/gguf-0.17.1.tar.gz", hash = "sha256:36ad71aad900a3e75fc94ebe96ea6029f03a4e44be7627ef7ad3d03e8c7bcb53", size = 89338, upload-time = "2025-06-19T14:00:33.705Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/fc/31/6a93a887617ee7deeaa602ca3d02d1c12a6cb8a742a695de5d128f5fa46a/gguf-0.17.1-py3-none-any.whl", hash = "sha256:7bc5aa7eeb1931f7d39b48fdc5b38fda6b294b9dca75cf607ac69557840a3943", size = 96224, upload-time = "2025-06-19T14:00:32.88Z" },
 ]
 
+[[package]]
+name = "gitdb"
+version = "4.0.12"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "smmap" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" },
+]
+
+[[package]]
+name = "gitpython"
+version = "3.1.45"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "gitdb" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9a/c8/dd58967d119baab745caec2f9d853297cec1989ec1d63f677d3880632b88/gitpython-3.1.45.tar.gz", hash = "sha256:85b0ee964ceddf211c41b9f27a49086010a190fd8132a24e21f362a4b36a791c", size = 215076, upload-time = "2025-07-24T03:45:54.871Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/01/61/d4b89fec821f72385526e1b9d9a3a0385dda4a72b206d28049e2c7cd39b8/gitpython-3.1.45-py3-none-any.whl", hash = "sha256:8908cb2e02fb3b93b7eb0f2827125cb699869470432cc885f019b8fd0fccff77", size = 208168, upload-time = "2025-07-24T03:45:52.517Z" },
+]
+
 [[package]]
 name = "h11"
 version = "0.16.0"
@@ -726,13 +768,10 @@ version = "0.7.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/53/7f/403e5d787dc4942316e515e949b0c8a013d84078a915910e9f391ba9b3ed/httptools-0.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:38e0c83a2ea9746ebbd643bdfb521b9aa4a91703e2cd705c20443405d2fd16a5", size = 206280, upload-time = "2025-10-10T03:54:39.274Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/0d/7f3fd28e2ce311ccc998c388dd1c53b18120fda3b70ebb022b135dc9839b/httptools-0.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f25bbaf1235e27704f1a7b86cd3304eabc04f569c828101d94a0e605ef7205a5", size = 110004, upload-time = "2025-10-10T03:54:40.403Z" },
     { url = "https://files.pythonhosted.org/packages/84/a6/b3965e1e146ef5762870bbe76117876ceba51a201e18cc31f5703e454596/httptools-0.7.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c15f37ef679ab9ecc06bfc4e6e8628c32a8e4b305459de7cf6785acd57e4d03", size = 517655, upload-time = "2025-10-10T03:54:41.347Z" },
     { url = "https://files.pythonhosted.org/packages/11/7d/71fee6f1844e6fa378f2eddde6c3e41ce3a1fb4b2d81118dd544e3441ec0/httptools-0.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7fe6e96090df46b36ccfaf746f03034e5ab723162bc51b0a4cf58305324036f2", size = 511440, upload-time = "2025-10-10T03:54:42.452Z" },
     { url = "https://files.pythonhosted.org/packages/22/a5/079d216712a4f3ffa24af4a0381b108aa9c45b7a5cc6eb141f81726b1823/httptools-0.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f72fdbae2dbc6e68b8239defb48e6a5937b12218e6ffc2c7846cc37befa84362", size = 495186, upload-time = "2025-10-10T03:54:43.937Z" },
     { url = "https://files.pythonhosted.org/packages/e9/9e/025ad7b65278745dee3bd0ebf9314934c4592560878308a6121f7f812084/httptools-0.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e99c7b90a29fd82fea9ef57943d501a16f3404d7b9ee81799d41639bdaae412c", size = 499192, upload-time = "2025-10-10T03:54:45.003Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/de/40a8f202b987d43afc4d54689600ff03ce65680ede2f31df348d7f368b8f/httptools-0.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3e14f530fefa7499334a79b0cf7e7cd2992870eb893526fb097d51b4f2d0f321", size = 86694, upload-time = "2025-10-10T03:54:45.923Z" },
 ]
 
 [[package]]
@@ -793,17 +832,12 @@ version = "3.4.0.post0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/2d/30/7ab4b9e88e7946f6beef419f74edcc541df3ea562c7882257b4eaa82417d/ijson-3.4.0.post0.tar.gz", hash = "sha256:9aa02dc70bb245670a6ca7fba737b992aeeb4895360980622f7e568dbf23e41e", size = 67216, upload-time = "2025-10-10T05:29:25.62Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7d/fe/3b6af0025288e769dbfa30485dae1b3bd3f33f00390f3ee532cbb1c33e9b/ijson-3.4.0.post0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b607a500fca26101be47d2baf7cddb457b819ab60a75ce51ed1092a40da8b2f9", size = 87847, upload-time = "2025-10-10T05:28:07.229Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/a5/95ee2ca82f3b1a57892452f6e5087607d56c620beb8ce625475194568698/ijson-3.4.0.post0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4827d9874a6a81625412c59f7ca979a84d01f7f6bfb3c6d4dc4c46d0382b14e0", size = 59815, upload-time = "2025-10-10T05:28:08.448Z" },
-    { url = "https://files.pythonhosted.org/packages/51/8d/5a704ab3c17c55c21c86423458db8610626ca99cc9086a74dfeb7ee9054c/ijson-3.4.0.post0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d4d4afec780881edb2a0d2dd40b1cdbe246e630022d5192f266172a0307986a7", size = 59648, upload-time = "2025-10-10T05:28:09.307Z" },
     { url = "https://files.pythonhosted.org/packages/25/56/ca5d6ca145d007f30b44e747f3c163bc08710ce004af0deaad4a2301339b/ijson-3.4.0.post0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:432fb60ffb952926f9438e0539011e2dfcd108f8426ee826ccc6173308c3ff2c", size = 138279, upload-time = "2025-10-10T05:28:10.489Z" },
     { url = "https://files.pythonhosted.org/packages/c3/d3/22e3cc806fcdda7ad4c8482ed74db7a017d4a1d49b4300c7bc07052fb561/ijson-3.4.0.post0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54a0e3e05d9a0c95ecba73d9579f146cf6d5c5874116c849dba2d39a5f30380e", size = 149110, upload-time = "2025-10-10T05:28:12.263Z" },
     { url = "https://files.pythonhosted.org/packages/3e/04/efb30f413648b9267f5a33920ac124d7ebef3bc4063af8f6ffc8ca11ddcb/ijson-3.4.0.post0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05807edc0bcbd222dc6ea32a2b897f0c81dc7f12c8580148bc82f6d7f5e7ec7b", size = 149026, upload-time = "2025-10-10T05:28:13.557Z" },
     { url = "https://files.pythonhosted.org/packages/2d/cf/481165f7046ade32488719300a3994a437020bc41cfbb54334356348f513/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a5269af16f715855d9864937f9dd5c348ca1ac49cee6a2c7a1b7091c159e874f", size = 150012, upload-time = "2025-10-10T05:28:14.859Z" },
     { url = "https://files.pythonhosted.org/packages/0f/24/642e3289917ecf860386e26dfde775f9962d26ab7f6c2e364ed3ca3c25d8/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b200df83c901f5bfa416d069ac71077aa1608f854a4c50df1b84ced560e9c9ec", size = 142193, upload-time = "2025-10-10T05:28:16.131Z" },
     { url = "https://files.pythonhosted.org/packages/0f/f5/fd2f038abe95e553e1c3ee207cda19db9196eb416e63c7c89699a8cf0db7/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6458bd8e679cdff459a0a5e555b107c3bbacb1f382da3fe0f40e392871eb518d", size = 150904, upload-time = "2025-10-10T05:28:17.401Z" },
-    { url = "https://files.pythonhosted.org/packages/49/35/24259d22519987928164e6cb8fe3486e1df0899b2999ada4b0498639b463/ijson-3.4.0.post0-cp312-cp312-win32.whl", hash = "sha256:55f7f656b5986326c978cbb3a9eea9e33f3ef6ecc4535b38f1d452c731da39ab", size = 52358, upload-time = "2025-10-10T05:28:18.315Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/2b/6f7ade27a8ff5758fc41006dadd2de01730def84fe3e60553b329c59e0d4/ijson-3.4.0.post0-cp312-cp312-win_amd64.whl", hash = "sha256:e15833dcf6f6d188fdc624a31cd0520c3ba21b6855dc304bc7c1a8aeca02d4ac", size = 54789, upload-time = "2025-10-10T05:28:19.552Z" },
 ]
 
 [[package]]
@@ -887,10 +921,10 @@ name = "jsonschema"
 version = "4.25.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "attrs" },
-    { name = "jsonschema-specifications" },
-    { name = "referencing" },
-    { name = "rpds-py" },
+    { name = "attrs", marker = "sys_platform == 'linux'" },
+    { name = "jsonschema-specifications", marker = "sys_platform == 'linux'" },
+    { name = "referencing", marker = "sys_platform == 'linux'" },
+    { name = "rpds-py", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" }
 wheels = [
@@ -902,7 +936,7 @@ name = "jsonschema-specifications"
 version = "2025.9.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "referencing" },
+    { name = "referencing", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" }
 wheels = [
@@ -918,30 +952,14 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2d/00/d90b10b962b4277f5e64a78b6609968859ff86889f5b898c1a778c06ec00/lark-1.2.2-py3-none-any.whl", hash = "sha256:c2276486b02f0f1b90be155f2c8ba4a8e194d42775786db622faccd652d8e80c", size = 111036, upload-time = "2024-08-13T19:48:58.603Z" },
 ]
 
-[[package]]
-name = "latex2sympy2-extended"
-version = "1.10.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "antlr4-python3-runtime" },
-    { name = "sympy" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f4/de/472f9115c14c6f6d8a5889cabe3418283d708bde62ce00402c29441deed4/latex2sympy2_extended-1.10.2.tar.gz", hash = "sha256:41a517ffcc5a140e910a7d1646ce6ff440817e5f9d48fc8279d88bd0925bc389", size = 206188, upload-time = "2025-07-02T15:26:06.225Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ab/60/dfbbf40e3a371388c0e03ff65b01319b7d4023e883df6d7261125772ffdc/latex2sympy2_extended-1.10.2-py3-none-any.whl", hash = "sha256:f910442c5b02a466c1046f47d05cc5285181068b882399281f30102715337fb7", size = 207855, upload-time = "2025-07-02T15:26:04.88Z" },
-]
-
 [[package]]
 name = "llguidance"
 version = "1.3.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/95/48/3f7a9d3ff1b36bba92b5107a3a21286821227afe9ea464736133994d61fb/llguidance-1.3.0.tar.gz", hash = "sha256:861249afd51dc325646834462ea827e57a5c2b2042e108e6aae7059fdad9104d", size = 1070460, upload-time = "2025-10-20T19:58:44.164Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/33/be5acb85cd8cdc4afde33d9c234eece9f318e087920255af3c05864cd3e7/llguidance-1.3.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f7685222660a762e481ac633d49cc559c64980fe2ee59c8f932a5bb5cbc0c2c2", size = 3220647, upload-time = "2025-10-20T19:58:42.542Z" },
-    { url = "https://files.pythonhosted.org/packages/82/e6/b48bda5b15efeaeb62bd0dba8fc6a01d4ae5457a85dbb5d18632385fe15c/llguidance-1.3.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:098030ff0687261a3f1bd54cf21fe951fc861d56d37a0671250dd36677eaf224", size = 3099830, upload-time = "2025-10-20T19:58:40.826Z" },
     { url = "https://files.pythonhosted.org/packages/aa/11/44389d3d1526d7a5c38ffd587a5ebc61d7bee443ac1dea95f2089ad58f5f/llguidance-1.3.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f6caca5d78db7f76e1fbb0fff8607b861c32d47fa3d5dee2fc49de27ee269df", size = 2835242, upload-time = "2025-10-20T19:58:34.518Z" },
     { url = "https://files.pythonhosted.org/packages/83/a8/1ff2bedb8f9acb46a2d2d603415d272bb622c142ea86f5b95445cc6e366c/llguidance-1.3.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc17e9dd602c3879bf91664a64bf72f54c74dbfbeb24ccfab6a5fe435b12f7aa", size = 3033133, upload-time = "2025-10-20T19:58:38.721Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/7e/809349638231f469b9056c0e1bfd924d5ef5558b3b3ec72d093b6fad33b1/llguidance-1.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:1d1cd1c8618d1a13605d3e057c978651e551c8c469b481ee4041f1d6c436002d", size = 2789946, upload-time = "2025-10-20T19:58:45.958Z" },
 ]
 
 [[package]]
@@ -950,11 +968,8 @@ version = "0.44.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880, upload-time = "2025-01-20T11:14:41.342Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/15/86/e3c3195b92e6e492458f16d233e58a1a812aa2bfbef9bdd0fbafcec85c60/llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad", size = 28132297, upload-time = "2025-01-20T11:13:32.57Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/53/373b6b8be67b9221d12b24125fd0ec56b1078b660eeae266ec388a6ac9a0/llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db", size = 26201105, upload-time = "2025-01-20T11:13:38.744Z" },
     { url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901, upload-time = "2025-01-20T11:13:46.711Z" },
     { url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247, upload-time = "2025-01-20T11:13:56.159Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380, upload-time = "2025-01-20T11:14:02.442Z" },
 ]
 
 [[package]]
@@ -962,10 +977,10 @@ name = "lm-format-enforcer"
 version = "0.11.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "interegular" },
-    { name = "packaging" },
-    { name = "pydantic" },
-    { name = "pyyaml" },
+    { name = "interegular", marker = "sys_platform == 'linux'" },
+    { name = "packaging", marker = "sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "pyyaml", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/84/d5/41cd417ba7dfdbbcfe46cebf81fb3dfd7c591b89897560ad05bb410a465d/lm_format_enforcer-0.11.3.tar.gz", hash = "sha256:e68081c108719cce284a9bcc889709b26ffb085a1945b5eba3a12cfa96d528da", size = 40258, upload-time = "2025-08-24T19:37:47.527Z" }
 wheels = [
@@ -976,10 +991,6 @@ wheels = [
 name = "loguru"
 version = "0.7.3"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
-    { name = "win32-setctime", marker = "sys_platform == 'win32'" },
-]
 sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" },
@@ -992,21 +1003,24 @@ source = { editable = "." }
 dependencies = [
     { name = "aiohttp" },
     { name = "beartype" },
+    { name = "datasets" },
+    { name = "flash-attn", marker = "sys_platform == 'linux'" },
     { name = "jaxtyping" },
     { name = "openai" },
     { name = "peft" },
     { name = "rich" },
-    { name = "torch" },
-    { name = "vllm" },
+    { name = "setuptools" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
+    { name = "torch-c-dlpack-ext" },
+    { name = "vllm", marker = "sys_platform == 'linux'" },
+    { name = "wandb" },
 ]
 
 [package.optional-dependencies]
-examples = [
-    { name = "datasets" },
-    { name = "math-verify" },
-]
-pipelinerl = [
-    { name = "redis" },
+code-exec = [
+    { name = "docker" },
 ]
 
 [package.dev-dependencies]
@@ -1031,17 +1045,21 @@ typing = [
 requires-dist = [
     { name = "aiohttp", specifier = ">=3.13.2" },
     { name = "beartype", specifier = ">=0.22.9" },
-    { name = "datasets", marker = "extra == 'examples'", specifier = "==4.4.1" },
+    { name = "datasets", specifier = ">=4.4.2" },
+    { name = "docker", marker = "extra == 'code-exec'", specifier = ">=7.1.0" },
+    { name = "flash-attn", marker = "sys_platform == 'linux'", specifier = ">=2.7.0" },
     { name = "jaxtyping", specifier = ">=0.3.4" },
-    { name = "math-verify", marker = "extra == 'examples'", specifier = "==0.8.0" },
     { name = "openai", specifier = ">=2.7.1" },
     { name = "peft", specifier = ">=0.18.0" },
-    { name = "redis", marker = "extra == 'pipelinerl'", specifier = ">=7.1.0" },
     { name = "rich", specifier = ">=14.2.0" },
-    { name = "torch", specifier = ">=2.8.0" },
-    { name = "vllm", specifier = ">=0.13.0" },
+    { name = "setuptools", specifier = ">=79.0.1" },
+    { name = "torch", marker = "sys_platform != 'linux'", specifier = ">=2.9.0", index = "https://download.pytorch.org/whl/cpu" },
+    { name = "torch", marker = "sys_platform == 'linux'", specifier = ">=2.9.0", index = "https://download.pytorch.org/whl/cu128" },
+    { name = "torch-c-dlpack-ext", specifier = ">=0.1.4" },
+    { name = "vllm", marker = "sys_platform == 'linux'", specifier = ">=0.12.0" },
+    { name = "wandb", specifier = ">=0.23.1" },
 ]
-provides-extras = ["pipelinerl", "examples"]
+provides-extras = ["code-exec"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -1088,37 +1106,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" },
 ]
 
-[[package]]
-name = "math-verify"
-version = "0.8.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "latex2sympy2-extended" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/35/b5/b1db6fa6b6c28ebbe1889ee11a4703a72a2ca7750ec415f4559c758cf01a/math_verify-0.8.0.tar.gz", hash = "sha256:3295e0adb94bfe553ff6e3189c44f1916a85aa24ab5d1900f2086a706e28f7c4", size = 60191, upload-time = "2025-07-02T15:52:07.209Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fe/9f/59979f699b5c97334298f1295bc9fcdc9904d98d2276479bffff863d23b1/math_verify-0.8.0-py3-none-any.whl", hash = "sha256:31ca651296d817a9bb3fd58ca1fd0d192dcea709b1e5ecf2d0a4514c16f89087", size = 29994, upload-time = "2025-07-02T15:52:05.023Z" },
-]
-
 [[package]]
 name = "mcp"
 version = "1.25.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio" },
-    { name = "httpx" },
-    { name = "httpx-sse" },
-    { name = "jsonschema" },
-    { name = "pydantic" },
-    { name = "pydantic-settings" },
-    { name = "pyjwt", extra = ["crypto"] },
-    { name = "python-multipart" },
-    { name = "pywin32", marker = "sys_platform == 'win32'" },
-    { name = "sse-starlette" },
-    { name = "starlette" },
-    { name = "typing-extensions" },
-    { name = "typing-inspection" },
-    { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
+    { name = "anyio", marker = "sys_platform == 'linux'" },
+    { name = "httpx", marker = "sys_platform == 'linux'" },
+    { name = "httpx-sse", marker = "sys_platform == 'linux'" },
+    { name = "jsonschema", marker = "sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "pydantic-settings", marker = "sys_platform == 'linux'" },
+    { name = "pyjwt", extra = ["crypto"], marker = "sys_platform == 'linux'" },
+    { name = "python-multipart", marker = "sys_platform == 'linux'" },
+    { name = "sse-starlette", marker = "sys_platform == 'linux'" },
+    { name = "starlette", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
+    { name = "typing-inspection", marker = "sys_platform == 'linux'" },
+    { name = "uvicorn", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/d5/2d/649d80a0ecf6a1f82632ca44bec21c0461a9d9fc8934d38cb5b319f2db5e/mcp-1.25.0.tar.gz", hash = "sha256:56310361ebf0364e2d438e5b45f7668cbb124e158bb358333cd06e49e83a6802", size = 605387, upload-time = "2025-12-19T10:19:56.985Z" }
 wheels = [
@@ -1136,66 +1141,26 @@ wheels = [
 
 [[package]]
 name = "mistral-common"
-version = "1.8.5"
+version = "1.8.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "jsonschema" },
-    { name = "numpy" },
-    { name = "pillow" },
-    { name = "pydantic" },
-    { name = "pydantic-extra-types", extra = ["pycountry"] },
-    { name = "requests" },
-    { name = "tiktoken" },
-    { name = "typing-extensions" },
+    { name = "jsonschema", marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "pillow", marker = "sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "pydantic-extra-types", extra = ["pycountry"], marker = "sys_platform == 'linux'" },
+    { name = "requests", marker = "sys_platform == 'linux'" },
+    { name = "tiktoken", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ce/ff/1992a00ccc936f2c6e69ecb1f2cac678e0fd46c53c71bdab99eda4f89dfd/mistral_common-1.8.5.tar.gz", hash = "sha256:9f6204ede9c807f09040a208a9381ae78ef93e2e5a9cd5202dc12e712a025de8", size = 6331923, upload-time = "2025-09-12T06:43:01.937Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/bb/6fc2e46d9920c80f0d053d58be5b0546c18010ff3a5f9b9d91299226e989/mistral_common-1.8.8.tar.gz", hash = "sha256:8ae28b3f88bce1b9396f5d1107e5ea87e4130486b9f6d811df6d5ac07bff2186", size = 6337014, upload-time = "2025-12-22T10:51:47.245Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/4a/54e19c5e75939fd9418c7b806c21d12cf252ea2ba38f122b597272b459dd/mistral_common-1.8.5-py3-none-any.whl", hash = "sha256:f3cf87b61958a00485e603f3fe0530eb509d7e9b2f7178329dcd260e307eced1", size = 6515140, upload-time = "2025-09-12T06:42:59.622Z" },
+    { url = "https://files.pythonhosted.org/packages/73/02/c1866598c8e94a4d0593b73e6dec0afea722227b9b3223bf6bb8ab269fa7/mistral_common-1.8.8-py3-none-any.whl", hash = "sha256:f63ce79b1867b3fc7c8b66fcaedab3b07966185567558038dc02321c17e4f39f", size = 6518005, upload-time = "2025-12-22T10:51:44.88Z" },
 ]
 
 [package.optional-dependencies]
 image = [
-    { name = "opencv-python-headless" },
-]
-
-[[package]]
-name = "mlx"
-version = "0.29.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/07/f5/14e12e219a2715296150d35f930dc3a6ff319cd60126408e563f03100113/mlx-0.29.3-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:86c62791ce930028d75c41b88b4e3ceb58f5f2e263ff9bfacda998b0c03d9544", size = 549516, upload-time = "2025-10-17T19:18:13.831Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/e2/5177c80e8c33a8be89fa45fa0a839d5b6a5578687d0ec973bf03638a4e73/mlx-0.29.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cddf6bcdc561094af6b3f0706f8768ecc5216a97eb6973e838c3ac2e2fca2cc8", size = 549509, upload-time = "2025-10-17T19:17:21.517Z" },
-    { url = "https://files.pythonhosted.org/packages/11/89/aa424217a7a0291b84f8969d504ac63f5af0ef60f248fe5562c3d6e44048/mlx-0.29.3-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:b2e1a249437d017a7425358420d28e641b7bc9c2650f3e013c1b1f4f239d8533", size = 549511, upload-time = "2025-10-17T19:16:54.227Z" },
-]
-
-[[package]]
-name = "mlx-lm"
-version = "0.28.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "jinja2" },
-    { name = "mlx", marker = "sys_platform == 'darwin'" },
-    { name = "numpy" },
-    { name = "protobuf" },
-    { name = "pyyaml" },
-    { name = "transformers" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/51/f6/15e002d52c28d8c544ec3aaf9053677468333e6ef0e76ea68579fd77b76d/mlx_lm-0.28.3.tar.gz", hash = "sha256:75df2b925d343ebaf50b63008dede4fe98cd3b02b1b24b7da71ebeb198d674f0", size = 214455, upload-time = "2025-10-17T21:44:33.921Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c2/a6/db3b44a5ac1a1174605628b0a477fbe4632d4fad1f94cf08647e27cc79ad/mlx_lm-0.28.3-py3-none-any.whl", hash = "sha256:ec103e2c9a06bd2cbafd41aafc975e40262176f7360d4f53ec342cebb9e0e6ea", size = 294506, upload-time = "2025-10-17T21:44:32.447Z" },
-]
-
-[[package]]
-name = "mlx-metal"
-version = "0.29.3"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/41/95/a00054a006df82bb1b5b8f666ae44a676b259146fadbff90fe654309fefc/mlx_metal-0.29.3-py3-none-macosx_13_0_arm64.whl", hash = "sha256:27b5a4d905202a71e84d9fd559ea0236813f6f960ef494e5cafe9c45df4c9d7c", size = 36817352, upload-time = "2025-10-17T19:19:25.801Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/d8/5ee91eac16dfcf0334103120b47d4abd8c890ccc0d73d3eee4770ce8810f/mlx_metal-0.29.3-py3-none-macosx_14_0_arm64.whl", hash = "sha256:f426d4b67f96b4d6f0ed50d5992933595aadb370dc3e9ed2410bafbc16229882", size = 36555573, upload-time = "2025-10-17T19:18:42.098Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/9a/39b7ecdf21cf2a39ced8d7933eed65c6cb38295cadfd0907dd1abd4d1ded/mlx_metal-0.29.3-py3-none-macosx_15_0_arm64.whl", hash = "sha256:106616f7f825851043c53d3dc186965c003985da9cbb6e5c034f35108fc1fc27", size = 36549163, upload-time = "2025-10-17T19:18:37.701Z" },
+    { name = "opencv-python-headless", marker = "sys_platform == 'linux'" },
 ]
 
 [[package]]
@@ -1203,13 +1168,13 @@ name = "model-hosting-container-standards"
 version = "0.1.12"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "fastapi" },
-    { name = "httpx" },
-    { name = "jmespath" },
-    { name = "pydantic" },
-    { name = "setuptools" },
-    { name = "starlette" },
-    { name = "supervisor" },
+    { name = "fastapi", marker = "sys_platform == 'linux'" },
+    { name = "httpx", marker = "sys_platform == 'linux'" },
+    { name = "jmespath", marker = "sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "setuptools", marker = "sys_platform == 'linux'" },
+    { name = "starlette", marker = "sys_platform == 'linux'" },
+    { name = "supervisor", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/23/cc/014bdcc700f1d4393578b55df09c1ed76b57feb9a542208d8c25e7c0bb1b/model_hosting_container_standards-0.1.12.tar.gz", hash = "sha256:5a38814201d319eaf258d816697caa16d39b5222319c2d5116d779b30babe602", size = 79119, upload-time = "2025-12-15T23:02:58.848Z" }
 wheels = [
@@ -1231,30 +1196,22 @@ version = "1.1.2"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/4d/f2/bfb55a6236ed8725a96b0aa3acbd0ec17588e6a2c3b62a93eb513ed8783f/msgpack-1.1.2.tar.gz", hash = "sha256:3b60763c1373dd60f398488069bcdc703cd08a711477b5d480eecc9f9626f47e", size = 173581, upload-time = "2025-10-08T09:15:56.596Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ad/bd/8b0d01c756203fbab65d265859749860682ccd2a59594609aeec3a144efa/msgpack-1.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:70a0dff9d1f8da25179ffcf880e10cf1aad55fdb63cd59c9a49a1b82290062aa", size = 81939, upload-time = "2025-10-08T09:15:01.472Z" },
-    { url = "https://files.pythonhosted.org/packages/34/68/ba4f155f793a74c1483d4bdef136e1023f7bcba557f0db4ef3db3c665cf1/msgpack-1.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:446abdd8b94b55c800ac34b102dffd2f6aa0ce643c55dfc017ad89347db3dbdb", size = 85064, upload-time = "2025-10-08T09:15:03.764Z" },
     { url = "https://files.pythonhosted.org/packages/f2/60/a064b0345fc36c4c3d2c743c82d9100c40388d77f0b48b2f04d6041dbec1/msgpack-1.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c63eea553c69ab05b6747901b97d620bb2a690633c77f23feb0c6a947a8a7b8f", size = 417131, upload-time = "2025-10-08T09:15:05.136Z" },
     { url = "https://files.pythonhosted.org/packages/65/92/a5100f7185a800a5d29f8d14041f61475b9de465ffcc0f3b9fba606e4505/msgpack-1.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:372839311ccf6bdaf39b00b61288e0557916c3729529b301c52c2d88842add42", size = 427556, upload-time = "2025-10-08T09:15:06.837Z" },
     { url = "https://files.pythonhosted.org/packages/f5/87/ffe21d1bf7d9991354ad93949286f643b2bb6ddbeab66373922b44c3b8cc/msgpack-1.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2929af52106ca73fcb28576218476ffbb531a036c2adbcf54a3664de124303e9", size = 404920, upload-time = "2025-10-08T09:15:08.179Z" },
     { url = "https://files.pythonhosted.org/packages/ff/41/8543ed2b8604f7c0d89ce066f42007faac1eaa7d79a81555f206a5cdb889/msgpack-1.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be52a8fc79e45b0364210eef5234a7cf8d330836d0a64dfbb878efa903d84620", size = 415013, upload-time = "2025-10-08T09:15:09.83Z" },
-    { url = "https://files.pythonhosted.org/packages/41/0d/2ddfaa8b7e1cee6c490d46cb0a39742b19e2481600a7a0e96537e9c22f43/msgpack-1.1.2-cp312-cp312-win32.whl", hash = "sha256:1fff3d825d7859ac888b0fbda39a42d59193543920eda9d9bea44d958a878029", size = 65096, upload-time = "2025-10-08T09:15:11.11Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/ec/d431eb7941fb55a31dd6ca3404d41fbb52d99172df2e7707754488390910/msgpack-1.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:1de460f0403172cff81169a30b9a92b260cb809c4cb7e2fc79ae8d0510c78b6b", size = 72708, upload-time = "2025-10-08T09:15:12.554Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/31/5b1a1f70eb0e87d1678e9624908f86317787b536060641d6798e3cf70ace/msgpack-1.1.2-cp312-cp312-win_arm64.whl", hash = "sha256:be5980f3ee0e6bd44f3a9e9dea01054f175b50c3e6cdb692bc9424c0bbb8bf69", size = 64119, upload-time = "2025-10-08T09:15:13.589Z" },
 ]
 
 [[package]]
 name = "msgspec"
-version = "0.19.0"
+version = "0.20.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/cf/9b/95d8ce458462b8b71b8a70fa94563b2498b89933689f3a7b8911edfae3d7/msgspec-0.19.0.tar.gz", hash = "sha256:604037e7cd475345848116e89c553aa9a233259733ab51986ac924ab1b976f8e", size = 216934, upload-time = "2024-12-27T17:40:28.597Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ea/9c/bfbd12955a49180cbd234c5d29ec6f74fe641698f0cd9df154a854fc8a15/msgspec-0.20.0.tar.gz", hash = "sha256:692349e588fde322875f8d3025ac01689fead5901e7fb18d6870a44519d62a29", size = 317862, upload-time = "2025-11-24T03:56:28.934Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b2/5f/a70c24f075e3e7af2fae5414c7048b0e11389685b7f717bb55ba282a34a7/msgspec-0.19.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f98bd8962ad549c27d63845b50af3f53ec468b6318400c9f1adfe8b092d7b62f", size = 190485, upload-time = "2024-12-27T17:39:44.974Z" },
-    { url = "https://files.pythonhosted.org/packages/89/b0/1b9763938cfae12acf14b682fcf05c92855974d921a5a985ecc197d1c672/msgspec-0.19.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:43bbb237feab761b815ed9df43b266114203f53596f9b6e6f00ebd79d178cdf2", size = 183910, upload-time = "2024-12-27T17:39:46.401Z" },
-    { url = "https://files.pythonhosted.org/packages/87/81/0c8c93f0b92c97e326b279795f9c5b956c5a97af28ca0fbb9fd86c83737a/msgspec-0.19.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cfc033c02c3e0aec52b71710d7f84cb3ca5eb407ab2ad23d75631153fdb1f12", size = 210633, upload-time = "2024-12-27T17:39:49.099Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/ef/c5422ce8af73928d194a6606f8ae36e93a52fd5e8df5abd366903a5ca8da/msgspec-0.19.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d911c442571605e17658ca2b416fd8579c5050ac9adc5e00c2cb3126c97f73bc", size = 213594, upload-time = "2024-12-27T17:39:51.204Z" },
-    { url = "https://files.pythonhosted.org/packages/19/2b/4137bc2ed45660444842d042be2cf5b18aa06efd2cda107cff18253b9653/msgspec-0.19.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:757b501fa57e24896cf40a831442b19a864f56d253679f34f260dcb002524a6c", size = 214053, upload-time = "2024-12-27T17:39:52.866Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/e6/8ad51bdc806aac1dc501e8fe43f759f9ed7284043d722b53323ea421c360/msgspec-0.19.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5f0f65f29b45e2816d8bded36e6b837a4bf5fb60ec4bc3c625fa2c6da4124537", size = 219081, upload-time = "2024-12-27T17:39:55.142Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0", size = 187467, upload-time = "2024-12-27T17:39:56.531Z" },
+    { url = "https://files.pythonhosted.org/packages/49/d6/9709ee093b7742362c2934bfb1bbe791a1e09bed3ea5d8a18ce552fbfd73/msgspec-0.20.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:558ed73315efa51b1538fa8f1d3b22c8c5ff6d9a2a62eff87d25829b94fc5054", size = 218852, upload-time = "2025-11-24T03:55:35.575Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/a2/488517a43ccf5a4b6b6eca6dd4ede0bd82b043d1539dd6bb908a19f8efd3/msgspec-0.20.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:509ac1362a1d53aa66798c9b9fd76872d7faa30fcf89b2fba3bcbfd559d56eb0", size = 224937, upload-time = "2025-11-24T03:55:36.859Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/e8/49b832808aa23b85d4f090d1d2e48a4e3834871415031ed7c5fe48723156/msgspec-0.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1353c2c93423602e7dea1aa4c92f3391fdfc25ff40e0bacf81d34dbc68adb870", size = 222858, upload-time = "2025-11-24T03:55:38.187Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/56/1dc2fa53685dca9c3f243a6cbecd34e856858354e455b77f47ebd76cf5bf/msgspec-0.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb33b5eb5adb3c33d749684471c6a165468395d7aa02d8867c15103b81e1da3e", size = 227248, upload-time = "2025-11-24T03:55:39.496Z" },
 ]
 
 [[package]]
@@ -1302,11 +1259,11 @@ wheels = [
 
 [[package]]
 name = "networkx"
-version = "3.5"
+version = "3.6.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload-time = "2025-05-29T11:35:04.961Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" },
 ]
 
 [[package]]
@@ -1315,7 +1272,6 @@ version = "1.13.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/43/73/79a0b22fc731989c708068427579e840a6cf4e937fe7ae5c5d0b7356ac22/ninja-1.13.0.tar.gz", hash = "sha256:4a40ce995ded54d9dc24f8ea37ff3bf62ad192b547f6c7126e7e25045e76f978", size = 242558, upload-time = "2025-08-11T15:10:19.421Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3c/74/d02409ed2aa865e051b7edda22ad416a39d81a84980f544f8de717cab133/ninja-1.13.0-py3-none-macosx_10_9_universal2.whl", hash = "sha256:fa2a8bfc62e31b08f83127d1613d10821775a0eb334197154c4d6067b7068ff1", size = 310125, upload-time = "2025-08-11T15:09:50.971Z" },
     { url = "https://files.pythonhosted.org/packages/8e/de/6e1cd6b84b412ac1ef327b76f0641aeb5dcc01e9d3f9eee0286d0c34fd93/ninja-1.13.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3d00c692fb717fd511abeb44b8c5d00340c36938c12d6538ba989fe764e79630", size = 177467, upload-time = "2025-08-11T15:09:52.767Z" },
     { url = "https://files.pythonhosted.org/packages/c8/83/49320fb6e58ae3c079381e333575fdbcf1cca3506ee160a2dcce775046fa/ninja-1.13.0-py3-none-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:be7f478ff9f96a128b599a964fc60a6a87b9fa332ee1bd44fa243ac88d50291c", size = 187834, upload-time = "2025-08-11T15:09:54.115Z" },
     { url = "https://files.pythonhosted.org/packages/56/c7/ba22748fb59f7f896b609cd3e568d28a0a367a6d953c24c461fe04fc4433/ninja-1.13.0-py3-none-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:60056592cf495e9a6a4bea3cd178903056ecb0943e4de45a2ea825edb6dc8d3e", size = 202736, upload-time = "2025-08-11T15:09:55.745Z" },
@@ -1330,9 +1286,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3c/fb/95752eb635bb8ad27d101d71bef15bc63049de23f299e312878fc21cb2da/ninja-1.13.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:d741a5e6754e0bda767e3274a0f0deeef4807f1fec6c0d7921a0244018926ae5", size = 585106, upload-time = "2025-08-11T15:10:09.818Z" },
     { url = "https://files.pythonhosted.org/packages/c1/31/aa56a1a286703800c0cbe39fb4e82811c277772dc8cd084f442dd8e2938a/ninja-1.13.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:e8bad11f8a00b64137e9b315b137d8bb6cbf3086fbdc43bf1f90fd33324d2e96", size = 707138, upload-time = "2025-08-11T15:10:11.366Z" },
     { url = "https://files.pythonhosted.org/packages/34/6f/5f5a54a1041af945130abdb2b8529cbef0cdcbbf9bcf3f4195378319d29a/ninja-1.13.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b4f2a072db3c0f944c32793e91532d8948d20d9ab83da9c0c7c15b5768072200", size = 581758, upload-time = "2025-08-11T15:10:13.295Z" },
-    { url = "https://files.pythonhosted.org/packages/95/97/51359c77527d45943fe7a94d00a3843b81162e6c4244b3579fe8fc54cb9c/ninja-1.13.0-py3-none-win32.whl", hash = "sha256:8cfbb80b4a53456ae8a39f90ae3d7a2129f45ea164f43fadfa15dc38c4aef1c9", size = 267201, upload-time = "2025-08-11T15:10:15.158Z" },
-    { url = "https://files.pythonhosted.org/packages/29/45/c0adfbfb0b5895aa18cec400c535b4f7ff3e52536e0403602fc1a23f7de9/ninja-1.13.0-py3-none-win_amd64.whl", hash = "sha256:fb8ee8719f8af47fed145cced4a85f0755dd55d45b2bddaf7431fa89803c5f3e", size = 309975, upload-time = "2025-08-11T15:10:16.697Z" },
-    { url = "https://files.pythonhosted.org/packages/df/93/a7b983643d1253bb223234b5b226e69de6cda02b76cdca7770f684b795f5/ninja-1.13.0-py3-none-win_arm64.whl", hash = "sha256:3c0b40b1f0bba764644385319028650087b4c1b18cdfa6f45cb39a3669b81aa9", size = 290806, upload-time = "2025-08-11T15:10:18.018Z" },
 ]
 
 [[package]]
@@ -1340,34 +1293,47 @@ name = "numba"
 version = "0.61.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "llvmlite" },
-    { name = "numpy" },
+    { name = "llvmlite", marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615, upload-time = "2025-04-09T02:58:07.659Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b4/a0/c6b7b9c615cfa3b98c4c63f4316e3f6b3bbe2387740277006551784218cd/numba-0.61.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2", size = 2776626, upload-time = "2025-04-09T02:57:51.857Z" },
-    { url = "https://files.pythonhosted.org/packages/92/4a/fe4e3c2ecad72d88f5f8cd04e7f7cff49e718398a2fac02d2947480a00ca/numba-0.61.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8", size = 2779287, upload-time = "2025-04-09T02:57:53.658Z" },
     { url = "https://files.pythonhosted.org/packages/9a/2d/e518df036feab381c23a624dac47f8445ac55686ec7f11083655eb707da3/numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546", size = 3885928, upload-time = "2025-04-09T02:57:55.206Z" },
     { url = "https://files.pythonhosted.org/packages/10/0f/23cced68ead67b75d77cfcca3df4991d1855c897ee0ff3fe25a56ed82108/numba-0.61.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd", size = 3577115, upload-time = "2025-04-09T02:57:56.818Z" },
-    { url = "https://files.pythonhosted.org/packages/68/1d/ddb3e704c5a8fb90142bf9dc195c27db02a08a99f037395503bfbc1d14b3/numba-0.61.2-cp312-cp312-win_amd64.whl", hash = "sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18", size = 2831929, upload-time = "2025-04-09T02:57:58.45Z" },
 ]
 
 [[package]]
 name = "numpy"
 version = "2.2.6"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "sys_platform == 'linux'",
+]
 sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" },
-    { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" },
     { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" },
     { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" },
     { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" },
     { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" },
-    { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" },
-    { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" },
+]
+
+[[package]]
+name = "numpy"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "sys_platform != 'darwin' and sys_platform != 'linux'",
+    "sys_platform == 'darwin'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a4/7a/6a3d14e205d292b738db449d0de649b373a59edb0d0b4493821d0a3e8718/numpy-2.4.0.tar.gz", hash = "sha256:6e504f7b16118198f138ef31ba24d985b124c2c469fe8467007cf30fd992f934", size = 20685720, upload-time = "2025-12-20T16:18:19.023Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8b/ff/f6400ffec95de41c74b8e73df32e3fff1830633193a7b1e409be7fb1bb8c/numpy-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2a8b6bb8369abefb8bd1801b054ad50e02b3275c8614dc6e5b0373c305291037", size = 16653117, upload-time = "2025-12-20T16:16:06.709Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/28/6c23e97450035072e8d830a3c411bf1abd1f42c611ff9d29e3d8f55c6252/numpy-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e284ca13d5a8367e43734148622caf0b261b275673823593e3e3634a6490f83", size = 12369711, upload-time = "2025-12-20T16:16:08.758Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/af/acbef97b630ab1bb45e6a7d01d1452e4251aa88ce680ac36e56c272120ec/numpy-2.4.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:49ff32b09f5aa0cd30a20c2b39db3e669c845589f2b7fc910365210887e39344", size = 5198355, upload-time = "2025-12-20T16:16:10.902Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/c8/4e0d436b66b826f2e53330adaa6311f5cac9871a5b5c31ad773b27f25a74/numpy-2.4.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:36cbfb13c152b1c7c184ddac43765db8ad672567e7bafff2cc755a09917ed2e6", size = 6545298, upload-time = "2025-12-20T16:16:12.607Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/6f/f868765d44e6fc466467ed810ba9d8d6db1add7d4a748abfa2a4c99a3194/numpy-2.4.0-cp312-cp312-win32.whl", hash = "sha256:92190db305a6f48734d3982f2c60fa30d6b5ee9bff10f2887b930d7b40119f4c", size = 5955432, upload-time = "2025-12-20T16:16:25.06Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/b5/94c1e79fcbab38d1ca15e13777477b2914dd2d559b410f96949d6637b085/numpy-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:680060061adb2d74ce352628cb798cfdec399068aa7f07ba9fb818b2b3305f98", size = 12306201, upload-time = "2025-12-20T16:16:26.979Z" },
+    { url = "https://files.pythonhosted.org/packages/70/09/c39dadf0b13bb0768cd29d6a3aaff1fb7c6905ac40e9aaeca26b1c086e06/numpy-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:39699233bc72dd482da1415dcb06076e32f60eddc796a796c5fb6c5efce94667", size = 10308234, upload-time = "2025-12-20T16:16:29.417Z" },
 ]
 
 [[package]]
@@ -1375,6 +1341,7 @@ name = "nvidia-cublas-cu12"
 version = "12.8.4.1"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
     { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
 ]
 
@@ -1383,6 +1350,7 @@ name = "nvidia-cuda-cupti-cu12"
 version = "12.8.90"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
     { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
 ]
 
@@ -1392,6 +1360,7 @@ version = "12.8.93"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
 ]
 
 [[package]]
@@ -1399,6 +1368,7 @@ name = "nvidia-cuda-runtime-cu12"
 version = "12.8.90"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
     { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
 ]
 
@@ -1407,20 +1377,20 @@ name = "nvidia-cudnn-cu12"
 version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
     { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
 ]
 
 [[package]]
 name = "nvidia-cudnn-frontend"
-version = "1.16.0"
+version = "1.17.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/00/39/79b606e805abd67ab4fa72f752a5413a496159f10d94fbdb1d67bb5ae86c/nvidia_cudnn_frontend-1.16.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd6fdd71c0896ff2ca1809d914cbd17f2904d55863f8881f47946e1d634c7a88", size = 1839271, upload-time = "2025-11-07T01:29:53.06Z" },
-    { url = "https://files.pythonhosted.org/packages/09/21/a0e0d50ba8d7b639fe635500fee0d9c0319561b1ae72176d7024ec04b439/nvidia_cudnn_frontend-1.16.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:16efb069d4bda4d3b99134f59f376cfd4d09558298bd96af778fdc7f2851e696", size = 1954062, upload-time = "2025-11-07T01:32:18.556Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/d6/30ae67bb9c010e9459d1211c56d73373eb4e3dd9f57f4c3c1fe0966efcb1/nvidia_cudnn_frontend-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:7b7860db03767c158accbe0b4e9c9553506513cc970ff08ed28c7761681ac466", size = 1368435, upload-time = "2025-11-07T01:26:28.022Z" },
+    { url = "https://files.pythonhosted.org/packages/42/d9/f58ed6292c9396f7422812a0a2d9f80cc5a623ea6c758bcb3d34d4795bb8/nvidia_cudnn_frontend-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de0c473f32d705abcf14f351615f7ffbeed7320e3499cf2195ae5689652a2592", size = 1917620, upload-time = "2025-12-20T00:27:46.179Z" },
+    { url = "https://files.pythonhosted.org/packages/db/eb/c641135632bd2afc21339aadee96af4c5db1460dfa07ca74836de75a590f/nvidia_cudnn_frontend-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c913c87fca691a91385287f2587575531933acfebc85c33dbcecb191886c7a53", size = 2038994, upload-time = "2025-12-20T00:25:18.9Z" },
 ]
 
 [[package]]
@@ -1428,9 +1398,10 @@ name = "nvidia-cufft-cu12"
 version = "11.3.3.83"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
     { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
 ]
 
@@ -1440,6 +1411,7 @@ version = "1.13.1.3"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
 ]
 
 [[package]]
@@ -1447,6 +1419,7 @@ name = "nvidia-curand-cu12"
 version = "10.3.9.90"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
     { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
 ]
 
@@ -1455,11 +1428,12 @@ name = "nvidia-cusolver-cu12"
 version = "11.7.3.90"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12" },
-    { name = "nvidia-cusparse-cu12" },
-    { name = "nvidia-nvjitlink-cu12" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
     { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
 ]
 
@@ -1468,9 +1442,10 @@ name = "nvidia-cusparse-cu12"
 version = "12.5.8.93"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
     { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
 ]
 
@@ -1479,21 +1454,22 @@ name = "nvidia-cusparselt-cu12"
 version = "0.7.1"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
     { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
 ]
 
 [[package]]
 name = "nvidia-cutlass-dsl"
-version = "4.3.3"
+version = "4.3.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cuda-python" },
-    { name = "numpy" },
-    { name = "typing-extensions" },
+    { name = "cuda-python", marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c6/d4/7c5ef53ccf75d7f99a9ea29cae9f9c0233229b75b3b22f85a4ef4f52e6ab/nvidia_cutlass_dsl-4.3.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3278526f54bddd920d8e539771e5820c6166c549a1e67813375025f39417dec6", size = 58734009, upload-time = "2025-12-10T09:23:29.305Z" },
-    { url = "https://files.pythonhosted.org/packages/88/a8/a27562194cc4182c67793cd21c5dbf9468cd5a49c775a487153c6f28364c/nvidia_cutlass_dsl-4.3.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f2b25816b8bb8bc332bcbf6fc341347b5d728344cf185c65af0dd73e8503d5c7", size = 58596724, upload-time = "2025-12-10T11:01:07.228Z" },
+    { url = "https://files.pythonhosted.org/packages/86/ee/53d22e2e14cb763927d85f7ec9748f6af6d27a2b7f43d52de014728da10e/nvidia_cutlass_dsl-4.3.4-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:57693d87677919572ab9eefa386b3f39e8e888bc4a9db7ab8730a97e8dbe06b4", size = 58736300, upload-time = "2025-12-21T07:41:25.723Z" },
+    { url = "https://files.pythonhosted.org/packages/66/f6/47489e07081cd4060f08bfa4166f8ff32beaecf71c06060d03bde88f3b6c/nvidia_cutlass_dsl-4.3.4-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a48fbff859e44dd548f8f26819d97d0595acea70e3b057c91dfdb47929015c72", size = 58599014, upload-time = "2025-12-21T07:38:51.632Z" },
 ]
 
 [[package]]
@@ -1510,6 +1486,7 @@ name = "nvidia-nccl-cu12"
 version = "2.27.5"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" },
     { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
 ]
 
@@ -1519,6 +1496,7 @@ version = "12.8.93"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
 ]
 
 [[package]]
@@ -1526,6 +1504,7 @@ name = "nvidia-nvshmem-cu12"
 version = "3.3.20"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/92/9d/3dd98852568fb845ec1f7902c90a22b240fe1cbabda411ccedf2fd737b7b/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b0b960da3842212758e4fa4696b94f129090b30e5122fea3c5345916545cff0", size = 124484616, upload-time = "2025-08-04T20:24:59.172Z" },
     { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" },
 ]
 
@@ -1534,12 +1513,13 @@ name = "nvidia-nvtx-cu12"
 version = "12.8.90"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
     { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
 ]
 
 [[package]]
 name = "openai"
-version = "2.7.1"
+version = "2.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1551,9 +1531,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/51/a2/f4023c1e0c868a6a5854955b3374f17153388aed95e835af114a17eac95b/openai-2.7.1.tar.gz", hash = "sha256:df4d4a3622b2df3475ead8eb0fbb3c27fd1c070fa2e55d778ca4f40e0186c726", size = 595933, upload-time = "2025-11-04T06:07:23.069Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/b1/12fe1c196bea326261718eb037307c1c1fe1dedc2d2d4de777df822e6238/openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952", size = 626938, upload-time = "2025-12-19T03:28:45.742Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8c/74/6bfc3adc81f6c2cea4439f2a734c40e3a420703bbcdc539890096a732bbd/openai-2.7.1-py3-none-any.whl", hash = "sha256:2f2530354d94c59c614645a4662b9dab0a5b881c5cd767a8587398feac0c9021", size = 1008780, upload-time = "2025-11-04T06:07:20.818Z" },
+    { url = "https://files.pythonhosted.org/packages/27/4b/7c1a00c2c3fbd004253937f7520f692a9650767aa73894d7a34f0d65d3f4/openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183", size = 1067558, upload-time = "2025-12-19T03:28:43.727Z" },
 ]
 
 [[package]]
@@ -1561,11 +1541,10 @@ name = "openai-harmony"
 version = "0.0.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pydantic" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3e/92/2d038d096f29179c7c9571b431f9e739f87a487121901725e23fe338dd9d/openai_harmony-0.0.8.tar.gz", hash = "sha256:6e43f98e6c242fa2de6f8ea12eab24af63fa2ed3e89c06341fb9d92632c5cbdf", size = 284777, upload-time = "2025-11-05T19:07:06.727Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/c6/2502f416d46be3ec08bb66d696cccffb57781a499e3ff2e4d7c174af4e8f/openai_harmony-0.0.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:029ec25ca74abe48fdb58eb9fdd2a8c1618581fc33ce8e5653f8a1ffbfbd9326", size = 2627806, upload-time = "2025-11-05T19:06:57.063Z" },
     { url = "https://files.pythonhosted.org/packages/d3/d2/ce6953ca87db9cae3e775024184da7d1c5cb88cead19a2d75b42f00a959c/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4f709815924ec325b9a890e6ab2bbb0ceec8e319a4e257328eb752cf36b2efc", size = 2948463, upload-time = "2025-11-05T19:06:48.17Z" },
     { url = "https://files.pythonhosted.org/packages/fa/4c/b553c9651662d6ce102ca7f3629d268b23df1abe5841e24bed81e8a8e949/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5cfcfd963b50a41fc656c84d3440ca6eecdccd6c552158ce790b8f2e33dfb5a9", size = 2704083, upload-time = "2025-11-05T19:06:50.205Z" },
     { url = "https://files.pythonhosted.org/packages/9b/af/4eec8f9ab9c27bcdb444460c72cf43011d176fc44c79d6e113094ca1e152/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a3a16972aa1cee38ea958470cd04ac9a2d5ac38fdcf77ab686611246220c158", size = 2959765, upload-time = "2025-11-05T19:06:53.62Z" },
@@ -1575,8 +1554,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1d/10/4327dbf87f75ae813405fd9a9b4a5cde63d506ffed0a096a440a4cabd89c/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:cbaa3bda75ef0d8836e1f8cc84af62f971b1d756d740efc95c38c3e04c0bfde2", size = 2932931, upload-time = "2025-11-05T19:07:01.437Z" },
     { url = "https://files.pythonhosted.org/packages/8a/c8/1774eec4f6f360ef57618fb8f52e3d3af245b2491bd0297513aa09eec04b/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:772922a9bd24e133950fad71eb1550836f415a88e8c77870e12d0c3bd688ddc2", size = 2996140, upload-time = "2025-11-05T19:07:03.438Z" },
     { url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5", size = 3205080, upload-time = "2025-11-05T19:07:05.078Z" },
-    { url = "https://files.pythonhosted.org/packages/14/63/119de431572d7c70a7bf1037034a9be6ed0a7502a7498ba7302bca5b3242/openai_harmony-0.0.8-cp38-abi3-win32.whl", hash = "sha256:a9b5f893326b28d9e935ade14b4f655f5a840942473bc89b201c25f7a15af9cf", size = 2082457, upload-time = "2025-11-05T19:07:09.631Z" },
-    { url = "https://files.pythonhosted.org/packages/40/1f/c83cf5a206c263ee70448a5ae4264682555f4d0b5bed0d2cc6ca1108103d/openai_harmony-0.0.8-cp38-abi3-win_amd64.whl", hash = "sha256:39d44f0d8f466bd56698e7ead708bead3141e27b9b87e3ab7d5a6d0e4a869ee5", size = 2438369, upload-time = "2025-11-05T19:07:08.1Z" },
 ]
 
 [[package]]
@@ -1584,16 +1561,12 @@ name = "opencv-python-headless"
 version = "4.12.0.88"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a4/63/6861102ec149c3cd298f4d1ea7ce9d6adbc7529221606ff1dab991a19adb/opencv-python-headless-4.12.0.88.tar.gz", hash = "sha256:cfdc017ddf2e59b6c2f53bc12d74b6b0be7ded4ec59083ea70763921af2b6c09", size = 95379675, upload-time = "2025-07-07T09:21:06.815Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f7/7d/414e243c5c8216a5277afd104a319cc1291c5e23f5eeef512db5629ee7f4/opencv_python_headless-4.12.0.88-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:1e58d664809b3350c1123484dd441e1667cd7bed3086db1b9ea1b6f6cb20b50e", size = 37877864, upload-time = "2025-07-07T09:14:41.693Z" },
-    { url = "https://files.pythonhosted.org/packages/05/14/7e162714beed1cd5e7b5eb66fcbcba2f065c51b1d9da2463024c84d2f7c0/opencv_python_headless-4.12.0.88-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:365bb2e486b50feffc2d07a405b953a8f3e8eaa63865bc650034e5c71e7a5154", size = 57326608, upload-time = "2025-07-07T09:14:51.885Z" },
     { url = "https://files.pythonhosted.org/packages/69/4e/116720df7f1f7f3b59abc608ca30fbec9d2b3ae810afe4e4d26483d9dfa0/opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:aeb4b13ecb8b4a0beb2668ea07928160ea7c2cd2d9b5ef571bbee6bafe9cc8d0", size = 33145800, upload-time = "2025-07-07T09:15:00.367Z" },
     { url = "https://files.pythonhosted.org/packages/89/53/e19c21e0c4eb1275c3e2c97b081103b6dfb3938172264d283a519bf728b9/opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:236c8df54a90f4d02076e6f9c1cc763d794542e886c576a6fee46ec8ff75a7a9", size = 54023419, upload-time = "2025-07-07T09:15:10.164Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/9c/a76fd5414de6ec9f21f763a600058a0c3e290053cea87e0275692b1375c0/opencv_python_headless-4.12.0.88-cp37-abi3-win32.whl", hash = "sha256:fde2cf5c51e4def5f2132d78e0c08f9c14783cd67356922182c6845b9af87dbd", size = 30225230, upload-time = "2025-07-07T09:15:17.045Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/35/0858e9e71b36948eafbc5e835874b63e515179dc3b742cbe3d76bc683439/opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl", hash = "sha256:86b413bdd6c6bf497832e346cd5371995de148e579b9774f8eba686dee3f5528", size = 38923559, upload-time = "2025-07-07T09:15:25.229Z" },
 ]
 
 [[package]]
@@ -1602,14 +1575,8 @@ version = "0.2.11"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/1a/d3/e04e9145f8f806723dec9b9e5227ad695a3efcd3ced7794cf7c22b15df5e/outlines_core-0.2.11.tar.gz", hash = "sha256:dfce56f717ff5083e54cbcfdb66cad243365437fccbb5509adaa7e31e030f1d8", size = 197263, upload-time = "2025-05-19T10:12:51.719Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/2c/c7636823244c70e2960060bf9bd978248dffb55c5e7c91c46d18354b2a24/outlines_core-0.2.11-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:4a9db4872bae083631d720994f4cee603bce0536b33d5a988814576863b657cf", size = 1957668, upload-time = "2025-05-19T10:12:18.29Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/09/5c62047da139d722317a444a4d01cd5f11943a8c2eaecce784341dd0844a/outlines_core-0.2.11-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8359a45c59f6a8f2eb717245806501a59044c75f6ea8bd08faaa131cc8cdec45", size = 2130493, upload-time = "2025-05-19T10:12:19.537Z" },
-    { url = "https://files.pythonhosted.org/packages/89/7a/d6a2810f90e37d550168e0c0a9a915086ea721444727e3ca2c630898d1ef/outlines_core-0.2.11-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:5d26a46591377340e0b870b8a96ea8341058341a62ee0bded9098e0c88dd24f4", size = 1956804, upload-time = "2025-05-19T10:12:20.755Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/ea/339e6c273b5581128c3b7ca27d428d8993c3085912af1a467aa32ef0e9d1/outlines_core-0.2.11-cp312-cp312-macosx_15_0_x86_64.whl", hash = "sha256:ae460a34675fb11d92a5c605a480fbae4cd6c1b2d11b3698da64a7fcaba64dcf", size = 2127085, upload-time = "2025-05-19T10:12:22.02Z" },
     { url = "https://files.pythonhosted.org/packages/92/c7/a65d1fddf49830ebc41422294eacde35286d9f68994a8aa905cb14f5aade/outlines_core-0.2.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86df9740368866295077346440d911df4972da2b3f1f54b8125e6f329e8a8891", size = 2287677, upload-time = "2025-05-19T10:12:24.24Z" },
     { url = "https://files.pythonhosted.org/packages/23/79/8795aed8be9b77dd69d78e7cfbfcf28c179e6b08da6e56bbbf48a09fe55f/outlines_core-0.2.11-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:96ce4dd78f106799be4a0a5795cefd1352806162973756a4b6fce4bb6eddd7e4", size = 2113000, upload-time = "2025-05-19T10:12:25.446Z" },
-    { url = "https://files.pythonhosted.org/packages/59/e3/cbe9294b06d92ee1892dbb6f2125d833d68e8629d45d080d6daba54eec2d/outlines_core-0.2.11-cp312-cp312-win32.whl", hash = "sha256:358db161cce3650ba822e118dcf0a1efa571c7deb4864ab9d64ca2c9cca7425d", size = 1765703, upload-time = "2025-05-19T10:12:26.693Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/c9/ed3cf362515fac16e313368b9b2f2497051f4ded88679205830b6f889f54/outlines_core-0.2.11-cp312-cp312-win_amd64.whl", hash = "sha256:231f9d20d2630c70665345821780d7808b29539620a75c99f65113b518c51032", size = 2060945, upload-time = "2025-05-19T10:12:28.294Z" },
 ]
 
 [[package]]
@@ -1626,7 +1593,8 @@ name = "pandas"
 version = "2.3.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" },
     { name = "python-dateutil" },
     { name = "pytz" },
     { name = "tzdata" },
@@ -1644,11 +1612,11 @@ wheels = [
 
 [[package]]
 name = "partial-json-parser"
-version = "0.2.1.1.post6"
+version = "0.2.1.1.post7"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/86/13/459e86c9c67a006651803a3df3d0b08f7708bc5483fdc482582d75562949/partial_json_parser-0.2.1.1.post6.tar.gz", hash = "sha256:43896b68929678224cbbe4884a6a5fe9251ded4b30b8b7d7eb569e5feea93afc", size = 10299, upload-time = "2025-06-23T17:51:45.372Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6a/6d/eed37d7ebc1e0bcd27b831c0cf1fe94881934316187c4b30d23f29ea0bd4/partial_json_parser-0.2.1.1.post7.tar.gz", hash = "sha256:86590e1ba6bcb6739a2dfc17d2323f028cb5884f4c6ce23db376999132c9a922", size = 10296, upload-time = "2025-11-17T07:27:41.202Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cb/40/1f922794af3dc7503f19319a8804b398a161a2cd54183cff8b12225b8d85/partial_json_parser-0.2.1.1.post6-py3-none-any.whl", hash = "sha256:abc332f09b13ef5233384dbfe7128a0e9ea3fa4b8f8be9b37ac1b433c810e99e", size = 10876, upload-time = "2025-06-23T17:51:44.332Z" },
+    { url = "https://files.pythonhosted.org/packages/42/32/658973117bf0fd82a24abbfb94fe73a5e86216e49342985e10acce54775a/partial_json_parser-0.2.1.1.post7-py3-none-any.whl", hash = "sha256:145119e5eabcf80cbb13844a6b50a85c68bf99d376f8ed771e2a3c3b03e653ae", size = 10877, upload-time = "2025-11-17T07:27:40.457Z" },
 ]
 
 [[package]]
@@ -1658,12 +1626,15 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "accelerate" },
     { name = "huggingface-hub" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" },
     { name = "packaging" },
     { name = "psutil" },
     { name = "pyyaml" },
     { name = "safetensors" },
-    { name = "torch" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
     { name = "tqdm" },
     { name = "transformers" },
 ]
@@ -1678,17 +1649,21 @@ version = "12.0.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" },
     { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" },
     { url = "https://files.pythonhosted.org/packages/88/e1/9098d3ce341a8750b55b0e00c03f1630d6178f38ac191c81c97a3b047b44/pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d", size = 8041399, upload-time = "2025-10-15T18:22:10.872Z" },
     { url = "https://files.pythonhosted.org/packages/a7/62/a22e8d3b602ae8cc01446d0c57a54e982737f44b6f2e1e019a925143771d/pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953", size = 6347740, upload-time = "2025-10-15T18:22:12.769Z" },
     { url = "https://files.pythonhosted.org/packages/4f/87/424511bdcd02c8d7acf9f65caa09f291a519b16bd83c3fb3374b3d4ae951/pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8", size = 7040201, upload-time = "2025-10-15T18:22:14.813Z" },
     { url = "https://files.pythonhosted.org/packages/dc/4d/435c8ac688c54d11755aedfdd9f29c9eeddf68d150fe42d1d3dbd2365149/pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79", size = 6462334, upload-time = "2025-10-15T18:22:16.375Z" },
     { url = "https://files.pythonhosted.org/packages/2b/f2/ad34167a8059a59b8ad10bc5c72d4d9b35acc6b7c0877af8ac885b5f2044/pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba", size = 7134162, upload-time = "2025-10-15T18:22:17.996Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/b1/a7391df6adacf0a5c2cf6ac1cf1fcc1369e7d439d28f637a847f8803beb3/pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0", size = 6298769, upload-time = "2025-10-15T18:22:19.923Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/0b/d87733741526541c909bbf159e338dcace4f982daac6e5a8d6be225ca32d/pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a", size = 7001107, upload-time = "2025-10-15T18:22:21.644Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/96/aaa61ce33cc98421fb6088af2a03be4157b1e7e0e87087c888e2370a7f45/pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad", size = 2436012, upload-time = "2025-10-15T18:22:23.621Z" },
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715, upload-time = "2025-12-05T13:52:58.638Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" },
 ]
 
 [[package]]
@@ -1714,8 +1689,8 @@ name = "prometheus-fastapi-instrumentator"
 version = "7.1.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "prometheus-client" },
-    { name = "starlette" },
+    { name = "prometheus-client", marker = "sys_platform == 'linux'" },
+    { name = "starlette", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/69/6d/24d53033cf93826aa7857699a4450c1c67e5b9c710e925b1ed2b320c04df/prometheus_fastapi_instrumentator-7.1.0.tar.gz", hash = "sha256:be7cd61eeea4e5912aeccb4261c6631b3f227d8924542d79eaf5af3f439cbe5e", size = 20220, upload-time = "2025-03-19T19:35:05.351Z" }
 wheels = [
@@ -1748,31 +1723,33 @@ wheels = [
 
 [[package]]
 name = "protobuf"
-version = "6.33.0"
+version = "6.33.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/19/ff/64a6c8f420818bb873713988ca5492cba3a7946be57e027ac63495157d97/protobuf-6.33.0.tar.gz", hash = "sha256:140303d5c8d2037730c548f8c7b93b20bb1dc301be280c378b82b8894589c954", size = 443463, upload-time = "2025-10-15T20:39:52.159Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/34/44/e49ecff446afeec9d1a66d6bbf9adc21e3c7cea7803a920ca3773379d4f6/protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4", size = 444296, upload-time = "2025-12-06T00:17:53.311Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/ee/52b3fa8feb6db4a833dfea4943e175ce645144532e8a90f72571ad85df4e/protobuf-6.33.0-cp310-abi3-win32.whl", hash = "sha256:d6101ded078042a8f17959eccd9236fb7a9ca20d3b0098bbcb91533a5680d035", size = 425593, upload-time = "2025-10-15T20:39:40.29Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/c6/7a465f1825872c55e0341ff4a80198743f73b69ce5d43ab18043699d1d81/protobuf-6.33.0-cp310-abi3-win_amd64.whl", hash = "sha256:9a031d10f703f03768f2743a1c403af050b6ae1f3480e9c140f39c45f81b13ee", size = 436882, upload-time = "2025-10-15T20:39:42.841Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/a9/b6eee662a6951b9c3640e8e452ab3e09f117d99fc10baa32d1581a0d4099/protobuf-6.33.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:905b07a65f1a4b72412314082c7dbfae91a9e8b68a0cc1577515f8df58ecf455", size = 427521, upload-time = "2025-10-15T20:39:43.803Z" },
-    { url = "https://files.pythonhosted.org/packages/10/35/16d31e0f92c6d2f0e77c2a3ba93185130ea13053dd16200a57434c882f2b/protobuf-6.33.0-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e0697ece353e6239b90ee43a9231318302ad8353c70e6e45499fa52396debf90", size = 324445, upload-time = "2025-10-15T20:39:44.932Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/eb/2a981a13e35cda8b75b5585aaffae2eb904f8f351bdd3870769692acbd8a/protobuf-6.33.0-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:e0a1715e4f27355afd9570f3ea369735afc853a6c3951a6afe1f80d8569ad298", size = 339159, upload-time = "2025-10-15T20:39:46.186Z" },
-    { url = "https://files.pythonhosted.org/packages/21/51/0b1cbad62074439b867b4e04cc09b93f6699d78fd191bed2bbb44562e077/protobuf-6.33.0-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:35be49fd3f4fefa4e6e2aacc35e8b837d6703c37a2168a55ac21e9b1bc7559ef", size = 323172, upload-time = "2025-10-15T20:39:47.465Z" },
-    { url = "https://files.pythonhosted.org/packages/07/d1/0a28c21707807c6aacd5dc9c3704b2aa1effbf37adebd8caeaf68b17a636/protobuf-6.33.0-py3-none-any.whl", hash = "sha256:25c9e1963c6734448ea2d308cfa610e692b801304ba0908d7bfa564ac5132995", size = 170477, upload-time = "2025-10-15T20:39:51.311Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/91/1e3a34881a88697a7354ffd177e8746e97a722e5e8db101544b47e84afb1/protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d", size = 425603, upload-time = "2025-12-06T00:17:41.114Z" },
+    { url = "https://files.pythonhosted.org/packages/64/20/4d50191997e917ae13ad0a235c8b42d8c1ab9c3e6fd455ca16d416944355/protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4", size = 436930, upload-time = "2025-12-06T00:17:43.278Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/ca/7e485da88ba45c920fb3f50ae78de29ab925d9e54ef0de678306abfbb497/protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43", size = 427621, upload-time = "2025-12-06T00:17:44.445Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/4f/f743761e41d3b2b2566748eb76bbff2b43e14d5fcab694f494a16458b05f/protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e", size = 324460, upload-time = "2025-12-06T00:17:45.678Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/fa/26468d00a92824020f6f2090d827078c09c9c587e34cbfd2d0c7911221f8/protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872", size = 339168, upload-time = "2025-12-06T00:17:46.813Z" },
+    { url = "https://files.pythonhosted.org/packages/56/13/333b8f421738f149d4fe5e49553bc2a2ab75235486259f689b4b91f96cec/protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f", size = 323270, upload-time = "2025-12-06T00:17:48.253Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/15/4f02896cc3df04fc465010a4c6a0cd89810f54617a32a70ef531ed75d61c/protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c", size = 170501, upload-time = "2025-12-06T00:17:52.211Z" },
 ]
 
 [[package]]
 name = "psutil"
-version = "7.1.3"
+version = "7.2.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e1/88/bdd0a41e5857d5d703287598cbf08dad90aed56774ea52ae071bae9071b6/psutil-7.1.3.tar.gz", hash = "sha256:6c86281738d77335af7aec228328e944b30930899ea760ecf33a4dba66be5e74", size = 489059, upload-time = "2025-11-02T12:25:54.619Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/be/7c/31d1c3ceb1260301f87565f50689dc6da3db427ece1e1e012af22abca54e/psutil-7.2.0.tar.gz", hash = "sha256:2e4f8e1552f77d14dc96fb0f6240c5b34a37081c0889f0853b3b29a496e5ef64", size = 489863, upload-time = "2025-12-23T20:26:24.616Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ef/94/46b9154a800253e7ecff5aaacdf8ebf43db99de4a2dfa18575b02548654e/psutil-7.1.3-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2bdbcd0e58ca14996a42adf3621a6244f1bb2e2e528886959c72cf1e326677ab", size = 238359, upload-time = "2025-11-02T12:26:25.284Z" },
-    { url = "https://files.pythonhosted.org/packages/68/3a/9f93cff5c025029a36d9a92fef47220ab4692ee7f2be0fba9f92813d0cb8/psutil-7.1.3-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:bc31fa00f1fbc3c3802141eede66f3a2d51d89716a194bf2cd6fc68310a19880", size = 239171, upload-time = "2025-11-02T12:26:27.23Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/b1/5f49af514f76431ba4eea935b8ad3725cdeb397e9245ab919dbc1d1dc20f/psutil-7.1.3-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3bb428f9f05c1225a558f53e30ccbad9930b11c3fc206836242de1091d3e7dd3", size = 263261, upload-time = "2025-11-02T12:26:29.48Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/95/992c8816a74016eb095e73585d747e0a8ea21a061ed3689474fabb29a395/psutil-7.1.3-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56d974e02ca2c8eb4812c3f76c30e28836fffc311d55d979f1465c1feeb2b68b", size = 264635, upload-time = "2025-11-02T12:26:31.74Z" },
-    { url = "https://files.pythonhosted.org/packages/55/4c/c3ed1a622b6ae2fd3c945a366e64eb35247a31e4db16cf5095e269e8eb3c/psutil-7.1.3-cp37-abi3-win_amd64.whl", hash = "sha256:f39c2c19fe824b47484b96f9692932248a54c43799a84282cfe58d05a6449efd", size = 247633, upload-time = "2025-11-02T12:26:33.887Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/ad/33b2ccec09bf96c2b2ef3f9a6f66baac8253d7565d8839e024a6b905d45d/psutil-7.1.3-cp37-abi3-win_arm64.whl", hash = "sha256:bd0d69cee829226a761e92f28140bec9a5ee9d5b4fb4b0cc589068dbfff559b1", size = 244608, upload-time = "2025-11-02T12:26:36.136Z" },
+    { url = "https://files.pythonhosted.org/packages/40/c5/a49160bf3e165b7b93a60579a353cf5d939d7f878fe5fd369110f1d18043/psutil-7.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:977a2fcd132d15cb05b32b2d85b98d087cad039b0ce435731670ba74da9e6133", size = 128116, upload-time = "2025-12-23T20:26:53.516Z" },
+    { url = "https://files.pythonhosted.org/packages/10/a1/c75feb480f60cd768fb6ed00ac362a16a33e5076ec8475a22d8162fb2659/psutil-7.2.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:24151011c21fadd94214d7139d7c6c54569290d7e553989bdf0eab73b13beb8c", size = 128925, upload-time = "2025-12-23T20:26:55.573Z" },
+    { url = "https://files.pythonhosted.org/packages/12/ff/e93136587c00a543f4bc768b157fac2c47cd77b180d4f4e5c6efb6ea53a2/psutil-7.2.0-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91f211ba9279e7c61d9d8f84b713cfc38fa161cb0597d5cb3f1ca742f6848254", size = 154666, upload-time = "2025-12-23T20:26:57.312Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/dd/4c2de9c3827c892599d277a69d2224136800870a8a88a80981de905de28d/psutil-7.2.0-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f37415188b7ea98faf90fed51131181646c59098b077550246e2e092e127418b", size = 156109, upload-time = "2025-12-23T20:26:58.851Z" },
+    { url = "https://files.pythonhosted.org/packages/81/3f/090943c682d3629968dd0b04826ddcbc760ee1379021dbe316e2ddfcd01b/psutil-7.2.0-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0d12c7ce6ed1128cd81fd54606afa054ac7dbb9773469ebb58cf2f171c49f2ac", size = 148081, upload-time = "2025-12-23T20:27:01.318Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/88/c39648ebb8ec182d0364af53cdefe6eddb5f3872ba718b5855a8ff65d6d4/psutil-7.2.0-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ca0faef7976530940dcd39bc5382d0d0d5eb023b186a4901ca341bd8d8684151", size = 147376, upload-time = "2025-12-23T20:27:03.347Z" },
+    { url = "https://files.pythonhosted.org/packages/01/a2/5b39e08bd9b27476bc7cce7e21c71a481ad60b81ffac49baf02687a50d7f/psutil-7.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:abdb74137ca232d20250e9ad471f58d500e7743bc8253ba0bfbf26e570c0e437", size = 136910, upload-time = "2025-12-23T20:27:05.289Z" },
+    { url = "https://files.pythonhosted.org/packages/59/54/53839db1258c1eaeb4ded57ff202144ebc75b23facc05a74fd98d338b0c6/psutil-7.2.0-cp37-abi3-win_arm64.whl", hash = "sha256:284e71038b3139e7ab3834b63b3eb5aa5565fcd61a681ec746ef9a0a8c457fd2", size = 133807, upload-time = "2025-12-23T20:27:06.825Z" },
 ]
 
 [[package]]
@@ -1801,29 +1778,26 @@ wheels = [
 
 [[package]]
 name = "pybase64"
-version = "1.4.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/04/14/43297a7b7f0c1bf0c00b596f754ee3ac946128c64d21047ccf9c9bbc5165/pybase64-1.4.2.tar.gz", hash = "sha256:46cdefd283ed9643315d952fe44de80dc9b9a811ce6e3ec97fd1827af97692d0", size = 137246, upload-time = "2025-07-27T13:08:57.808Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/28/6d/11ede991e800797b9f5ebd528013b34eee5652df93de61ffb24503393fa5/pybase64-1.4.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:db2c75d1388855b5a1015b65096d7dbcc708e7de3245dcbedeb872ec05a09326", size = 38326, upload-time = "2025-07-27T13:03:09.065Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/84/87f1f565f42e2397e2aaa2477c86419f5173c3699881c42325c090982f0a/pybase64-1.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b621a972a01841368fdb9dedc55fd3c6e0c7217d0505ba3b1ebe95e7ef1b493", size = 31661, upload-time = "2025-07-27T13:03:10.295Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/2a/a24c810e7a61d2cc6f73fe9ee4872a03030887fa8654150901b15f376f65/pybase64-1.4.2-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f48c32ac6a16cbf57a5a96a073fef6ff7e3526f623cd49faa112b7f9980bafba", size = 68192, upload-time = "2025-07-27T13:03:11.467Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/87/d9baf98cbfc37b8657290ad4421f3a3c36aa0eafe4872c5859cfb52f3448/pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ace8b23093a6bb862477080d9059b784096ab2f97541e8bfc40d42f062875149", size = 71587, upload-time = "2025-07-27T13:03:12.719Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/89/3df043cc56ef3b91b7aa0c26ae822a2d7ec8da0b0fd7c309c879b0eb5988/pybase64-1.4.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1772c7532a7fb6301baea3dd3e010148dbf70cd1136a83c2f5f91bdc94822145", size = 59910, upload-time = "2025-07-27T13:03:14.266Z" },
-    { url = "https://files.pythonhosted.org/packages/75/4f/6641e9edf37aeb4d4524dc7ba2168eff8d96c90e77f6283c2be3400ab380/pybase64-1.4.2-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:f86f7faddcba5cbfea475f8ab96567834c28bf09ca6c7c3d66ee445adac80d8f", size = 56701, upload-time = "2025-07-27T13:03:15.6Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/7f/20d8ac1046f12420a0954a45a13033e75f98aade36eecd00c64e3549b071/pybase64-1.4.2-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:0b8c8e275b5294089f314814b4a50174ab90af79d6a4850f6ae11261ff6a7372", size = 59288, upload-time = "2025-07-27T13:03:16.823Z" },
-    { url = "https://files.pythonhosted.org/packages/17/ea/9c0ca570e3e50b3c6c3442e280c83b321a0464c86a9db1f982a4ff531550/pybase64-1.4.2-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:864d85a0470c615807ae8b97d724d068b940a2d10ac13a5f1b9e75a3ce441758", size = 60267, upload-time = "2025-07-27T13:03:18.132Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/ac/46894929d71ccedebbfb0284173b0fea96bc029cd262654ba8451a7035d6/pybase64-1.4.2-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:47254d97ed2d8351e30ecfdb9e2414547f66ba73f8a09f932c9378ff75cd10c5", size = 54801, upload-time = "2025-07-27T13:03:19.669Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/1e/02c95218ea964f0b2469717c2c69b48e63f4ca9f18af01a5b2a29e4c1216/pybase64-1.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:264b65ecc4f0ee73f3298ab83bbd8008f7f9578361b8df5b448f985d8c63e02a", size = 58599, upload-time = "2025-07-27T13:03:20.951Z" },
-    { url = "https://files.pythonhosted.org/packages/15/45/ccc21004930789b8fb439d43e3212a6c260ccddb2bf450c39a20db093f33/pybase64-1.4.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fbcc2b30cd740c16c9699f596f22c7a9e643591311ae72b1e776f2d539e9dd9d", size = 52388, upload-time = "2025-07-27T13:03:23.064Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/45/22e46e549710c4c237d77785b6fb1bc4c44c288a5c44237ba9daf5c34b82/pybase64-1.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cda9f79c22d51ee4508f5a43b673565f1d26af4330c99f114e37e3186fdd3607", size = 68802, upload-time = "2025-07-27T13:03:24.673Z" },
-    { url = "https://files.pythonhosted.org/packages/55/0c/232c6261b81296e5593549b36e6e7884a5da008776d12665923446322c36/pybase64-1.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0c91c6d2a7232e2a1cd10b3b75a8bb657defacd4295a1e5e80455df2dfc84d4f", size = 57841, upload-time = "2025-07-27T13:03:25.948Z" },
-    { url = "https://files.pythonhosted.org/packages/20/8a/b35a615ae6f04550d696bb179c414538b3b477999435fdd4ad75b76139e4/pybase64-1.4.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:a370dea7b1cee2a36a4d5445d4e09cc243816c5bc8def61f602db5a6f5438e52", size = 54320, upload-time = "2025-07-27T13:03:27.495Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/a9/8bd4f9bcc53689f1b457ecefed1eaa080e4949d65a62c31a38b7253d5226/pybase64-1.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9aa4de83f02e462a6f4e066811c71d6af31b52d7484de635582d0e3ec3d6cc3e", size = 56482, upload-time = "2025-07-27T13:03:28.942Z" },
-    { url = "https://files.pythonhosted.org/packages/75/e5/4a7735b54a1191f61c3f5c2952212c85c2d6b06eb5fb3671c7603395f70c/pybase64-1.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83a1c2f9ed00fee8f064d548c8654a480741131f280e5750bb32475b7ec8ee38", size = 70959, upload-time = "2025-07-27T13:03:30.171Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/67/e2b6cb32c782e12304d467418e70da0212567f42bd4d3b5eb1fdf64920ad/pybase64-1.4.2-cp312-cp312-win32.whl", hash = "sha256:a6e5688b18d558e8c6b8701cc8560836c4bbeba61d33c836b4dba56b19423716", size = 33683, upload-time = "2025-07-27T13:03:31.775Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/bc/d5c277496063a09707486180f17abbdbdebbf2f5c4441b20b11d3cb7dc7c/pybase64-1.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:c995d21b8bd08aa179cd7dd4db0695c185486ecc72da1e8f6c37ec86cadb8182", size = 35817, upload-time = "2025-07-27T13:03:32.99Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/69/e4be18ae685acff0ae77f75d4586590f29d2cd187bf603290cf1d635cad4/pybase64-1.4.2-cp312-cp312-win_arm64.whl", hash = "sha256:e254b9258c40509c2ea063a7784f6994988f3f26099d6e08704e3c15dfed9a55", size = 30900, upload-time = "2025-07-27T13:03:34.499Z" },
+version = "1.4.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/b8/4ed5c7ad5ec15b08d35cc79ace6145d5c1ae426e46435f4987379439dfea/pybase64-1.4.3.tar.gz", hash = "sha256:c2ed274c9e0ba9c8f9c4083cfe265e66dd679126cd9c2027965d807352f3f053", size = 137272, upload-time = "2025-12-06T13:27:04.013Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/1b/9a8cab0042b464e9a876d5c65fe5127445a2436da36fda64899b119b1a1b/pybase64-1.4.3-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f0b3f200c3e06316f6bebabd458b4e4bcd4c2ca26af7c0c766614d91968dee27", size = 68210, upload-time = "2025-12-06T13:23:18.813Z" },
+    { url = "https://files.pythonhosted.org/packages/62/f7/965b79ff391ad208b50e412b5d3205ccce372a2d27b7218ae86d5295b105/pybase64-1.4.3-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb632edfd132b3eaf90c39c89aa314beec4e946e210099b57d40311f704e11d4", size = 71599, upload-time = "2025-12-06T13:23:20.195Z" },
+    { url = "https://files.pythonhosted.org/packages/03/4b/a3b5175130b3810bbb8ccfa1edaadbd3afddb9992d877c8a1e2f274b476e/pybase64-1.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:356ef1d74648ce997f5a777cf8f1aefecc1c0b4fe6201e0ef3ec8a08170e1b54", size = 59922, upload-time = "2025-12-06T13:23:21.487Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5d/c38d1572027fc601b62d7a407721688b04b4d065d60ca489912d6893e6cf/pybase64-1.4.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:c48361f90db32bacaa5518419d4eb9066ba558013aaf0c7781620279ecddaeb9", size = 56712, upload-time = "2025-12-06T13:23:22.77Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/d4/4e04472fef485caa8f561d904d4d69210a8f8fc1608ea15ebd9012b92655/pybase64-1.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:702bcaa16ae02139d881aeaef5b1c8ffb4a3fae062fe601d1e3835e10310a517", size = 59300, upload-time = "2025-12-06T13:23:24.543Z" },
+    { url = "https://files.pythonhosted.org/packages/86/e7/16e29721b86734b881d09b7e23dfd7c8408ad01a4f4c7525f3b1088e25ec/pybase64-1.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:53d0ffe1847b16b647c6413d34d1de08942b7724273dd57e67dcbdb10c574045", size = 60278, upload-time = "2025-12-06T13:23:25.608Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/02/18515f211d7c046be32070709a8efeeef8a0203de4fd7521e6b56404731b/pybase64-1.4.3-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:9a1792e8b830a92736dae58f0c386062eb038dfe8004fb03ba33b6083d89cd43", size = 54817, upload-time = "2025-12-06T13:23:26.633Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/be/14e29d8e1a481dbff151324c96dd7b5d2688194bb65dc8a00ca0e1ad1e86/pybase64-1.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1d468b1b1ac5ad84875a46eaa458663c3721e8be5f155ade356406848d3701f6", size = 58611, upload-time = "2025-12-06T13:23:27.684Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/8a/a2588dfe24e1bbd742a554553778ab0d65fdf3d1c9a06d10b77047d142aa/pybase64-1.4.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e97b7bdbd62e71898cd542a6a9e320d9da754ff3ebd02cb802d69087ee94d468", size = 52404, upload-time = "2025-12-06T13:23:28.714Z" },
+    { url = "https://files.pythonhosted.org/packages/27/fc/afcda7445bebe0cbc38cafdd7813234cdd4fc5573ff067f1abf317bb0cec/pybase64-1.4.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b33aeaa780caaa08ffda87fc584d5eab61e3d3bbb5d86ead02161dc0c20d04bc", size = 68817, upload-time = "2025-12-06T13:23:30.079Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/3a/87c3201e555ed71f73e961a787241a2438c2bbb2ca8809c29ddf938a3157/pybase64-1.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c0efcf78f11cf866bed49caa7b97552bc4855a892f9cc2372abcd3ed0056f0d", size = 57854, upload-time = "2025-12-06T13:23:31.17Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/7d/931c2539b31a7b375e7d595b88401eeb5bd6c5ce1059c9123f9b608aaa14/pybase64-1.4.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:66e3791f2ed725a46593f8bd2761ff37d01e2cdad065b1dceb89066f476e50c6", size = 54333, upload-time = "2025-12-06T13:23:32.422Z" },
+    { url = "https://files.pythonhosted.org/packages/de/5e/537601e02cc01f27e9d75f440f1a6095b8df44fc28b1eef2cd739aea8cec/pybase64-1.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:72bb0b6bddadab26e1b069bb78e83092711a111a80a0d6b9edcb08199ad7299b", size = 56492, upload-time = "2025-12-06T13:23:33.515Z" },
+    { url = "https://files.pythonhosted.org/packages/96/97/2a2e57acf8f5c9258d22aba52e71f8050e167b29ed2ee1113677c1b600c1/pybase64-1.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5b3365dbcbcdb0a294f0f50af0c0a16b27a232eddeeb0bceeefd844ef30d2a23", size = 70974, upload-time = "2025-12-06T13:23:36.27Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/8f/43c3bb11ca9bacf81cb0b7a71500bb65b2eda6d5fe07433c09b543de97f3/pybase64-1.4.3-graalpy312-graalpy250_312_native-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5c29a582b0ea3936d02bd6fe9bf674ab6059e6e45ab71c78404ab2c913224414", size = 43461, upload-time = "2025-12-06T13:26:28.906Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/4c/2a5258329200be57497d3972b5308558c6de42e3749c6cc2aa1cbe34b25a/pybase64-1.4.3-graalpy312-graalpy250_312_native-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b6b664758c804fa919b4f1257aa8cf68e95db76fc331de5f70bfc3a34655afe1", size = 36058, upload-time = "2025-12-06T13:26:30.092Z" },
 ]
 
 [[package]]
@@ -1846,7 +1820,7 @@ wheels = [
 
 [[package]]
 name = "pydantic"
-version = "2.12.4"
+version = "2.12.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-types" },
@@ -1854,14 +1828,14 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/96/ad/a17bc283d7d81837c061c49e3eaa27a45991759a1b7eae1031921c6bd924/pydantic-2.12.4.tar.gz", hash = "sha256:0f8cb9555000a4b5b617f66bfd2566264c4984b27589d3b845685983e8ea85ac", size = 821038, upload-time = "2025-11-05T10:50:08.59Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/82/2f/e68750da9b04856e2a7ec56fc6f034a5a79775e9b9a81882252789873798/pydantic-2.12.4-py3-none-any.whl", hash = "sha256:92d3d202a745d46f9be6df459ac5a064fdaa3c1c4cd8adcfa332ccf3c05f871e", size = 463400, upload-time = "2025-11-05T10:50:06.732Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
 ]
 
 [package.optional-dependencies]
 email = [
-    { name = "email-validator" },
+    { name = "email-validator", marker = "sys_platform == 'linux'" },
 ]
 
 [[package]]
@@ -1898,8 +1872,8 @@ name = "pydantic-extra-types"
 version = "2.10.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pydantic" },
-    { name = "typing-extensions" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3a/10/fb64987804cde41bcc39d9cd757cd5f2bb5d97b389d81aa70238b14b8a7e/pydantic_extra_types-2.10.6.tar.gz", hash = "sha256:c63d70bf684366e6bbe1f4ee3957952ebe6973d41e7802aea0b770d06b116aeb", size = 141858, upload-time = "2025-10-08T13:47:49.483Z" }
 wheels = [
@@ -1908,7 +1882,7 @@ wheels = [
 
 [package.optional-dependencies]
 pycountry = [
-    { name = "pycountry" },
+    { name = "pycountry", marker = "sys_platform == 'linux'" },
 ]
 
 [[package]]
@@ -1916,9 +1890,9 @@ name = "pydantic-settings"
 version = "2.12.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pydantic" },
-    { name = "python-dotenv" },
-    { name = "typing-inspection" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "python-dotenv", marker = "sys_platform == 'linux'" },
+    { name = "typing-inspection", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" }
 wheels = [
@@ -1945,12 +1919,12 @@ wheels = [
 
 [package.optional-dependencies]
 crypto = [
-    { name = "cryptography" },
+    { name = "cryptography", marker = "sys_platform == 'linux'" },
 ]
 
 [[package]]
 name = "pytest"
-version = "8.4.2"
+version = "9.0.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama", marker = "sys_platform == 'win32'" },
@@ -1959,22 +1933,22 @@ dependencies = [
     { name = "pluggy" },
     { name = "pygments" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
 ]
 
 [[package]]
 name = "pytest-asyncio"
-version = "1.2.0"
+version = "1.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pytest" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
 ]
 
 [[package]]
@@ -2009,11 +1983,11 @@ wheels = [
 
 [[package]]
 name = "python-multipart"
-version = "0.0.20"
+version = "0.0.21"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158, upload-time = "2024-12-16T19:45:46.972Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/78/96/804520d0850c7db98e5ccb70282e29208723f0964e88ffd9d0da2f52ea09/python_multipart-0.0.21.tar.gz", hash = "sha256:7137ebd4d3bbf70ea1622998f902b97a29434a9e8dc40eb203bbcf7c2a2cba92", size = 37196, upload-time = "2025-12-17T09:24:22.446Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload-time = "2024-12-16T19:45:44.423Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/76/03af049af4dcee5d27442f71b6924f01f3efb5d2bd34f23fcd563f2cc5f5/python_multipart-0.0.21-py3-none-any.whl", hash = "sha256:cf7a6713e01c87aa35387f4774e812c4361150938d20d232800f75ffcf266090", size = 24541, upload-time = "2025-12-17T09:24:21.153Z" },
 ]
 
 [[package]]
@@ -2058,55 +2032,40 @@ name = "pyzmq"
 version = "27.1.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cffi", marker = "implementation_name == 'pypy'" },
+    { name = "cffi", marker = "implementation_name == 'pypy' and sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/04/0b/3c9baedbdf613ecaa7aa07027780b8867f57b6293b6ee50de316c9f3222b/pyzmq-27.1.0.tar.gz", hash = "sha256:ac0765e3d44455adb6ddbf4417dcce460fc40a05978c08efdf2948072f6db540", size = 281750, upload-time = "2025-09-08T23:10:18.157Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/92/e7/038aab64a946d535901103da16b953c8c9cc9c961dadcbf3609ed6428d23/pyzmq-27.1.0-cp312-abi3-macosx_10_15_universal2.whl", hash = "sha256:452631b640340c928fa343801b0d07eb0c3789a5ffa843f6e1a9cee0ba4eb4fc", size = 1306279, upload-time = "2025-09-08T23:08:03.807Z" },
     { url = "https://files.pythonhosted.org/packages/e8/5e/c3c49fdd0f535ef45eefcc16934648e9e59dace4a37ee88fc53f6cd8e641/pyzmq-27.1.0-cp312-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:1c179799b118e554b66da67d88ed66cd37a169f1f23b5d9f0a231b4e8d44a113", size = 895645, upload-time = "2025-09-08T23:08:05.301Z" },
     { url = "https://files.pythonhosted.org/packages/f8/e5/b0b2504cb4e903a74dcf1ebae157f9e20ebb6ea76095f6cfffea28c42ecd/pyzmq-27.1.0-cp312-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3837439b7f99e60312f0c926a6ad437b067356dc2bc2ec96eb395fd0fe804233", size = 652574, upload-time = "2025-09-08T23:08:06.828Z" },
     { url = "https://files.pythonhosted.org/packages/f8/9b/c108cdb55560eaf253f0cbdb61b29971e9fb34d9c3499b0e96e4e60ed8a5/pyzmq-27.1.0-cp312-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43ad9a73e3da1fab5b0e7e13402f0b2fb934ae1c876c51d0afff0e7c052eca31", size = 840995, upload-time = "2025-09-08T23:08:08.396Z" },
     { url = "https://files.pythonhosted.org/packages/c2/bb/b79798ca177b9eb0825b4c9998c6af8cd2a7f15a6a1a4272c1d1a21d382f/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0de3028d69d4cdc475bfe47a6128eb38d8bc0e8f4d69646adfbcd840facbac28", size = 1642070, upload-time = "2025-09-08T23:08:09.989Z" },
     { url = "https://files.pythonhosted.org/packages/9c/80/2df2e7977c4ede24c79ae39dcef3899bfc5f34d1ca7a5b24f182c9b7a9ca/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_i686.whl", hash = "sha256:cf44a7763aea9298c0aa7dbf859f87ed7012de8bda0f3977b6fb1d96745df856", size = 2021121, upload-time = "2025-09-08T23:08:11.907Z" },
     { url = "https://files.pythonhosted.org/packages/46/bd/2d45ad24f5f5ae7e8d01525eb76786fa7557136555cac7d929880519e33a/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f30f395a9e6fbca195400ce833c731e7b64c3919aa481af4d88c3759e0cb7496", size = 1878550, upload-time = "2025-09-08T23:08:13.513Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/2f/104c0a3c778d7c2ab8190e9db4f62f0b6957b53c9d87db77c284b69f33ea/pyzmq-27.1.0-cp312-abi3-win32.whl", hash = "sha256:250e5436a4ba13885494412b3da5d518cd0d3a278a1ae640e113c073a5f88edd", size = 559184, upload-time = "2025-09-08T23:08:15.163Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/7f/a21b20d577e4100c6a41795842028235998a643b1ad406a6d4163ea8f53e/pyzmq-27.1.0-cp312-abi3-win_amd64.whl", hash = "sha256:9ce490cf1d2ca2ad84733aa1d69ce6855372cb5ce9223802450c9b2a7cba0ccf", size = 619480, upload-time = "2025-09-08T23:08:17.192Z" },
-    { url = "https://files.pythonhosted.org/packages/78/c2/c012beae5f76b72f007a9e91ee9401cb88c51d0f83c6257a03e785c81cc2/pyzmq-27.1.0-cp312-abi3-win_arm64.whl", hash = "sha256:75a2f36223f0d535a0c919e23615fc85a1e23b71f40c7eb43d7b1dedb4d8f15f", size = 552993, upload-time = "2025-09-08T23:08:18.926Z" },
 ]
 
 [[package]]
 name = "ray"
-version = "2.51.1"
+version = "2.53.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "click" },
-    { name = "filelock" },
-    { name = "jsonschema" },
-    { name = "msgpack" },
-    { name = "packaging" },
-    { name = "protobuf" },
-    { name = "pyyaml" },
-    { name = "requests" },
+    { name = "click", marker = "sys_platform == 'linux'" },
+    { name = "filelock", marker = "sys_platform == 'linux'" },
+    { name = "jsonschema", marker = "sys_platform == 'linux'" },
+    { name = "msgpack", marker = "sys_platform == 'linux'" },
+    { name = "packaging", marker = "sys_platform == 'linux'" },
+    { name = "protobuf", marker = "sys_platform == 'linux'" },
+    { name = "pyyaml", marker = "sys_platform == 'linux'" },
+    { name = "requests", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/95/51e44ce79e42f02ca1c4d4c5501e6dd49f3a384c5f6324aceb4e0015988a/ray-2.51.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ef847b025ca758baee4571a1ca001d973897cad772f8e95d7f303d24c38b649e", size = 68029226, upload-time = "2025-11-01T03:24:21.928Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/b5/a93e39e131067edb7cba3385a609f61aaaf7aa54728cd3a7474bfbf3b0fc/ray-2.51.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:0bed9408712bad1511e65683a455302f88d94e5e5cb6a58cc4a154b61d8a0b4a", size = 70502423, upload-time = "2025-11-01T03:24:27.398Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/59/69b7a653ed8176fc7fd894d462ed34bb1477e7fa71700324de99179b5b7e/ray-2.51.1-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:4e786da7862cf73664977d0212a505d6d5a585beadf63e7dc1e1c129259bee20", size = 71353730, upload-time = "2025-11-01T03:24:33.495Z" },
-    { url = "https://files.pythonhosted.org/packages/38/91/0c4fe7aed34baa14d9c050c88f39ff16083d555bd6dcd6c4ffb4332a6f8a/ray-2.51.1-cp312-cp312-win_amd64.whl", hash = "sha256:198fda93074a6863555f4003e9013bb2ba0cd50b59b18c02affdc294b28a2eef", size = 26674921, upload-time = "2025-11-01T03:24:38.394Z" },
+    { url = "https://files.pythonhosted.org/packages/38/68/8e59b8413f3751fe7ce8b98ee8787d13964b47a4043587950790a9dd2151/ray-2.53.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:65e2ce58d3dc6baa3cf45824d889c1968ebde565ee54dfd80a98af8f31af8e4a", size = 71504450, upload-time = "2025-12-20T16:06:34.922Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/db/978a50d264565ca42e2a4bf115ec9a1f04f19ca5e620e6aa2f280747b644/ray-2.53.0-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:14f46363e9b4cf0c1c8b4d8623ec337c5bd408377831b5e5b50067930137bbca", size = 72370424, upload-time = "2025-12-20T16:06:40.821Z" },
 ]
 
 [package.optional-dependencies]
 cgraph = [
-    { name = "cupy-cuda12x", marker = "sys_platform != 'darwin'" },
-]
-
-[[package]]
-name = "redis"
-version = "7.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/43/c8/983d5c6579a411d8a99bc5823cc5712768859b5ce2c8afe1a65b37832c81/redis-7.1.0.tar.gz", hash = "sha256:b1cc3cfa5a2cb9c2ab3ba700864fb0ad75617b41f01352ce5779dabf6d5f9c3c", size = 4796669, upload-time = "2025-11-19T15:54:39.961Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/89/f0/8956f8a86b20d7bb9d6ac0187cf4cd54d8065bc9a1a09eb8011d4d326596/redis-7.1.0-py3-none-any.whl", hash = "sha256:23c52b208f92b56103e17c5d06bdc1a6c2c0b3106583985a76a18f83b265de2b", size = 354159, upload-time = "2025-11-19T15:54:38.064Z" },
+    { name = "cupy-cuda12x", marker = "sys_platform == 'linux'" },
 ]
 
 [[package]]
@@ -2114,9 +2073,9 @@ name = "referencing"
 version = "0.37.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "attrs" },
-    { name = "rpds-py" },
-    { name = "typing-extensions" },
+    { name = "attrs", marker = "sys_platform == 'linux'" },
+    { name = "rpds-py", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
 wheels = [
@@ -2175,16 +2134,16 @@ wheels = [
 
 [[package]]
 name = "rich-toolkit"
-version = "0.15.1"
+version = "0.17.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "click" },
-    { name = "rich" },
-    { name = "typing-extensions" },
+    { name = "click", marker = "sys_platform == 'linux'" },
+    { name = "rich", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/67/33/1a18839aaa8feef7983590c05c22c9c09d245ada6017d118325bbfcc7651/rich_toolkit-0.15.1.tar.gz", hash = "sha256:6f9630eb29f3843d19d48c3bd5706a086d36d62016687f9d0efa027ddc2dd08a", size = 115322, upload-time = "2025-09-04T09:28:11.789Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/97/09/3f9b8d9daaf235195c626f21e03604c05b987404ee3bcacee0c1f67f2a8e/rich_toolkit-0.17.1.tar.gz", hash = "sha256:5af54df8d1dd9c8530e462e1bdcaed625c9b49f5a55b035aa0ba1c17bdb87c9a", size = 187925, upload-time = "2025-12-17T10:49:22.583Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/49/42821d55ead7b5a87c8d121edf323cb393d8579f63e933002ade900b784f/rich_toolkit-0.15.1-py3-none-any.whl", hash = "sha256:36a0b1d9a135d26776e4b78f1d5c2655da6e0ef432380b5c6b523c8d8ab97478", size = 29412, upload-time = "2025-09-04T09:28:10.587Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/7b/15e55fa8a76d0d41bf34d965af78acdaf80a315907adb30de8b63c272694/rich_toolkit-0.17.1-py3-none-any.whl", hash = "sha256:96d24bb921ecd225ffce7c526a9149e74006410c05e6d405bd74ffd54d5631ed", size = 31412, upload-time = "2025-12-17T10:49:21.793Z" },
 ]
 
 [[package]]
@@ -2193,8 +2152,6 @@ version = "0.7.6"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/e5/f5/8bed2310abe4ae04b67a38374a4d311dd85220f5d8da56f47ae9361be0b0/rignore-0.7.6.tar.gz", hash = "sha256:00d3546cd793c30cb17921ce674d2c8f3a4b00501cb0e3dd0e82217dbeba2671", size = 57140, upload-time = "2025-11-05T21:41:21.968Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0b/0e/012556ef3047a2628842b44e753bb15f4dc46806780ff090f1e8fe4bf1eb/rignore-0.7.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:03e82348cb7234f8d9b2834f854400ddbbd04c0f8f35495119e66adbd37827a8", size = 883488, upload-time = "2025-11-05T20:42:41.359Z" },
-    { url = "https://files.pythonhosted.org/packages/93/b0/d4f1f3fe9eb3f8e382d45ce5b0547ea01c4b7e0b4b4eb87bcd66a1d2b888/rignore-0.7.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9e624f6be6116ea682e76c5feb71ea91255c67c86cb75befe774365b2931961", size = 820411, upload-time = "2025-11-05T20:42:24.782Z" },
     { url = "https://files.pythonhosted.org/packages/4a/c8/dea564b36dedac8de21c18e1851789545bc52a0c22ece9843444d5608a6a/rignore-0.7.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bda49950d405aa8d0ebe26af807c4e662dd281d926530f03f29690a2e07d649a", size = 897821, upload-time = "2025-11-05T20:40:52.613Z" },
     { url = "https://files.pythonhosted.org/packages/b3/2b/ee96db17ac1835e024c5d0742eefb7e46de60020385ac883dd3d1cde2c1f/rignore-0.7.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b5fd5ab3840b8c16851d327ed06e9b8be6459702a53e5ab1fc4073b684b3789e", size = 873963, upload-time = "2025-11-05T20:41:07.49Z" },
     { url = "https://files.pythonhosted.org/packages/a5/8c/ad5a57bbb9d14d5c7e5960f712a8a0b902472ea3f4a2138cbf70d1777b75/rignore-0.7.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ced2a248352636a5c77504cb755dc02c2eef9a820a44d3f33061ce1bb8a7f2d2", size = 1169216, upload-time = "2025-11-05T20:41:23.73Z" },
@@ -2205,80 +2162,72 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d4/cf/2c64f0b6725149f7c6e7e5a909d14354889b4beaadddaa5fff023ec71084/rignore-0.7.6-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5719ea14ea2b652c0c0894be5dfde954e1853a80dea27dd2fbaa749618d837f5", size = 1139186, upload-time = "2025-11-05T21:40:31.27Z" },
     { url = "https://files.pythonhosted.org/packages/75/95/a86c84909ccc24af0d094b50d54697951e576c252a4d9f21b47b52af9598/rignore-0.7.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8e23424fc7ce35726854f639cb7968151a792c0c3d9d082f7f67e0c362cfecca", size = 1117604, upload-time = "2025-11-05T21:40:48.07Z" },
     { url = "https://files.pythonhosted.org/packages/7f/5e/13b249613fd5d18d58662490ab910a9f0be758981d1797789913adb4e918/rignore-0.7.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3efdcf1dd84d45f3e2bd2f93303d9be103888f56dfa7c3349b5bf4f0657ec696", size = 1127725, upload-time = "2025-11-05T21:41:05.804Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/28/fa5dcd1e2e16982c359128664e3785f202d3eca9b22dd0b2f91c4b3d242f/rignore-0.7.6-cp312-cp312-win32.whl", hash = "sha256:ccca9d1a8b5234c76b71546fc3c134533b013f40495f394a65614a81f7387046", size = 646145, upload-time = "2025-11-05T21:41:51.096Z" },
-    { url = "https://files.pythonhosted.org/packages/26/87/69387fb5dd81a0f771936381431780b8cf66fcd2cfe9495e1aaf41548931/rignore-0.7.6-cp312-cp312-win_amd64.whl", hash = "sha256:c96a285e4a8bfec0652e0bfcf42b1aabcdda1e7625f5006d188e3b1c87fdb543", size = 726090, upload-time = "2025-11-05T21:41:36.485Z" },
-    { url = "https://files.pythonhosted.org/packages/24/5f/e8418108dcda8087fb198a6f81caadbcda9fd115d61154bf0df4d6d3619b/rignore-0.7.6-cp312-cp312-win_arm64.whl", hash = "sha256:a64a750e7a8277a323f01ca50b7784a764845f6cce2fe38831cb93f0508d0051", size = 656317, upload-time = "2025-11-05T21:41:25.305Z" },
 ]
 
 [[package]]
 name = "rpds-py"
-version = "0.28.0"
+version = "0.30.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/48/dc/95f074d43452b3ef5d06276696ece4b3b5d696e7c9ad7173c54b1390cd70/rpds_py-0.28.0.tar.gz", hash = "sha256:abd4df20485a0983e2ca334a216249b6186d6e3c1627e106651943dbdb791aea", size = 27419, upload-time = "2025-10-22T22:24:29.327Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b8/5c/6c3936495003875fe7b14f90ea812841a08fca50ab26bd840e924097d9c8/rpds_py-0.28.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6b4f28583a4f247ff60cd7bdda83db8c3f5b05a7a82ff20dd4b078571747708f", size = 366439, upload-time = "2025-10-22T22:22:04.525Z" },
-    { url = "https://files.pythonhosted.org/packages/56/f9/a0f1ca194c50aa29895b442771f036a25b6c41a35e4f35b1a0ea713bedae/rpds_py-0.28.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d678e91b610c29c4b3d52a2c148b641df2b4676ffe47c59f6388d58b99cdc424", size = 348170, upload-time = "2025-10-22T22:22:06.397Z" },
-    { url = "https://files.pythonhosted.org/packages/18/ea/42d243d3a586beb72c77fa5def0487daf827210069a95f36328e869599ea/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e819e0e37a44a78e1383bf1970076e2ccc4dc8c2bbaa2f9bd1dc987e9afff628", size = 378838, upload-time = "2025-10-22T22:22:07.932Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/78/3de32e18a94791af8f33601402d9d4f39613136398658412a4e0b3047327/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5ee514e0f0523db5d3fb171f397c54875dbbd69760a414dccf9d4d7ad628b5bd", size = 393299, upload-time = "2025-10-22T22:22:09.435Z" },
-    { url = "https://files.pythonhosted.org/packages/13/7e/4bdb435afb18acea2eb8a25ad56b956f28de7c59f8a1d32827effa0d4514/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3fa06d27fdcee47f07a39e02862da0100cb4982508f5ead53ec533cd5fe55e", size = 518000, upload-time = "2025-10-22T22:22:11.326Z" },
-    { url = "https://files.pythonhosted.org/packages/31/d0/5f52a656875cdc60498ab035a7a0ac8f399890cc1ee73ebd567bac4e39ae/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46959ef2e64f9e4a41fc89aa20dbca2b85531f9a72c21099a3360f35d10b0d5a", size = 408746, upload-time = "2025-10-22T22:22:13.143Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/cd/49ce51767b879cde77e7ad9fae164ea15dce3616fe591d9ea1df51152706/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8455933b4bcd6e83fde3fefc987a023389c4b13f9a58c8d23e4b3f6d13f78c84", size = 386379, upload-time = "2025-10-22T22:22:14.602Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/99/e4e1e1ee93a98f72fc450e36c0e4d99c35370220e815288e3ecd2ec36a2a/rpds_py-0.28.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:ad50614a02c8c2962feebe6012b52f9802deec4263946cddea37aaf28dd25a66", size = 401280, upload-time = "2025-10-22T22:22:16.063Z" },
-    { url = "https://files.pythonhosted.org/packages/61/35/e0c6a57488392a8b319d2200d03dad2b29c0db9996f5662c3b02d0b86c02/rpds_py-0.28.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5deca01b271492553fdb6c7fd974659dce736a15bae5dad7ab8b93555bceb28", size = 412365, upload-time = "2025-10-22T22:22:17.504Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/6a/841337980ea253ec797eb084665436007a1aad0faac1ba097fb906c5f69c/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:735f8495a13159ce6a0d533f01e8674cec0c57038c920495f87dcb20b3ddb48a", size = 559573, upload-time = "2025-10-22T22:22:19.108Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/5e/64826ec58afd4c489731f8b00729c5f6afdb86f1df1df60bfede55d650bb/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:961ca621ff10d198bbe6ba4957decca61aa2a0c56695384c1d6b79bf61436df5", size = 583973, upload-time = "2025-10-22T22:22:20.768Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/ee/44d024b4843f8386a4eeaa4c171b3d31d55f7177c415545fd1a24c249b5d/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2374e16cc9131022e7d9a8f8d65d261d9ba55048c78f3b6e017971a4f5e6353c", size = 553800, upload-time = "2025-10-22T22:22:22.25Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/89/33e675dccff11a06d4d85dbb4d1865f878d5020cbb69b2c1e7b2d3f82562/rpds_py-0.28.0-cp312-cp312-win32.whl", hash = "sha256:d15431e334fba488b081d47f30f091e5d03c18527c325386091f31718952fe08", size = 216954, upload-time = "2025-10-22T22:22:24.105Z" },
-    { url = "https://files.pythonhosted.org/packages/af/36/45f6ebb3210887e8ee6dbf1bc710ae8400bb417ce165aaf3024b8360d999/rpds_py-0.28.0-cp312-cp312-win_amd64.whl", hash = "sha256:a410542d61fc54710f750d3764380b53bf09e8c4edbf2f9141a82aa774a04f7c", size = 227844, upload-time = "2025-10-22T22:22:25.551Z" },
-    { url = "https://files.pythonhosted.org/packages/57/91/f3fb250d7e73de71080f9a221d19bd6a1c1eb0d12a1ea26513f6c1052ad6/rpds_py-0.28.0-cp312-cp312-win_arm64.whl", hash = "sha256:1f0cfd1c69e2d14f8c892b893997fa9a60d890a0c8a603e88dca4955f26d1edd", size = 217624, upload-time = "2025-10-22T22:22:26.914Z" },
+    { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" },
+    { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" },
+    { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" },
+    { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" },
+    { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" },
+    { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" },
 ]
 
 [[package]]
 name = "ruff"
-version = "0.14.5"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/82/fa/fbb67a5780ae0f704876cb8ac92d6d76da41da4dc72b7ed3565ab18f2f52/ruff-0.14.5.tar.gz", hash = "sha256:8d3b48d7d8aad423d3137af7ab6c8b1e38e4de104800f0d596990f6ada1a9fc1", size = 5615944, upload-time = "2025-11-13T19:58:51.155Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/68/31/c07e9c535248d10836a94e4f4e8c5a31a1beed6f169b31405b227872d4f4/ruff-0.14.5-py3-none-linux_armv6l.whl", hash = "sha256:f3b8248123b586de44a8018bcc9fefe31d23dda57a34e6f0e1e53bd51fd63594", size = 13171630, upload-time = "2025-11-13T19:57:54.894Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/5c/283c62516dca697cd604c2796d1487396b7a436b2f0ecc3fd412aca470e0/ruff-0.14.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f7a75236570318c7a30edd7f5491945f0169de738d945ca8784500b517163a72", size = 13413925, upload-time = "2025-11-13T19:57:59.181Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/f3/aa319f4afc22cb6fcba2b9cdfc0f03bbf747e59ab7a8c5e90173857a1361/ruff-0.14.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:6d146132d1ee115f8802356a2dc9a634dbf58184c51bff21f313e8cd1c74899a", size = 12574040, upload-time = "2025-11-13T19:58:02.056Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/7f/cb5845fcc7c7e88ed57f58670189fc2ff517fe2134c3821e77e29fd3b0c8/ruff-0.14.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2380596653dcd20b057794d55681571a257a42327da8894b93bbd6111aa801f", size = 13009755, upload-time = "2025-11-13T19:58:05.172Z" },
-    { url = "https://files.pythonhosted.org/packages/21/d2/bcbedbb6bcb9253085981730687ddc0cc7b2e18e8dc13cf4453de905d7a0/ruff-0.14.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2d1fa985a42b1f075a098fa1ab9d472b712bdb17ad87a8ec86e45e7fa6273e68", size = 12937641, upload-time = "2025-11-13T19:58:08.345Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/58/e25de28a572bdd60ffc6bb71fc7fd25a94ec6a076942e372437649cbb02a/ruff-0.14.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88f0770d42b7fa02bbefddde15d235ca3aa24e2f0137388cc15b2dcbb1f7c7a7", size = 13610854, upload-time = "2025-11-13T19:58:11.419Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/24/43bb3fd23ecee9861970978ea1a7a63e12a204d319248a7e8af539984280/ruff-0.14.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:3676cb02b9061fee7294661071c4709fa21419ea9176087cb77e64410926eb78", size = 15061088, upload-time = "2025-11-13T19:58:14.551Z" },
-    { url = "https://files.pythonhosted.org/packages/23/44/a022f288d61c2f8c8645b24c364b719aee293ffc7d633a2ca4d116b9c716/ruff-0.14.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b595bedf6bc9cab647c4a173a61acf4f1ac5f2b545203ba82f30fcb10b0318fb", size = 14734717, upload-time = "2025-11-13T19:58:17.518Z" },
-    { url = "https://files.pythonhosted.org/packages/58/81/5c6ba44de7e44c91f68073e0658109d8373b0590940efe5bd7753a2585a3/ruff-0.14.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f55382725ad0bdb2e8ee2babcbbfb16f124f5a59496a2f6a46f1d9d99d93e6e2", size = 14028812, upload-time = "2025-11-13T19:58:20.533Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/ef/41a8b60f8462cb320f68615b00299ebb12660097c952c600c762078420f8/ruff-0.14.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7497d19dce23976bdaca24345ae131a1d38dcfe1b0850ad8e9e6e4fa321a6e19", size = 13825656, upload-time = "2025-11-13T19:58:23.345Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/00/207e5de737fdb59b39eb1fac806904fe05681981b46d6a6db9468501062e/ruff-0.14.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:410e781f1122d6be4f446981dd479470af86537fb0b8857f27a6e872f65a38e4", size = 13959922, upload-time = "2025-11-13T19:58:26.537Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/7e/fa1f5c2776db4be405040293618846a2dece5c70b050874c2d1f10f24776/ruff-0.14.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c01be527ef4c91a6d55e53b337bfe2c0f82af024cc1a33c44792d6844e2331e1", size = 12932501, upload-time = "2025-11-13T19:58:29.822Z" },
-    { url = "https://files.pythonhosted.org/packages/67/d8/d86bf784d693a764b59479a6bbdc9515ae42c340a5dc5ab1dabef847bfaa/ruff-0.14.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f66e9bb762e68d66e48550b59c74314168ebb46199886c5c5aa0b0fbcc81b151", size = 12927319, upload-time = "2025-11-13T19:58:32.923Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/de/ee0b304d450ae007ce0cb3e455fe24fbcaaedae4ebaad6c23831c6663651/ruff-0.14.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:d93be8f1fa01022337f1f8f3bcaa7ffee2d0b03f00922c45c2207954f351f465", size = 13206209, upload-time = "2025-11-13T19:58:35.952Z" },
-    { url = "https://files.pythonhosted.org/packages/33/aa/193ca7e3a92d74f17d9d5771a765965d2cf42c86e6f0fd95b13969115723/ruff-0.14.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c135d4b681f7401fe0e7312017e41aba9b3160861105726b76cfa14bc25aa367", size = 13953709, upload-time = "2025-11-13T19:58:39.002Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/f1/7119e42aa1d3bf036ffc9478885c2e248812b7de9abea4eae89163d2929d/ruff-0.14.5-py3-none-win32.whl", hash = "sha256:c83642e6fccfb6dea8b785eb9f456800dcd6a63f362238af5fc0c83d027dd08b", size = 12925808, upload-time = "2025-11-13T19:58:42.779Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/9d/7c0a255d21e0912114784e4a96bf62af0618e2190cae468cd82b13625ad2/ruff-0.14.5-py3-none-win_amd64.whl", hash = "sha256:9d55d7af7166f143c94eae1db3312f9ea8f95a4defef1979ed516dbb38c27621", size = 14331546, upload-time = "2025-11-13T19:58:45.691Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/80/69756670caedcf3b9be597a6e12276a6cf6197076eb62aad0c608f8efce0/ruff-0.14.5-py3-none-win_arm64.whl", hash = "sha256:4b700459d4649e2594b31f20a9de33bc7c19976d4746d8d0798ad959621d64a4", size = 13433331, upload-time = "2025-11-13T19:58:48.434Z" },
+version = "0.14.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/08/52232a877978dd8f9cf2aeddce3e611b40a63287dfca29b6b8da791f5e8d/ruff-0.14.10.tar.gz", hash = "sha256:9a2e830f075d1a42cd28420d7809ace390832a490ed0966fe373ba288e77aaf4", size = 5859763, upload-time = "2025-12-18T19:28:57.98Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/60/01/933704d69f3f05ee16ef11406b78881733c186fe14b6a46b05cfcaf6d3b2/ruff-0.14.10-py3-none-linux_armv6l.whl", hash = "sha256:7a3ce585f2ade3e1f29ec1b92df13e3da262178df8c8bdf876f48fa0e8316c49", size = 13527080, upload-time = "2025-12-18T19:29:25.642Z" },
+    { url = "https://files.pythonhosted.org/packages/df/58/a0349197a7dfa603ffb7f5b0470391efa79ddc327c1e29c4851e85b09cc5/ruff-0.14.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:674f9be9372907f7257c51f1d4fc902cb7cf014b9980152b802794317941f08f", size = 13797320, upload-time = "2025-12-18T19:29:02.571Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/82/36be59f00a6082e38c23536df4e71cdbc6af8d7c707eade97fcad5c98235/ruff-0.14.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d85713d522348837ef9df8efca33ccb8bd6fcfc86a2cde3ccb4bc9d28a18003d", size = 12918434, upload-time = "2025-12-18T19:28:51.202Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/00/45c62a7f7e34da92a25804f813ebe05c88aa9e0c25e5cb5a7d23dd7450e3/ruff-0.14.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6987ebe0501ae4f4308d7d24e2d0fe3d7a98430f5adfd0f1fead050a740a3a77", size = 13371961, upload-time = "2025-12-18T19:29:04.991Z" },
+    { url = "https://files.pythonhosted.org/packages/40/31/a5906d60f0405f7e57045a70f2d57084a93ca7425f22e1d66904769d1628/ruff-0.14.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16a01dfb7b9e4eee556fbfd5392806b1b8550c9b4a9f6acd3dbe6812b193c70a", size = 13275629, upload-time = "2025-12-18T19:29:21.381Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/60/61c0087df21894cf9d928dc04bcd4fb10e8b2e8dca7b1a276ba2155b2002/ruff-0.14.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7165d31a925b7a294465fa81be8c12a0e9b60fb02bf177e79067c867e71f8b1f", size = 14029234, upload-time = "2025-12-18T19:29:00.132Z" },
+    { url = "https://files.pythonhosted.org/packages/44/84/77d911bee3b92348b6e5dab5a0c898d87084ea03ac5dc708f46d88407def/ruff-0.14.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c561695675b972effb0c0a45db233f2c816ff3da8dcfbe7dfc7eed625f218935", size = 15449890, upload-time = "2025-12-18T19:28:53.573Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/36/480206eaefa24a7ec321582dda580443a8f0671fdbf6b1c80e9c3e93a16a/ruff-0.14.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bb98fcbbc61725968893682fd4df8966a34611239c9fd07a1f6a07e7103d08e", size = 15123172, upload-time = "2025-12-18T19:29:23.453Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/38/68e414156015ba80cef5473d57919d27dfb62ec804b96180bafdeaf0e090/ruff-0.14.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f24b47993a9d8cb858429e97bdf8544c78029f09b520af615c1d261bf827001d", size = 14460260, upload-time = "2025-12-18T19:29:27.808Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/19/9e050c0dca8aba824d67cc0db69fb459c28d8cd3f6855b1405b3f29cc91d/ruff-0.14.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59aabd2e2c4fd614d2862e7939c34a532c04f1084476d6833dddef4afab87e9f", size = 14229978, upload-time = "2025-12-18T19:29:11.32Z" },
+    { url = "https://files.pythonhosted.org/packages/51/eb/e8dd1dd6e05b9e695aa9dd420f4577debdd0f87a5ff2fedda33c09e9be8c/ruff-0.14.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:213db2b2e44be8625002dbea33bb9c60c66ea2c07c084a00d55732689d697a7f", size = 14338036, upload-time = "2025-12-18T19:29:09.184Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/12/f3e3a505db7c19303b70af370d137795fcfec136d670d5de5391e295c134/ruff-0.14.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b914c40ab64865a17a9a5b67911d14df72346a634527240039eb3bd650e5979d", size = 13264051, upload-time = "2025-12-18T19:29:13.431Z" },
+    { url = "https://files.pythonhosted.org/packages/08/64/8c3a47eaccfef8ac20e0484e68e0772013eb85802f8a9f7603ca751eb166/ruff-0.14.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1484983559f026788e3a5c07c81ef7d1e97c1c78ed03041a18f75df104c45405", size = 13283998, upload-time = "2025-12-18T19:29:06.994Z" },
+    { url = "https://files.pythonhosted.org/packages/12/84/534a5506f4074e5cc0529e5cd96cfc01bb480e460c7edf5af70d2bcae55e/ruff-0.14.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c70427132db492d25f982fffc8d6c7535cc2fd2c83fc8888f05caaa248521e60", size = 13601891, upload-time = "2025-12-18T19:28:55.811Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/1e/14c916087d8598917dbad9b2921d340f7884824ad6e9c55de948a93b106d/ruff-0.14.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5bcf45b681e9f1ee6445d317ce1fa9d6cba9a6049542d1c3d5b5958986be8830", size = 14336660, upload-time = "2025-12-18T19:29:16.531Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/1c/d7b67ab43f30013b47c12b42d1acd354c195351a3f7a1d67f59e54227ede/ruff-0.14.10-py3-none-win32.whl", hash = "sha256:104c49fc7ab73f3f3a758039adea978869a918f31b73280db175b43a2d9b51d6", size = 13196187, upload-time = "2025-12-18T19:29:19.006Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/9c/896c862e13886fae2af961bef3e6312db9ebc6adc2b156fe95e615dee8c1/ruff-0.14.10-py3-none-win_amd64.whl", hash = "sha256:466297bd73638c6bdf06485683e812db1c00c7ac96d4ddd0294a338c62fdc154", size = 14661283, upload-time = "2025-12-18T19:29:30.16Z" },
+    { url = "https://files.pythonhosted.org/packages/74/31/b0e29d572670dca3674eeee78e418f20bdf97fa8aa9ea71380885e175ca0/ruff-0.14.10-py3-none-win_arm64.whl", hash = "sha256:e51d046cf6dda98a4633b8a8a771451107413b0f07183b2bef03f075599e44e6", size = 13729839, upload-time = "2025-12-18T19:28:48.636Z" },
 ]
 
 [[package]]
 name = "safetensors"
-version = "0.6.2"
+version = "0.7.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ac/cc/738f3011628920e027a11754d9cae9abec1aed00f7ae860abbf843755233/safetensors-0.6.2.tar.gz", hash = "sha256:43ff2aa0e6fa2dc3ea5524ac7ad93a9839256b8703761e76e2d0b2a3fa4f15d9", size = 197968, upload-time = "2025-08-08T13:13:58.654Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4d/b1/3f5fd73c039fc87dba3ff8b5d528bfc5a32b597fea8e7a6a4800343a17c7/safetensors-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9c85ede8ec58f120bad982ec47746981e210492a6db876882aa021446af8ffba", size = 454797, upload-time = "2025-08-08T13:13:52.066Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/c9/bb114c158540ee17907ec470d01980957fdaf87b4aa07914c24eba87b9c6/safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d6675cf4b39c98dbd7d940598028f3742e0375a6b4d4277e76beb0c35f4b843b", size = 432206, upload-time = "2025-08-08T13:13:50.931Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/8e/f70c34e47df3110e8e0bb268d90db8d4be8958a54ab0336c9be4fe86dac8/safetensors-0.6.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d2d2b3ce1e2509c68932ca03ab8f20570920cd9754b05063d4368ee52833ecd", size = 473261, upload-time = "2025-08-08T13:13:41.259Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/f5/be9c6a7c7ef773e1996dc214e73485286df1836dbd063e8085ee1976f9cb/safetensors-0.6.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:93de35a18f46b0f5a6a1f9e26d91b442094f2df02e9fd7acf224cfec4238821a", size = 485117, upload-time = "2025-08-08T13:13:43.506Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/55/23f2d0a2c96ed8665bf17a30ab4ce5270413f4d74b6d87dd663258b9af31/safetensors-0.6.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89a89b505f335640f9120fac65ddeb83e40f1fd081cb8ed88b505bdccec8d0a1", size = 616154, upload-time = "2025-08-08T13:13:45.096Z" },
-    { url = "https://files.pythonhosted.org/packages/98/c6/affb0bd9ce02aa46e7acddbe087912a04d953d7a4d74b708c91b5806ef3f/safetensors-0.6.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc4d0d0b937e04bdf2ae6f70cd3ad51328635fe0e6214aa1fc811f3b576b3bda", size = 520713, upload-time = "2025-08-08T13:13:46.25Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/5d/5a514d7b88e310c8b146e2404e0dc161282e78634d9358975fd56dfd14be/safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8045db2c872db8f4cbe3faa0495932d89c38c899c603f21e9b6486951a5ecb8f", size = 485835, upload-time = "2025-08-08T13:13:49.373Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/7b/4fc3b2ba62c352b2071bea9cfbad330fadda70579f617506ae1a2f129cab/safetensors-0.6.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:81e67e8bab9878bb568cffbc5f5e655adb38d2418351dc0859ccac158f753e19", size = 521503, upload-time = "2025-08-08T13:13:47.651Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/50/0057e11fe1f3cead9254315a6c106a16dd4b1a19cd247f7cc6414f6b7866/safetensors-0.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b0e4d029ab0a0e0e4fdf142b194514695b1d7d3735503ba700cf36d0fc7136ce", size = 652256, upload-time = "2025-08-08T13:13:53.167Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/29/473f789e4ac242593ac1656fbece6e1ecd860bb289e635e963667807afe3/safetensors-0.6.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:fa48268185c52bfe8771e46325a1e21d317207bcabcb72e65c6e28e9ffeb29c7", size = 747281, upload-time = "2025-08-08T13:13:54.656Z" },
-    { url = "https://files.pythonhosted.org/packages/68/52/f7324aad7f2df99e05525c84d352dc217e0fa637a4f603e9f2eedfbe2c67/safetensors-0.6.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:d83c20c12c2d2f465997c51b7ecb00e407e5f94d7dec3ea0cc11d86f60d3fde5", size = 692286, upload-time = "2025-08-08T13:13:55.884Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/fe/cad1d9762868c7c5dc70c8620074df28ebb1a8e4c17d4c0cb031889c457e/safetensors-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d944cea65fad0ead848b6ec2c37cc0b197194bec228f8020054742190e9312ac", size = 655957, upload-time = "2025-08-08T13:13:57.029Z" },
-    { url = "https://files.pythonhosted.org/packages/59/a7/e2158e17bbe57d104f0abbd95dff60dda916cf277c9f9663b4bf9bad8b6e/safetensors-0.6.2-cp38-abi3-win32.whl", hash = "sha256:cab75ca7c064d3911411461151cb69380c9225798a20e712b102edda2542ddb1", size = 308926, upload-time = "2025-08-08T13:14:01.095Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/c3/c0be1135726618dc1e28d181b8c442403d8dbb9e273fd791de2d4384bcdd/safetensors-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:c7b214870df923cbc1593c3faee16bec59ea462758699bd3fee399d00aac072c", size = 320192, upload-time = "2025-08-08T13:13:59.467Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" },
+    { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" },
+    { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" },
+    { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" },
+    { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" },
 ]
 
 [[package]]
@@ -2286,20 +2235,14 @@ name = "scipy"
 version = "1.16.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0a/ca/d8ace4f98322d01abcd52d381134344bf7b431eba7ed8b42bdea5a3c2ac9/scipy-1.16.3.tar.gz", hash = "sha256:01e87659402762f43bd2fee13370553a17ada367d42e7487800bf2916535aecb", size = 30597883, upload-time = "2025-10-28T17:38:54.068Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/40/41/5bf55c3f386b1643812f3a5674edf74b26184378ef0f3e7c7a09a7e2ca7f/scipy-1.16.3-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:81fc5827606858cf71446a5e98715ba0e11f0dbc83d71c7409d05486592a45d6", size = 36659043, upload-time = "2025-10-28T17:32:40.285Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/0f/65582071948cfc45d43e9870bf7ca5f0e0684e165d7c9ef4e50d783073eb/scipy-1.16.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:c97176013d404c7346bf57874eaac5187d969293bf40497140b0a2b2b7482e07", size = 28898986, upload-time = "2025-10-28T17:32:45.325Z" },
-    { url = "https://files.pythonhosted.org/packages/96/5e/36bf3f0ac298187d1ceadde9051177d6a4fe4d507e8f59067dc9dd39e650/scipy-1.16.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2b71d93c8a9936046866acebc915e2af2e292b883ed6e2cbe5c34beb094b82d9", size = 20889814, upload-time = "2025-10-28T17:32:49.277Z" },
-    { url = "https://files.pythonhosted.org/packages/80/35/178d9d0c35394d5d5211bbff7ac4f2986c5488b59506fef9e1de13ea28d3/scipy-1.16.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3d4a07a8e785d80289dfe66b7c27d8634a773020742ec7187b85ccc4b0e7b686", size = 23565795, upload-time = "2025-10-28T17:32:53.337Z" },
     { url = "https://files.pythonhosted.org/packages/fa/46/d1146ff536d034d02f83c8afc3c4bab2eddb634624d6529a8512f3afc9da/scipy-1.16.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0553371015692a898e1aa858fed67a3576c34edefa6b7ebdb4e9dde49ce5c203", size = 33349476, upload-time = "2025-10-28T17:32:58.353Z" },
     { url = "https://files.pythonhosted.org/packages/79/2e/415119c9ab3e62249e18c2b082c07aff907a273741b3f8160414b0e9193c/scipy-1.16.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:72d1717fd3b5e6ec747327ce9bda32d5463f472c9dce9f54499e81fbd50245a1", size = 35676692, upload-time = "2025-10-28T17:33:03.88Z" },
     { url = "https://files.pythonhosted.org/packages/27/82/df26e44da78bf8d2aeaf7566082260cfa15955a5a6e96e6a29935b64132f/scipy-1.16.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1fb2472e72e24d1530debe6ae078db70fb1605350c88a3d14bc401d6306dbffe", size = 36019345, upload-time = "2025-10-28T17:33:09.773Z" },
     { url = "https://files.pythonhosted.org/packages/82/31/006cbb4b648ba379a95c87262c2855cd0d09453e500937f78b30f02fa1cd/scipy-1.16.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c5192722cffe15f9329a3948c4b1db789fbb1f05c97899187dcf009b283aea70", size = 38678975, upload-time = "2025-10-28T17:33:15.809Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/7f/acbd28c97e990b421af7d6d6cd416358c9c293fc958b8529e0bd5d2a2a19/scipy-1.16.3-cp312-cp312-win_amd64.whl", hash = "sha256:56edc65510d1331dae01ef9b658d428e33ed48b4f77b1d51caf479a0253f96dc", size = 38555926, upload-time = "2025-10-28T17:33:21.388Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/69/c5c7807fd007dad4f48e0a5f2153038dc96e8725d3345b9ee31b2b7bed46/scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2", size = 25463014, upload-time = "2025-10-28T17:33:25.975Z" },
 ]
 
 [[package]]
@@ -2308,27 +2251,21 @@ version = "0.2.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/15/15/2e7a025fc62d764b151ae6d0f2a92f8081755ebe8d4a64099accc6f77ba6/sentencepiece-0.2.1.tar.gz", hash = "sha256:8138cec27c2f2282f4a34d9a016e3374cd40e5c6e9cb335063db66a0a3b71fad", size = 3228515, upload-time = "2025-08-12T07:00:51.718Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4a/be/32ce495aa1d0e0c323dcb1ba87096037358edee539cac5baf8755a6bd396/sentencepiece-0.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57cae326c8727de58c85977b175af132a7138d84c764635d7e71bbee7e774133", size = 1943152, upload-time = "2025-08-12T06:59:40.048Z" },
-    { url = "https://files.pythonhosted.org/packages/88/7e/ff23008899a58678e98c6ff592bf4d368eee5a71af96d0df6b38a039dd4f/sentencepiece-0.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:56dd39a3c4d6493db3cdca7e8cc68c6b633f0d4195495cbadfcf5af8a22d05a6", size = 1325651, upload-time = "2025-08-12T06:59:41.536Z" },
-    { url = "https://files.pythonhosted.org/packages/19/84/42eb3ce4796777a1b5d3699dfd4dca85113e68b637f194a6c8d786f16a04/sentencepiece-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d9381351182ff9888cc80e41c632e7e274b106f450de33d67a9e8f6043da6f76", size = 1253645, upload-time = "2025-08-12T06:59:42.903Z" },
     { url = "https://files.pythonhosted.org/packages/89/fa/d3d5ebcba3cb9e6d3775a096251860c41a6bc53a1b9461151df83fe93255/sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99f955df238021bf11f0fc37cdb54fd5e5b5f7fd30ecc3d93fb48b6815437167", size = 1316273, upload-time = "2025-08-12T06:59:44.476Z" },
     { url = "https://files.pythonhosted.org/packages/04/88/14f2f4a2b922d8b39be45bf63d79e6cd3a9b2f248b2fcb98a69b12af12f5/sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cdfecef430d985f1c2bcbfff3defd1d95dae876fbd0173376012d2d7d24044b", size = 1387881, upload-time = "2025-08-12T06:59:46.09Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/b8/903e5ccb77b4ef140605d5d71b4f9e0ad95d456d6184688073ed11712809/sentencepiece-0.2.1-cp312-cp312-win32.whl", hash = "sha256:a483fd29a34c3e34c39ac5556b0a90942bec253d260235729e50976f5dba1068", size = 999540, upload-time = "2025-08-12T06:59:48.023Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/81/92df5673c067148c2545b1bfe49adfd775bcc3a169a047f5a0e6575ddaca/sentencepiece-0.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:4cdc7c36234fda305e85c32949c5211faaf8dd886096c7cea289ddc12a2d02de", size = 1054671, upload-time = "2025-08-12T06:59:49.895Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/02/c5e3bc518655d714622bec87d83db9cdba1cd0619a4a04e2109751c4f47f/sentencepiece-0.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:daeb5e9e9fcad012324807856113708614d534f596d5008638eb9b40112cd9e4", size = 1033923, upload-time = "2025-08-12T06:59:51.952Z" },
 ]
 
 [[package]]
 name = "sentry-sdk"
-version = "2.43.0"
+version = "2.48.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b3/18/09875b4323b03ca9025bae7e6539797b27e4fc032998a466b4b9c3d24653/sentry_sdk-2.43.0.tar.gz", hash = "sha256:52ed6e251c5d2c084224d73efee56b007ef5c2d408a4a071270e82131d336e20", size = 368953, upload-time = "2025-10-29T11:26:08.156Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/40/f0/0e9dc590513d5e742d7799e2038df3a05167cba084c6ca4f3cdd75b55164/sentry_sdk-2.48.0.tar.gz", hash = "sha256:5213190977ff7fdff8a58b722fb807f8d5524a80488626ebeda1b5676c0c1473", size = 384828, upload-time = "2025-12-16T14:55:41.722Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/31/8228fa962f7fd8814d634e4ebece8780e2cdcfbdf0cd2e14d4a6861a7cd5/sentry_sdk-2.43.0-py2.py3-none-any.whl", hash = "sha256:4aacafcf1756ef066d359ae35030881917160ba7f6fc3ae11e0e58b09edc2d5d", size = 400997, upload-time = "2025-10-29T11:26:05.77Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/19/8d77f9992e5cbfcaa9133c3bf63b4fbbb051248802e1e803fed5c552fbb2/sentry_sdk-2.48.0-py2.py3-none-any.whl", hash = "sha256:6b12ac256769d41825d9b7518444e57fa35b5642df4c7c5e322af4d2c8721172", size = 414555, upload-time = "2025-12-16T14:55:40.152Z" },
 ]
 
 [[package]]
@@ -2337,25 +2274,21 @@ version = "1.3.7"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/8d/48/49393a96a2eef1ab418b17475fb92b8fcfad83d099e678751b05472e69de/setproctitle-1.3.7.tar.gz", hash = "sha256:bc2bc917691c1537d5b9bca1468437176809c7e11e5694ca79a9ca12345dcb9e", size = 27002, upload-time = "2025-09-05T12:51:25.278Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fb/f0/2dc88e842077719d7384d86cc47403e5102810492b33680e7dadcee64cd8/setproctitle-1.3.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2dc99aec591ab6126e636b11035a70991bc1ab7a261da428491a40b84376654e", size = 18049, upload-time = "2025-09-05T12:49:36.241Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/b4/50940504466689cda65680c9e9a1e518e5750c10490639fa687489ac7013/setproctitle-1.3.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdd8aa571b7aa39840fdbea620e308a19691ff595c3a10231e9ee830339dd798", size = 13079, upload-time = "2025-09-05T12:49:38.088Z" },
     { url = "https://files.pythonhosted.org/packages/d0/99/71630546b9395b095f4082be41165d1078204d1696c2d9baade3de3202d0/setproctitle-1.3.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2906b6c7959cdb75f46159bf0acd8cc9906cf1361c9e1ded0d065fe8f9039629", size = 32932, upload-time = "2025-09-05T12:49:39.271Z" },
     { url = "https://files.pythonhosted.org/packages/50/22/cee06af4ffcfb0e8aba047bd44f5262e644199ae7527ae2c1f672b86495c/setproctitle-1.3.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6915964a6dda07920a1159321dcd6d94fc7fc526f815ca08a8063aeca3c204f1", size = 33736, upload-time = "2025-09-05T12:49:40.565Z" },
     { url = "https://files.pythonhosted.org/packages/5c/00/a5949a8bb06ef5e7df214fc393bb2fb6aedf0479b17214e57750dfdd0f24/setproctitle-1.3.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cff72899861c765bd4021d1ff1c68d60edc129711a2fdba77f9cb69ef726a8b6", size = 35605, upload-time = "2025-09-05T12:49:42.362Z" },
     { url = "https://files.pythonhosted.org/packages/b0/3a/50caca532a9343828e3bf5778c7a84d6c737a249b1796d50dd680290594d/setproctitle-1.3.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b7cb05bd446687ff816a3aaaf831047fc4c364feff7ada94a66024f1367b448c", size = 33143, upload-time = "2025-09-05T12:49:43.515Z" },
     { url = "https://files.pythonhosted.org/packages/ca/14/b843a251296ce55e2e17c017d6b9f11ce0d3d070e9265de4ecad948b913d/setproctitle-1.3.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3a57b9a00de8cae7e2a1f7b9f0c2ac7b69372159e16a7708aa2f38f9e5cc987a", size = 34434, upload-time = "2025-09-05T12:49:45.31Z" },
     { url = "https://files.pythonhosted.org/packages/c8/b7/06145c238c0a6d2c4bc881f8be230bb9f36d2bf51aff7bddcb796d5eed67/setproctitle-1.3.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d8828b356114f6b308b04afe398ed93803d7fca4a955dd3abe84430e28d33739", size = 32795, upload-time = "2025-09-05T12:49:46.419Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/dc/ef76a81fac9bf27b84ed23df19c1f67391a753eed6e3c2254ebcb5133f56/setproctitle-1.3.7-cp312-cp312-win32.whl", hash = "sha256:b0304f905efc845829ac2bc791ddebb976db2885f6171f4a3de678d7ee3f7c9f", size = 12552, upload-time = "2025-09-05T12:49:47.635Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/5b/a9fe517912cd6e28cf43a212b80cb679ff179a91b623138a99796d7d18a0/setproctitle-1.3.7-cp312-cp312-win_amd64.whl", hash = "sha256:9888ceb4faea3116cf02a920ff00bfbc8cc899743e4b4ac914b03625bdc3c300", size = 13247, upload-time = "2025-09-05T12:49:49.16Z" },
 ]
 
 [[package]]
 name = "setuptools"
-version = "79.0.1"
+version = "80.9.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/bb/71/b6365e6325b3290e14957b2c3a804a529968c77a049b2ed40c095f749707/setuptools-79.0.1.tar.gz", hash = "sha256:128ce7b8f33c3079fd1b067ecbb4051a66e8526e7b65f6cec075dfc650ddfa88", size = 1367909, upload-time = "2025-04-23T22:20:59.241Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0d/6d/b4752b044bf94cb802d88a888dc7d288baaf77d7910b7dedda74b5ceea0c/setuptools-79.0.1-py3-none-any.whl", hash = "sha256:e147c0549f27767ba362f9da434eab9c5dc0045d5304feb602a0af001089fc51", size = 1256281, upload-time = "2025-04-23T22:20:56.768Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
 ]
 
 [[package]]
@@ -2376,6 +2309,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]
 
+[[package]]
+name = "smmap"
+version = "5.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329, upload-time = "2025-01-02T07:14:40.909Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" },
+]
+
 [[package]]
 name = "sniffio"
 version = "1.3.1"
@@ -2387,28 +2329,28 @@ wheels = [
 
 [[package]]
 name = "sse-starlette"
-version = "3.0.4"
+version = "3.1.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio" },
-    { name = "starlette" },
+    { name = "anyio", marker = "sys_platform == 'linux'" },
+    { name = "starlette", marker = "sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/17/8b/54651ad49bce99a50fd61a7f19c2b6a79fbb072e693101fbb1194c362054/sse_starlette-3.0.4.tar.gz", hash = "sha256:5e34286862e96ead0eb70f5ddd0bd21ab1f6473a8f44419dd267f431611383dd", size = 22576, upload-time = "2025-12-14T16:22:52.493Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/62/08/8f554b0e5bad3e4e880521a1686d96c05198471eed860b0eb89b57ea3636/sse_starlette-3.1.1.tar.gz", hash = "sha256:bffa531420c1793ab224f63648c059bcadc412bf9fdb1301ac8de1cf9a67b7fb", size = 24306, upload-time = "2025-12-26T15:22:53.836Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/71/22/8ab1066358601163e1ac732837adba3672f703818f693e179b24e0d3b65c/sse_starlette-3.0.4-py3-none-any.whl", hash = "sha256:32c80ef0d04506ced4b0b6ab8fe300925edc37d26f666afb1874c754895f5dc3", size = 11764, upload-time = "2025-12-14T16:22:51.453Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/31/4c281581a0f8de137b710a07f65518b34bcf333b201cfa06cfda9af05f8a/sse_starlette-3.1.1-py3-none-any.whl", hash = "sha256:bb38f71ae74cfd86b529907a9fda5632195dfa6ae120f214ea4c890c7ee9d436", size = 12442, upload-time = "2025-12-26T15:22:52.911Z" },
 ]
 
 [[package]]
 name = "starlette"
-version = "0.49.3"
+version = "0.50.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio" },
-    { name = "typing-extensions" },
+    { name = "anyio", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/de/1a/608df0b10b53b0beb96a37854ee05864d182ddd4b1156a22f1ad3860425a/starlette-0.49.3.tar.gz", hash = "sha256:1c14546f299b5901a1ea0e34410575bc33bbd741377a10484a54445588d00284", size = 2655031, upload-time = "2025-11-01T15:12:26.13Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a3/e0/021c772d6a662f43b63044ab481dc6ac7592447605b5b35a957785363122/starlette-0.49.3-py3-none-any.whl", hash = "sha256:b579b99715fdc2980cf88c8ec96d3bf1ce16f5a8051a7c2b84ef9b1cdecaea2f", size = 74340, upload-time = "2025-11-01T15:12:24.387Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
 ]
 
 [[package]]
@@ -2446,18 +2388,15 @@ name = "tiktoken"
 version = "0.12.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "regex" },
-    { name = "requests" },
+    { name = "regex", marker = "sys_platform == 'linux'" },
+    { name = "requests", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" },
     { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" },
     { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" },
     { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" },
     { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" },
-    { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" },
 ]
 
 [[package]]
@@ -2487,38 +2426,97 @@ wheels = [
 
 [[package]]
 name = "torch"
-version = "2.9.0"
+version = "2.9.0+cu128"
+source = { registry = "https://download.pytorch.org/whl/cu128" }
+resolution-markers = [
+    "sys_platform == 'linux'",
+]
+dependencies = [
+    { name = "filelock", marker = "sys_platform == 'linux'" },
+    { name = "fsspec", marker = "sys_platform == 'linux'" },
+    { name = "jinja2", marker = "sys_platform == 'linux'" },
+    { name = "networkx", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cufft-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cufile-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-curand-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusparselt-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvshmem-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "sys_platform == 'linux'" },
+    { name = "setuptools", marker = "sys_platform == 'linux'" },
+    { name = "sympy", marker = "sys_platform == 'linux'" },
+    { name = "triton", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
+]
+wheels = [
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e1765625084e320f1eb2f4eb5fd9d14d39d08d7a1880c10a307ce5de20831d27" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:87c62d3b95f1a2270bd116dbd47dc515c0b2035076fbb4a03b4365ea289e89c4" },
+]
+
+[[package]]
+name = "torch"
+version = "2.9.1"
+source = { registry = "https://download.pytorch.org/whl/cpu" }
+resolution-markers = [
+    "sys_platform == 'darwin'",
+]
+dependencies = [
+    { name = "filelock", marker = "sys_platform == 'darwin'" },
+    { name = "fsspec", marker = "sys_platform == 'darwin'" },
+    { name = "jinja2", marker = "sys_platform == 'darwin'" },
+    { name = "networkx", marker = "sys_platform == 'darwin'" },
+    { name = "setuptools", marker = "sys_platform == 'darwin'" },
+    { name = "sympy", marker = "sys_platform == 'darwin'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin'" },
+]
+wheels = [
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.9.1-cp312-none-macosx_11_0_arm64.whl" },
+]
+
+[[package]]
+name = "torch"
+version = "2.9.1+cpu"
+source = { registry = "https://download.pytorch.org/whl/cpu" }
+resolution-markers = [
+    "sys_platform != 'darwin' and sys_platform != 'linux'",
+]
+dependencies = [
+    { name = "filelock", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
+    { name = "fsspec", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
+    { name = "jinja2", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
+    { name = "networkx", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
+    { name = "setuptools", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
+    { name = "sympy", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
+]
+wheels = [
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp312-cp312-win_amd64.whl" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp312-cp312-win_arm64.whl" },
+]
+
+[[package]]
+name = "torch-c-dlpack-ext"
+version = "0.1.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock" },
-    { name = "fsspec" },
-    { name = "jinja2" },
-    { name = "networkx" },
-    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "setuptools" },
-    { name = "sympy" },
-    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "typing-extensions" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/b7/cc/24e5eee56bfe2f99b9c026d55bc1a77ceaf409791d9be71a001ede1b2f4e/torch_c_dlpack_ext-0.1.4.tar.gz", hash = "sha256:ad292d17e285ab9523940e51e87d21ffce4982ce8beb46fb18b5c2b4760a1a10", size = 3683, upload-time = "2025-12-09T00:37:56.739Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" },
-    { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/5f/b85bd8c05312d71de9402bf5868d217c38827cfd09d8f8514e5be128a52b/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695", size = 74478983, upload-time = "2025-10-15T15:46:39.406Z" },
+    { url = "https://files.pythonhosted.org/packages/84/c8/97c3d4a1c05dd41e4ba70a8abff47a93951c035a3db1a532777b372f63bc/torch_c_dlpack_ext-0.1.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:78253bc0d2ee4f0c4bf38e207f19de93ba3625430e5ecb08f3a800d93ea9a144", size = 5281944, upload-time = "2025-12-09T00:37:32.153Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/4c/7d59344006807613baca1daf3109ea6a81743146330b5afde96dc953115c/torch_c_dlpack_ext-0.1.4-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b3f6beab017961a082f4012a68fcdeddb55a8b10cd8db630902bd46068b5e5", size = 433744, upload-time = "2025-12-09T00:37:33.538Z" },
+    { url = "https://files.pythonhosted.org/packages/35/c0/0ae9067fd9f15f1feacfd7398314aa48f922ddb9cbeb5f95c8a2e1831cb8/torch_c_dlpack_ext-0.1.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:383794a3281862f8736efa99789713187fb8e1937a5e2f32456bbbe52fa3a8a3", size = 888525, upload-time = "2025-12-09T00:37:35.374Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/5c/74143cedeaf98f632b60c68770e697ce978f75ff6de8f3fe6e58f46459ba/torch_c_dlpack_ext-0.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:d4a343642c8ee46f1257731b0a5473a76eacaf1a09db13dbf12a2b012b586041", size = 1473667, upload-time = "2025-12-09T00:37:37.382Z" },
 ]
 
 [[package]]
@@ -2526,13 +2524,11 @@ name = "torchaudio"
 version = "2.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "torch" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b7/63/3c0ede3aa3d19a8a6698ddd107fa88660549360b51bf8ce2717cd498d800/torchaudio-2.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ab4cbcccfd873b0fb41fcb39c9869e59ef84bb95b093f6f58e2d05172a7500d2", size = 809116, upload-time = "2025-10-15T15:52:00.911Z" },
     { url = "https://files.pythonhosted.org/packages/be/d5/25e58745defe9d05893d3cba5c0e1a76aeaac503ac5ec4d9f83c871df71c/torchaudio-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:7f93388b6e536c14d6015b6f75277a8b45efc532f61b35adc1ed06c98a86003e", size = 476020, upload-time = "2025-10-15T15:51:59.967Z" },
     { url = "https://files.pythonhosted.org/packages/f0/9c/58b8b49dfba2ae85e41ca86b0c52de45bbbea01987490de219c99c523a58/torchaudio-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:508318a2130b40ad51378f90caf8727a4bd3ac2b296f2b90c900b44e6068a940", size = 2059901, upload-time = "2025-10-15T15:51:54.634Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/eb/58b05f75d12f69ccc460893a20c999da082e063082120ed06e05cca3a053/torchaudio-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:82117e3a605f2959dc09b4cd8a11178d6e92727d5f85e5d4f9fe47502f84ee96", size = 665350, upload-time = "2025-10-15T15:52:08.384Z" },
 ]
 
 [[package]]
@@ -2540,15 +2536,13 @@ name = "torchvision"
 version = "0.24.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy" },
-    { name = "pillow" },
-    { name = "torch" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "pillow", marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/47/ef/81e4e69e02e2c4650b30e8c11c8974f946682a30e0ab7e9803a831beff76/torchvision-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c61d40bcd2e2451e932902a702ad495ba1ec6f279e90b1e15cef2bb55dc911e2", size = 1891726, upload-time = "2025-10-15T15:51:16.977Z" },
     { url = "https://files.pythonhosted.org/packages/00/7b/e3809b3302caea9a12c13f3adebe4fef127188438e719fd6c8dc93db1da6/torchvision-0.24.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b0531d1483fc322d7da0d83be52f0df860a75114ab87dbeeb9de765feaeda843", size = 2419495, upload-time = "2025-10-15T15:51:11.885Z" },
     { url = "https://files.pythonhosted.org/packages/7e/e6/7324ead6793075a8c75c56abeed1236d1750de16a5613cfe2ddad164a92a/torchvision-0.24.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:26b9dd9c083f8e5f7ac827de6d5b88c615d9c582dc87666770fbdf16887e4c25", size = 8050480, upload-time = "2025-10-15T15:51:24.012Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/ad/3c56fcd2a0d6e8afa80e115b5ade4302232ec99655220a51d05709819523/torchvision-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:060b7c50ed4b3fb0316b08e2e31bfd874ec2f63ef5ae02f81e54341ca4e88703", size = 4292225, upload-time = "2025-10-15T15:51:27.699Z" },
 ]
 
 [[package]]
@@ -2565,12 +2559,13 @@ wheels = [
 
 [[package]]
 name = "transformers"
-version = "4.57.1"
+version = "4.57.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "huggingface-hub" },
-    { name = "numpy" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "regex" },
@@ -2579,9 +2574,9 @@ dependencies = [
     { name = "tokenizers" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d6/68/a39307bcc4116a30b2106f2e689130a48de8bd8a1e635b5e1030e46fcd9e/transformers-4.57.1.tar.gz", hash = "sha256:f06c837959196c75039809636cd964b959f6604b75b8eeec6fdfc0440b89cc55", size = 10142511, upload-time = "2025-10-14T15:39:26.18Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/dd/70/d42a739e8dfde3d92bb2fff5819cbf331fe9657323221e79415cd5eb65ee/transformers-4.57.3.tar.gz", hash = "sha256:df4945029aaddd7c09eec5cad851f30662f8bd1746721b34cc031d70c65afebc", size = 10139680, upload-time = "2025-11-25T15:51:30.139Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/71/d3/c16c3b3cf7655a67db1144da94b021c200ac1303f82428f2beef6c2e72bb/transformers-4.57.1-py3-none-any.whl", hash = "sha256:b10d05da8fa67dc41644dbbf9bc45a44cb86ae33da6f9295f5fbf5b7890bd267", size = 11990925, upload-time = "2025-10-14T15:39:23.085Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/6b/2f416568b3c4c91c96e5a365d164f8a4a4a88030aa8ab4644181fdadce97/transformers-4.57.3-py3-none-any.whl", hash = "sha256:c77d353a4851b1880191603d36acb313411d3577f6e2897814f333841f7003f4", size = 11993463, upload-time = "2025-11-25T15:51:26.493Z" },
 ]
 
 [[package]]
@@ -2589,47 +2584,48 @@ name = "triton"
 version = "3.5.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/9b/30988039e1e84df7554fba24e6a734d2d0e847af33cabdf9b532b3c51456/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b", size = 159946647, upload-time = "2025-10-15T19:15:56.325Z" },
     { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" },
 ]
 
 [[package]]
 name = "ty"
-version = "0.0.5"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/9e/db/6299d478000f4f1c6f9bf2af749359381610ffc4cbe6713b66e436ecf6e7/ty-0.0.5.tar.gz", hash = "sha256:983da6330773ff71e2b249810a19c689f9a0372f6e21bbf7cde37839d05b4346", size = 4806218, upload-time = "2025-12-20T21:19:17.24Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/98/c1f61ba378b4191e641bb36c07b7fcc70ff844d61be7a4bf2fea7472b4a9/ty-0.0.5-py3-none-linux_armv6l.whl", hash = "sha256:1594cd9bb68015eb2f5a3c68a040860f3c9306dc6667d7a0e5f4df9967b460e2", size = 9785554, upload-time = "2025-12-20T21:19:05.024Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/f9/b37b77c03396bd779c1397dae4279b7ad79315e005b3412feed8812a4256/ty-0.0.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7c0140ba980233d28699d9ddfe8f43d0b3535d6a3bbff9935df625a78332a3cf", size = 9603995, upload-time = "2025-12-20T21:19:15.256Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/70/4e75c11903b0e986c0203040472627cb61d6a709e1797fb08cdf9d565743/ty-0.0.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:15de414712cde92048ae4b1a77c4dc22920bd23653fe42acaf73028bad88f6b9", size = 9145815, upload-time = "2025-12-20T21:19:36.481Z" },
-    { url = "https://files.pythonhosted.org/packages/89/05/93983dfcf871a41dfe58e5511d28e6aa332a1f826cc67333f77ae41a2f8a/ty-0.0.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:438aa51ad6c5fae64191f8d58876266e26f9250cf09f6624b6af47a22fa88618", size = 9619849, upload-time = "2025-12-20T21:19:19.084Z" },
-    { url = "https://files.pythonhosted.org/packages/82/b6/896ab3aad59f846823f202e94be6016fb3f72434d999d2ae9bd0f28b3af9/ty-0.0.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b3d373fd96af1564380caf153600481c676f5002ee76ba8a7c3508cdff82ee0", size = 9606611, upload-time = "2025-12-20T21:19:24.583Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/ae/098e33fc92330285ed843e2750127e896140c4ebd2d73df7732ea496f588/ty-0.0.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8453692503212ad316cf8b99efbe85a91e5f63769c43be5345e435a1b16cba5a", size = 10029523, upload-time = "2025-12-20T21:19:07.055Z" },
-    { url = "https://files.pythonhosted.org/packages/04/5a/f4b4c33758b9295e9aca0de9645deca0f4addd21d38847228723a6e780fc/ty-0.0.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:2e4c454139473abbd529767b0df7a795ed828f780aef8d0d4b144558c0dc4446", size = 10870892, upload-time = "2025-12-20T21:19:34.495Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/c5/4e3e7e88389365aa1e631c99378711cf0c9d35a67478cb4720584314cf44/ty-0.0.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:426d4f3b82475b1ec75f3cc9ee5a667c8a4ae8441a09fcd8e823a53b706d00c7", size = 10599291, upload-time = "2025-12-20T21:19:26.557Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/5d/138f859ea87bd95e17b9818e386ae25a910e46521c41d516bf230ed83ffc/ty-0.0.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5710817b67c6b2e4c0224e4f319b7decdff550886e9020f6d46aa1ce8f89a609", size = 10413515, upload-time = "2025-12-20T21:19:11.094Z" },
-    { url = "https://files.pythonhosted.org/packages/27/21/1cbcd0d3b1182172f099e88218137943e0970603492fb10c7c9342369d9a/ty-0.0.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23c55ef08882c7c5ced1ccb90b4eeefa97f690aea254f58ac0987896c590f76", size = 10144992, upload-time = "2025-12-20T21:19:13.225Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/30/fdac06a5470c09ad2659a0806497b71f338b395d59e92611f71b623d05a0/ty-0.0.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b9e4c1a28a23b14cf8f4f793f4da396939f16c30bfa7323477c8cc234e352ac4", size = 9606408, upload-time = "2025-12-20T21:19:09.212Z" },
-    { url = "https://files.pythonhosted.org/packages/09/93/e99dcd7f53295192d03efd9cbcec089a916f49cad4935c0160ea9adbd53d/ty-0.0.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4e9ebb61529b9745af662e37c37a01ad743cdd2c95f0d1421705672874d806cd", size = 9630040, upload-time = "2025-12-20T21:19:38.165Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/f8/6d1e87186e4c35eb64f28000c1df8fd5f73167ce126c5e3dd21fd1204a23/ty-0.0.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5eb191a8e332f50f56dfe45391bdd7d43dd4ef6e60884710fd7ce84c5d8c1eb5", size = 9754016, upload-time = "2025-12-20T21:19:32.79Z" },
-    { url = "https://files.pythonhosted.org/packages/28/e6/20f989342cb3115852dda404f1d89a10a3ce93f14f42b23f095a3d1a00c9/ty-0.0.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:92ed7451a1e82ee134a2c24ca43b74dd31e946dff2b08e5c34473e6b051de542", size = 10252877, upload-time = "2025-12-20T21:19:20.787Z" },
-    { url = "https://files.pythonhosted.org/packages/57/9d/fc66fa557443233dfad9ae197ff3deb70ae0efcfb71d11b30ef62f5cdcc3/ty-0.0.5-py3-none-win32.whl", hash = "sha256:71f6707e4c1c010c158029a688a498220f28bb22fdb6707e5c20e09f11a5e4f2", size = 9212640, upload-time = "2025-12-20T21:19:30.817Z" },
-    { url = "https://files.pythonhosted.org/packages/68/b6/05c35f6dea29122e54af0e9f8dfedd0a100c721affc8cc801ebe2bc2ed13/ty-0.0.5-py3-none-win_amd64.whl", hash = "sha256:2b8b754a0d7191e94acdf0c322747fec34371a4d0669f5b4e89549aef28814ae", size = 10034701, upload-time = "2025-12-20T21:19:28.311Z" },
-    { url = "https://files.pythonhosted.org/packages/df/ca/4201ed5cb2af73912663d0c6ded927c28c28b3c921c9348aa8d2cfef4853/ty-0.0.5-py3-none-win_arm64.whl", hash = "sha256:83bea5a5296caac20d52b790ded2b830a7ff91c4ed9f36730fe1f393ceed6654", size = 9566474, upload-time = "2025-12-20T21:19:22.518Z" },
+version = "0.0.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b3/43/8be3ec2e2ce6119cff9ee3a207fae0cb4f2b4f8ed6534175130a32be24a7/ty-0.0.7.tar.gz", hash = "sha256:90e53b20b86c418ee41a8385f17da44cc7f916f96f9eee87593423ce8292ca72", size = 4826677, upload-time = "2025-12-24T21:28:49.136Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6e/56/fafa123acf955089306372add312f16e97aba61f7c4daf74e2bb9c350d23/ty-0.0.7-py3-none-linux_armv6l.whl", hash = "sha256:b30105bd9a0b064497111c50c206d5b6a032f29bcf39f09a12085c3009d72784", size = 9862360, upload-time = "2025-12-24T21:28:36.762Z" },
+    { url = "https://files.pythonhosted.org/packages/71/f4/9c30ff498d9a60e24f16d26c0cf93cd03a119913ffa720a77149f02df06e/ty-0.0.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b4df20889115f3d5611a9d9cdedc222e3fd82b5fe87bb0a9f7246e53a23becc7", size = 9712866, upload-time = "2025-12-24T21:28:25.926Z" },
+    { url = "https://files.pythonhosted.org/packages/43/84/e06a4a6e4011890027ffee41efbf261b1335103d09009d625ace7f1a60eb/ty-0.0.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f699589d8511e1e17c5a7edfc5f4a4e80f2a6d4a3932a0e9e3422fd32d731472", size = 9221692, upload-time = "2025-12-24T21:28:29.649Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/e9/ebb4192d3627730125d40ee403a17dc91bab59d69c3eff286453b3218d01/ty-0.0.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3eaec2d8aa153ee4bcc43b17a384d0f9e66177c8c8127be3358b6b8348b9e3b", size = 9710340, upload-time = "2025-12-24T21:28:55.148Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/4a/ec144458a9cfb324d5cb471483094e62e74d73179343dff262a5cca1a1e1/ty-0.0.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:177d160295e6a56bdf0b61f6120bc4502fff301d4d10855ba711c109aa7f37fb", size = 9670317, upload-time = "2025-12-24T21:28:43.096Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/94/fe7106fd5e2ac06b81fba7b785a6216774618edc3fda9e17f58efe3cede6/ty-0.0.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30518b95ab5cc83615794cca765a5fb86df39a0d9c3dadc0ab2d787ab7830008", size = 10096517, upload-time = "2025-12-24T21:28:23.667Z" },
+    { url = "https://files.pythonhosted.org/packages/45/d9/db96ccfd663c96bdd4bb63db72899198c01445012f939477a5318a563f14/ty-0.0.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7867b3f75c2d9602cc6fb3b6d462580b707c2d112d4b27037142b0d01f8bfd03", size = 10996406, upload-time = "2025-12-24T21:28:39.134Z" },
+    { url = "https://files.pythonhosted.org/packages/94/da/103915c08c3e6a14f95959614646fcdc9a240cd9a039fadbdcd086c819ee/ty-0.0.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:878d45858e209b7904753fbc5155f4cb75dadc20a26bbb77614bfef31580f9ae", size = 10712829, upload-time = "2025-12-24T21:28:27.745Z" },
+    { url = "https://files.pythonhosted.org/packages/47/c0/d9be417bc8e459e13e9698978579eec9868f91f4c5d6ef663249967fec8b/ty-0.0.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:651820b193901825afce40ae68f6a51cd64dbfa4b81a45db90061401261f25e4", size = 10486541, upload-time = "2025-12-24T21:28:45.17Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/09/d1858c66620d8ae566e021ad0d7168914b1568841f8fe9e439116ce6b440/ty-0.0.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f56a5a0c1c045863b1b70c358a392b3f73b8528c5c571d409f19dd465525e116", size = 10255312, upload-time = "2025-12-24T21:28:53.17Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/0a/78f75089db491fd5fcc13d2845a0b2771b7f7d377450c64c6616e9c227bc/ty-0.0.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:748218fbc1f7b7f1b9d14e77d4f3d7fec72af794417e26b0185bdb94153afe1c", size = 9696201, upload-time = "2025-12-24T21:28:57.345Z" },
+    { url = "https://files.pythonhosted.org/packages/01/9e/b26e94832fd563fef6f77a4487affc77a027b0e53106422c66aafb37fa01/ty-0.0.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1ff80f3985a52a7358b9069b4a8d223e92cf312544a934a062d6d3a4fb6876b3", size = 9688907, upload-time = "2025-12-24T21:28:59.485Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/8f/cc48601fb92c964cf6c34277e0d947076146b7de47aa11b5dbae45e01ce7/ty-0.0.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a808910ce672ba4446699f4c021283208f58f988bcfc3bdbdfc6e005819d9ee0", size = 9829982, upload-time = "2025-12-24T21:28:34.429Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/af/7fa9c2bfa25865968bded637f7e71f1a712f4fbede88f487b6a9101ab936/ty-0.0.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2718fea5f314eda01703fb406ec89b1fc8710b3fc6a09bbd6f7a4f3502ddc889", size = 10361037, upload-time = "2025-12-24T21:28:47.027Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/5b/1a6ff1495975cd1c02aa8d03bc5c9d8006eaeb8bf354446f88d70f0518fd/ty-0.0.7-py3-none-win32.whl", hash = "sha256:ae89bb8dc50deb66f34eab3113aa61ac5d7f85ecf16279e5918548085a89021c", size = 9295092, upload-time = "2025-12-24T21:28:51.041Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/f6/47e9364635d048002354f84d2d0d6dfc9eb166dc67850739f88e1fec4fc5/ty-0.0.7-py3-none-win_amd64.whl", hash = "sha256:25bd20e3d4d0f07b422f9b42711ba24d28116031273bd23dbda66cec14df1c06", size = 10162816, upload-time = "2025-12-24T21:28:41.006Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/f4/c4fc28410c4493982b7481fb23f62bacb02fd2912ebec3b9bc7de18bebb8/ty-0.0.7-py3-none-win_arm64.whl", hash = "sha256:c87d27484dba9fca0053b6a9eee47eecc760aab2bbb8e6eab3d7f81531d1ad0c", size = 9653112, upload-time = "2025-12-24T21:28:31.562Z" },
 ]
 
 [[package]]
 name = "typer"
-version = "0.20.0"
+version = "0.21.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "click" },
-    { name = "rich" },
-    { name = "shellingham" },
-    { name = "typing-extensions" },
+    { name = "click", marker = "sys_platform == 'linux'" },
+    { name = "rich", marker = "sys_platform == 'linux'" },
+    { name = "shellingham", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8f/28/7c85c8032b91dbe79725b6f17d2fffc595dff06a35c7a30a37bef73a1ab4/typer-0.20.0.tar.gz", hash = "sha256:1aaf6494031793e4876fb0bacfa6a912b551cf43c1e63c800df8b1a866720c37", size = 106492, upload-time = "2025-10-20T17:03:49.445Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/30/ff9ede605e3bd086b4dd842499814e128500621f7951ca1e5ce84bbf61b1/typer-0.21.0.tar.gz", hash = "sha256:c87c0d2b6eee3b49c5c64649ec92425492c14488096dfbc8a0c2799b2f6f9c53", size = 106781, upload-time = "2025-12-25T09:54:53.651Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/78/64/7713ffe4b5983314e9d436a90d5bd4f63b6054e2aca783a3cfc44cb95bbf/typer-0.20.0-py3-none-any.whl", hash = "sha256:5b463df6793ec1dca6213a3cf4c0f03bc6e322ac5e16e13ddd622a889489784a", size = 47028, upload-time = "2025-10-20T17:03:47.617Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/e4/5ebc1899d31d2b1601b32d21cfb4bba022ae6fce323d365f0448031b1660/typer-0.21.0-py3-none-any.whl", hash = "sha256:c79c01ca6b30af9fd48284058a7056ba0d3bf5cf10d0ff3d0c5b11b68c258ac6", size = 47109, upload-time = "2025-12-25T09:54:51.918Z" },
 ]
 
 [[package]]
@@ -2664,35 +2660,34 @@ wheels = [
 
 [[package]]
 name = "urllib3"
-version = "2.5.0"
+version = "2.6.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1e/24/a2a2ed9addd907787d7aa0355ba36a6cadf1768b934c652ea78acbd59dcd/urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797", size = 432930, upload-time = "2025-12-11T15:56:40.252Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/b9/4095b668ea3678bf6a0af005527f39de12fb026516fb3df17495a733b7f8/urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd", size = 131182, upload-time = "2025-12-11T15:56:38.584Z" },
 ]
 
 [[package]]
 name = "uvicorn"
-version = "0.38.0"
+version = "0.40.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "click" },
-    { name = "h11" },
+    { name = "click", marker = "sys_platform == 'linux'" },
+    { name = "h11", marker = "sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605, upload-time = "2025-10-18T13:46:44.63Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
 ]
 
 [package.optional-dependencies]
 standard = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
-    { name = "httptools" },
-    { name = "python-dotenv" },
-    { name = "pyyaml" },
-    { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" },
-    { name = "watchfiles" },
-    { name = "websockets" },
+    { name = "httptools", marker = "sys_platform == 'linux'" },
+    { name = "python-dotenv", marker = "sys_platform == 'linux'" },
+    { name = "pyyaml", marker = "sys_platform == 'linux'" },
+    { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'linux'" },
+    { name = "watchfiles", marker = "sys_platform == 'linux'" },
+    { name = "websockets", marker = "sys_platform == 'linux'" },
 ]
 
 [[package]]
@@ -2701,8 +2696,6 @@ version = "0.22.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" },
     { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" },
     { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" },
     { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" },
@@ -2714,65 +2707,65 @@ name = "vllm"
 version = "0.13.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "aiohttp" },
-    { name = "anthropic" },
-    { name = "blake3" },
-    { name = "cachetools" },
-    { name = "cbor2" },
-    { name = "cloudpickle" },
-    { name = "compressed-tensors" },
-    { name = "depyf" },
-    { name = "diskcache" },
-    { name = "einops" },
-    { name = "fastapi", extra = ["standard"] },
-    { name = "filelock" },
-    { name = "flashinfer-python" },
-    { name = "gguf" },
-    { name = "ijson" },
-    { name = "lark" },
-    { name = "llguidance", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'ppc64le' or platform_machine == 's390x' or platform_machine == 'x86_64'" },
-    { name = "lm-format-enforcer" },
-    { name = "mcp" },
-    { name = "mistral-common", extra = ["image"] },
-    { name = "model-hosting-container-standards" },
-    { name = "msgspec" },
-    { name = "ninja" },
-    { name = "numba" },
-    { name = "numpy" },
-    { name = "openai" },
-    { name = "openai-harmony" },
-    { name = "opencv-python-headless" },
-    { name = "outlines-core" },
-    { name = "partial-json-parser" },
-    { name = "pillow" },
-    { name = "prometheus-client" },
-    { name = "prometheus-fastapi-instrumentator" },
-    { name = "protobuf" },
-    { name = "psutil" },
-    { name = "py-cpuinfo" },
-    { name = "pybase64" },
-    { name = "pydantic" },
-    { name = "python-json-logger" },
-    { name = "pyyaml" },
-    { name = "pyzmq" },
-    { name = "ray", extra = ["cgraph"] },
-    { name = "regex" },
-    { name = "requests" },
-    { name = "scipy" },
-    { name = "sentencepiece" },
-    { name = "setproctitle" },
-    { name = "setuptools" },
-    { name = "six" },
-    { name = "tiktoken" },
-    { name = "tokenizers" },
-    { name = "torch" },
-    { name = "torchaudio" },
-    { name = "torchvision" },
-    { name = "tqdm" },
-    { name = "transformers" },
-    { name = "typing-extensions" },
-    { name = "watchfiles" },
-    { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'ppc64le' or platform_machine == 's390x' or platform_machine == 'x86_64'" },
+    { name = "aiohttp", marker = "sys_platform == 'linux'" },
+    { name = "anthropic", marker = "sys_platform == 'linux'" },
+    { name = "blake3", marker = "sys_platform == 'linux'" },
+    { name = "cachetools", marker = "sys_platform == 'linux'" },
+    { name = "cbor2", marker = "sys_platform == 'linux'" },
+    { name = "cloudpickle", marker = "sys_platform == 'linux'" },
+    { name = "compressed-tensors", marker = "sys_platform == 'linux'" },
+    { name = "depyf", marker = "sys_platform == 'linux'" },
+    { name = "diskcache", marker = "sys_platform == 'linux'" },
+    { name = "einops", marker = "sys_platform == 'linux'" },
+    { name = "fastapi", extra = ["standard"], marker = "sys_platform == 'linux'" },
+    { name = "filelock", marker = "sys_platform == 'linux'" },
+    { name = "flashinfer-python", marker = "sys_platform == 'linux'" },
+    { name = "gguf", marker = "sys_platform == 'linux'" },
+    { name = "ijson", marker = "sys_platform == 'linux'" },
+    { name = "lark", marker = "sys_platform == 'linux'" },
+    { name = "llguidance", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'ppc64le' and sys_platform == 'linux') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "lm-format-enforcer", marker = "sys_platform == 'linux'" },
+    { name = "mcp", marker = "sys_platform == 'linux'" },
+    { name = "mistral-common", extra = ["image"], marker = "sys_platform == 'linux'" },
+    { name = "model-hosting-container-standards", marker = "sys_platform == 'linux'" },
+    { name = "msgspec", marker = "sys_platform == 'linux'" },
+    { name = "ninja", marker = "sys_platform == 'linux'" },
+    { name = "numba", marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "openai", marker = "sys_platform == 'linux'" },
+    { name = "openai-harmony", marker = "sys_platform == 'linux'" },
+    { name = "opencv-python-headless", marker = "sys_platform == 'linux'" },
+    { name = "outlines-core", marker = "sys_platform == 'linux'" },
+    { name = "partial-json-parser", marker = "sys_platform == 'linux'" },
+    { name = "pillow", marker = "sys_platform == 'linux'" },
+    { name = "prometheus-client", marker = "sys_platform == 'linux'" },
+    { name = "prometheus-fastapi-instrumentator", marker = "sys_platform == 'linux'" },
+    { name = "protobuf", marker = "sys_platform == 'linux'" },
+    { name = "psutil", marker = "sys_platform == 'linux'" },
+    { name = "py-cpuinfo", marker = "sys_platform == 'linux'" },
+    { name = "pybase64", marker = "sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "python-json-logger", marker = "sys_platform == 'linux'" },
+    { name = "pyyaml", marker = "sys_platform == 'linux'" },
+    { name = "pyzmq", marker = "sys_platform == 'linux'" },
+    { name = "ray", extra = ["cgraph"], marker = "sys_platform == 'linux'" },
+    { name = "regex", marker = "sys_platform == 'linux'" },
+    { name = "requests", marker = "sys_platform == 'linux'" },
+    { name = "scipy", marker = "sys_platform == 'linux'" },
+    { name = "sentencepiece", marker = "sys_platform == 'linux'" },
+    { name = "setproctitle", marker = "sys_platform == 'linux'" },
+    { name = "setuptools", marker = "sys_platform == 'linux'" },
+    { name = "six", marker = "sys_platform == 'linux'" },
+    { name = "tiktoken", marker = "sys_platform == 'linux'" },
+    { name = "tokenizers", marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
+    { name = "torchaudio", marker = "sys_platform == 'linux'" },
+    { name = "torchvision", marker = "sys_platform == 'linux'" },
+    { name = "tqdm", marker = "sys_platform == 'linux'" },
+    { name = "transformers", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
+    { name = "watchfiles", marker = "sys_platform == 'linux'" },
+    { name = "xgrammar", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'ppc64le' and sys_platform == 'linux') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/11/12/b922f96778d07df1c28dfa9a81fbc9706c13c5d0a4e8d154060818a79705/vllm-0.13.0.tar.gz", hash = "sha256:4ad43db45fef37114b550d03a4f423fb3fa3a31d8bc09ee810ef8b9cdcd4b5fe", size = 17828199, upload-time = "2025-12-19T03:30:32.741Z" }
 wheels = [
@@ -2789,17 +2782,44 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8d/96/04e7b441807b26b794da5b11e59ed7f83b2cf8af202bd7eba8ad2fa6046e/wadler_lindig-0.1.7-py3-none-any.whl", hash = "sha256:e3ec83835570fd0a9509f969162aeb9c65618f998b1f42918cfc8d45122fe953", size = 20516, upload-time = "2025-06-18T07:00:41.684Z" },
 ]
 
+[[package]]
+name = "wandb"
+version = "0.23.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "gitpython" },
+    { name = "packaging" },
+    { name = "platformdirs" },
+    { name = "protobuf" },
+    { name = "pydantic" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "sentry-sdk" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0a/cc/770ae3aa7ae44f6792f7ecb81c14c0e38b672deb35235719bb1006519487/wandb-0.23.1.tar.gz", hash = "sha256:f6fb1e3717949b29675a69359de0eeb01e67d3360d581947d5b3f98c273567d6", size = 44298053, upload-time = "2025-12-03T02:25:10.79Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/0b/c3d7053dfd93fd259a63c7818d9c4ac2ba0642ff8dc8db98662ea0cf9cc0/wandb-0.23.1-py3-none-macosx_12_0_arm64.whl", hash = "sha256:358e15471d19b7d73fc464e37371c19d44d39e433252ac24df107aff993a286b", size = 21527293, upload-time = "2025-12-03T02:24:48.011Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/9f/059420fa0cb6c511dc5c5a50184122b6aca7b178cb2aa210139e354020da/wandb-0.23.1-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:110304407f4b38f163bdd50ed5c5225365e4df3092f13089c30171a75257b575", size = 22745926, upload-time = "2025-12-03T02:24:50.519Z" },
+    { url = "https://files.pythonhosted.org/packages/96/b6/fd465827c14c64d056d30b4c9fcf4dac889a6969dba64489a88fc4ffa333/wandb-0.23.1-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:6cc984cf85feb2f8ee0451d76bc9fb7f39da94956bb8183e30d26284cf203b65", size = 21212973, upload-time = "2025-12-03T02:24:52.828Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/ee/9a8bb9a39cc1f09c3060456cc79565110226dc4099a719af5c63432da21d/wandb-0.23.1-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:67431cd3168d79fdb803e503bd669c577872ffd5dadfa86de733b3274b93088e", size = 22887885, upload-time = "2025-12-03T02:24:55.281Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/4d/8d9e75add529142e037b05819cb3ab1005679272950128d69d218b7e5b2e/wandb-0.23.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:07be70c0baa97ea25fadc4a9d0097f7371eef6dcacc5ceb525c82491a31e9244", size = 21250967, upload-time = "2025-12-03T02:24:57.603Z" },
+    { url = "https://files.pythonhosted.org/packages/97/72/0b35cddc4e4168f03c759b96d9f671ad18aec8bdfdd84adfea7ecb3f5701/wandb-0.23.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:216c95b08e0a2ec6a6008373b056d597573d565e30b43a7a93c35a171485ee26", size = 22988382, upload-time = "2025-12-03T02:25:00.518Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/6d/e78093d49d68afb26f5261a70fc7877c34c114af5c2ee0ab3b1af85f5e76/wandb-0.23.1-py3-none-win32.whl", hash = "sha256:fb5cf0f85692f758a5c36ab65fea96a1284126de64e836610f92ddbb26df5ded", size = 22150756, upload-time = "2025-12-03T02:25:02.734Z" },
+    { url = "https://files.pythonhosted.org/packages/05/27/4f13454b44c9eceaac3d6e4e4efa2230b6712d613ff9bf7df010eef4fd18/wandb-0.23.1-py3-none-win_amd64.whl", hash = "sha256:21c8c56e436eb707b7d54f705652e030d48e5cfcba24cf953823eb652e30e714", size = 22150760, upload-time = "2025-12-03T02:25:05.106Z" },
+    { url = "https://files.pythonhosted.org/packages/30/20/6c091d451e2a07689bfbfaeb7592d488011420e721de170884fedd68c644/wandb-0.23.1-py3-none-win_arm64.whl", hash = "sha256:8aee7f3bb573f2c0acf860f497ca9c684f9b35f2ca51011ba65af3d4592b77c1", size = 20137463, upload-time = "2025-12-03T02:25:08.317Z" },
+]
+
 [[package]]
 name = "watchfiles"
 version = "1.1.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio" },
+    { name = "anyio", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745, upload-time = "2025-10-14T15:04:46.731Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769, upload-time = "2025-10-14T15:04:48.003Z" },
     { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374, upload-time = "2025-10-14T15:04:49.179Z" },
     { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485, upload-time = "2025-10-14T15:04:50.155Z" },
     { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813, upload-time = "2025-10-14T15:04:51.059Z" },
@@ -2808,9 +2828,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812, upload-time = "2025-10-14T15:04:55.174Z" },
     { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196, upload-time = "2025-10-14T15:04:56.22Z" },
     { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657, upload-time = "2025-10-14T15:04:57.521Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042, upload-time = "2025-10-14T15:04:59.046Z" },
-    { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410, upload-time = "2025-10-14T15:05:00.081Z" },
-    { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209, upload-time = "2025-10-14T15:05:01.168Z" },
 ]
 
 [[package]]
@@ -2819,50 +2836,32 @@ version = "15.0.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437, upload-time = "2025-03-05T20:02:16.706Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096, upload-time = "2025-03-05T20:02:18.832Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332, upload-time = "2025-03-05T20:02:20.187Z" },
     { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152, upload-time = "2025-03-05T20:02:22.286Z" },
     { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096, upload-time = "2025-03-05T20:02:24.368Z" },
     { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523, upload-time = "2025-03-05T20:02:25.669Z" },
     { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790, upload-time = "2025-03-05T20:02:26.99Z" },
     { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165, upload-time = "2025-03-05T20:02:30.291Z" },
     { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160, upload-time = "2025-03-05T20:02:31.634Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395, upload-time = "2025-03-05T20:02:33.017Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841, upload-time = "2025-03-05T20:02:34.498Z" },
     { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
 ]
 
-[[package]]
-name = "win32-setctime"
-version = "1.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" },
-]
-
 [[package]]
 name = "xgrammar"
 version = "0.1.27"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
-    { name = "ninja" },
-    { name = "numpy" },
-    { name = "pydantic" },
-    { name = "torch" },
-    { name = "transformers" },
+    { name = "ninja", marker = "sys_platform == 'linux'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
+    { name = "transformers", marker = "sys_platform == 'linux'" },
     { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "typing-extensions" },
+    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/62/e1/b522b1e50fddd773d368c2945ef5ed628aa90c0c972027f9aa5a51d6d4f9/xgrammar-0.1.27.tar.gz", hash = "sha256:40af7bb2891f1633ec7f660723c74a92a963307d283aca9e3b4e53a0feaf1d46", size = 2303435, upload-time = "2025-11-04T03:11:53.512Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/39/b6/09b43e2adff45d30ebcf9110d0ff753f4c96b368adaa2d166df3dee88d5f/xgrammar-0.1.27-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:6404a7714440eb86ab0379d749f33591274eeef04787dc00d61f22069f3ed51d", size = 663319, upload-time = "2025-11-04T03:11:28.682Z" },
-    { url = "https://files.pythonhosted.org/packages/88/8b/53eb5c6d0df8df9f6350f182516a5b8c7b8b11d62650300d2c04af2bc4ea/xgrammar-0.1.27-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d01fa9894bc44a7f6a70b0301b59f3e310c0e0e7b7ea4cf5ce190b12d8220dd8", size = 636168, upload-time = "2025-11-04T03:11:30.373Z" },
     { url = "https://files.pythonhosted.org/packages/08/1b/53d30395bb973f13255d3e3a72961f95fdfb4083877c3f93bb626e3d1522/xgrammar-0.1.27-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:906c0601bac9170e1bab77ca985259035ff9c386c347efcb191555eab86e984e", size = 8676340, upload-time = "2025-11-04T03:11:32.203Z" },
     { url = "https://files.pythonhosted.org/packages/48/74/70cfac0171d9f309cfe18c5384330e3edc9466c436b258495fd30ecf29a3/xgrammar-0.1.27-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb68988a122f544301c496f2cac8ee82960ca7f5b3a42a952b2a00c0a55e6ca5", size = 8870650, upload-time = "2025-11-04T03:11:34.322Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/a1/0392aa9c7669c56f7f88e4423b246476a74a72c3bb9db944e1bfc029985e/xgrammar-0.1.27-cp312-cp312-win_amd64.whl", hash = "sha256:3aac335ea052afc8f8dc34b9f2afcb9462a68189423aed9f60b0941db6cfc310", size = 708811, upload-time = "2025-11-04T03:11:36.214Z" },
 ]
 
 [[package]]