From 6530793eef871fa9d4f4326be5f5c6a0ebf3bf16 Mon Sep 17 00:00:00 2001 From: Sean Brar Date: Thu, 5 Mar 2026 18:09:30 -0800 Subject: [PATCH] feat(anthropic): update v1.3 integration and documentation --- README.md | 8 +++-- docs/conversations-and-agents.md | 45 ++++++++++++++++--------- docs/error-handling.md | 7 ++-- docs/getting-started.md | 16 +++++++-- docs/portable-code.md | 13 +++---- docs/reference/cli.md | 2 +- docs/reference/provider-capabilities.md | 8 +++-- docs/sending-content.md | 2 +- docs/structured-data.md | 4 +-- src/pollux/providers/anthropic.py | 6 ++-- tests/test_providers.py | 6 ++-- 11 files changed, 73 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 30957e7..bddc7d2 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ print(result["answers"][0]) `run()` returns a `ResultEnvelope` dict — `answers` is a list with one entry per prompt. To use OpenAI instead: `Config(provider="openai", model="gpt-5-nano")`. +For Anthropic: `Config(provider="anthropic", model="claude-haiku-4-5")`. For a full 2-minute walkthrough (install, key setup, success checks), see the [Quickstart](https://polluxlib.dev/quickstart/). @@ -59,14 +60,17 @@ pip install pollux-ai ### API Keys -Get a key from [Google AI Studio](https://ai.dev/) or [OpenAI Platform](https://platform.openai.com/api-keys), then: +Get a key from [Google AI Studio](https://ai.dev/), [OpenAI Platform](https://platform.openai.com/api-keys), or the [Anthropic Console](https://console.anthropic.com/settings/keys), then: ```bash -# Gemini (recommended starting point — supports context caching) +# Gemini export GEMINI_API_KEY="your-key-here" # OpenAI export OPENAI_API_KEY="your-key-here" + +# Anthropic +export ANTHROPIC_API_KEY="your-key-here" ``` ## Usage diff --git a/docs/conversations-and-agents.md b/docs/conversations-and-agents.md index c6531d0..80007ee 100644 --- a/docs/conversations-and-agents.md +++ b/docs/conversations-and-agents.md @@ -32,9 +32,20 @@ next turn. ## Continuing a Conversation with `continue_from` -Every successful `run()` returns a `ResultEnvelope` containing internal -conversation state. Pass this envelope back into `Options` to -automatically resume the conversation. +Pass a prior `ResultEnvelope` back into `Options(continue_from=...)` to +automatically resume a conversation. Pollux unpacks the initial prompt, +the assistant's previous response, and any tool calls directly into the +context payload without requiring manual dictionary manipulation. + +To use `continue_from`, the first turn must opt into conversation tracking +by passing `history=[]` (an empty list is enough). Without it, Pollux +treats the call as stateless and does not build conversation state. + +!!! note + When tool calling is active, Pollux auto-populates conversation state + whenever the model returns `tool_calls` — no explicit `history=[]` + needed. The opt-in requirement only applies to plain conversational + calls without tools. ```python import asyncio @@ -43,12 +54,16 @@ from pollux import Config, Options, run async def chat_loop() -> None: config = Config(provider="gemini", model="gemini-2.5-flash-lite") - # Turn 1: Initial query + # Turn 1: opt into conversation tracking with history=[] print("User: Hello! Please remember my name is Sean.") - result1 = await run("Hello! Please remember my name is Sean.", config=config) + result1 = await run( + "Hello! Please remember my name is Sean.", + config=config, + options=Options(history=[]), # Enable conversation state + ) print(f"Assistant: {result1['answers'][0]}") - # Turn 2: Continuing the session + # Turn 2: continue from prior result print("\nUser: What is my name?") result2 = await run( "What is my name?", @@ -60,10 +75,6 @@ async def chat_loop() -> None: asyncio.run(chat_loop()) ``` -`continue_from` unpacks the initial prompt, the assistant's previous response, -and any tool calls directly into the context payload without requiring manual -dictionary manipulation. - ## Using `history` for Manual Control If you need to inject mid-conversation context, groom old context out to @@ -150,7 +161,7 @@ Pollux normalizes tool parameter schemas at the provider boundary. For OpenAI (which defaults to strict mode), `additionalProperties: false` and `required` are injected automatically. For Gemini, unsupported fields like `additionalProperties` are stripped. You can define one schema and use it -across both providers without modification. +across all providers without modification. **Reading tool calls:** when the model invokes tools, the result envelope includes a `tool_calls` field: @@ -199,12 +210,16 @@ round of `tool_calls` (if the model needs more data) or a final text answer. `result["tool_calls"][0]`. - **Conversation continuity requires one prompt.** Both `history` and `continue_from` work with single-prompt `run()` calls, not `run_many()`. +- **Plain conversations need `history=[]` on the first turn.** Without + `history` or `continue_from`, Pollux treats a call as stateless and + does not produce conversation state. Pass `history=[]` on the first + turn to enable `continue_from` on subsequent turns. - **Tool-call responses auto-populate conversation state.** When `run()` - returns tool calls, Pollux builds `_conversation_state` automatically, even + returns tool calls, Pollux builds conversation state automatically, even without explicit `history` or `continue_from`. This means `continue_tool()` - works on any result that contains tool calls. -- **Provider differences exist.** Both Gemini and OpenAI support tool calling - and tool messages in history. See + works on any result that contains tool calls — no opt-in needed. +- **Provider differences exist.** Gemini, OpenAI, and Anthropic all support + tool calling and tool messages in history. See [Provider Capabilities](reference/provider-capabilities.md) for details. --- diff --git a/docs/error-handling.md b/docs/error-handling.md index 08ca499..94fe85f 100644 --- a/docs/error-handling.md +++ b/docs/error-handling.md @@ -61,7 +61,8 @@ strings. Use this order when debugging. Most failures resolve by step 2. 1. **Auth and mode check.** Is `use_mock` what you expect? For real mode, - ensure the matching key exists (`GEMINI_API_KEY` or `OPENAI_API_KEY`). + ensure the matching key exists (`GEMINI_API_KEY`, `OPENAI_API_KEY`, or + `ANTHROPIC_API_KEY`). 2. **Provider/model pairing.** Verify the model belongs to the selected provider. Re-run a minimal prompt after fixing any mismatch. @@ -69,7 +70,7 @@ Use this order when debugging. Most failures resolve by step 2. 3. **Unsupported feature.** Compare your options against [Provider Capabilities](reference/provider-capabilities.md). `delivery_mode="deferred"` is not supported. Conversation continuity - and tool calling are supported by both Gemini and OpenAI. + and tool calling are supported by all three providers. 4. **Source and payload.** Reduce to one source + one prompt and retry. For OpenAI remote URLs, only PDF and image URLs are supported. @@ -202,7 +203,7 @@ asyncio.run(process_collection("./papers", "Summarize the key findings.")) | Symptom | Likely Cause | Fix | |---|---|---| -| `ConfigurationError` at startup | Missing API key | `export GEMINI_API_KEY="your-key"` or pass `api_key` in `Config(...)` | +| `ConfigurationError` at startup | Missing API key | `export GEMINI_API_KEY="your-key"` (or `OPENAI_API_KEY` / `ANTHROPIC_API_KEY`) or pass `api_key` in `Config(...)` | | Outputs look like `echo: ...` | `use_mock=True` is set | Set `use_mock=False` (default) and ensure the API key is present | | `ConfigurationError` at request time | Provider/model mismatch | Verify the model belongs to the selected provider | | `ConfigurationError` mentioning `delivery_mode` | `"deferred"` is not supported | Use `delivery_mode="realtime"` (default) | diff --git a/docs/getting-started.md b/docs/getting-started.md index c87d269..c47a923 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -34,8 +34,17 @@ Or download the latest wheel from export OPENAI_API_KEY="your-key-here" ``` -Not sure which provider? Start with Gemini: it's the original path and -supports context caching out of the box. +=== "Anthropic" + + Get a key from the [Anthropic Console](https://console.anthropic.com/settings/keys), then: + + ```bash + export ANTHROPIC_API_KEY="your-key-here" + ``` + +Not sure which provider? Start with Gemini for explicit context caching, +OpenAI for broad model selection, or Anthropic for implicit caching and +extended thinking. ## 3. Run @@ -60,7 +69,8 @@ asyncio.run(main()) ``` If you chose OpenAI, change config to -`Config(provider="openai", model="gpt-5-nano")`. +`Config(provider="openai", model="gpt-5-nano")`. For Anthropic, use +`Config(provider="anthropic", model="claude-haiku-4-5")`. !!! tip "No API key yet?" Use `Config(provider="gemini", model="gemini-2.5-flash-lite", use_mock=True)` to diff --git a/docs/portable-code.md b/docs/portable-code.md index d92ae4a..19ce45e 100644 --- a/docs/portable-code.md +++ b/docs/portable-code.md @@ -65,6 +65,7 @@ class ProviderConfig: PROVIDERS = { "gemini": ProviderConfig("gemini", "gemini-2.5-flash-lite"), "openai": ProviderConfig("openai", "gpt-5-nano"), + "anthropic": ProviderConfig("anthropic", "claude-haiku-4-5"), } @@ -186,9 +187,9 @@ preferences to provider-specific models: ```python MODEL_TIERS = { - "fast": {"gemini": "gemini-2.5-flash-lite", "openai": "gpt-5-nano"}, - "balanced": {"gemini": "gemini-2.5-flash", "openai": "gpt-5-mini"}, - "quality": {"gemini": "gemini-2.5-pro", "openai": "gpt-5"}, + "fast": {"gemini": "gemini-2.5-flash-lite", "openai": "gpt-5-nano", "anthropic": "claude-haiku-4-5"}, + "balanced": {"gemini": "gemini-2.5-flash", "openai": "gpt-5-mini", "anthropic": "claude-sonnet-4-6"}, + "quality": {"gemini": "gemini-2.5-pro", "openai": "gpt-5", "anthropic": "claude-opus-4-6"}, } @@ -239,7 +240,7 @@ provider in CI with real credentials: ```python import pytest -@pytest.mark.parametrize("provider", ["gemini", "openai"]) +@pytest.mark.parametrize("provider", ["gemini", "openai", "anthropic"]) async def test_analyze_document_mock(provider: str) -> None: config = Config(provider=provider, model="any-model", use_mock=True) result = await run( @@ -256,7 +257,7 @@ async def test_analyze_document_mock(provider: str) -> None: - **Keep the portable subset in mind.** Text generation, structured output, tool calling, and conversation continuity work on all providers. Context caching has different paradigms (explicit for Gemini, implicit for Anthropic). - YouTube URLs have limited OpenAI support. + YouTube URLs have limited OpenAI and Anthropic support. Check [Provider Capabilities](reference/provider-capabilities.md). - **Config errors are your portability signal.** A `ConfigurationError` for an unsupported feature marks the boundary of portability. Handle it at @@ -267,7 +268,7 @@ async def test_analyze_document_mock(provider: str) -> None: and `top_p`. If you use these, guard them with a provider check or catch the error. - **Test with mock first.** `use_mock=True` validates your pipeline structure - without API calls. Both providers return synthetic responses in mock mode. + without API calls. All providers return synthetic responses in mock mode. --- diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 0137ca2..9d08ac9 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -128,5 +128,5 @@ just demo-data # seed demo inputs - `Recipe not found`: verify the spec with `python -m cookbook --list`. - Unexpected relative-path behavior: use `--no-cwd-repo-root` only when you need CWD-local paths. - **No demo files:** run `just demo-data` or provide explicit `--input` paths. -- **API auth errors:** set `GEMINI_API_KEY`/`OPENAI_API_KEY`, then use `--no-mock`. +- **API auth errors:** set `GEMINI_API_KEY`/`OPENAI_API_KEY`/`ANTHROPIC_API_KEY`, then use `--no-mock`. - **Rate limits:** lower concurrency and stage workload size with `--limit`. diff --git a/docs/reference/provider-capabilities.md b/docs/reference/provider-capabilities.md index 2dce4cb..320f4d0 100644 --- a/docs/reference/provider-capabilities.md +++ b/docs/reference/provider-capabilities.md @@ -1,6 +1,6 @@ # Provider Capabilities -This page defines the v1.2 capability contract by provider. +This page defines the v1.3 capability contract by provider. Pollux is **capability-transparent**, not capability-equalizing: providers are allowed to differ, and those differences are surfaced clearly. @@ -10,13 +10,13 @@ Pollux is **capability-transparent**, not capability-equalizing: providers are a - Unsupported features must fail fast with actionable errors. - New provider features do not require immediate cross-provider implementation. -## Capability Matrix (v1.2) +## Capability Matrix (v1.3) | Capability | Gemini | OpenAI | Anthropic | Notes | |---|---|---|---|---| | Text generation | ✅ | ✅ | ✅ | Core feature | | Multi-prompt execution (`run_many`) | ✅ | ✅ | ✅ | One call per prompt, shared context | -| Local file inputs | ✅ | ✅ | ❌ | OpenAI uses Files API upload; Anthropic supports URL inputs only | +| Local file inputs | ✅ | ✅ | ✅ | Each provider uses its own Files API for uploads | | PDF URL inputs | ✅ (via URI part) | ✅ (native `input_file.file_url`) | ✅ (native `document` URL block) | | | Image URL inputs | ✅ (via URI part) | ✅ (native `input_image.image_url`) | ✅ (native `image` URL block) | | | YouTube URL inputs | ✅ | ⚠️ limited | ⚠️ limited | OpenAI/Anthropic parity layers (download/re-upload) are out of scope | @@ -69,6 +69,8 @@ Pollux is **capability-transparent**, not capability-equalizing: providers are a ### Anthropic +- Local file uploads use the Anthropic Files API (beta). Supported types: + images, PDFs, and text files. - Remote URL support is intentionally narrow: images and PDFs only. - Implicit prompt caching is enabled with `Options(implicit_caching=True)`. Pollux defaults it on for single-call workloads and off for multi-call diff --git a/docs/sending-content.md b/docs/sending-content.md index 2be9469..94e7d8f 100644 --- a/docs/sending-content.md +++ b/docs/sending-content.md @@ -23,7 +23,7 @@ a `Source`, which entry point to call, and how to read the result. |---|---|---| | `Source.from_text(text)` | Plain string | Identifier defaults to first 50 chars | | `Source.from_file(path)` | Local file path | Supports PDF, images, video, audio, text | -| `Source.from_youtube(url)` | YouTube URL | URL reference (no download); Gemini-native, limited on OpenAI | +| `Source.from_youtube(url)` | YouTube URL | URL reference (no download); Gemini-native, limited on OpenAI and Anthropic | | `Source.from_arxiv(ref)` | arXiv ID or URL | Normalizes to canonical PDF URL (no download at construction time) | | `Source.from_uri(uri, mime_type=...)` | Remote URI | Generic fallback for any hosted content | | `Source.from_json(data)` | Dict or Pydantic model instance | Serializes via `json.dumps()`; calls `model_dump()` on Pydantic objects | diff --git a/docs/structured-data.md b/docs/structured-data.md index 6e90403..c97a852 100644 --- a/docs/structured-data.md +++ b/docs/structured-data.md @@ -218,8 +218,8 @@ decisions. - **Raw text is always available.** Even with `response_schema`, the raw model response is in `result["answers"]`. Useful for debugging when the structured output doesn't match expectations. -- **Both providers support structured output.** Gemini and OpenAI both - support `response_schema`. See +- **All providers support structured output.** Gemini, OpenAI, and Anthropic + all support `response_schema`. See [Provider Capabilities](reference/provider-capabilities.md) for details. - **Pydantic v2 is required.** Pollux uses `model_json_schema()` for schema generation. Pydantic v2 is a dependency of Pollux. diff --git a/src/pollux/providers/anthropic.py b/src/pollux/providers/anthropic.py index ebbc193..8dd34be 100644 --- a/src/pollux/providers/anthropic.py +++ b/src/pollux/providers/anthropic.py @@ -317,10 +317,8 @@ async def upload_file(self, path: Path, mime_type: str) -> ProviderFileAsset: try: with path.open("rb") as f: - result = await client.beta.files.create( - file=f.read(), - file_name=path.name, - file_type=mime_type, + result = await client.beta.files.upload( + file=(path.name, f.read(), mime_type), extra_headers={"anthropic-beta": "files-api-2025-04-14"}, ) diff --git a/tests/test_providers.py b/tests/test_providers.py index bf55ffb..d5cef9f 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -2203,7 +2203,7 @@ class _FakeBetaFiles: def __init__(self) -> None: self.last_kwargs: dict[str, Any] = {} - async def create(self, **kwargs: Any) -> Any: + async def upload(self, **kwargs: Any) -> Any: self.last_kwargs = kwargs return type("Result", (), {"id": "file_123"})() @@ -2225,9 +2225,7 @@ async def create(self, **kwargs: Any) -> Any: assert asset.provider == "anthropic" assert asset.mime_type == "image/jpeg" - assert fake_files.last_kwargs["file"] == b"image data" - assert fake_files.last_kwargs["file_name"] == "test.jpg" - assert fake_files.last_kwargs["file_type"] == "image/jpeg" + assert fake_files.last_kwargs["file"] == ("test.jpg", b"image data", "image/jpeg") assert fake_files.last_kwargs["extra_headers"] == { "anthropic-beta": "files-api-2025-04-14" }