diff --git a/.changeset/update-known-models.md b/.changeset/update-known-models.md new file mode 100644 index 00000000..f56c3031 --- /dev/null +++ b/.changeset/update-known-models.md @@ -0,0 +1,5 @@ +--- +"@perstack/core": patch +--- + +Add latest models to known-models: claude-opus-4-6, gpt-5.1, gpt-5.2, gpt-5.2-pro, gemini-3-flash-preview diff --git a/e2e/experts/bundled-base.toml b/e2e/experts/bundled-base.toml index 4621717a..d4cf2336 100644 --- a/e2e/experts/bundled-base.toml +++ b/e2e/experts/bundled-base.toml @@ -1,7 +1,7 @@ # E2E test configuration for bundled base skill with InMemoryTransport # Tests that the bundled @perstack/base uses in-memory transport (no process spawn) -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/cli-commands.toml b/e2e/experts/cli-commands.toml index 30e3fe0e..085986fa 100644 --- a/e2e/experts/cli-commands.toml +++ b/e2e/experts/cli-commands.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/continue-resume.toml b/e2e/experts/continue-resume.toml index 71008b46..18e8fc46 100644 --- a/e2e/experts/continue-resume.toml +++ b/e2e/experts/continue-resume.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/delegate-chain.toml b/e2e/experts/delegate-chain.toml index 55055572..5eedff64 100644 --- a/e2e/experts/delegate-chain.toml +++ b/e2e/experts/delegate-chain.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/error-handling.toml b/e2e/experts/error-handling.toml index 222313f0..e52f496e 100644 --- a/e2e/experts/error-handling.toml +++ b/e2e/experts/error-handling.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/errors.toml b/e2e/experts/errors.toml index 73043a0e..771a20b5 100644 --- a/e2e/experts/errors.toml +++ b/e2e/experts/errors.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/global-runtime.toml b/e2e/experts/global-runtime.toml index 2b6cfbaa..a367b869 100644 --- a/e2e/experts/global-runtime.toml +++ b/e2e/experts/global-runtime.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/lazy-init.toml b/e2e/experts/lazy-init.toml index f23ce174..0bb317a6 100644 --- a/e2e/experts/lazy-init.toml +++ b/e2e/experts/lazy-init.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/lockfile.toml b/e2e/experts/lockfile.toml index 0cd4e032..28bd6f85 100644 --- a/e2e/experts/lockfile.toml +++ b/e2e/experts/lockfile.toml @@ -1,7 +1,7 @@ # E2E test configuration for lockfile functionality # Tests perstack install and lockfile-based execution -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/mixed-tools.toml b/e2e/experts/mixed-tools.toml index cd7244fb..d8207b07 100644 --- a/e2e/experts/mixed-tools.toml +++ b/e2e/experts/mixed-tools.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/multi-modal.toml b/e2e/experts/multi-modal.toml index d135cb26..8595784d 100644 --- a/e2e/experts/multi-modal.toml +++ b/e2e/experts/multi-modal.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/parallel-delegate.toml b/e2e/experts/parallel-delegate.toml index 028d47cf..bb08ace9 100644 --- a/e2e/experts/parallel-delegate.toml +++ b/e2e/experts/parallel-delegate.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/parallel-mcp.toml b/e2e/experts/parallel-mcp.toml index 82a4735a..81908cdf 100644 --- a/e2e/experts/parallel-mcp.toml +++ b/e2e/experts/parallel-mcp.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/runtime-version-future.toml b/e2e/experts/runtime-version-future.toml index d9afec29..b5e79f0d 100644 --- a/e2e/experts/runtime-version-future.toml +++ b/e2e/experts/runtime-version-future.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/runtime-version.toml b/e2e/experts/runtime-version.toml index ab82ec5a..55796b4e 100644 --- a/e2e/experts/runtime-version.toml +++ b/e2e/experts/runtime-version.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/skills.toml b/e2e/experts/skills.toml index eeac3271..0d93c255 100644 --- a/e2e/experts/skills.toml +++ b/e2e/experts/skills.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/special-tools.toml b/e2e/experts/special-tools.toml index 67a7d514..5dd91402 100644 --- a/e2e/experts/special-tools.toml +++ b/e2e/experts/special-tools.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/versioned-base.toml b/e2e/experts/versioned-base.toml index 2ac8890b..9ff3ce3a 100644 --- a/e2e/experts/versioned-base.toml +++ b/e2e/experts/versioned-base.toml @@ -1,7 +1,7 @@ # E2E test configuration for versioned base skill with StdioTransport # Tests that pinning a version falls back to npx (StdioTransport) -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/perstack-cli/options.test.ts b/e2e/perstack-cli/options.test.ts index f22cbe74..33c72a62 100644 --- a/e2e/perstack-cli/options.test.ts +++ b/e2e/perstack-cli/options.test.ts @@ -48,7 +48,7 @@ describe.concurrent("CLI Options", () => { "--config", GLOBAL_RUNTIME_CONFIG, "--model", - "claude-sonnet-4-5", + "claude-haiku-4-5", "e2e-global-runtime", "Say hello", ], diff --git a/e2e/perstack-cli/providers.test.ts b/e2e/perstack-cli/providers.test.ts index 680da3fc..dcf1e1fe 100644 --- a/e2e/perstack-cli/providers.test.ts +++ b/e2e/perstack-cli/providers.test.ts @@ -19,7 +19,7 @@ const CONFIG = "./e2e/experts/providers.toml" const LLM_TIMEOUT = 120000 const providers = [ - { provider: "openai", model: "gpt-4.1", hasKey: hasOpenAIKey }, + { provider: "openai", model: "gpt-5-nano", hasKey: hasOpenAIKey }, { provider: "anthropic", model: "claude-haiku-4-5", hasKey: hasAnthropicKey }, { provider: "google", model: "gemini-2.5-flash", hasKey: hasGoogleKey }, ] diff --git a/e2e/perstack-cli/reasoning-budget.test.ts b/e2e/perstack-cli/reasoning-budget.test.ts index a30c06f6..ebe2d7d6 100644 --- a/e2e/perstack-cli/reasoning-budget.test.ts +++ b/e2e/perstack-cli/reasoning-budget.test.ts @@ -97,8 +97,8 @@ async function runReasoningTest( describe("Reasoning Budget", () => { describe("Anthropic Extended Thinking", () => { - // Note: Claude claude-sonnet-4-5 supports extended thinking - const ANTHROPIC_MODEL = "claude-sonnet-4-5" + // Note: Claude claude-haiku-4-5 supports extended thinking + const ANTHROPIC_MODEL = "claude-haiku-4-5" it( "should produce reasoning tokens when budget is set", @@ -152,8 +152,8 @@ describe("Reasoning Budget", () => { }) describe("OpenAI Reasoning Effort", () => { - // Note: o3-mini supports reasoning effort - const OPENAI_MODEL = "o3-mini" + // Note: gpt-5-nano supports reasoning effort + const OPENAI_MODEL = "gpt-5-nano" it( "should produce reasoning tokens when budget is set", diff --git a/e2e/perstack-cli/streaming.test.ts b/e2e/perstack-cli/streaming.test.ts index 29a01ee0..77618c2a 100644 --- a/e2e/perstack-cli/streaming.test.ts +++ b/e2e/perstack-cli/streaming.test.ts @@ -32,7 +32,7 @@ function filterStreamingEvents(events: ParsedEvent[]): ParsedEvent[] { describe("Streaming Events", () => { describe("Event Sequence with Reasoning", () => { - const ANTHROPIC_MODEL = "claude-sonnet-4-5" + const ANTHROPIC_MODEL = "claude-haiku-4-5" it( "emits reasoning events in correct order (start → stream... → complete)", @@ -180,7 +180,7 @@ describe("Streaming Events", () => { describe("Without Reasoning", () => { // Use a model/provider without reasoning or with reasoning disabled - const ANTHROPIC_MODEL = "claude-sonnet-4-20250514" + const ANTHROPIC_MODEL = "claude-haiku-4-5" it( "skips reasoning events when budget is none", @@ -220,7 +220,7 @@ describe("Streaming Events", () => { }) describe("Streaming Delta Content", () => { - const ANTHROPIC_MODEL = "claude-sonnet-4-5" + const ANTHROPIC_MODEL = "claude-haiku-4-5" it( "streamReasoning events contain non-empty deltas", diff --git a/packages/core/src/known-models/index.ts b/packages/core/src/known-models/index.ts index b20ba2c9..0f1c3488 100644 --- a/packages/core/src/known-models/index.ts +++ b/packages/core/src/known-models/index.ts @@ -3,6 +3,11 @@ export const knownModels = [ provider: "anthropic", models: [ // https://docs.claude.com/en/docs/about-claude/models/overview#model-comparison-table + { + name: "claude-opus-4-6", + contextWindow: 200_000, + maxOutputTokens: 128_000, + }, { name: "claude-opus-4-5", contextWindow: 200_000, @@ -48,6 +53,12 @@ export const knownModels = [ { provider: "google", models: [ + // https://ai.google.dev/gemini-api/docs/models#gemini-3-flash + { + name: "gemini-3-flash-preview", + contextWindow: 1_048_576, + maxOutputTokens: 65_536, + }, // https://ai.google.dev/gemini-api/docs/models#gemini-3-pro { name: "gemini-3-pro-preview", @@ -95,6 +106,24 @@ export const knownModels = [ contextWindow: 400_000, maxOutputTokens: 128_000, }, + // https://platform.openai.com/docs/models/gpt-5.2 + { + name: "gpt-5.2", + contextWindow: 400_000, + maxOutputTokens: 128_000, + }, + // https://platform.openai.com/docs/models/gpt-5.2-pro + { + name: "gpt-5.2-pro", + contextWindow: 400_000, + maxOutputTokens: 128_000, + }, + // https://platform.openai.com/docs/models/gpt-5.1 + { + name: "gpt-5.1", + contextWindow: 400_000, + maxOutputTokens: 128_000, + }, // https://platform.openai.com/docs/models/gpt-5-chat-latest { name: "gpt-5-chat-latest",