From a6caf6e39ecc7af0214a3fe905ad0db3d4cfd572 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Sun, 15 Feb 2026 14:14:33 +0000 Subject: [PATCH 1/3] chore: update known-models and E2E tests to latest model IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing models to known-models: - claude-opus-4-6 (Anthropic latest flagship) - gpt-5.1, gpt-5.2, gpt-5.2-pro (OpenAI latest) - gemini-3-flash-preview (Google latest) Update E2E test models: - o3-mini → gpt-5-nano (o3-mini deprecated) - gpt-4.1 → gpt-5-nano (providers test) - claude-sonnet-4-20250514 → claude-sonnet-4-5 (streaming test) Co-Authored-By: Claude Opus 4.6 --- e2e/perstack-cli/providers.test.ts | 2 +- e2e/perstack-cli/reasoning-budget.test.ts | 4 ++-- e2e/perstack-cli/streaming.test.ts | 2 +- packages/core/src/known-models/index.ts | 29 +++++++++++++++++++++++ 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/e2e/perstack-cli/providers.test.ts b/e2e/perstack-cli/providers.test.ts index 680da3fc..dcf1e1fe 100644 --- a/e2e/perstack-cli/providers.test.ts +++ b/e2e/perstack-cli/providers.test.ts @@ -19,7 +19,7 @@ const CONFIG = "./e2e/experts/providers.toml" const LLM_TIMEOUT = 120000 const providers = [ - { provider: "openai", model: "gpt-4.1", hasKey: hasOpenAIKey }, + { provider: "openai", model: "gpt-5-nano", hasKey: hasOpenAIKey }, { provider: "anthropic", model: "claude-haiku-4-5", hasKey: hasAnthropicKey }, { provider: "google", model: "gemini-2.5-flash", hasKey: hasGoogleKey }, ] diff --git a/e2e/perstack-cli/reasoning-budget.test.ts b/e2e/perstack-cli/reasoning-budget.test.ts index a30c06f6..d94ef263 100644 --- a/e2e/perstack-cli/reasoning-budget.test.ts +++ b/e2e/perstack-cli/reasoning-budget.test.ts @@ -152,8 +152,8 @@ describe("Reasoning Budget", () => { }) describe("OpenAI Reasoning Effort", () => { - // Note: o3-mini supports reasoning effort - const OPENAI_MODEL = "o3-mini" + // Note: gpt-5-nano supports reasoning effort + const OPENAI_MODEL = "gpt-5-nano" it( "should produce reasoning tokens when budget is set", diff --git a/e2e/perstack-cli/streaming.test.ts b/e2e/perstack-cli/streaming.test.ts index 29a01ee0..001eb196 100644 --- a/e2e/perstack-cli/streaming.test.ts +++ b/e2e/perstack-cli/streaming.test.ts @@ -180,7 +180,7 @@ describe("Streaming Events", () => { describe("Without Reasoning", () => { // Use a model/provider without reasoning or with reasoning disabled - const ANTHROPIC_MODEL = "claude-sonnet-4-20250514" + const ANTHROPIC_MODEL = "claude-sonnet-4-5" it( "skips reasoning events when budget is none", diff --git a/packages/core/src/known-models/index.ts b/packages/core/src/known-models/index.ts index b20ba2c9..0f1c3488 100644 --- a/packages/core/src/known-models/index.ts +++ b/packages/core/src/known-models/index.ts @@ -3,6 +3,11 @@ export const knownModels = [ provider: "anthropic", models: [ // https://docs.claude.com/en/docs/about-claude/models/overview#model-comparison-table + { + name: "claude-opus-4-6", + contextWindow: 200_000, + maxOutputTokens: 128_000, + }, { name: "claude-opus-4-5", contextWindow: 200_000, @@ -48,6 +53,12 @@ export const knownModels = [ { provider: "google", models: [ + // https://ai.google.dev/gemini-api/docs/models#gemini-3-flash + { + name: "gemini-3-flash-preview", + contextWindow: 1_048_576, + maxOutputTokens: 65_536, + }, // https://ai.google.dev/gemini-api/docs/models#gemini-3-pro { name: "gemini-3-pro-preview", @@ -95,6 +106,24 @@ export const knownModels = [ contextWindow: 400_000, maxOutputTokens: 128_000, }, + // https://platform.openai.com/docs/models/gpt-5.2 + { + name: "gpt-5.2", + contextWindow: 400_000, + maxOutputTokens: 128_000, + }, + // https://platform.openai.com/docs/models/gpt-5.2-pro + { + name: "gpt-5.2-pro", + contextWindow: 400_000, + maxOutputTokens: 128_000, + }, + // https://platform.openai.com/docs/models/gpt-5.1 + { + name: "gpt-5.1", + contextWindow: 400_000, + maxOutputTokens: 128_000, + }, // https://platform.openai.com/docs/models/gpt-5-chat-latest { name: "gpt-5-chat-latest", From 093964923eba5fef7ee866f7b37ee752de4f697e Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Sun, 15 Feb 2026 14:16:13 +0000 Subject: [PATCH 2/3] chore: switch all E2E tests to cheapest models per provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Anthropic: claude-sonnet-4-5 → claude-haiku-4-5 (18 TOMLs + 3 test files) - OpenAI: already gpt-5-nano from previous commit - Google: gemini-2.5-flash unchanged (already cheapest stable) Co-Authored-By: Claude Opus 4.6 --- e2e/experts/bundled-base.toml | 2 +- e2e/experts/cli-commands.toml | 2 +- e2e/experts/continue-resume.toml | 2 +- e2e/experts/delegate-chain.toml | 2 +- e2e/experts/error-handling.toml | 2 +- e2e/experts/errors.toml | 2 +- e2e/experts/global-runtime.toml | 2 +- e2e/experts/lazy-init.toml | 2 +- e2e/experts/lockfile.toml | 2 +- e2e/experts/mixed-tools.toml | 2 +- e2e/experts/multi-modal.toml | 2 +- e2e/experts/parallel-delegate.toml | 2 +- e2e/experts/parallel-mcp.toml | 2 +- e2e/experts/runtime-version-future.toml | 2 +- e2e/experts/runtime-version.toml | 2 +- e2e/experts/skills.toml | 2 +- e2e/experts/special-tools.toml | 2 +- e2e/experts/versioned-base.toml | 2 +- e2e/perstack-cli/options.test.ts | 2 +- e2e/perstack-cli/reasoning-budget.test.ts | 4 ++-- e2e/perstack-cli/streaming.test.ts | 6 +++--- 21 files changed, 24 insertions(+), 24 deletions(-) diff --git a/e2e/experts/bundled-base.toml b/e2e/experts/bundled-base.toml index 4621717a..d4cf2336 100644 --- a/e2e/experts/bundled-base.toml +++ b/e2e/experts/bundled-base.toml @@ -1,7 +1,7 @@ # E2E test configuration for bundled base skill with InMemoryTransport # Tests that the bundled @perstack/base uses in-memory transport (no process spawn) -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/cli-commands.toml b/e2e/experts/cli-commands.toml index 30e3fe0e..085986fa 100644 --- a/e2e/experts/cli-commands.toml +++ b/e2e/experts/cli-commands.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/continue-resume.toml b/e2e/experts/continue-resume.toml index 71008b46..18e8fc46 100644 --- a/e2e/experts/continue-resume.toml +++ b/e2e/experts/continue-resume.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/delegate-chain.toml b/e2e/experts/delegate-chain.toml index 55055572..5eedff64 100644 --- a/e2e/experts/delegate-chain.toml +++ b/e2e/experts/delegate-chain.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/error-handling.toml b/e2e/experts/error-handling.toml index 222313f0..e52f496e 100644 --- a/e2e/experts/error-handling.toml +++ b/e2e/experts/error-handling.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/errors.toml b/e2e/experts/errors.toml index 73043a0e..771a20b5 100644 --- a/e2e/experts/errors.toml +++ b/e2e/experts/errors.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/global-runtime.toml b/e2e/experts/global-runtime.toml index 2b6cfbaa..a367b869 100644 --- a/e2e/experts/global-runtime.toml +++ b/e2e/experts/global-runtime.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/lazy-init.toml b/e2e/experts/lazy-init.toml index f23ce174..0bb317a6 100644 --- a/e2e/experts/lazy-init.toml +++ b/e2e/experts/lazy-init.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/lockfile.toml b/e2e/experts/lockfile.toml index 0cd4e032..28bd6f85 100644 --- a/e2e/experts/lockfile.toml +++ b/e2e/experts/lockfile.toml @@ -1,7 +1,7 @@ # E2E test configuration for lockfile functionality # Tests perstack install and lockfile-based execution -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/mixed-tools.toml b/e2e/experts/mixed-tools.toml index cd7244fb..d8207b07 100644 --- a/e2e/experts/mixed-tools.toml +++ b/e2e/experts/mixed-tools.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/multi-modal.toml b/e2e/experts/multi-modal.toml index d135cb26..8595784d 100644 --- a/e2e/experts/multi-modal.toml +++ b/e2e/experts/multi-modal.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/parallel-delegate.toml b/e2e/experts/parallel-delegate.toml index 028d47cf..bb08ace9 100644 --- a/e2e/experts/parallel-delegate.toml +++ b/e2e/experts/parallel-delegate.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/parallel-mcp.toml b/e2e/experts/parallel-mcp.toml index 82a4735a..81908cdf 100644 --- a/e2e/experts/parallel-mcp.toml +++ b/e2e/experts/parallel-mcp.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/runtime-version-future.toml b/e2e/experts/runtime-version-future.toml index d9afec29..b5e79f0d 100644 --- a/e2e/experts/runtime-version-future.toml +++ b/e2e/experts/runtime-version-future.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/runtime-version.toml b/e2e/experts/runtime-version.toml index ab82ec5a..55796b4e 100644 --- a/e2e/experts/runtime-version.toml +++ b/e2e/experts/runtime-version.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/skills.toml b/e2e/experts/skills.toml index eeac3271..0d93c255 100644 --- a/e2e/experts/skills.toml +++ b/e2e/experts/skills.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/special-tools.toml b/e2e/experts/special-tools.toml index 67a7d514..5dd91402 100644 --- a/e2e/experts/special-tools.toml +++ b/e2e/experts/special-tools.toml @@ -1,4 +1,4 @@ -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/experts/versioned-base.toml b/e2e/experts/versioned-base.toml index 2ac8890b..9ff3ce3a 100644 --- a/e2e/experts/versioned-base.toml +++ b/e2e/experts/versioned-base.toml @@ -1,7 +1,7 @@ # E2E test configuration for versioned base skill with StdioTransport # Tests that pinning a version falls back to npx (StdioTransport) -model = "claude-sonnet-4-5" +model = "claude-haiku-4-5" [provider] providerName = "anthropic" diff --git a/e2e/perstack-cli/options.test.ts b/e2e/perstack-cli/options.test.ts index f22cbe74..33c72a62 100644 --- a/e2e/perstack-cli/options.test.ts +++ b/e2e/perstack-cli/options.test.ts @@ -48,7 +48,7 @@ describe.concurrent("CLI Options", () => { "--config", GLOBAL_RUNTIME_CONFIG, "--model", - "claude-sonnet-4-5", + "claude-haiku-4-5", "e2e-global-runtime", "Say hello", ], diff --git a/e2e/perstack-cli/reasoning-budget.test.ts b/e2e/perstack-cli/reasoning-budget.test.ts index d94ef263..ebe2d7d6 100644 --- a/e2e/perstack-cli/reasoning-budget.test.ts +++ b/e2e/perstack-cli/reasoning-budget.test.ts @@ -97,8 +97,8 @@ async function runReasoningTest( describe("Reasoning Budget", () => { describe("Anthropic Extended Thinking", () => { - // Note: Claude claude-sonnet-4-5 supports extended thinking - const ANTHROPIC_MODEL = "claude-sonnet-4-5" + // Note: Claude claude-haiku-4-5 supports extended thinking + const ANTHROPIC_MODEL = "claude-haiku-4-5" it( "should produce reasoning tokens when budget is set", diff --git a/e2e/perstack-cli/streaming.test.ts b/e2e/perstack-cli/streaming.test.ts index 001eb196..77618c2a 100644 --- a/e2e/perstack-cli/streaming.test.ts +++ b/e2e/perstack-cli/streaming.test.ts @@ -32,7 +32,7 @@ function filterStreamingEvents(events: ParsedEvent[]): ParsedEvent[] { describe("Streaming Events", () => { describe("Event Sequence with Reasoning", () => { - const ANTHROPIC_MODEL = "claude-sonnet-4-5" + const ANTHROPIC_MODEL = "claude-haiku-4-5" it( "emits reasoning events in correct order (start → stream... → complete)", @@ -180,7 +180,7 @@ describe("Streaming Events", () => { describe("Without Reasoning", () => { // Use a model/provider without reasoning or with reasoning disabled - const ANTHROPIC_MODEL = "claude-sonnet-4-5" + const ANTHROPIC_MODEL = "claude-haiku-4-5" it( "skips reasoning events when budget is none", @@ -220,7 +220,7 @@ describe("Streaming Events", () => { }) describe("Streaming Delta Content", () => { - const ANTHROPIC_MODEL = "claude-sonnet-4-5" + const ANTHROPIC_MODEL = "claude-haiku-4-5" it( "streamReasoning events contain non-empty deltas", From d7edf6f77edacd20a5efbed258bb947a7b393c7e Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Sun, 15 Feb 2026 14:22:59 +0000 Subject: [PATCH 3/3] chore: add changeset for known-models update Co-Authored-By: Claude Opus 4.6 --- .changeset/update-known-models.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/update-known-models.md diff --git a/.changeset/update-known-models.md b/.changeset/update-known-models.md new file mode 100644 index 00000000..f56c3031 --- /dev/null +++ b/.changeset/update-known-models.md @@ -0,0 +1,5 @@ +--- +"@perstack/core": patch +--- + +Add latest models to known-models: claude-opus-4-6, gpt-5.1, gpt-5.2, gpt-5.2-pro, gemini-3-flash-preview