From 0bea7311f31f712bb568f36cadeb9329d0d33cfd Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 03:16:05 +0000 Subject: [PATCH 1/6] feat: add defaultModelTier to expert config for provider-aware model selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows each expert in perstack.toml to specify a model tier ("low", "middle", "high") instead of a concrete model name. The tier is automatically resolved to the appropriate model for the current provider (e.g., "low" → claude-haiku-4-5 for Anthropic, gpt-5-nano for OpenAI). CLI --model flag takes priority over tier. Delegation also respects per-expert tiers. Co-Authored-By: Claude Opus 4.6 --- packages/core/src/known-models/index.ts | 2 + .../core/src/known-models/model-tiers.test.ts | 74 ++++++++++++++++ packages/core/src/known-models/model-tiers.ts | 63 ++++++++++++++ packages/core/src/schemas/expert.ts | 5 ++ packages/core/src/schemas/perstack-toml.ts | 5 ++ .../orchestration/delegation-executor.test.ts | 87 +++++++++++++++++++ .../src/orchestration/delegation-executor.ts | 15 ++++ packages/tui/src/lib/context.ts | 1 + packages/tui/src/run-handler.ts | 15 +++- packages/tui/src/start-handler.ts | 17 +++- 10 files changed, 281 insertions(+), 3 deletions(-) create mode 100644 packages/core/src/known-models/model-tiers.test.ts create mode 100644 packages/core/src/known-models/model-tiers.ts diff --git a/packages/core/src/known-models/index.ts b/packages/core/src/known-models/index.ts index 0f1c3488..1cd68f3c 100644 --- a/packages/core/src/known-models/index.ts +++ b/packages/core/src/known-models/index.ts @@ -1,3 +1,5 @@ +export * from "./model-tiers.js" + export const knownModels = [ { provider: "anthropic", diff --git a/packages/core/src/known-models/model-tiers.test.ts b/packages/core/src/known-models/model-tiers.test.ts new file mode 100644 index 00000000..3d571cd5 --- /dev/null +++ b/packages/core/src/known-models/model-tiers.test.ts @@ -0,0 +1,74 @@ +import { describe, expect, it } from "bun:test" +import { modelTierMap, modelTierSchema, resolveModelTier } from "./model-tiers.js" + +describe("@perstack/core: modelTierSchema", () => { + it("accepts valid tier values", () => { + expect(modelTierSchema.parse("low")).toBe("low") + expect(modelTierSchema.parse("middle")).toBe("middle") + expect(modelTierSchema.parse("high")).toBe("high") + }) + + it("rejects invalid tier values", () => { + expect(() => modelTierSchema.parse("small")).toThrow() + expect(() => modelTierSchema.parse("large")).toThrow() + expect(() => modelTierSchema.parse("")).toThrow() + expect(() => modelTierSchema.parse(123)).toThrow() + }) +}) + +describe("@perstack/core: resolveModelTier", () => { + it("resolves anthropic tiers", () => { + expect(resolveModelTier("anthropic", "low")).toBe("claude-haiku-4-5") + expect(resolveModelTier("anthropic", "middle")).toBe("claude-sonnet-4-5") + expect(resolveModelTier("anthropic", "high")).toBe("claude-opus-4-6") + }) + + it("resolves openai tiers", () => { + expect(resolveModelTier("openai", "low")).toBe("gpt-5-nano") + expect(resolveModelTier("openai", "middle")).toBe("gpt-5-mini") + expect(resolveModelTier("openai", "high")).toBe("gpt-5") + }) + + it("resolves google tiers", () => { + expect(resolveModelTier("google", "low")).toBe("gemini-2.5-flash-lite") + expect(resolveModelTier("google", "middle")).toBe("gemini-2.5-flash") + expect(resolveModelTier("google", "high")).toBe("gemini-2.5-pro") + }) + + it("resolves deepseek tiers", () => { + expect(resolveModelTier("deepseek", "low")).toBe("deepseek-chat") + expect(resolveModelTier("deepseek", "middle")).toBe("deepseek-chat") + expect(resolveModelTier("deepseek", "high")).toBe("deepseek-reasoner") + }) + + it("resolves ollama tiers", () => { + expect(resolveModelTier("ollama", "low")).toBe("gemma3:4b") + expect(resolveModelTier("ollama", "middle")).toBe("gemma3:12b") + expect(resolveModelTier("ollama", "high")).toBe("gpt-oss:120b") + }) + + it("resolves cloud-hosted provider tiers", () => { + expect(resolveModelTier("azure-openai", "high")).toBe("gpt-5") + expect(resolveModelTier("amazon-bedrock", "high")).toBe("claude-opus-4-6") + expect(resolveModelTier("google-vertex", "high")).toBe("gemini-2.5-pro") + }) + + it("covers all known providers", () => { + const expectedProviders = [ + "anthropic", + "google", + "openai", + "deepseek", + "ollama", + "azure-openai", + "amazon-bedrock", + "google-vertex", + ] + for (const provider of expectedProviders) { + expect(modelTierMap[provider]).toBeDefined() + expect(modelTierMap[provider].low).toBeDefined() + expect(modelTierMap[provider].middle).toBeDefined() + expect(modelTierMap[provider].high).toBeDefined() + } + }) +}) diff --git a/packages/core/src/known-models/model-tiers.ts b/packages/core/src/known-models/model-tiers.ts new file mode 100644 index 00000000..b61247bb --- /dev/null +++ b/packages/core/src/known-models/model-tiers.ts @@ -0,0 +1,63 @@ +import { z } from "zod" +import type { ProviderName } from "../schemas/provider-config.js" + +/** Model tier for provider-aware model selection */ +export type ModelTier = "low" | "middle" | "high" + +export const modelTierSchema = z.enum(["low", "middle", "high"]) + +/** + * Maps provider name + model tier to a concrete model name. + * Cloud-hosted variants (azure-openai, amazon-bedrock, google-vertex) + * use the same model names as their base providers. + */ +export const modelTierMap: Record> = { + anthropic: { + low: "claude-haiku-4-5", + middle: "claude-sonnet-4-5", + high: "claude-opus-4-6", + }, + google: { + low: "gemini-2.5-flash-lite", + middle: "gemini-2.5-flash", + high: "gemini-2.5-pro", + }, + openai: { + low: "gpt-5-nano", + middle: "gpt-5-mini", + high: "gpt-5", + }, + deepseek: { + low: "deepseek-chat", + middle: "deepseek-chat", + high: "deepseek-reasoner", + }, + ollama: { + low: "gemma3:4b", + middle: "gemma3:12b", + high: "gpt-oss:120b", + }, + "azure-openai": { + low: "gpt-5-nano", + middle: "gpt-5-mini", + high: "gpt-5", + }, + "amazon-bedrock": { + low: "claude-haiku-4-5", + middle: "claude-sonnet-4-5", + high: "claude-opus-4-6", + }, + "google-vertex": { + low: "gemini-2.5-flash-lite", + middle: "gemini-2.5-flash", + high: "gemini-2.5-pro", + }, +} + +/** + * Resolve a model tier to a concrete model name for the given provider. + * Returns undefined if the provider has no tier mapping. + */ +export function resolveModelTier(providerName: ProviderName, tier: ModelTier): string | undefined { + return modelTierMap[providerName]?.[tier] +} diff --git a/packages/core/src/schemas/expert.ts b/packages/core/src/schemas/expert.ts index b0b6b2f4..406fb53c 100644 --- a/packages/core/src/schemas/expert.ts +++ b/packages/core/src/schemas/expert.ts @@ -6,6 +6,8 @@ import { maxExpertNameLength, tagNameRegex, } from "../constants/constants.js" +import type { ModelTier } from "../known-models/model-tiers.js" +import { modelTierSchema } from "../known-models/model-tiers.js" import { validateAllDelegations } from "../utils/expert-type.js" import type { AnthropicProviderSkill, ProviderToolOptions } from "./provider-tools.js" import { anthropicProviderSkillSchema, providerToolOptionsSchema } from "./provider-tools.js" @@ -37,6 +39,8 @@ export interface Expert { tags: string[] /** Minimum runtime version required to run this Expert */ minRuntimeVersion: RuntimeVersion + /** Default model tier for provider-aware model selection ("low", "middle", "high") */ + defaultModelTier?: ModelTier /** Provider-specific tool names to enable (e.g., "webSearch", "codeExecution") */ providerTools?: string[] /** Anthropic Agent Skills configuration */ @@ -104,6 +108,7 @@ export const expertBaseSchema = z.object({ delegates: z.array(z.string().regex(expertKeyRegex).min(1)).optional().default([]), tags: z.array(z.string().regex(tagNameRegex).min(1)).optional().default([]), minRuntimeVersion: runtimeVersionSchema.default("v1.0"), + defaultModelTier: modelTierSchema.optional(), providerTools: z.array(z.string()).optional(), providerSkills: z.array(anthropicProviderSkillSchema).optional(), providerToolOptions: providerToolOptionsSchema, diff --git a/packages/core/src/schemas/perstack-toml.ts b/packages/core/src/schemas/perstack-toml.ts index 82b449a3..3f253737 100644 --- a/packages/core/src/schemas/perstack-toml.ts +++ b/packages/core/src/schemas/perstack-toml.ts @@ -1,4 +1,6 @@ import { z } from "zod" +import type { ModelTier } from "../known-models/model-tiers.js" +import { modelTierSchema } from "../known-models/model-tiers.js" import { headersSchema } from "./provider-config.js" import { anthropicProviderSkillSchema, providerToolOptionsSchema } from "./provider-tools.js" import type { RuntimeVersion } from "./runtime-version.js" @@ -185,6 +187,8 @@ export interface PerstackConfigExpert { description?: string /** System instruction */ instruction: string + /** Default model tier for provider-aware model selection ("low", "middle", "high") */ + defaultModelTier?: ModelTier /** Skills configuration */ skills?: Record /** Delegates list */ @@ -245,6 +249,7 @@ export const perstackConfigSchema = z.object({ minRuntimeVersion: runtimeVersionSchema.optional(), description: z.string().optional(), instruction: z.string(), + defaultModelTier: modelTierSchema.optional(), skills: z .record( z.string(), diff --git a/packages/runtime/src/orchestration/delegation-executor.test.ts b/packages/runtime/src/orchestration/delegation-executor.test.ts index b1fe467c..9a82a2e0 100644 --- a/packages/runtime/src/orchestration/delegation-executor.test.ts +++ b/packages/runtime/src/orchestration/delegation-executor.test.ts @@ -382,6 +382,93 @@ describe("@perstack/runtime: delegation-executor", () => { warnSpy.mockRestore() }) + it("resolves delegate model from defaultModelTier", async () => { + const executor = new DelegationExecutor() + const setting = createMockSetting({ + model: "claude-sonnet-4-5", + providerConfig: { providerName: "anthropic" } as RunSetting["providerConfig"], + experts: { + "expert-a": { + key: "expert-a", + name: "expert-a", + version: "1.0.0", + instruction: "test", + skills: {}, + delegates: [], + tags: [], + minRuntimeVersion: "v1.0", + defaultModelTier: "low", + }, + } as RunSetting["experts"], + }) + const delegation = createMockDelegation({ + toolCallId: "tc-1", + expert: { key: "expert-a", name: "A", version: "1" }, + }) + const context = createMockContext() + const parentExpert = { key: "parent", name: "Parent", version: "1.0" } + + const resultCheckpoint: Checkpoint = { + ...createMockCheckpoint(), + messages: [ + { + id: "msg-expert-a", + type: "expertMessage", + contents: [{ type: "textPart", id: "txt-1", text: "Result" }], + }, + ], + } + const runFn = mock().mockResolvedValueOnce(resultCheckpoint) + + await executor.execute([delegation], setting, context, parentExpert, runFn) + + // Verify that the delegate was called with the tier-resolved model + const delegateSetting = runFn.mock.calls[0][0].setting + expect(delegateSetting.model).toBe("claude-haiku-4-5") + }) + + it("inherits parent model when delegate has no defaultModelTier", async () => { + const executor = new DelegationExecutor() + const setting = createMockSetting({ + model: "claude-sonnet-4-5", + experts: { + "expert-a": { + key: "expert-a", + name: "expert-a", + version: "1.0.0", + instruction: "test", + skills: {}, + delegates: [], + tags: [], + minRuntimeVersion: "v1.0", + }, + } as RunSetting["experts"], + }) + const delegation = createMockDelegation({ + toolCallId: "tc-1", + expert: { key: "expert-a", name: "A", version: "1" }, + }) + const context = createMockContext() + const parentExpert = { key: "parent", name: "Parent", version: "1.0" } + + const resultCheckpoint: Checkpoint = { + ...createMockCheckpoint(), + messages: [ + { + id: "msg-expert-a", + type: "expertMessage", + contents: [{ type: "textPart", id: "txt-1", text: "Result" }], + }, + ], + } + const runFn = mock().mockResolvedValueOnce(resultCheckpoint) + + await executor.execute([delegation], setting, context, parentExpert, runFn) + + const delegateSetting = runFn.mock.calls[0][0].setting + expect(delegateSetting.model).toBe("claude-sonnet-4-5") + }) + it("passes parent options to child runs", async () => { const executor = new DelegationExecutor() const setting = createMockSetting() diff --git a/packages/runtime/src/orchestration/delegation-executor.ts b/packages/runtime/src/orchestration/delegation-executor.ts index b7f07d09..c729aa42 100644 --- a/packages/runtime/src/orchestration/delegation-executor.ts +++ b/packages/runtime/src/orchestration/delegation-executor.ts @@ -13,6 +13,7 @@ import type { ToolResult, Usage, } from "@perstack/core" +import { resolveModelTier } from "@perstack/core" /** Reference to the parent Expert that delegated */ type DelegatedBy = NonNullable @@ -188,8 +189,22 @@ export class DelegationExecutor { const { expert, toolCallId, toolName, query } = delegation const delegateRunId = createId() + // Resolve per-expert model tier for the delegate expert + let delegateModel = parentSetting.model + const delegateExpert = parentSetting.experts?.[expert.key] + if (delegateExpert?.defaultModelTier) { + const tierModel = resolveModelTier( + parentSetting.providerConfig.providerName, + delegateExpert.defaultModelTier, + ) + if (tierModel) { + delegateModel = tierModel + } + } + const delegateSetting: RunSetting = { ...parentSetting, + model: delegateModel, runId: delegateRunId, expertKey: expert.key, input: { text: query }, diff --git a/packages/tui/src/lib/context.ts b/packages/tui/src/lib/context.ts index 37161d0d..b3187eb1 100644 --- a/packages/tui/src/lib/context.ts +++ b/packages/tui/src/lib/context.ts @@ -81,6 +81,7 @@ export async function resolveRunContext(input: ResolveRunContextInput): Promise< skills: expert.skills, delegates: expert.delegates, tags: expert.tags, + defaultModelTier: expert.defaultModelTier, }, ] }), diff --git a/packages/tui/src/run-handler.ts b/packages/tui/src/run-handler.ts index 58c2dc3d..09a89fb0 100644 --- a/packages/tui/src/run-handler.ts +++ b/packages/tui/src/run-handler.ts @@ -3,6 +3,7 @@ import type { Lockfile, PerstackConfig, RunEvent, RuntimeEvent } from "@perstack import { createFilteredEventListener, parseWithFriendlyError, + resolveModelTier, runCommandInputSchema, validateEventFilter, } from "@perstack/core" @@ -57,6 +58,18 @@ export async function runHandler( expertKey: input.expertKey, }) + // Resolve per-expert model tier for the entry expert (CLI --model takes priority) + let resolvedModel = model + if (!input.options.model) { + const expertConfig = perstackConfig.experts?.[input.expertKey] + if (expertConfig?.defaultModelTier) { + const tierModel = resolveModelTier(providerConfig.providerName, expertConfig.defaultModelTier) + if (tierModel) { + resolvedModel = tierModel + } + } + } + if (handlerOptions?.additionalEnv) { Object.assign(env, handlerOptions.additionalEnv(env)) } @@ -80,7 +93,7 @@ export async function runHandler( : { text: input.query })) : { text: input.query }, experts, - model, + model: resolvedModel, providerConfig, reasoningBudget: input.options.reasoningBudget ?? perstackConfig.reasoningBudget, maxRetries: input.options.maxRetries ?? perstackConfig.maxRetries, diff --git a/packages/tui/src/start-handler.ts b/packages/tui/src/start-handler.ts index 860d0b49..19b8a3f4 100644 --- a/packages/tui/src/start-handler.ts +++ b/packages/tui/src/start-handler.ts @@ -5,6 +5,7 @@ import { type Lockfile, type PerstackConfig, parseWithFriendlyError, + resolveModelTier, startCommandInputSchema, } from "@perstack/core" import { @@ -127,6 +128,18 @@ export async function startHandler( return } + // Resolve per-expert model tier for the selected expert (CLI --model takes priority) + let resolvedModel = model + if (!input.options.model) { + const expertConfig = perstackConfig.experts?.[selection.expertKey] + if (expertConfig?.defaultModelTier) { + const tierModel = resolveModelTier(providerConfig.providerName, expertConfig.defaultModelTier) + if (tierModel) { + resolvedModel = tierModel + } + } + } + const lockfile = handlerOptions.lockfile // Phase 3: Execution loop @@ -163,7 +176,7 @@ export async function startHandler( query: currentQuery ?? undefined, config: { runtimeVersion, - model, + model: resolvedModel, maxRetries, timeout, contextWindowUsage: currentCheckpoint?.contextWindowUsage ?? 0, @@ -186,7 +199,7 @@ export async function startHandler( ? parseInteractiveToolCallResult(resolvedQuery, currentCheckpoint) : { text: resolvedQuery }, experts, - model, + model: resolvedModel, providerConfig, reasoningBudget: input.options.reasoningBudget ?? perstackConfig.reasoningBudget, maxRetries: input.options.maxRetries ?? perstackConfig.maxRetries, From c3e7590aa1b1b4daa71e99e34542f87a7a407184 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 03:19:42 +0000 Subject: [PATCH 2/6] chore: add changeset for defaultModelTier feature Co-Authored-By: Claude Opus 4.6 --- .changeset/default-model-tier.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .changeset/default-model-tier.md diff --git a/.changeset/default-model-tier.md b/.changeset/default-model-tier.md new file mode 100644 index 00000000..caa2974d --- /dev/null +++ b/.changeset/default-model-tier.md @@ -0,0 +1,14 @@ +--- +"@perstack/core": minor +"@perstack/runtime": minor +"@perstack/tui": minor +"@perstack/react": minor +"@perstack/perstack-toml": minor +"@perstack/skill-manager": minor +"@perstack/log": minor +"@perstack/installer": minor +"@perstack/filesystem-storage": minor +"@perstack/tui-components": minor +--- + +Add `defaultModelTier` to expert config for provider-aware model selection ("low", "middle", "high") From 5dfd485b07b78bcfed43249da253ff5b76719de0 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 03:27:13 +0000 Subject: [PATCH 3/6] chore: change changeset to patch bump Co-Authored-By: Claude Opus 4.6 --- .changeset/default-model-tier.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.changeset/default-model-tier.md b/.changeset/default-model-tier.md index caa2974d..f971d96f 100644 --- a/.changeset/default-model-tier.md +++ b/.changeset/default-model-tier.md @@ -1,14 +1,14 @@ --- -"@perstack/core": minor -"@perstack/runtime": minor -"@perstack/tui": minor -"@perstack/react": minor -"@perstack/perstack-toml": minor -"@perstack/skill-manager": minor -"@perstack/log": minor -"@perstack/installer": minor -"@perstack/filesystem-storage": minor -"@perstack/tui-components": minor +"@perstack/core": patch +"@perstack/runtime": patch +"@perstack/tui": patch +"@perstack/react": patch +"@perstack/perstack-toml": patch +"@perstack/skill-manager": patch +"@perstack/log": patch +"@perstack/installer": patch +"@perstack/filesystem-storage": patch +"@perstack/tui-components": patch --- Add `defaultModelTier` to expert config for provider-aware model selection ("low", "middle", "high") From 1bcc9293e5e6879ce1cdabaf95d003ac74f433af Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 03:39:45 +0000 Subject: [PATCH 4/6] refactor: move tier to knownModels and derive resolveModelTier from it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of a separate modelTierMap, each model in knownModels now has a tier field. resolveModelTier() finds the first matching model for the given provider and tier. Cloud-hosted providers fall back to their base provider (azure-openai→openai, amazon-bedrock→anthropic, google-vertex→google). Co-Authored-By: Claude Opus 4.6 --- packages/core/src/known-models/index.ts | 33 +++++++++++ .../core/src/known-models/model-tiers.test.ts | 48 ++++++++------- packages/core/src/known-models/model-tiers.ts | 59 ++++--------------- 3 files changed, 69 insertions(+), 71 deletions(-) diff --git a/packages/core/src/known-models/index.ts b/packages/core/src/known-models/index.ts index 1cd68f3c..c0f5def2 100644 --- a/packages/core/src/known-models/index.ts +++ b/packages/core/src/known-models/index.ts @@ -7,46 +7,55 @@ export const knownModels = [ // https://docs.claude.com/en/docs/about-claude/models/overview#model-comparison-table { name: "claude-opus-4-6", + tier: "high" as const, contextWindow: 200_000, maxOutputTokens: 128_000, }, { name: "claude-opus-4-5", + tier: "high" as const, contextWindow: 200_000, maxOutputTokens: 32_000, }, { name: "claude-opus-4-1", + tier: "high" as const, contextWindow: 200_000, maxOutputTokens: 32_000, }, { name: "claude-opus-4-20250514", + tier: "high" as const, contextWindow: 200_000, maxOutputTokens: 32_000, }, { name: "claude-sonnet-4-5", + tier: "middle" as const, contextWindow: 200_000, maxOutputTokens: 64_000, }, { name: "claude-sonnet-4-20250514", + tier: "middle" as const, contextWindow: 200_000, maxOutputTokens: 64_000, }, { name: "claude-3-7-sonnet-20250219", + tier: "middle" as const, contextWindow: 200_000, maxOutputTokens: 64_000, }, { name: "claude-haiku-4-5", + tier: "low" as const, contextWindow: 200_000, maxOutputTokens: 8_192, }, { name: "claude-3-5-haiku-latest", + tier: "low" as const, contextWindow: 200_000, maxOutputTokens: 8_192, }, @@ -58,30 +67,35 @@ export const knownModels = [ // https://ai.google.dev/gemini-api/docs/models#gemini-3-flash { name: "gemini-3-flash-preview", + tier: "middle" as const, contextWindow: 1_048_576, maxOutputTokens: 65_536, }, // https://ai.google.dev/gemini-api/docs/models#gemini-3-pro { name: "gemini-3-pro-preview", + tier: "high" as const, contextWindow: 1_048_576, maxOutputTokens: 65_536, }, // https://ai.google.dev/gemini-api/docs/models#gemini-2.5-pro { name: "gemini-2.5-pro", + tier: "high" as const, contextWindow: 1_048_576, maxOutputTokens: 65_536, }, // https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash { name: "gemini-2.5-flash", + tier: "middle" as const, contextWindow: 1_048_576, maxOutputTokens: 65_536, }, // https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite { name: "gemini-2.5-flash-lite", + tier: "low" as const, contextWindow: 1_048_576, maxOutputTokens: 65_536, }, @@ -93,66 +107,77 @@ export const knownModels = [ // https://platform.openai.com/docs/models/gpt-5 { name: "gpt-5", + tier: "high" as const, contextWindow: 400_000, maxOutputTokens: 128_000, }, // https://platform.openai.com/docs/models/gpt-5-mini { name: "gpt-5-mini", + tier: "middle" as const, contextWindow: 400_000, maxOutputTokens: 128_000, }, // https://platform.openai.com/docs/models/gpt-5-nano { name: "gpt-5-nano", + tier: "low" as const, contextWindow: 400_000, maxOutputTokens: 128_000, }, // https://platform.openai.com/docs/models/gpt-5.2 { name: "gpt-5.2", + tier: "high" as const, contextWindow: 400_000, maxOutputTokens: 128_000, }, // https://platform.openai.com/docs/models/gpt-5.2-pro { name: "gpt-5.2-pro", + tier: "high" as const, contextWindow: 400_000, maxOutputTokens: 128_000, }, // https://platform.openai.com/docs/models/gpt-5.1 { name: "gpt-5.1", + tier: "high" as const, contextWindow: 400_000, maxOutputTokens: 128_000, }, // https://platform.openai.com/docs/models/gpt-5-chat-latest { name: "gpt-5-chat-latest", + tier: "middle" as const, contextWindow: 128_000, maxOutputTokens: 16_384, }, // https://platform.openai.com/docs/models/o4-mini { name: "o4-mini", + tier: "middle" as const, contextWindow: 200_000, maxOutputTokens: 100_000, }, // https://platform.openai.com/docs/models/o3 { name: "o3", + tier: "high" as const, contextWindow: 200_000, maxOutputTokens: 10_000, }, // https://platform.openai.com/docs/models/o3-mini { name: "o3-mini", + tier: "middle" as const, contextWindow: 200_000, maxOutputTokens: 10_000, }, // https://platform.openai.com/docs/models/gpt-4.1 { name: "gpt-4.1", + tier: "middle" as const, contextWindow: 1_047_576, maxOutputTokens: 32_768, }, @@ -163,11 +188,13 @@ export const knownModels = [ models: [ { name: "deepseek-chat", + tier: "middle" as const, contextWindow: 128_000, maxOutputTokens: 8_192, }, { name: "deepseek-reasoner", + tier: "high" as const, contextWindow: 128_000, maxOutputTokens: 64_000, }, @@ -179,36 +206,42 @@ export const knownModels = [ // https://platform.openai.com/docs/models/gpt-oss-20b { name: "gpt-oss:20b", + tier: "middle" as const, contextWindow: 131_072, maxOutputTokens: 131_072, }, // https://platform.openai.com/docs/models/gpt-oss-120b { name: "gpt-oss:120b", + tier: "high" as const, contextWindow: 131_072, maxOutputTokens: 131_072, }, // https://ai.google.dev/gemma/docs/core/model_card_3 { name: "gemma3:1b", + tier: "low" as const, contextWindow: 32_000, maxOutputTokens: 32_000, }, // https://ai.google.dev/gemma/docs/core/model_card_3 { name: "gemma3:4b", + tier: "low" as const, contextWindow: 128_000, maxOutputTokens: 128_000, }, // https://ai.google.dev/gemma/docs/core/model_card_3 { name: "gemma3:12b", + tier: "middle" as const, contextWindow: 128_000, maxOutputTokens: 128_000, }, // https://ai.google.dev/gemma/docs/core/model_card_3 { name: "gemma3:27b", + tier: "middle" as const, contextWindow: 128_000, maxOutputTokens: 128_000, }, diff --git a/packages/core/src/known-models/model-tiers.test.ts b/packages/core/src/known-models/model-tiers.test.ts index 3d571cd5..49c21590 100644 --- a/packages/core/src/known-models/model-tiers.test.ts +++ b/packages/core/src/known-models/model-tiers.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it } from "bun:test" -import { modelTierMap, modelTierSchema, resolveModelTier } from "./model-tiers.js" +import { knownModels } from "./index.js" +import { modelTierSchema, resolveModelTier } from "./model-tiers.js" describe("@perstack/core: modelTierSchema", () => { it("accepts valid tier values", () => { @@ -17,7 +18,7 @@ describe("@perstack/core: modelTierSchema", () => { }) describe("@perstack/core: resolveModelTier", () => { - it("resolves anthropic tiers", () => { + it("resolves anthropic tiers to the first matching model", () => { expect(resolveModelTier("anthropic", "low")).toBe("claude-haiku-4-5") expect(resolveModelTier("anthropic", "middle")).toBe("claude-sonnet-4-5") expect(resolveModelTier("anthropic", "high")).toBe("claude-opus-4-6") @@ -31,44 +32,41 @@ describe("@perstack/core: resolveModelTier", () => { it("resolves google tiers", () => { expect(resolveModelTier("google", "low")).toBe("gemini-2.5-flash-lite") - expect(resolveModelTier("google", "middle")).toBe("gemini-2.5-flash") - expect(resolveModelTier("google", "high")).toBe("gemini-2.5-pro") + expect(resolveModelTier("google", "middle")).toBe("gemini-3-flash-preview") + expect(resolveModelTier("google", "high")).toBe("gemini-3-pro-preview") }) it("resolves deepseek tiers", () => { - expect(resolveModelTier("deepseek", "low")).toBe("deepseek-chat") expect(resolveModelTier("deepseek", "middle")).toBe("deepseek-chat") expect(resolveModelTier("deepseek", "high")).toBe("deepseek-reasoner") }) it("resolves ollama tiers", () => { - expect(resolveModelTier("ollama", "low")).toBe("gemma3:4b") - expect(resolveModelTier("ollama", "middle")).toBe("gemma3:12b") + expect(resolveModelTier("ollama", "low")).toBe("gemma3:1b") + expect(resolveModelTier("ollama", "middle")).toBe("gpt-oss:20b") expect(resolveModelTier("ollama", "high")).toBe("gpt-oss:120b") }) - it("resolves cloud-hosted provider tiers", () => { + it("falls back to base provider for cloud-hosted providers", () => { expect(resolveModelTier("azure-openai", "high")).toBe("gpt-5") + expect(resolveModelTier("azure-openai", "middle")).toBe("gpt-5-mini") + expect(resolveModelTier("azure-openai", "low")).toBe("gpt-5-nano") expect(resolveModelTier("amazon-bedrock", "high")).toBe("claude-opus-4-6") - expect(resolveModelTier("google-vertex", "high")).toBe("gemini-2.5-pro") + expect(resolveModelTier("amazon-bedrock", "low")).toBe("claude-haiku-4-5") + expect(resolveModelTier("google-vertex", "high")).toBe("gemini-3-pro-preview") + expect(resolveModelTier("google-vertex", "low")).toBe("gemini-2.5-flash-lite") }) - it("covers all known providers", () => { - const expectedProviders = [ - "anthropic", - "google", - "openai", - "deepseek", - "ollama", - "azure-openai", - "amazon-bedrock", - "google-vertex", - ] - for (const provider of expectedProviders) { - expect(modelTierMap[provider]).toBeDefined() - expect(modelTierMap[provider].low).toBeDefined() - expect(modelTierMap[provider].middle).toBeDefined() - expect(modelTierMap[provider].high).toBeDefined() + it("returns undefined for provider with no matching tier", () => { + expect(resolveModelTier("deepseek", "low")).toBeUndefined() + }) + + it("every model in knownModels has a valid tier", () => { + const validTiers = new Set(["low", "middle", "high"]) + for (const provider of knownModels) { + for (const model of provider.models) { + expect(validTiers.has(model.tier)).toBe(true) + } } }) }) diff --git a/packages/core/src/known-models/model-tiers.ts b/packages/core/src/known-models/model-tiers.ts index b61247bb..24a64e24 100644 --- a/packages/core/src/known-models/model-tiers.ts +++ b/packages/core/src/known-models/model-tiers.ts @@ -1,5 +1,6 @@ import { z } from "zod" import type { ProviderName } from "../schemas/provider-config.js" +import { knownModels } from "./index.js" /** Model tier for provider-aware model selection */ export type ModelTier = "low" | "middle" | "high" @@ -7,57 +8,23 @@ export type ModelTier = "low" | "middle" | "high" export const modelTierSchema = z.enum(["low", "middle", "high"]) /** - * Maps provider name + model tier to a concrete model name. - * Cloud-hosted variants (azure-openai, amazon-bedrock, google-vertex) - * use the same model names as their base providers. + * Cloud-hosted providers that share model names with a base provider. + * Used to fall back when looking up models by tier. */ -export const modelTierMap: Record> = { - anthropic: { - low: "claude-haiku-4-5", - middle: "claude-sonnet-4-5", - high: "claude-opus-4-6", - }, - google: { - low: "gemini-2.5-flash-lite", - middle: "gemini-2.5-flash", - high: "gemini-2.5-pro", - }, - openai: { - low: "gpt-5-nano", - middle: "gpt-5-mini", - high: "gpt-5", - }, - deepseek: { - low: "deepseek-chat", - middle: "deepseek-chat", - high: "deepseek-reasoner", - }, - ollama: { - low: "gemma3:4b", - middle: "gemma3:12b", - high: "gpt-oss:120b", - }, - "azure-openai": { - low: "gpt-5-nano", - middle: "gpt-5-mini", - high: "gpt-5", - }, - "amazon-bedrock": { - low: "claude-haiku-4-5", - middle: "claude-sonnet-4-5", - high: "claude-opus-4-6", - }, - "google-vertex": { - low: "gemini-2.5-flash-lite", - middle: "gemini-2.5-flash", - high: "gemini-2.5-pro", - }, +const cloudProviderFallback: Partial> = { + "azure-openai": "openai", + "amazon-bedrock": "anthropic", + "google-vertex": "google", } /** * Resolve a model tier to a concrete model name for the given provider. - * Returns undefined if the provider has no tier mapping. + * Returns the first model matching the tier in the provider's known models list. + * For cloud-hosted providers (azure-openai, amazon-bedrock, google-vertex), + * falls back to their base provider's models. */ export function resolveModelTier(providerName: ProviderName, tier: ModelTier): string | undefined { - return modelTierMap[providerName]?.[tier] + const lookupProvider = cloudProviderFallback[providerName] ?? providerName + const providerModels = knownModels.find((p) => p.provider === lookupProvider) + return providerModels?.models.find((m) => m.tier === tier)?.name } From eaac51656b068e4c11099e6e5f81df03f79bdcac Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 03:45:46 +0000 Subject: [PATCH 5/6] fix: correct OpenAI model tier assignments based on research MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - gpt-5.2: high → middle (Instant variant, mid-tier) - gpt-5.1: high → middle (mid-tier, gpt-4.1 successor) - Reorder OpenAI models to put gpt-5.2-pro first (latest high-tier default) Co-Authored-By: Claude Opus 4.6 --- packages/core/src/known-models/index.ts | 18 +++++++++--------- .../core/src/known-models/model-tiers.test.ts | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/packages/core/src/known-models/index.ts b/packages/core/src/known-models/index.ts index c0f5def2..8625d119 100644 --- a/packages/core/src/known-models/index.ts +++ b/packages/core/src/known-models/index.ts @@ -104,6 +104,13 @@ export const knownModels = [ { provider: "openai", models: [ + // https://platform.openai.com/docs/models/gpt-5.2-pro + { + name: "gpt-5.2-pro", + tier: "high" as const, + contextWindow: 400_000, + maxOutputTokens: 128_000, + }, // https://platform.openai.com/docs/models/gpt-5 { name: "gpt-5", @@ -128,21 +135,14 @@ export const knownModels = [ // https://platform.openai.com/docs/models/gpt-5.2 { name: "gpt-5.2", - tier: "high" as const, - contextWindow: 400_000, - maxOutputTokens: 128_000, - }, - // https://platform.openai.com/docs/models/gpt-5.2-pro - { - name: "gpt-5.2-pro", - tier: "high" as const, + tier: "middle" as const, contextWindow: 400_000, maxOutputTokens: 128_000, }, // https://platform.openai.com/docs/models/gpt-5.1 { name: "gpt-5.1", - tier: "high" as const, + tier: "middle" as const, contextWindow: 400_000, maxOutputTokens: 128_000, }, diff --git a/packages/core/src/known-models/model-tiers.test.ts b/packages/core/src/known-models/model-tiers.test.ts index 49c21590..e5481e53 100644 --- a/packages/core/src/known-models/model-tiers.test.ts +++ b/packages/core/src/known-models/model-tiers.test.ts @@ -27,7 +27,7 @@ describe("@perstack/core: resolveModelTier", () => { it("resolves openai tiers", () => { expect(resolveModelTier("openai", "low")).toBe("gpt-5-nano") expect(resolveModelTier("openai", "middle")).toBe("gpt-5-mini") - expect(resolveModelTier("openai", "high")).toBe("gpt-5") + expect(resolveModelTier("openai", "high")).toBe("gpt-5.2-pro") }) it("resolves google tiers", () => { @@ -48,7 +48,7 @@ describe("@perstack/core: resolveModelTier", () => { }) it("falls back to base provider for cloud-hosted providers", () => { - expect(resolveModelTier("azure-openai", "high")).toBe("gpt-5") + expect(resolveModelTier("azure-openai", "high")).toBe("gpt-5.2-pro") expect(resolveModelTier("azure-openai", "middle")).toBe("gpt-5-mini") expect(resolveModelTier("azure-openai", "low")).toBe("gpt-5-nano") expect(resolveModelTier("amazon-bedrock", "high")).toBe("claude-opus-4-6") From e3b2b54872737d3836c88985bebbe539c340a79b Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 25 Feb 2026 04:28:40 +0000 Subject: [PATCH 6/6] refactor(e2e): migrate all e2e tests to defaultModelTier = "low" Replace hardcoded model names with defaultModelTier across all e2e TOML configs and remove CLI --model injection from test infrastructure. Model selection is now handled by perstack.toml tier resolution. Co-Authored-By: Claude Opus 4.6 --- e2e/experts/bundled-base.toml | 3 +-- e2e/experts/cli-commands.toml | 4 ++-- e2e/experts/continue-resume.toml | 4 ++-- e2e/experts/delegate-chain.toml | 5 +++-- e2e/experts/error-handling.toml | 4 ++-- e2e/experts/errors.toml | 4 ++-- e2e/experts/global-runtime.toml | 3 +-- e2e/experts/lockfile.toml | 3 +-- e2e/experts/mixed-tools.toml | 4 ++-- e2e/experts/multi-modal.toml | 4 ++-- e2e/experts/parallel-delegate.toml | 5 +++-- e2e/experts/parallel-mcp.toml | 3 +-- e2e/experts/providers.toml | 1 + e2e/experts/reasoning-budget.toml | 12 ++++++++++++ e2e/experts/runtime-version-future.toml | 4 ++-- e2e/experts/runtime-version.toml | 7 +++++-- e2e/experts/skills.toml | 8 ++++++-- e2e/experts/special-tools.toml | 3 +-- e2e/experts/versioned-base.toml | 3 +-- e2e/lib/round-robin.ts | 13 ++----------- e2e/lib/runner.ts | 15 ++++----------- e2e/perstack-cli/providers.test.ts | 9 ++++----- e2e/perstack-cli/reasoning-budget.test.ts | 9 +++------ e2e/perstack-cli/streaming.test.ts | 18 ------------------ 24 files changed, 63 insertions(+), 85 deletions(-) diff --git a/e2e/experts/bundled-base.toml b/e2e/experts/bundled-base.toml index b5d324a9..e785891c 100644 --- a/e2e/experts/bundled-base.toml +++ b/e2e/experts/bundled-base.toml @@ -1,8 +1,6 @@ # E2E test configuration for bundled base skill with InMemoryTransport # Tests that the bundled @perstack/base uses in-memory transport (no process spawn) -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" @@ -10,6 +8,7 @@ envPath = [".env", ".env.local"] # Default bundled base - uses InMemoryTransport [experts."e2e-bundled-base"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for bundled base with InMemoryTransport" instruction = """ diff --git a/e2e/experts/cli-commands.toml b/e2e/experts/cli-commands.toml index e79b206f..89ab28b4 100644 --- a/e2e/experts/cli-commands.toml +++ b/e2e/experts/cli-commands.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-publish-test"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for CLI publish command validation" instruction = """ @@ -19,6 +18,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-cli-simple"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for CLI log command validation" instruction = """ diff --git a/e2e/experts/continue-resume.toml b/e2e/experts/continue-resume.toml index 99f40532..2bce0645 100644 --- a/e2e/experts/continue-resume.toml +++ b/e2e/experts/continue-resume.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-continue"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for continue functionality" instruction = """ @@ -29,6 +28,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion", "think"] [experts."e2e-resume"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for resume-from functionality" instruction = """ diff --git a/e2e/experts/delegate-chain.toml b/e2e/experts/delegate-chain.toml index deb099fc..c787dce8 100644 --- a/e2e/experts/delegate-chain.toml +++ b/e2e/experts/delegate-chain.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-delegate-chain"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for delegate chain" instruction = """ @@ -21,6 +20,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."@e2e-delegate-chain/level1"] +defaultModelTier = "low" version = "1.0.0" description = "First level delegate expert" instruction = """ @@ -36,6 +36,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."@e2e-delegate-chain/level2"] +defaultModelTier = "low" version = "1.0.0" description = "Second level delegate expert" instruction = """ diff --git a/e2e/experts/error-handling.toml b/e2e/experts/error-handling.toml index cd559be2..17cd88dd 100644 --- a/e2e/experts/error-handling.toml +++ b/e2e/experts/error-handling.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-tool-error-recovery"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for tool error recovery" instruction = """ @@ -20,6 +19,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion", "readTextFile"] [experts."e2e-invalid-delegate"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert with nonexistent delegate" instruction = """ diff --git a/e2e/experts/errors.toml b/e2e/experts/errors.toml index b94f9461..8facf1ec 100644 --- a/e2e/experts/errors.toml +++ b/e2e/experts/errors.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-mcp-error"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert with broken MCP skill" instruction = """ @@ -24,6 +23,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-invalid-provider"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for invalid provider testing" instruction = """ diff --git a/e2e/experts/global-runtime.toml b/e2e/experts/global-runtime.toml index 81c63cef..1884fecb 100644 --- a/e2e/experts/global-runtime.toml +++ b/e2e/experts/global-runtime.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-global-runtime"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for global runtime configuration" instruction = """ diff --git a/e2e/experts/lockfile.toml b/e2e/experts/lockfile.toml index 1b367bee..5b59e708 100644 --- a/e2e/experts/lockfile.toml +++ b/e2e/experts/lockfile.toml @@ -1,14 +1,13 @@ # E2E test configuration for lockfile functionality # Tests perstack install and lockfile-based execution -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-lockfile"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for lockfile functionality" instruction = """ diff --git a/e2e/experts/mixed-tools.toml b/e2e/experts/mixed-tools.toml index 399b379b..c3b6f0d1 100644 --- a/e2e/experts/mixed-tools.toml +++ b/e2e/experts/mixed-tools.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-mixed-tools"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for mixed tool calls (MCP + Delegate + Interactive)" instruction = """ @@ -39,6 +38,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion", "think"] [experts."@e2e-mixed-tools/helper"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test helper expert" instruction = """ diff --git a/e2e/experts/multi-modal.toml b/e2e/experts/multi-modal.toml index 61d3cace..26731c6b 100644 --- a/e2e/experts/multi-modal.toml +++ b/e2e/experts/multi-modal.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-pdf-reader"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for PDF file reading" instruction = """ @@ -22,6 +21,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion", "readPdfFile"] [experts."e2e-image-reader"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for image file reading" instruction = """ diff --git a/e2e/experts/parallel-delegate.toml b/e2e/experts/parallel-delegate.toml index e07789e5..d9850357 100644 --- a/e2e/experts/parallel-delegate.toml +++ b/e2e/experts/parallel-delegate.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-parallel-delegate"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for parallel delegation" instruction = """ @@ -21,6 +20,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion", "think"] [experts."@e2e-parallel-delegate/math"] +defaultModelTier = "low" version = "1.0.0" description = "Math delegate expert" instruction = """ @@ -34,6 +34,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."@e2e-parallel-delegate/text"] +defaultModelTier = "low" version = "1.0.0" description = "Text processing delegate expert" instruction = """ diff --git a/e2e/experts/parallel-mcp.toml b/e2e/experts/parallel-mcp.toml index 81908cdf..1dbbab72 100644 --- a/e2e/experts/parallel-mcp.toml +++ b/e2e/experts/parallel-mcp.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-parallel-mcp"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for parallel MCP tool calls" instruction = """ diff --git a/e2e/experts/providers.toml b/e2e/experts/providers.toml index 6a6dcc02..8a1d8864 100644 --- a/e2e/experts/providers.toml +++ b/e2e/experts/providers.toml @@ -1,4 +1,5 @@ [experts."e2e-providers"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for multi-provider testing" instruction = """ diff --git a/e2e/experts/reasoning-budget.toml b/e2e/experts/reasoning-budget.toml index 583d9adc..d76a9a5a 100644 --- a/e2e/experts/reasoning-budget.toml +++ b/e2e/experts/reasoning-budget.toml @@ -8,6 +8,7 @@ envPath = [".env", ".env.local"] # ============================================================================= [experts."e2e-reasoning-anthropic-minimal"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for Anthropic reasoning with minimal budget" instruction = """ @@ -22,6 +23,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-reasoning-anthropic-low"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for Anthropic reasoning with low budget" instruction = """ @@ -36,6 +38,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-reasoning-anthropic-medium"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for Anthropic reasoning with medium budget" instruction = """ @@ -50,6 +53,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-reasoning-anthropic-high"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for Anthropic reasoning with high budget" instruction = """ @@ -68,6 +72,7 @@ pick = ["attemptCompletion"] # ============================================================================= [experts."e2e-reasoning-openai-minimal"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for OpenAI reasoning with minimal budget" instruction = """ @@ -82,6 +87,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-reasoning-openai-low"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for OpenAI reasoning with low budget" instruction = """ @@ -96,6 +102,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-reasoning-openai-medium"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for OpenAI reasoning with medium budget" instruction = """ @@ -110,6 +117,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-reasoning-openai-high"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for OpenAI reasoning with high budget" instruction = """ @@ -128,6 +136,7 @@ pick = ["attemptCompletion"] # ============================================================================= [experts."e2e-reasoning-google-minimal"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for Google reasoning with minimal budget" instruction = """ @@ -142,6 +151,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-reasoning-google-low"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for Google reasoning with low budget" instruction = """ @@ -156,6 +166,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-reasoning-google-medium"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for Google reasoning with medium budget" instruction = """ @@ -170,6 +181,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-reasoning-google-high"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for Google reasoning with high budget" instruction = """ diff --git a/e2e/experts/runtime-version-future.toml b/e2e/experts/runtime-version-future.toml index d782af64..027ced69 100644 --- a/e2e/experts/runtime-version-future.toml +++ b/e2e/experts/runtime-version-future.toml @@ -1,5 +1,3 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" @@ -7,6 +5,7 @@ envPath = [".env", ".env.local"] # Expert requiring future version (validation failure) [experts."e2e-runtime-future"] +defaultModelTier = "low" version = "1.0.0" minRuntimeVersion = "v99.0" instruction = "Call attemptCompletion." @@ -19,6 +18,7 @@ pick = ["attemptCompletion"] # Chain with nested future version: root(v1.0) -> nested(v99.0) [experts."e2e-runtime-chain-future"] +defaultModelTier = "low" version = "1.0.0" minRuntimeVersion = "v1.0" instruction = """ diff --git a/e2e/experts/runtime-version.toml b/e2e/experts/runtime-version.toml index 6fc86fe6..29213296 100644 --- a/e2e/experts/runtime-version.toml +++ b/e2e/experts/runtime-version.toml @@ -1,5 +1,3 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" @@ -7,6 +5,7 @@ envPath = [".env", ".env.local"] # Expert with v1.0 minRuntimeVersion (compatible with 0.x.y) [experts."e2e-runtime-v1"] +defaultModelTier = "low" version = "1.0.0" minRuntimeVersion = "v1.0" instruction = "Call attemptCompletion." @@ -19,6 +18,7 @@ pick = ["attemptCompletion"] # Expert without minRuntimeVersion (default) [experts."e2e-runtime-default"] +defaultModelTier = "low" version = "1.0.0" instruction = "Call attemptCompletion." @@ -30,6 +30,7 @@ pick = ["attemptCompletion"] # 3-level chain: root(v1.0) -> level1(v1.0) -> level2(v1.0) [experts."e2e-runtime-chain-ok"] +defaultModelTier = "low" version = "1.0.0" minRuntimeVersion = "v1.0" instruction = """ @@ -45,6 +46,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-runtime-chain-ok-l1"] +defaultModelTier = "low" version = "1.0.0" minRuntimeVersion = "v1.0" instruction = """ @@ -60,6 +62,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion"] [experts."e2e-runtime-chain-ok-l2"] +defaultModelTier = "low" version = "1.0.0" minRuntimeVersion = "v1.0" instruction = "Call attemptCompletion." diff --git a/e2e/experts/skills.toml b/e2e/experts/skills.toml index 18a00e6e..823f6e14 100644 --- a/e2e/experts/skills.toml +++ b/e2e/experts/skills.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-pick-tools"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert with picked tools only" instruction = """ @@ -23,6 +22,7 @@ packageName = "@perstack/base" pick = ["attemptCompletion", "todo"] [experts."e2e-omit-tools"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert with omitted tools" instruction = """ @@ -36,6 +36,7 @@ packageName = "@perstack/base" omit = ["exec"] [experts."e2e-multi-skill"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert with multiple skills" instruction = """ @@ -58,6 +59,7 @@ pick = ["attemptCompletion", "todo"] # Expert for testing dynamic skill add/remove [experts."e2e-dynamic-skills"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for dynamic skill management" instruction = """ @@ -77,6 +79,7 @@ pick = ["attemptCompletion", "addSkill", "removeSkill"] # Target expert for delegate testing [experts."e2e-delegate-target"] +defaultModelTier = "low" version = "1.0.0" description = "Simple target expert for delegate testing" instruction = "Call attemptCompletion." @@ -89,6 +92,7 @@ pick = ["attemptCompletion"] # Expert for testing dynamic delegate add/remove [experts."e2e-dynamic-delegates"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for dynamic delegate management" instruction = """ diff --git a/e2e/experts/special-tools.toml b/e2e/experts/special-tools.toml index 0d550566..d26007f9 100644 --- a/e2e/experts/special-tools.toml +++ b/e2e/experts/special-tools.toml @@ -1,11 +1,10 @@ -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" envPath = [".env", ".env.local"] [experts."e2e-special-tools"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for special tool parallel execution" instruction = """ diff --git a/e2e/experts/versioned-base.toml b/e2e/experts/versioned-base.toml index 9ff4b01f..b40482c0 100644 --- a/e2e/experts/versioned-base.toml +++ b/e2e/experts/versioned-base.toml @@ -1,8 +1,6 @@ # E2E test configuration for versioned base skill with StdioTransport # Tests that pinning a version falls back to npx (StdioTransport) -model = "claude-haiku-4-5" - [provider] providerName = "anthropic" @@ -10,6 +8,7 @@ envPath = [".env", ".env.local"] # Versioned base - uses StdioTransport (npx fallback) [experts."e2e-versioned-base"] +defaultModelTier = "low" version = "1.0.0" description = "E2E test expert for versioned base with StdioTransport" instruction = """ diff --git a/e2e/lib/round-robin.ts b/e2e/lib/round-robin.ts index f46914fe..a1b1e09b 100644 --- a/e2e/lib/round-robin.ts +++ b/e2e/lib/round-robin.ts @@ -1,15 +1,6 @@ const DEFAULT_PROVIDER = "anthropic" -const DEFAULT_MODEL = "claude-haiku-4-5" export function injectProviderArgs(args: string[]): string[] { - const hasProvider = args.some((arg) => arg === "--provider") - const hasModel = args.some((arg) => arg === "--model") - const result = [...args] - if (!hasProvider) { - result.push("--provider", DEFAULT_PROVIDER) - } - if (!hasModel) { - result.push("--model", DEFAULT_MODEL) - } - return result + if (args.some((arg) => arg === "--provider")) return args + return [...args, "--provider", DEFAULT_PROVIDER] } diff --git a/e2e/lib/runner.ts b/e2e/lib/runner.ts index e13910c0..e9409b85 100644 --- a/e2e/lib/runner.ts +++ b/e2e/lib/runner.ts @@ -30,22 +30,15 @@ type RunOptions = { cwd?: string env?: Record provider?: string - model?: string } function buildFinalArgs(args: string[], options?: RunOptions): string[] { if (args[0] !== "run") return args - if (options?.provider && options?.model) { - const result = [...args] - if (!args.some((arg) => arg === "--provider")) { - result.push("--provider", options.provider) - } - if (!args.some((arg) => arg === "--model")) { - result.push("--model", options.model) - } - return result + const result = [...args] + if (options?.provider && !args.some((arg) => arg === "--provider")) { + result.push("--provider", options.provider) } - return injectProviderArgs(args) + return injectProviderArgs(result) } export async function runCliUntilToolCalled( diff --git a/e2e/perstack-cli/providers.test.ts b/e2e/perstack-cli/providers.test.ts index 57987495..e1bcdb47 100644 --- a/e2e/perstack-cli/providers.test.ts +++ b/e2e/perstack-cli/providers.test.ts @@ -7,15 +7,15 @@ const CONFIG = "./e2e/experts/providers.toml" const LLM_TIMEOUT = 120000 const providers = [ - { provider: "openai", model: "gpt-5-nano", hasKey: hasOpenAIKey }, - { provider: "anthropic", model: "claude-haiku-4-5", hasKey: hasAnthropicKey }, - { provider: "google", model: "gemini-2.5-flash", hasKey: hasGoogleKey }, + { provider: "openai", hasKey: hasOpenAIKey }, + { provider: "anthropic", hasKey: hasAnthropicKey }, + { provider: "google", hasKey: hasGoogleKey }, ] describe.concurrent("LLM Providers", () => { it.each(providers)( "should complete with $provider provider", - async ({ provider, model, hasKey }) => { + async ({ provider, hasKey }) => { if (!hasKey()) { console.log(`Skipping ${provider} test: API key not available`) return @@ -23,7 +23,6 @@ describe.concurrent("LLM Providers", () => { const cmdResult = await runCli(["run", "--config", CONFIG, "e2e-providers", "Say hello"], { timeout: LLM_TIMEOUT, provider, - model, }) const result = withEventParsing(cmdResult) expect(result.exitCode).toBe(0) diff --git a/e2e/perstack-cli/reasoning-budget.test.ts b/e2e/perstack-cli/reasoning-budget.test.ts index c29eac1e..5acfa3c5 100644 --- a/e2e/perstack-cli/reasoning-budget.test.ts +++ b/e2e/perstack-cli/reasoning-budget.test.ts @@ -7,7 +7,6 @@ const LLM_TIMEOUT = 180000 async function runReasoningTest( provider: "anthropic" | "openai" | "google", - model: string, ): Promise<{ reasoningTokens: number; thinking?: string; success: boolean }> { const expertKey = `e2e-reasoning-${provider}-medium` const cmdResult = await runCli( @@ -19,8 +18,6 @@ async function runReasoningTest( "Calculate", "--provider", provider, - "--model", - model, "--reasoning-budget", "medium", ], @@ -69,7 +66,7 @@ describe.concurrent("Reasoning Budget", () => { it( "should produce reasoning tokens with Anthropic extended thinking", async () => { - const result = await runReasoningTest("anthropic", "claude-haiku-4-5") + const result = await runReasoningTest("anthropic") expect(result.success).toBe(true) const hasThinking = result.reasoningTokens > 0 || (result.thinking?.length ?? 0) > 0 @@ -81,7 +78,7 @@ describe.concurrent("Reasoning Budget", () => { it( "should produce reasoning tokens with OpenAI reasoning effort", async () => { - const result = await runReasoningTest("openai", "gpt-5-nano") + const result = await runReasoningTest("openai") expect(result.success).toBe(true) const hasReasoning = result.reasoningTokens > 0 || (result.thinking?.length ?? 0) > 0 @@ -93,7 +90,7 @@ describe.concurrent("Reasoning Budget", () => { it( "should produce reasoning tokens with Google flash thinking", async () => { - const result = await runReasoningTest("google", "gemini-2.5-flash") + const result = await runReasoningTest("google") expect(result.success).toBe(true) const hasThinking = result.reasoningTokens > 0 || (result.thinking?.length ?? 0) > 0 diff --git a/e2e/perstack-cli/streaming.test.ts b/e2e/perstack-cli/streaming.test.ts index 521d3978..60dc0831 100644 --- a/e2e/perstack-cli/streaming.test.ts +++ b/e2e/perstack-cli/streaming.test.ts @@ -22,8 +22,6 @@ function filterStreamingEvents(events: ParsedEvent[]): ParsedEvent[] { describe("Streaming Events", () => { describe("Event Sequence with Reasoning", () => { - const ANTHROPIC_MODEL = "claude-haiku-4-5" - it( "emits reasoning events in correct order (start → stream... → complete)", async () => { @@ -36,8 +34,6 @@ describe("Streaming Events", () => { "Calculate 2+2", "--provider", "anthropic", - "--model", - ANTHROPIC_MODEL, "--reasoning-budget", "medium", ], @@ -82,8 +78,6 @@ describe("Streaming Events", () => { "Say hello", "--provider", "anthropic", - "--model", - ANTHROPIC_MODEL, "--reasoning-budget", "minimal", ], @@ -128,8 +122,6 @@ describe("Streaming Events", () => { "Calculate 5*5", "--provider", "anthropic", - "--model", - ANTHROPIC_MODEL, "--reasoning-budget", "low", ], @@ -155,8 +147,6 @@ describe("Streaming Events", () => { }) describe("Without Reasoning", () => { - const ANTHROPIC_MODEL = "claude-haiku-4-5" - it( "skips reasoning events when budget is none", async () => { @@ -169,8 +159,6 @@ describe("Streaming Events", () => { "Hello", "--provider", "anthropic", - "--model", - ANTHROPIC_MODEL, "--reasoning-budget", "none", ], @@ -192,8 +180,6 @@ describe("Streaming Events", () => { }) describe("Streaming Delta Content", () => { - const ANTHROPIC_MODEL = "claude-haiku-4-5" - it( "streamReasoning events contain non-empty deltas", async () => { @@ -206,8 +192,6 @@ describe("Streaming Events", () => { "Think about the number 42", "--provider", "anthropic", - "--model", - ANTHROPIC_MODEL, "--reasoning-budget", "medium", ], @@ -246,8 +230,6 @@ describe("Streaming Events", () => { "Write a short greeting", "--provider", "anthropic", - "--model", - ANTHROPIC_MODEL, "--reasoning-budget", "minimal", ],