Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/cli/config-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ export * from './model-selection';
export * from './opencode-models';
export * from './opencode-selection';
export * from './paths';
export * from './precedence-resolver';
export * from './providers';
export * from './scoring-v2';
export * from './system';
22 changes: 22 additions & 0 deletions src/cli/dynamic-model-selection.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,27 @@ describe('dynamic-model-selection', () => {
expect(chains.orchestrator).toContain('chutes/kimi-k2.5');
expect(chains.explorer).toContain('opencode/gpt-5-nano');
expect(chains.fixer[chains.fixer.length - 1]).toBe('opencode/gpt-5-nano');
expect(plan?.provenance?.oracle?.winnerLayer).toBe(
'dynamic-recommendation',
);
expect(plan?.scoring?.engineVersionApplied).toBe('v1');
});

test('supports v2-shadow mode without changing applied engine', () => {
const plan = buildDynamicModelPlan(
[
m({ model: 'openai/gpt-5.3-codex', reasoning: true, toolcall: true }),
m({ model: 'chutes/kimi-k2.5', reasoning: true, toolcall: true }),
m({ model: 'opencode/gpt-5-nano', reasoning: true, toolcall: true }),
],
baseInstallConfig(),
undefined,
{ scoringEngineVersion: 'v2-shadow' },
);

expect(plan).not.toBeNull();
expect(plan?.scoring?.engineVersionApplied).toBe('v1');
expect(plan?.scoring?.shadowCompared).toBe(true);
expect(plan?.scoring?.diffs?.oracle).toBeDefined();
});
});
73 changes: 68 additions & 5 deletions src/cli/dynamic-model-selection.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import { resolveAgentWithPrecedence } from './precedence-resolver';
import { rankModelsV2 } from './scoring-v2';
import type {
DiscoveredModel,
DynamicModelPlan,
ExternalSignalMap,
InstallConfig,
ScoringEngineVersion,
} from './types';

const AGENTS = [
Expand Down Expand Up @@ -773,6 +776,9 @@ export function buildDynamicModelPlan(
catalog: DiscoveredModel[],
config: InstallConfig,
externalSignals?: ExternalSignalMap,
options?: {
scoringEngineVersion?: ScoringEngineVersion;
},
): DynamicModelPlan | null {
const catalogWithSelectedModels = [
config.selectedChutesPrimaryModel,
Expand Down Expand Up @@ -814,12 +820,50 @@ export function buildDynamicModelPlan(
}
}
const providerUsage = new Map<string, number>();
const engineVersion =
options?.scoringEngineVersion ?? config.scoringEngineVersion ?? 'v1';
const rankCache = new Map<AgentName, DiscoveredModel[]>();
const shadowDiffs: Record<
string,
{ v1TopModel?: string; v2TopModel?: string }
> = {};

const agents: Record<string, { model: string; variant?: string }> = {};
const chains: Record<string, string[]> = {};
const provenance: DynamicModelPlan['provenance'] = {};

const getRankedModels = (agent: AgentName): DiscoveredModel[] => {
const cached = rankCache.get(agent);
if (cached) return cached;

const rankedV1 = rankModels(providerCandidates, agent, externalSignals);

if (engineVersion === 'v1') {
rankCache.set(agent, rankedV1);
return rankedV1;
}

const rankedV2 = rankModelsV2(
providerCandidates,
agent,
externalSignals,
).map((candidate) => candidate.model);

if (engineVersion === 'v2-shadow') {
shadowDiffs[agent] = {
v1TopModel: rankedV1[0]?.model,
v2TopModel: rankedV2[0]?.model,
};
rankCache.set(agent, rankedV1);
return rankedV1;
}

rankCache.set(agent, rankedV2);
return rankedV2;
};

for (const [agentIndex, agent] of PRIMARY_ASSIGNMENT_ORDER.entries()) {
const ranked = rankModels(providerCandidates, agent, externalSignals);
const ranked = getRankedModels(agent);
const primaryPool = hasPaidProviderEnabled
? ranked.filter((model) => !FREE_BIASED_PROVIDERS.has(model.providerID))
: ranked;
Expand Down Expand Up @@ -885,11 +929,21 @@ export function buildDynamicModelPlan(

const finalizedChain = finalizeChainWithTail(chain, deterministicFreeTail);

const providerPolicyChain = dedupe([selectedChutes, selectedOpencode]);
const systemDefaultModel = selectedOpencode ?? 'opencode/big-pickle';
const resolved = resolveAgentWithPrecedence({
agentName: agent,
dynamicRecommendation: finalizedChain,
providerFallbackPolicy: providerPolicyChain,
systemDefault: [systemDefaultModel],
});

agents[agent] = {
model: finalizedChain[0] ?? primary.model,
model: resolved.model,
variant: ROLE_VARIANT[agent],
};
chains[agent] = finalizedChain;
chains[agent] = resolved.chain;
provenance[agent] = resolved.provenance;
}

if (hasPaidProviderEnabled) {
Expand All @@ -908,7 +962,7 @@ export function buildDynamicModelPlan(
const currentModel = agents[agent]?.model;
if (!currentModel) continue;

const ranked = rankModels(providerCandidates, agent, externalSignals);
const ranked = getRankedModels(agent);
const candidate = ranked.find(
(model) => model.providerID === providerID,
);
Expand Down Expand Up @@ -960,5 +1014,14 @@ export function buildDynamicModelPlan(
return null;
}

return { agents, chains };
return {
agents,
chains,
provenance,
scoring: {
engineVersionApplied: engineVersion === 'v2' ? 'v2' : 'v1',
shadowCompared: engineVersion === 'v2-shadow',
diffs: engineVersion === 'v2-shadow' ? shadowDiffs : undefined,
},
};
}
37 changes: 37 additions & 0 deletions src/cli/precedence-resolver.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/// <reference types="bun-types" />

import { describe, expect, test } from 'bun:test';
import { resolveAgentWithPrecedence } from './precedence-resolver';

describe('precedence-resolver', () => {
test('resolves deterministic winner with provenance', () => {
const result = resolveAgentWithPrecedence({
agentName: 'oracle',
manualUserPlan: ['openai/gpt-5.3-codex', 'openai/gpt-5.1-codex-mini'],
dynamicRecommendation: ['anthropic/claude-opus-4-6'],
providerFallbackPolicy: ['chutes/kimi-k2.5'],
systemDefault: ['opencode/big-pickle'],
});

expect(result.model).toBe('openai/gpt-5.3-codex');
expect(result.provenance.winnerLayer).toBe('manual-user-plan');
expect(result.chain).toEqual([
'openai/gpt-5.3-codex',
'openai/gpt-5.1-codex-mini',
'anthropic/claude-opus-4-6',
'chutes/kimi-k2.5',
'opencode/big-pickle',
]);
});

test('uses system default when no other layer is provided', () => {
const result = resolveAgentWithPrecedence({
agentName: 'explorer',
systemDefault: ['opencode/gpt-5-nano'],
});

expect(result.model).toBe('opencode/gpt-5-nano');
expect(result.provenance.winnerLayer).toBe('system-default');
expect(result.chain).toEqual(['opencode/gpt-5-nano']);
});
});
93 changes: 93 additions & 0 deletions src/cli/precedence-resolver.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import type { AgentResolutionProvenance, ResolutionLayerName } from './types';

export interface AgentLayerInput {
agentName: string;
openCodeDirectOverride?: string;
manualUserPlan?: string[];
pinnedModel?: string;
dynamicRecommendation?: string[];
providerFallbackPolicy?: string[];
systemDefault: string[];
}

export interface ResolvedAgentLayerResult {
model: string;
chain: string[];
provenance: AgentResolutionProvenance;
}

type LayerCandidate = {
layer: ResolutionLayerName;
models: string[];
};

function dedupe(models: Array<string | undefined>): string[] {
const seen = new Set<string>();
const result: string[] = [];
for (const model of models) {
if (!model || seen.has(model)) continue;
seen.add(model);
result.push(model);
}
return result;
}

function buildLayerOrder(input: AgentLayerInput): LayerCandidate[] {
return [
{
layer: 'opencode-direct-override',
models: input.openCodeDirectOverride
? [input.openCodeDirectOverride]
: [],
},
{
layer: 'manual-user-plan',
models: input.manualUserPlan ?? [],
},
{
layer: 'pinned-model',
models: input.pinnedModel ? [input.pinnedModel] : [],
},
{
layer: 'dynamic-recommendation',
models: input.dynamicRecommendation ?? [],
},
{
layer: 'provider-fallback-policy',
models: input.providerFallbackPolicy ?? [],
},
{
layer: 'system-default',
models: input.systemDefault,
},
];
}

export function resolveAgentWithPrecedence(
input: AgentLayerInput,
): ResolvedAgentLayerResult {
const ordered = buildLayerOrder(input);
const firstWinningIndex = ordered.findIndex(
(layer) => layer.models.length > 0,
);
const winnerIndex =
firstWinningIndex >= 0 ? firstWinningIndex : ordered.length - 1;
const winnerLayer = ordered[winnerIndex];

const chain = dedupe(
ordered
.slice(winnerIndex)
.flatMap((layer) => layer.models)
.concat(input.systemDefault),
);
const model = chain[0] ?? input.systemDefault[0] ?? 'opencode/big-pickle';

return {
model,
chain,
provenance: {
winnerLayer: winnerLayer?.layer ?? 'system-default',
winnerModel: model,
},
};
}
61 changes: 61 additions & 0 deletions src/cli/scoring-v2/engine.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/// <reference types="bun-types" />

import { describe, expect, test } from 'bun:test';
import type { DiscoveredModel, ExternalSignalMap } from '../types';
import { rankModelsV2, scoreCandidateV2 } from './engine';

function model(
input: Partial<DiscoveredModel> & { model: string },
): DiscoveredModel {
const [providerID] = input.model.split('/');
return {
providerID: providerID ?? 'openai',
model: input.model,
name: input.name ?? input.model,
status: input.status ?? 'active',
contextLimit: input.contextLimit ?? 200000,
outputLimit: input.outputLimit ?? 32000,
reasoning: input.reasoning ?? true,
toolcall: input.toolcall ?? true,
attachment: input.attachment ?? false,
dailyRequestLimit: input.dailyRequestLimit,
costInput: input.costInput,
costOutput: input.costOutput,
};
}

describe('scoring-v2', () => {
test('returns explain breakdown with deterministic total', () => {
const candidate = model({ model: 'openai/gpt-5.3-codex' });
const signalMap: ExternalSignalMap = {
'openai/gpt-5.3-codex': {
source: 'artificial-analysis',
qualityScore: 70,
codingScore: 75,
latencySeconds: 1.2,
inputPricePer1M: 1,
outputPricePer1M: 3,
},
};

const first = scoreCandidateV2(candidate, 'oracle', signalMap);
const second = scoreCandidateV2(candidate, 'oracle', signalMap);

expect(first.totalScore).toBe(second.totalScore);
expect(first.scoreBreakdown.features.quality).toBe(0.7);
expect(first.scoreBreakdown.weighted.coding).toBeGreaterThan(0);
});

test('uses stable tie-break when scores are equal', () => {
const ranked = rankModelsV2(
[
model({ model: 'zai-coding-plan/glm-4.7', reasoning: false }),
model({ model: 'openai/gpt-5.3-codex', reasoning: false }),
],
'explorer',
);

expect(ranked[0]?.model.providerID).toBe('openai');
expect(ranked[1]?.model.providerID).toBe('zai-coding-plan');
});
});
Loading