diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e60e7d2..ee25025 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,6 +51,9 @@ jobs: pnpm --filter @maschina/notifications build pnpm --filter @maschina/validation build pnpm --filter @maschina/email build + pnpm --filter @maschina/webhooks build + pnpm --filter @maschina/search build + pnpm --filter @maschina/compliance build - run: pnpm typecheck @@ -135,6 +138,9 @@ jobs: pnpm --filter @maschina/notifications build pnpm --filter @maschina/validation build pnpm --filter @maschina/email build + pnpm --filter @maschina/webhooks build + pnpm --filter @maschina/search build + pnpm --filter @maschina/compliance build - name: Run migrations run: pnpm db:migrate diff --git a/packages/model/src/catalog.test.ts b/packages/model/src/catalog.test.ts index 25aa083..b9f8b43 100644 --- a/packages/model/src/catalog.test.ts +++ b/packages/model/src/catalog.test.ts @@ -4,30 +4,52 @@ import { getAllowedModels, getModel, getModelMultiplier, + inferProvider, resolveModel, validateModelAccess, } from "./catalog.js"; describe("getModel", () => { - it("returns the model def for a known ID", () => { - const m = getModel("claude-haiku-4-5-20251001"); + it("returns the model def for a known Claude ID", () => { + const m = getModel("claude-haiku-4-5"); expect(m).toBeDefined(); expect(m?.provider).toBe("anthropic"); expect(m?.multiplier).toBe(1); }); + it("returns the model def for a known OpenAI ID", () => { + const m = getModel("gpt-5"); + expect(m).toBeDefined(); + expect(m?.provider).toBe("openai"); + expect(m?.multiplier).toBe(8); + }); + it("returns undefined for an unknown ID", () => { expect(getModel("gpt-99")).toBeUndefined(); }); }); describe("getModelMultiplier", () => { - it("returns 1 for haiku", () => expect(getModelMultiplier("claude-haiku-4-5-20251001")).toBe(1)); + it("returns 1 for haiku", () => expect(getModelMultiplier("claude-haiku-4-5")).toBe(1)); it("returns 3 for sonnet", () => expect(getModelMultiplier("claude-sonnet-4-6")).toBe(3)); it("returns 15 for opus", () => expect(getModelMultiplier("claude-opus-4-6")).toBe(15)); it("returns 0 for ollama models", () => expect(getModelMultiplier("ollama/llama3.2")).toBe(0)); - it("returns 1 for unknown model (safe default)", () => - expect(getModelMultiplier("unknown")).toBe(1)); + it("returns 1 for gpt-5-mini", () => expect(getModelMultiplier("gpt-5-mini")).toBe(1)); + it("returns 8 for gpt-5", () => expect(getModelMultiplier("gpt-5")).toBe(8)); + it("returns 20 for o3", () => expect(getModelMultiplier("o3")).toBe(20)); + it("returns 2 for unknown model (passthrough rate)", () => + expect(getModelMultiplier("unknown-future-model")).toBe(2)); +}); + +describe("inferProvider", () => { + it("infers anthropic for claude- prefix", () => + expect(inferProvider("claude-sonnet-4-6")).toBe("anthropic")); + it("infers openai for gpt- prefix", () => expect(inferProvider("gpt-5")).toBe("openai")); + it("infers openai for o3 prefix", () => expect(inferProvider("o3-pro")).toBe("openai")); + it("infers openai for o4 prefix", () => expect(inferProvider("o4-mini")).toBe("openai")); + it("infers ollama for ollama/ prefix", () => + expect(inferProvider("ollama/llama3.2")).toBe("ollama")); + it("returns null for unknown prefix", () => expect(inferProvider("gemini-pro")).toBeNull()); }); describe("getAllowedModels", () => { @@ -36,42 +58,40 @@ describe("getAllowedModels", () => { expect(allowed.every((m) => m.isLocal)).toBe(true); }); - it("m1 tier can use haiku and ollama", () => { + it("m1 tier can use haiku, gpt-5-mini, o4-mini, and ollama", () => { const ids = getAllowedModels("m1").map((m) => m.id); - expect(ids).toContain("claude-haiku-4-5-20251001"); + expect(ids).toContain("claude-haiku-4-5"); + expect(ids).toContain("gpt-5-mini"); + expect(ids).toContain("o4-mini"); expect(ids).toContain("ollama/llama3.2"); expect(ids).not.toContain("claude-sonnet-4-6"); expect(ids).not.toContain("claude-opus-4-6"); }); - it("m5 tier can use haiku and sonnet but not opus", () => { + it("m5 tier can use sonnet and gpt-5 but not opus or o3", () => { const ids = getAllowedModels("m5").map((m) => m.id); - expect(ids).toContain("claude-haiku-4-5-20251001"); expect(ids).toContain("claude-sonnet-4-6"); + expect(ids).toContain("gpt-5"); expect(ids).not.toContain("claude-opus-4-6"); + expect(ids).not.toContain("o3"); }); - it("m10 tier can use all models", () => { + it("m10 tier can use all models including opus and o3", () => { const ids = getAllowedModels("m10").map((m) => m.id); - expect(ids).toContain("claude-haiku-4-5-20251001"); - expect(ids).toContain("claude-sonnet-4-6"); - expect(ids).toContain("claude-opus-4-6"); - }); - - it("internal tier can use all models", () => { - const ids = getAllowedModels("internal").map((m) => m.id); expect(ids).toContain("claude-opus-4-6"); + expect(ids).toContain("o3"); + expect(ids).toContain("o3-pro"); + expect(ids).toContain("gpt-5.4-pro"); }); }); describe("validateModelAccess", () => { it("allows access tier to use ollama", () => { - const result = validateModelAccess("access", "ollama/llama3.2"); - expect(result.allowed).toBe(true); + expect(validateModelAccess("access", "ollama/llama3.2").allowed).toBe(true); }); it("denies access tier from using haiku", () => { - const result = validateModelAccess("access", "claude-haiku-4-5-20251001"); + const result = validateModelAccess("access", "claude-haiku-4-5"); expect(result.allowed).toBe(false); expect(result.reason).toMatch(/m1/); }); @@ -89,12 +109,36 @@ describe("validateModelAccess", () => { }); it("allows m10 tier to use opus", () => { - const result = validateModelAccess("m10", "claude-opus-4-6"); + expect(validateModelAccess("m10", "claude-opus-4-6").allowed).toBe(true); + }); + + it("allows m1 tier to use gpt-5-mini", () => { + expect(validateModelAccess("m1", "gpt-5-mini").allowed).toBe(true); + }); + + it("allows m5 tier to use gpt-5", () => { + expect(validateModelAccess("m5", "gpt-5").allowed).toBe(true); + }); + + it("allows m1+ passthrough for unknown claude model", () => { + const result = validateModelAccess("m1", "claude-future-model-9"); expect(result.allowed).toBe(true); + expect(result.passthrough).toBe(true); + }); + + it("allows m1+ passthrough for unknown gpt model", () => { + const result = validateModelAccess("m1", "gpt-6"); + expect(result.allowed).toBe(true); + expect(result.passthrough).toBe(true); + }); + + it("denies access tier passthrough", () => { + const result = validateModelAccess("access", "gpt-6"); + expect(result.allowed).toBe(false); }); - it("denies unknown model with clear error", () => { - const result = validateModelAccess("enterprise", "gpt-99"); + it("denies unknown prefix with no inferred provider", () => { + const result = validateModelAccess("enterprise", "gemini-pro"); expect(result.allowed).toBe(false); expect(result.reason).toMatch(/Unknown model/); }); @@ -102,17 +146,16 @@ describe("validateModelAccess", () => { describe("resolveModel", () => { it("returns the requested model if allowed", () => { - expect(resolveModel("m5", "claude-haiku-4-5-20251001")).toBe("claude-haiku-4-5-20251001"); + expect(resolveModel("m5", "claude-haiku-4-5")).toBe("claude-haiku-4-5"); }); it("falls back to tier default if requested model is denied", () => { - // m1 requesting opus → should fall back to m1 default expect(resolveModel("m1", "claude-opus-4-6")).toBe(DEFAULT_MODEL.m1); }); it("returns tier default when no model is requested", () => { expect(resolveModel("access")).toBe("ollama/llama3.2"); - expect(resolveModel("m1")).toBe("claude-haiku-4-5-20251001"); + expect(resolveModel("m1")).toBe("claude-haiku-4-5"); expect(resolveModel("m5")).toBe("claude-sonnet-4-6"); expect(resolveModel("m10")).toBe("claude-opus-4-6"); }); diff --git a/packages/model/src/catalog.ts b/packages/model/src/catalog.ts index 8ec5271..b7fa72c 100644 --- a/packages/model/src/catalog.ts +++ b/packages/model/src/catalog.ts @@ -2,54 +2,214 @@ import type { PlanTier } from "@maschina/plans"; // ─── Model definitions ──────────────────────────────────────────────────────── // multiplier: tokens billed = actual_tokens * multiplier -// Ollama (local) = 0 — never deducted from quota -// Haiku = 1 — 1:1 deduction -// Sonnet = 3 — 3x deduction per token -// Opus = 15 — 15x deduction per token +// 0 = free (local Ollama — never deducted from quota) +// 1 = 1:1 (cheap/fast models) +// 3 = 3x (mid-tier) +// 8+ = expensive/frontier models // // minTier: minimum plan tier required to use this model via cloud execution. -// Local Ollama models have minTier "access" (always allowed). +// Passthrough models (unknown IDs) route by prefix with a flat 2x multiplier. export interface ModelDef { id: string; displayName: string; - provider: "anthropic" | "ollama"; + provider: "anthropic" | "openai" | "ollama"; /** Token billing multiplier. 0 = no deduction (local). */ multiplier: number; /** Minimum tier for cloud access. */ minTier: PlanTier; /** Whether this is a local Ollama model. */ isLocal: boolean; + /** Whether this model is deprecated (still works, but warn users). */ + deprecated?: boolean; } export const MODEL_CATALOG: ModelDef[] = [ - // ─── Anthropic cloud models ───────────────────────────────────────────── + // ─── Anthropic — Claude 4.x (current) ────────────────────────────────── + { + id: "claude-haiku-4-5", + displayName: "Claude Haiku 4.5", + provider: "anthropic", + multiplier: 1, + minTier: "m1", + isLocal: false, + }, { id: "claude-haiku-4-5-20251001", - displayName: "Claude Haiku", + displayName: "Claude Haiku 4.5 (pinned)", provider: "anthropic", multiplier: 1, minTier: "m1", isLocal: false, }, + { + id: "claude-sonnet-4-5", + displayName: "Claude Sonnet 4.5", + provider: "anthropic", + multiplier: 3, + minTier: "m5", + isLocal: false, + }, { id: "claude-sonnet-4-6", - displayName: "Claude Sonnet", + displayName: "Claude Sonnet 4.6", provider: "anthropic", multiplier: 3, minTier: "m5", isLocal: false, }, + { + id: "claude-opus-4-5", + displayName: "Claude Opus 4.5", + provider: "anthropic", + multiplier: 15, + minTier: "m10", + isLocal: false, + }, { id: "claude-opus-4-6", - displayName: "Claude Opus", + displayName: "Claude Opus 4.6", provider: "anthropic", multiplier: 15, minTier: "m10", isLocal: false, }, - // ─── Local Ollama models (Access tier and up) ──────────────────────────── + // ─── Anthropic — Claude 4.x legacy (available, not recommended) ───────── + { + id: "claude-sonnet-4-20250514", + displayName: "Claude Sonnet 4 (legacy)", + provider: "anthropic", + multiplier: 3, + minTier: "m5", + isLocal: false, + deprecated: true, + }, + { + id: "claude-opus-4-20250514", + displayName: "Claude Opus 4 (legacy)", + provider: "anthropic", + multiplier: 15, + minTier: "m10", + isLocal: false, + deprecated: true, + }, + + // ─── OpenAI — GPT-5 series (current) ──────────────────────────────────── + { + id: "gpt-5-nano", + displayName: "GPT-5 Nano", + provider: "openai", + multiplier: 1, + minTier: "m1", + isLocal: false, + }, + { + id: "gpt-5-mini", + displayName: "GPT-5 Mini", + provider: "openai", + multiplier: 1, + minTier: "m1", + isLocal: false, + }, + { + id: "gpt-5", + displayName: "GPT-5", + provider: "openai", + multiplier: 8, + minTier: "m5", + isLocal: false, + }, + { + id: "gpt-5.4", + displayName: "GPT-5.4", + provider: "openai", + multiplier: 10, + minTier: "m5", + isLocal: false, + }, + { + id: "gpt-5.4-pro", + displayName: "GPT-5.4 Pro", + provider: "openai", + multiplier: 25, + minTier: "m10", + isLocal: false, + }, + + // ─── OpenAI — o-series reasoning models ───────────────────────────────── + { + id: "o4-mini", + displayName: "o4-mini", + provider: "openai", + multiplier: 2, + minTier: "m1", + isLocal: false, + }, + { + id: "o3-mini", + displayName: "o3-mini", + provider: "openai", + multiplier: 2, + minTier: "m1", + isLocal: false, + }, + { + id: "o3", + displayName: "o3", + provider: "openai", + multiplier: 20, + minTier: "m10", + isLocal: false, + }, + { + id: "o3-pro", + displayName: "o3 Pro", + provider: "openai", + multiplier: 25, + minTier: "m10", + isLocal: false, + }, + + // ─── OpenAI — GPT-4.1 series (legacy, still available) ────────────────── + { + id: "gpt-4.1-mini", + displayName: "GPT-4.1 Mini (legacy)", + provider: "openai", + multiplier: 1, + minTier: "m1", + isLocal: false, + deprecated: true, + }, + { + id: "gpt-4.1", + displayName: "GPT-4.1 (legacy)", + provider: "openai", + multiplier: 4, + minTier: "m5", + isLocal: false, + deprecated: true, + }, + { + id: "gpt-4o", + displayName: "GPT-4o (legacy)", + provider: "openai", + multiplier: 4, + minTier: "m5", + isLocal: false, + deprecated: true, + }, + { + id: "gpt-4o-mini", + displayName: "GPT-4o Mini (legacy)", + provider: "openai", + multiplier: 1, + minTier: "m1", + isLocal: false, + deprecated: true, + }, + + // ─── Local Ollama models (Access tier and up, always free) ────────────── { id: "ollama/llama3.2", displayName: "Llama 3.2 (local)", @@ -89,7 +249,7 @@ const TIER_RANK: Record = { /** Default model for a given plan tier. */ export const DEFAULT_MODEL: Record = { access: "ollama/llama3.2", - m1: "claude-haiku-4-5-20251001", + m1: "claude-haiku-4-5", m5: "claude-sonnet-4-6", m10: "claude-opus-4-6", teams: "claude-sonnet-4-6", @@ -107,35 +267,70 @@ export function getModel(modelId: string): ModelDef | undefined { return MODEL_CATALOG.find((m) => m.id === modelId); } -/** Returns the billing multiplier for a model. Returns 1 if model not found. */ +/** Returns the billing multiplier for a model. Returns 2 for unknown (passthrough). */ export function getModelMultiplier(modelId: string): number { - return getModel(modelId)?.multiplier ?? 1; + return getModel(modelId)?.multiplier ?? 2; +} + +/** + * Infer the provider from a model ID prefix. + * Used for passthrough routing of models not in the catalog. + */ +export function inferProvider(modelId: string): "anthropic" | "openai" | "ollama" | null { + if (modelId.startsWith("ollama/")) return "ollama"; + if (modelId.startsWith("claude-")) return "anthropic"; + if ( + modelId.startsWith("gpt-") || + modelId.startsWith("o1") || + modelId.startsWith("o3") || + modelId.startsWith("o4") + ) + return "openai"; + return null; } export interface ModelAccessResult { allowed: boolean; reason?: string; model: ModelDef | undefined; + /** True if this is a passthrough (model not in catalog but provider inferred). */ + passthrough?: boolean; } /** * Validates whether a given tier may use a given model. - * Returns { allowed: true, model } on success. - * Returns { allowed: false, reason } if denied. + * If the model is not in the catalog but has a recognizable prefix, + * allows it as a passthrough at M1+ (with a flat 2x multiplier). */ export function validateModelAccess(tier: PlanTier, modelId: string): ModelAccessResult { const model = getModel(modelId); - if (!model) { - return { allowed: false, reason: `Unknown model: ${modelId}`, model: undefined }; + + if (model) { + if (TIER_RANK[tier] < TIER_RANK[model.minTier]) { + return { + allowed: false, + reason: `Model ${model.displayName} requires the ${model.minTier} plan or higher.`, + model, + }; + } + return { allowed: true, model }; } - if (TIER_RANK[tier] < TIER_RANK[model.minTier]) { - return { - allowed: false, - reason: `Model ${model.displayName} requires the ${model.minTier} plan or higher.`, - model, - }; + + // Not in catalog — try passthrough by prefix + const provider = inferProvider(modelId); + if (provider && provider !== "ollama") { + // Passthrough requires at least M1 + if (TIER_RANK[tier] < TIER_RANK.m1) { + return { + allowed: false, + reason: "Custom models require the M1 plan or higher.", + model: undefined, + }; + } + return { allowed: true, model: undefined, passthrough: true }; } - return { allowed: true, model }; + + return { allowed: false, reason: `Unknown model: ${modelId}`, model: undefined }; } /** Returns the default model ID for a tier, resolving to the best allowed model. */ diff --git a/services/runtime/src/config.py b/services/runtime/src/config.py index 23eab06..cd38c0b 100644 --- a/services/runtime/src/config.py +++ b/services/runtime/src/config.py @@ -5,6 +5,7 @@ class Settings(BaseSettings): model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore") anthropic_api_key: str = "" + openai_api_key: str = "" runtime_port: int = 8001 node_env: str = "development" default_timeout_secs: int = 300 diff --git a/services/runtime/src/openai_runner.py b/services/runtime/src/openai_runner.py new file mode 100644 index 0000000..1c098e0 --- /dev/null +++ b/services/runtime/src/openai_runner.py @@ -0,0 +1,93 @@ +"""OpenAI runner — routes gpt-* and o* models to the OpenAI API.""" + +from __future__ import annotations + +import asyncio +import logging +from typing import Any + +from maschina_runtime.models import RunInput, RunResult +from openai import AsyncOpenAI + +logger = logging.getLogger(__name__) + +MAX_TURNS = 20 + + +class OpenAIRunner: + """Runs agents against the OpenAI API (gpt-*, o1, o3, o4 series).""" + + def __init__( + self, + api_key: str, + model: str, + system_prompt: str, + max_tokens: int = 4096, + timeout_secs: int = 300, + ) -> None: + self.client = AsyncOpenAI(api_key=api_key) + self.model = model + self.system_prompt = system_prompt + self.max_tokens = max_tokens + self.timeout_secs = timeout_secs + + async def run(self, inp: RunInput) -> RunResult: + try: + return await asyncio.wait_for( + self._run_loop(inp), + timeout=float(self.timeout_secs), + ) + except TimeoutError: + raise RuntimeError(f"run {inp.run_id} timed out after {self.timeout_secs}s") + + async def _run_loop(self, inp: RunInput) -> RunResult: + messages: list[dict[str, Any]] = [ + {"role": "system", "content": self.system_prompt}, + *[{"role": m.role, "content": m.content} for m in inp.history], + {"role": "user", "content": inp.message}, + ] + + total_input_tokens = 0 + total_output_tokens = 0 + final_text = "" + + for turn in range(MAX_TURNS): + response = await self.client.chat.completions.create( + model=self.model, + messages=messages, # type: ignore[arg-type] + max_tokens=self.max_tokens, + ) + + choice = response.choices[0] + usage = response.usage + if usage: + total_input_tokens += usage.prompt_tokens + total_output_tokens += usage.completion_tokens + + content = choice.message.content or "" + final_text = content + + if choice.finish_reason in ("stop", "length", None): + break + + messages.append({"role": "assistant", "content": content}) + + logger.info( + "openai run completed", + extra={ + "run_id": inp.run_id, + "model": self.model, + "turns": turn + 1, + "input_tokens": total_input_tokens, + "output_tokens": total_output_tokens, + }, + ) + + return RunResult( + run_id=inp.run_id, + output=final_text, + tool_calls=[], + input_tokens=total_input_tokens, + output_tokens=total_output_tokens, + turns=turn + 1, + ) diff --git a/services/runtime/src/runner.py b/services/runtime/src/runner.py index 6a86447..422a5b2 100644 --- a/services/runtime/src/runner.py +++ b/services/runtime/src/runner.py @@ -2,15 +2,15 @@ Agent execution — delegates to maschina-runtime (the shared execution package) and runs risk checks via maschina-risk before and after the LLM call. -Model routing: - - Models starting with "ollama/" → OllamaRunner (local, no token quota deduction) - - All other models → AnthropicRunner (cloud, billed with multiplier) - -Token billing multipliers (applied to raw token counts before returning): - claude-haiku-* → 1x - claude-sonnet-* → 3x - claude-opus-* → 15x - ollama/* → 0x (local, never deducted from quota) +Model routing (by prefix): + ollama/* → OllamaRunner (local, no token quota deduction) + claude-* → AnthropicRunner (requires ANTHROPIC_API_KEY) + gpt-* / o* → OpenAIRunner (requires OPENAI_API_KEY) + +Unknown prefixes raise a RuntimeError. + +Token billing multipliers are sourced from the catalog in packages/model. +Unknown models fall back to 2x (passthrough rate). """ import logging @@ -32,23 +32,41 @@ ("claude-haiku-", 1), ("claude-sonnet-", 3), ("claude-opus-", 15), + ("gpt-5-nano", 1), + ("gpt-5-mini", 1), + ("gpt-5.4-pro", 25), + ("gpt-5.4", 10), + ("gpt-5", 8), + ("gpt-4o-mini", 1), + ("gpt-4o", 4), + ("gpt-4.1-mini", 1), + ("gpt-4.1-nano", 1), + ("gpt-4.1", 4), + ("o4-mini", 2), + ("o3-mini", 2), + ("o3-pro", 25), + ("o3", 20), ("ollama/", 0), ] -_DEFAULT_MULTIPLIER = 1 +_PASSTHROUGH_MULTIPLIER = 2 # flat rate for unlisted models def _get_multiplier(model: str) -> int: for prefix, mult in _MULTIPLIERS: if model.startswith(prefix): return mult - return _DEFAULT_MULTIPLIER + return _PASSTHROUGH_MULTIPLIER def _is_ollama(model: str) -> bool: return model.startswith("ollama/") +def _is_openai(model: str) -> bool: + return model.startswith(("gpt-", "o1", "o3", "o4")) + + def _ollama_model_name(model: str) -> str: """Strip 'ollama/' prefix to get the bare Ollama model name.""" return model[len("ollama/") :] @@ -83,7 +101,6 @@ async def execute(req: RunRequest) -> RunResponse: # ── Route to runner ───────────────────────────────────────────────────── if _is_ollama(req.model): - # Local Ollama — use the model name from the request runner = OllamaRunner( base_url=settings.ollama_base_url, model=_ollama_model_name(req.model), @@ -91,22 +108,39 @@ async def execute(req: RunRequest) -> RunResponse: max_tokens=min(req.max_tokens, settings.max_output_tokens), timeout_secs=req.timeout_secs, ) + + elif _is_openai(req.model): + try: + import openai as _openai_check # noqa: F401 + except ImportError as exc: + raise RuntimeError("openai package not installed — run: pip install openai") from exc + + if not settings.openai_api_key: + raise RuntimeError(f"OPENAI_API_KEY is not set but model '{req.model}' requires it") + + from .openai_runner import OpenAIRunner + + runner = OpenAIRunner( + api_key=settings.openai_api_key, + model=req.model, + system_prompt=req.system_prompt, + max_tokens=min(req.max_tokens, settings.max_output_tokens), + timeout_secs=req.timeout_secs, + ) + else: - # Cloud Anthropic model — lazy-import to avoid requiring the key for local dev + # Anthropic (claude-*) or unknown prefix treated as Anthropic + if not settings.anthropic_api_key: + raise RuntimeError(f"ANTHROPIC_API_KEY is not set but model '{req.model}' requires it") + try: import anthropic - - client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key) except ImportError as exc: raise RuntimeError("anthropic package not installed") from exc - if not settings.anthropic_api_key: - raise RuntimeError( - f"ANTHROPIC_API_KEY is not set but model '{req.model}' requires cloud execution" - ) - from maschina_runtime import AgentRunner + client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key) runner = AgentRunner( client=client, system_prompt=req.system_prompt, @@ -131,8 +165,6 @@ async def execute(req: RunRequest) -> RunResponse: ) # ── Apply billing multiplier ──────────────────────────────────────────── - # Multiply raw token counts so the daemon's quota deduction reflects cost. - # Ollama multiplier = 0, so local runs never deduct from the cloud quota. multiplier = _get_multiplier(req.model) billed_input_tokens = result.input_tokens * multiplier billed_output_tokens = result.output_tokens * multiplier diff --git a/services/runtime/tests/test_runner_routing.py b/services/runtime/tests/test_runner_routing.py index c8088fa..52c3f9c 100644 --- a/services/runtime/tests/test_runner_routing.py +++ b/services/runtime/tests/test_runner_routing.py @@ -65,9 +65,18 @@ def test_ollama_is_0x(self): assert _get_multiplier("ollama/llama3.2") == 0 assert _get_multiplier("ollama/mistral") == 0 - def test_unknown_model_defaults_to_1x(self): - assert _get_multiplier("gpt-99") == 1 - assert _get_multiplier("") == 1 + def test_gpt5_mini_is_1x(self): + assert _get_multiplier("gpt-5-mini") == 1 + + def test_gpt5_is_8x(self): + assert _get_multiplier("gpt-5") == 8 + + def test_o3_is_20x(self): + assert _get_multiplier("o3") == 20 + + def test_unknown_model_defaults_to_2x_passthrough(self): + assert _get_multiplier("gpt-99") == 2 + assert _get_multiplier("") == 2 # ─── Routing helpers ──────────────────────────────────────────────────────────