diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index f8bb5dda..585c95b6 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -1,8 +1,6 @@ name: Check on: - push: - branches: [main] pull_request: branches: [main] diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index bba6ea32..df24aba3 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -1,8 +1,6 @@ name: E2E Tests on: - push: - branches: [main] pull_request: branches: [main] diff --git a/.github/workflows/evals.yml b/.github/workflows/evals.yml index afa7ca0b..6c938cf1 100644 --- a/.github/workflows/evals.yml +++ b/.github/workflows/evals.yml @@ -12,9 +12,7 @@ on: description: 'Run scenarios in parallel' type: boolean default: false - # Run on PRs and main branch pushes - push: - branches: [main] + # Run on PRs pull_request: branches: [main] # Run weekly to keep cache warm and catch regressions diff --git a/DEBUG_BADGE_PLAN.md b/DEBUG_BADGE_PLAN.md deleted file mode 100644 index af257c7c..00000000 --- a/DEBUG_BADGE_PLAN.md +++ /dev/null @@ -1,443 +0,0 @@ -# Debug badge + per-session API request stats plan - -## Goal -Add a **Debug** badge to the Unified Input footer badge row, **immediately to the right of the existing Langfuse tracing badge**. The badge is used for Qbit developer-focused debug info. - -First debug feature: -- Per **tab/pane session** (`sessionId`) stats: - - Number of LLM API requests made **per provider** - - Time the **last request was sent** - - Time the **last request was received** - -Must count both: -- Main agent loop requests (`qbit-ai`) -- Sub-agent requests (`qbit-sub-agents`) - -UI behavior: -- Badge shown only in dev builds (`import.meta.env.DEV`) -- Popover polling happens **only while the popover is open** - ---- - -## High-level architecture (locked) - -- Store stats **per session** by attaching an `Arc` to the session’s `AgentBridge`. -- Share the stats type via **Layer 1** crate `qbit-core` so both `qbit-ai` and `qbit-sub-agents` can update it. -- Expose stats to the frontend via a **session-scoped** Tauri command: - - `get_api_request_stats(session_id: String) -> ApiRequestStatsSnapshot` - -Provider key: -- Use the existing provider name strings already carried through contexts (`ctx.provider_name`). - -Definition of timestamps: -- **sent**: immediately before calling `model.stream(request).await` -- **received**: when that call returns `Ok(stream)` (stream handle created successfully) - ---- - -## Backend: qbit-core (shared stats type) - -### Files -- **New:** `backend/crates/qbit-core/src/api_request_stats.rs` -- **Edit:** `backend/crates/qbit-core/src/lib.rs` - -### Implementation details -Create `ApiRequestStats` with: -- `tokio::sync::RwLock>` -- Public snapshot types (serde-serializable) returned to frontend - -Public types (exported from `qbit-core`): -- `ApiRequestStats` -- `ApiRequestStatsSnapshot` -- `ProviderRequestStatsSnapshot` - -Snapshot fields (snake_case): -- `requests: u64` -- `last_sent_at: Option` (unix millis) -- `last_received_at: Option` (unix millis) - -Methods: -- `pub fn new() -> Self` -- `pub async fn record_sent(&self, provider: &str)` -- `pub async fn record_received(&self, provider: &str)` -- `pub async fn snapshot(&self) -> ApiRequestStatsSnapshot` - -Time source: -- `SystemTime::now().duration_since(UNIX_EPOCH)` in millis (`u64`) - -### Export wiring -In `qbit-core/src/lib.rs`: -1. Add module declaration: - - `pub mod api_request_stats;` -2. Add re-export: - - `pub use api_request_stats::{ApiRequestStats, ApiRequestStatsSnapshot, ProviderRequestStatsSnapshot};` - -Downstream imports become: -- `use qbit_core::{ApiRequestStats, ApiRequestStatsSnapshot};` - ---- - -## Backend: qbit-ai (per-session storage + main loop instrumentation) - -### Files -- **Edit:** `backend/crates/qbit-ai/src/agent_bridge.rs` -- **Edit:** `backend/crates/qbit-ai/src/agentic_loop.rs` -- **Edit:** `backend/crates/qbit-ai/src/eval_support.rs` (3 `AgenticLoopContext { .. }` literals) -- **Edit:** `backend/crates/qbit-ai/src/test_utils.rs` (1 `AgenticLoopContext { .. }` literal + 6 `SubAgentExecutorContext { .. }` literals) - -### 1) AgentBridge owns per-session stats - -#### Add field -In `qbit-ai/src/agent_bridge.rs` `pub struct AgentBridge`: -- `pub(crate) api_request_stats: Arc,` - -Add import: -- `use qbit_core::{ApiRequestStats, ApiRequestStatsSnapshot};` - -#### Single init point (confirmed) -`AgentBridge` is constructed in exactly one place: -- `fn from_components_with_runtime(...) -> Self` (contains a `Self { ... }` struct literal) - -Add in that `Self { ... }` literal: -- `api_request_stats: Arc::new(ApiRequestStats::new()),` - -#### Add async accessor -In `impl AgentBridge`: -- `pub async fn get_api_request_stats_snapshot(&self) -> ApiRequestStatsSnapshot { self.api_request_stats.snapshot().await }` - -### 2) Extend AgenticLoopContext and wire it - -#### Add field -In `qbit-ai/src/agentic_loop.rs` `pub struct AgenticLoopContext<'a>`: -- `pub api_request_stats: &'a Arc,` - -Add import: -- `use qbit_core::ApiRequestStats;` - -#### Wire via build_loop_context -In `qbit-ai/src/agent_bridge.rs`, `fn build_loop_context` literal (anchor provided below), add: -- `api_request_stats: &self.api_request_stats,` - -**Anchor:** -```rust -fn build_loop_context<'a>( - &'a self, - loop_event_tx: &'a mpsc::UnboundedSender, -) -> AgenticLoopContext<'a> { - AgenticLoopContext { - event_tx: loop_event_tx, - tool_registry: &self.tool_registry, - ... - coordinator: self.coordinator.as_ref(), - } -} -``` - -### 3) Instrument main agent `.stream` boundary - -In `qbit-ai/src/agentic_loop.rs`, at the stream request code (anchor below): - -**Anchor:** -```rust -let stream_result = tokio::time::timeout( - stream_timeout, - async { model.stream(request).await }.instrument(llm_span.clone()), -) -.await; - -let mut stream = match stream_result { - Ok(Ok(s)) => { - tracing::info!("[OpenAI Debug] Stream created successfully, consuming chunks..."); - s - } - ... -}; -``` - -Add: -1) Immediately before the `timeout(...)` call: -- `ctx.api_request_stats.record_sent(ctx.provider_name).await;` - -2) Inside `Ok(Ok(s)) => { ... }` at the top: -- `ctx.api_request_stats.record_received(ctx.provider_name).await;` - -Do not record `received` on error branches. - -### 4) Compile-fix updates: AgenticLoopContext literals -Adding `api_request_stats` to `AgenticLoopContext` requires updating all struct literals: - -- `qbit-ai/src/agent_bridge.rs` (already covered) -- `qbit-ai/src/eval_support.rs` — **3** literals -- `qbit-ai/src/test_utils.rs` — **1** literal (`TestContext::as_agentic_context_with_client`) - -#### eval_support.rs (3 literals) -For each `let ctx = AgenticLoopContext { ... }`: -- Create local: `let api_request_stats = Arc::new(ApiRequestStats::new());` -- Add in ctx literal: `api_request_stats: &api_request_stats,` -- Import `ApiRequestStats` from `qbit_core` - -#### qbit-ai/src/test_utils.rs (AgenticLoopContext literal) -Update `TestContext` to own stats so the single helper can supply it: -- Add `pub api_request_stats: Arc,` to `TestContext` -- In `TestContextBuilder::build()` (the `TestContext { ... }` literal), add: - - `api_request_stats: Arc::new(ApiRequestStats::new()),` -- In `as_agentic_context_with_client` `AgenticLoopContext { ... }` literal add: - - `api_request_stats: &self.api_request_stats,` - ---- - -## Backend: qbit-sub-agents (sub-agent instrumentation) - -### Files -- **Edit:** `backend/crates/qbit-sub-agents/src/executor.rs` - -### 1) Extend SubAgentExecutorContext -In `qbit-sub-agents/src/executor.rs`: -- Import: - - `use qbit_core::ApiRequestStats;` - -In `pub struct SubAgentExecutorContext<'a>` add: -- `pub api_request_stats: Option<&'a Arc>,` - -### 2) Instrument sub-agent `.stream` boundary -At the exact stream code (anchor below): - -**Anchor:** -```rust -let mut stream = match model.stream(request).await { - Ok(s) => s, - Err(e) => { ... } -}; -``` - -Add: -1) Immediately before the match: -```rust -if let Some(stats) = ctx.api_request_stats { - stats.record_sent(ctx.provider_name).await; -} -``` - -2) Change `Ok(s) => s,` to: -```rust -Ok(s) => { - if let Some(stats) = ctx.api_request_stats { - stats.record_received(ctx.provider_name).await; - } - s -} -``` - -Provider key is `ctx.provider_name`, which qbit-ai already sets correctly for override models. - ---- - -## Backend: qbit-ai → qbit-sub-agents plumbing (3 production call sites) - -### Files -- **Edit:** `backend/crates/qbit-ai/src/agentic_loop.rs` -- **Edit:** `backend/crates/qbit-ai/src/test_utils.rs` (6 call sites) - -### 1) Update SubAgentExecutorContext literals in qbit-ai/src/agentic_loop.rs -There are **3** struct literals (override, override-fallback, and no-override). - -**Anchor snippet:** -```rust -let sub_ctx = SubAgentExecutorContext { - event_tx: ctx.event_tx, - tool_registry: ctx.tool_registry, - workspace: ctx.workspace, - provider_name: override_provider, - model_name: override_model, - session_id: ctx.session_id, - transcript_base_dir: ctx.transcript_base_dir, -}; -``` - -Add to each literal: -- `api_request_stats: Some(ctx.api_request_stats),` - -### 2) Update SubAgentExecutorContext literals in qbit-ai/src/test_utils.rs -There are **6** occurrences (grep confirmed). - -For each of these literals, add: -- `api_request_stats: None, // tests` - -This keeps test plumbing minimal. - ---- - -## Backend: qbit (Tauri) command to fetch per-session stats - -### Files -- **New:** `backend/crates/qbit/src/ai/commands/debug.rs` -- **Edit:** `backend/crates/qbit/src/ai/commands/mod.rs` -- **Edit:** `backend/crates/qbit/src/ai/mod.rs` (explicit re-export list) -- **Edit:** `backend/crates/qbit/src/lib.rs` (explicit imports + generate_handler) - -### Command -Create `get_api_request_stats` command: - -Signature: -- `pub async fn get_api_request_stats(state: State<'_, AppState>, session_id: String) -> Result` - -Implementation: -- Use per-session bridge: - - `state.ai_state.get_session_bridge(&session_id).await` -- If missing, return: - - `super::ai_session_not_initialized_error(&session_id)` -- Else return: - - `bridge.get_api_request_stats_snapshot().await` - -### Wiring -1) `qbit/src/ai/commands/mod.rs`: -- `pub mod debug;` -- `pub use debug::*;` - -2) `qbit/src/ai/mod.rs`: -- Add `get_api_request_stats,` to the explicit `pub use commands::{ ... }` list. - -3) `qbit/src/lib.rs`: -- Add `get_api_request_stats,` to the explicit `use ai::{ ... }` list. -- Add `get_api_request_stats,` to `tauri::generate_handler![ ... ]`. - ---- - -## Frontend: API wrapper + Debug badge UI - -### Files -- **Edit:** `frontend/lib/ai.ts` -- **Edit:** `frontend/components/UnifiedInput/InputStatusRow.tsx` - -### 1) Add TS types + invoke wrapper -In `frontend/lib/ai.ts`: - -```ts -export interface ProviderRequestStats { - requests: number; - last_sent_at: number | null; - last_received_at: number | null; -} - -export interface ApiRequestStatsSnapshot { - providers: Record; -} - -export async function getApiRequestStats(sessionId: string): Promise { - return invoke("get_api_request_stats", { sessionId }); -} -``` - -### 2) Add Debug badge next to Langfuse badge -In `frontend/components/UnifiedInput/InputStatusRow.tsx`: - -#### Imports -- Add `Bug` to lucide import list: - - `import { Bot, Cpu, Gauge, Terminal, Bug } from "lucide-react";` -- Add to the existing `@/lib/ai` import block: - - `getApiRequestStats` - - `type ApiRequestStatsSnapshot` - -#### State -Add: -- `const [debugOpen, setDebugOpen] = useState(false);` -- `const debugPollRef = useRef | null>(null);` -- `const [apiRequestStats, setApiRequestStats] = useState(null);` -- `const [apiRequestStatsError, setApiRequestStatsError] = useState(null);` - -#### Refresh function -Create `refreshApiRequestStats` callback bound to `sessionId`. - -**Expected error handling (locked):** -Backend error string is produced by: -```rust -"AI agent not initialized for session '{}'. Call init_ai_session first." -``` - -Frontend should treat as expected if: -- `msg.includes("AI agent not initialized for session")` OR -- `msg.includes("Call init_ai_session first")` - -On expected: -- `setApiRequestStats(null);` -- `setApiRequestStatsError(null);` -- do not log - -On unexpected: -- `setApiRequestStatsError(msg);` - -#### Poll only while popover open -Use controlled popover state: -- `` - -Effect: -- If not `import.meta.env.DEV`, do nothing. -- When `debugOpen` becomes true: - - refresh once immediately - - set interval 1500ms -- When it becomes false or on unmount: - - clear interval - -#### Placement -Insert the Debug badge block **immediately after** the Langfuse badge block in the left badge row. - -Render gate: -- `import.meta.env.DEV && !isMockBrowserMode()` (recommended) - -#### Badge display -- Show label: `Debug` -- Optionally show total requests (sum across providers) if > 0. - -#### Popover content -Title: -- `Debug (This Tab)` - -Subtitle: -- `LLM API Requests (main + sub-agents)` - -Table columns: -- Provider | Req | Sent | Recv - -Sorting: -- requests desc, then provider asc. - -Time formatting: -- display relative (e.g. `12s`, `3m`, `1h`, `2d`), with tooltip `new Date(ms).toLocaleString()`. -- if delta negative, show `0s`. -- if timestamp null, show `—`. - ---- - -## Verification checklist - -### Build/test -- `just check` -- `just test-rust` - -### Manual -1. Open two tabs (two sessionIds). -2. Init AI in tab A only; run a prompt. -3. Open Debug popover in tab A: - - provider row exists - - requests increments - - last_sent_at and last_received_at appear -4. Tab B (no AI): open Debug popover: - - shows “No AI agent initialized…” (no red error, no log spam) -5. Trigger a prompt that invokes a sub-agent tool (e.g. `sub_agent_coder`): - - counts increase (same session) -6. If sub-agent has model override provider, confirm provider key differs and counts split. - ---- - -## Exhaustive compilation breakpoints (must update) - -### Adding `api_request_stats` to `AgenticLoopContext` requires updating these literals: -- `qbit-ai/src/agent_bridge.rs` (build_loop_context) -- `qbit-ai/src/eval_support.rs` (3 literals) -- `qbit-ai/src/test_utils.rs` (1 literal) - -### Adding `api_request_stats` to `SubAgentExecutorContext` requires updating these literals: -- `qbit-ai/src/agentic_loop.rs` (3 literals) -- `qbit-ai/src/test_utils.rs` (6 literals) - -No other occurrences were found by grep. diff --git a/DEBUG_BADGE_WORK_TRACKER.md b/DEBUG_BADGE_WORK_TRACKER.md deleted file mode 100644 index 5ec50807..00000000 --- a/DEBUG_BADGE_WORK_TRACKER.md +++ /dev/null @@ -1,172 +0,0 @@ -# Debug badge implementation work tracker - -Use this checklist to implement `DEBUG_BADGE_PLAN.md` with minimal back-and-forth. Keep boxes updated as you work. - ---- - -## 0) Prep / sanity - -- [x] Read `DEBUG_BADGE_PLAN.md` end-to-end -- [x] Create a feature branch (optional) (do not commit unless asked) - ---- - -## 1) qbit-core: shared API request stats type - -- [x] Create `backend/crates/qbit-core/src/api_request_stats.rs` - - [x] Implement `ApiRequestStats` (tokio::RwLock>) - - [x] Implement `ProviderRequestStatsSnapshot` (Serialize) - - [x] Implement `ApiRequestStatsSnapshot` (Serialize) - - [x] Implement `record_sent(provider)` - - [x] Implement `record_received(provider)` - - [x] Implement `snapshot()` - - [x] Implement `now_ms()` helper (SystemTime → unix millis) -- [x] Update `backend/crates/qbit-core/src/lib.rs` - - [x] Add `pub mod api_request_stats;` - - [x] Add `pub use api_request_stats::{ApiRequestStats, ApiRequestStatsSnapshot, ProviderRequestStatsSnapshot};` ---- - -## 2) qbit-ai: store stats per session on AgentBridge - -- [x] Update `backend/crates/qbit-ai/src/agent_bridge.rs` - - [x] Import `qbit_core::{ApiRequestStats, ApiRequestStatsSnapshot}` - - [x] Add field on `AgentBridge`: `api_request_stats: Arc` - - [x] Initialize in `from_components_with_runtime` struct literal: `Arc::new(ApiRequestStats::new())` - - [x] Add `pub async fn get_api_request_stats_snapshot(&self) -> ApiRequestStatsSnapshot` - ---- - -## 3) qbit-ai: main agent loop wiring + instrumentation - -- [x] Update `backend/crates/qbit-ai/src/agentic_loop.rs` - - [x] Import `qbit_core::ApiRequestStats` - - [x] Add field to `AgenticLoopContext<'a>`: `api_request_stats: &'a Arc` - - [x] Instrument main `.stream` boundary - - [x] Call `record_sent(ctx.provider_name)` immediately before the timeout-wrapped `model.stream(request)` - - [x] Call `record_received(ctx.provider_name)` inside `Ok(Ok(s))` (stream successfully created) -- [x] Update `backend/crates/qbit-ai/src/agent_bridge.rs` - - [x] In `build_loop_context`, set `api_request_stats: &self.api_request_stats` - ---- - -## 4) qbit-sub-agents: executor context + instrumentation - -- [x] Update `backend/crates/qbit-sub-agents/src/executor.rs` - - [x] Import `qbit_core::ApiRequestStats` - - [x] Extend `SubAgentExecutorContext<'a>` with: - - [x] `api_request_stats: Option<&'a Arc>` - - [x] Instrument sub-agent `.stream` boundary - - [x] Before `model.stream(request)`: if stats present, `record_sent(ctx.provider_name)` - - [x] In `Ok(s)` arm: if stats present, `record_received(ctx.provider_name)` - ---- - -## 5) qbit-ai: pass stats into sub-agent execution context - -- [x] Update `backend/crates/qbit-ai/src/agentic_loop.rs` (3 call sites) - - [x] In each `SubAgentExecutorContext { ... }` literal, set: - - [x] `api_request_stats: Some(ctx.api_request_stats)` - ---- - -## 6) qbit-ai: compilation fixups for eval + tests - -### 6.1 AgenticLoopContext literals - -- [x] Update `backend/crates/qbit-ai/src/eval_support.rs` (3 `AgenticLoopContext { ... }` literals) - - [x] Import `qbit_core::ApiRequestStats` - - [x] For each ctx literal: - - [x] Create local `let api_request_stats = Arc::new(ApiRequestStats::new());` - - [x] Add field `api_request_stats: &api_request_stats,` - -- [x] Update `backend/crates/qbit-ai/src/test_utils.rs` for `AgenticLoopContext` literal - - [x] Import `qbit_core::ApiRequestStats` - - [x] Add `api_request_stats: Arc` to `TestContext` - - [x] In `TestContextBuilder::build()`, initialize `api_request_stats: Arc::new(ApiRequestStats::new())` - - [x] In `TestContext::as_agentic_context_with_client` ctx literal, add `api_request_stats: &self.api_request_stats` - -### 6.2 SubAgentExecutorContext literals (tests) - -- [x] Update `backend/crates/qbit-ai/src/test_utils.rs` (6 `SubAgentExecutorContext { ... }` literals) - - [x] Add field `api_request_stats: None, // tests` - ---- - -## 7) qbit (Tauri backend): session-scoped stats command - -- [x] Create `backend/crates/qbit/src/ai/commands/debug.rs` - - [x] Implement `#[tauri::command] get_api_request_stats(state: State<'_, AppState>, session_id: String) -> Result` - - [x] Use `state.ai_state.get_session_bridge(&session_id)` - - [x] Error with `super::ai_session_not_initialized_error(&session_id)` - - [x] Return `bridge.get_api_request_stats_snapshot().await` - -- [x] Update `backend/crates/qbit/src/ai/commands/mod.rs` - - [x] Add `pub mod debug;` - - [x] Add `pub use debug::*;` - -- [x] Update `backend/crates/qbit/src/ai/mod.rs` - - [x] Add `get_api_request_stats` to `pub use commands::{ ... }` list - -- [x] Update `backend/crates/qbit/src/lib.rs` - - [x] Add `get_api_request_stats` to `use ai::{ ... }` list - - [x] Add `get_api_request_stats` to `tauri::generate_handler![ ... ]` - ---- - -## 8) Frontend: API wrapper - -- [x] Update `frontend/lib/ai.ts` - - [x] Add `ProviderRequestStats` type (snake_case fields) - - [x] Add `ApiRequestStatsSnapshot` type - - [x] Add `getApiRequestStats(sessionId: string)` wrapper calling `invoke("get_api_request_stats", { sessionId })` - ---- - -## 9) Frontend: Debug badge UI - -- [x] Update `frontend/components/UnifiedInput/InputStatusRow.tsx` - - [x] Import `Bug` from `lucide-react` - - [x] Import `getApiRequestStats` + `ApiRequestStatsSnapshot` from `@/lib/ai` - - [x] Add state: - - [x] `debugOpen`, `setDebugOpen` - - [x] `debugPollRef` - - [x] `apiRequestStats`, `apiRequestStatsError` - - [x] Implement `refreshApiRequestStats()` - - [x] Expected error handling: treat as normal if error contains: - - [x] `"AI agent not initialized for session"` OR - - [x] `"Call init_ai_session first"` - - [x] Do not log expected case - - [x] Controlled popover: - - [x] `` - - [x] Poll only while open (1500ms) - - [x] Render badge only when: - - [x] `import.meta.env.DEV` - - [x] and (recommended) not `isMockBrowserMode()` - - [x] Place badge immediately after Langfuse badge block - - [x] Popover contents: - - [x] Title: `Debug (This Tab)` - - [x] Subtitle: `LLM API Requests (main + sub-agents)` - - [x] Table: Provider | Req | Sent | Recv - - [x] Sort providers by requests desc - - [x] Relative time + absolute tooltip; null → `—` - ---- - -## 10) Verification - -- [ ] Run `just check` (fails: missing frontend deps/types in this environment) -- [ ] Run `just test-rust` (timed out after 120s) -- [ ] Manual UI verification - - [ ] Two tabs: stats are independent per tab - - [ ] Tab without AI initialized shows friendly empty state (no noisy error) - - [ ] Main agent request increments counts + updates sent/recv - - [ ] Sub-agent request increments counts + updates sent/recv - - [ ] Sub-agent override provider produces separate provider row - ---- - -## 11) Cleanup - -- [ ] Ensure no secrets are logged or stored -- [ ] Ensure no background polling when popover is closed -- [ ] Ensure code formatted (run `just fmt` if needed) diff --git a/provider-model-config-analysis.md b/provider-model-config-analysis.md deleted file mode 100644 index c987c20f..00000000 --- a/provider-model-config-analysis.md +++ /dev/null @@ -1,698 +0,0 @@ -# Provider & Model Configuration Analysis - -## Executive Summary - -The current provider and model configuration is spread across **6+ files** with significant duplication between frontend and backend. Adding a new provider requires changes in at least **5 locations**. This document outlines the current architecture and proposes a simplification plan. - ---- - -## Current Architecture - -### 1. Backend Configuration - -#### Provider Enum (`backend/crates/qbit-settings/src/schema.rs`) -```rust -pub enum AiProvider { - VertexAi, // default - Openrouter, - Anthropic, - Openai, - Ollama, - Gemini, - Groq, - Xai, - ZaiSdk, -} -``` - -Each provider has a dedicated settings struct (e.g., `VertexAiSettings`, `OpenAiSettings`) containing: -- Credentials (API key, service account path, etc.) -- Provider-specific options (base_url, web_search settings, etc.) -- `show_in_selector: bool` - controls visibility in UI - -**Key observation**: The backend does NOT maintain a list of available models - it treats models as opaque strings. - -#### LLM Client Enum (`backend/crates/qbit-llm-providers/src/lib.rs`) -```rust -pub enum LlmClient { - VertexAnthropic(rig_anthropic_vertex::CompletionModel), - RigOpenRouter(rig_openrouter::CompletionModel), - RigOpenAi(rig_openai::completion::CompletionModel), - RigOpenAiResponses(rig_openai::responses_api::ResponsesCompletionModel), - OpenAiReasoning(rig_openai_responses::CompletionModel), // Custom for o1/o3/gpt-5 - RigAnthropic(rig_anthropic::completion::CompletionModel), - RigOllama(rig_ollama::CompletionModel), - RigGemini(rig_gemini::completion::CompletionModel), - RigGroq(rig_groq::CompletionModel), - RigXai(rig_xai::completion::CompletionModel), - RigZaiSdk(rig_zai_sdk::CompletionModel), - Mock, -} -``` - -**Note**: OpenAI has 3 different client variants based on model type! - -#### Model Capabilities Detection (`backend/crates/qbit-llm-providers/src/model_capabilities.rs`) - -**This is where backend hardcodes model knowledge**: - -```rust -// Temperature support detection -pub fn model_supports_temperature(provider: &str, model: &str) -> bool { - match provider { - "openai" | "openai_responses" => { - // Codex, o-series, gpt-5 don't support temperature - if model.contains("codex") { return false; } - if model.starts_with("o1") || model.starts_with("o3") || model.starts_with("o4") { - return false; - } - if model.starts_with("gpt-5") { return false; } - true - } - _ => true, - } -} - -// Web search support -const OPENAI_WEB_SEARCH_MODELS: &[&str] = &[ - "gpt-4o", "gpt-4o-mini", "chatgpt-4o-latest", - "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", - "gpt-5", "gpt-5.1", "gpt-5.2", "gpt-5-mini", "gpt-5-nano", -]; - -// Vision support detection -pub fn detect_vision(provider: &str, model: &str) -> VisionCapabilities { ... } - -// Thinking/reasoning history detection -fn detect_thinking_history_support(provider: &str, model: &str) -> bool { ... } -``` - -#### ProviderConfig Enum (`backend/crates/qbit-llm-providers/src/lib.rs`) -```rust -#[derive(Deserialize)] -#[serde(tag = "provider", rename_all = "snake_case")] -pub enum ProviderConfig { - VertexAi { workspace, model, credentials_path?, project_id, location }, - Openrouter { workspace, model, api_key }, - Openai { workspace, model, api_key, base_url?, reasoning_effort?, enable_web_search, ... }, - Anthropic { workspace, model, api_key }, - Ollama { workspace, model, base_url? }, - Gemini { workspace, model, api_key }, - Groq { workspace, model, api_key }, - Xai { workspace, model, api_key }, - ZaiSdk { workspace, model, api_key, base_url?, source_channel? }, -} -``` - -Used for Tauri command deserialization from frontend. - -#### LLM Client Factory (`backend/crates/qbit-ai/src/llm_client.rs`) - -Two layers of client creation: - -1. **Top-level functions** (`create_openai_components()`, `create_vertex_components()`, etc.) - - Called during session initialization - - Create `AgentBridgeComponents` with full context (workspace, tool registry, etc.) - - Contains provider-specific logic (e.g., OpenAI reasoning model detection) - -2. **`LlmClientFactory`** - For sub-agent model overrides - ```rust - pub struct LlmClientFactory { - cache: RwLock>>, - settings_manager: Arc, - workspace: PathBuf, - } - - impl LlmClientFactory { - pub async fn get_or_create(&self, provider: &str, model: &str) -> Result>; - } - ``` - - Reads credentials from settings - - Caches clients by (provider, model) key - - Duplicates much of the logic from top-level functions - -#### Reasoning Model Detection (`rig-openai-responses` crate) -```rust -pub fn is_reasoning_model(model: &str) -> bool { - let model_lower = model.to_lowercase(); - model_lower.starts_with("o1") - || model_lower.starts_with("o3") - || model_lower.starts_with("o4") - || model_lower.starts_with("gpt-5") -} -``` - -This is checked in multiple places to route to the correct client variant. - ---- - -### 2. Frontend Configuration - -#### Model ID Constants (`frontend/lib/ai.ts`) -```typescript -export const VERTEX_AI_MODELS = { - CLAUDE_OPUS_4_5: "claude-opus-4-5@20251101", - CLAUDE_SONNET_4_5: "claude-sonnet-4-5@20250929", - CLAUDE_HAIKU_4_5: "claude-haiku-4-5@20251001", -} as const; - -export const OPENAI_MODELS = { - GPT_5_2: "gpt-5.2", - GPT_5_1: "gpt-5.1", - // ... 15+ more models -} as const; - -// Similar constants for: ANTHROPIC, OLLAMA, GEMINI, GROQ, XAI, ZAI_SDK -``` - -#### Model Groups (`frontend/lib/models.ts`) -Two parallel data structures for UI rendering: - -1. **`PROVIDER_GROUPS`** - Flat list for simple dropdowns -2. **`PROVIDER_GROUPS_NESTED`** - Hierarchical for sub-menus (OpenAI reasoning variants) - -Each group contains: -```typescript -interface ProviderGroup { - provider: AiProvider; - providerName: string; - icon: string; - models: ModelInfo[]; -} -``` - -#### Provider Metadata (`frontend/components/Settings/ProviderSettings.tsx`) -```typescript -const PROVIDERS: ProviderConfig[] = [ - { - id: "anthropic", - name: "Anthropic", - icon: "🔶", - description: "Direct Claude API access", - getConfigured: (s) => !!s.anthropic.api_key, - }, - // ... 8 more providers -]; -``` - -#### TypeScript Settings Types (`frontend/lib/settings.ts`) -Mirrors the Rust schema exactly - must be kept in sync manually. - ---- - -### 3. UI Components That Display Providers/Models - -| Component | Purpose | Data Source | -|-----------|---------|-------------| -| `ProviderSettings.tsx` | Provider config cards | Hardcoded `PROVIDERS` array | -| `ModelSelector.tsx` | Default model dropdown in settings | `PROVIDER_GROUPS_NESTED` | -| `InputStatusRow.tsx` | Footer model selector | `PROVIDER_GROUPS` + `PROVIDER_GROUPS_NESTED` | -| `AiSettings.tsx` | Synthesis backend config | **Hardcoded model lists (separate!)** | - ---- - -## Duplication Map - -| Data | Backend Location | Frontend Location(s) | -|------|------------------|---------------------| -| Provider enum/type | `schema.rs:14-28` | `settings.ts:88-97` | -| Provider names/icons | N/A | `ProviderSettings.tsx:41-105`, `models.ts` (in PROVIDER_GROUPS) | -| Provider visibility check | `schema.rs` (`show_in_selector`) | `InputStatusRow.tsx:190-198, 251-259` (duplicated logic) | -| Model ID strings | N/A | `ai.ts:686-788` | -| Model display names | N/A | `models.ts:60-337, 343-702` | -| Credentials check | N/A | `ProviderSettings.tsx:47-104` (`getConfigured` functions) | -| Synthesis models | N/A | `AiSettings.tsx:239-294` (hardcoded, not from models.ts!) | - ---- - -## Backend Duplication & Issues - -### 1. Client Creation Logic Duplicated -The same client creation logic exists in **two places**: -- `create_*_components()` functions (lines 137-557 of `llm_client.rs`) -- `LlmClientFactory::create_client()` (lines 620-794 of `llm_client.rs`) - -When adding a provider or fixing a bug, both must be updated. - -### 2. Model Detection Logic Scattered -Model-specific behavior is detected in multiple places: - -| Detection | Location | Purpose | -|-----------|----------|---------| -| `is_reasoning_model()` | `rig-openai-responses/src/lib.rs` | Route to correct OpenAI client | -| `model_supports_temperature()` | `model_capabilities.rs` | Skip temperature param | -| `openai_supports_web_search()` | `model_capabilities.rs` | Enable web search tool | -| `detect_thinking_history_support()` | `model_capabilities.rs` | Track reasoning in history | -| `VisionCapabilities::detect()` | `model_capabilities.rs` | Image upload support | - -All use string prefix matching on model names. - -### 3. No Central Model Registry -The backend has **no list of valid models**. It: -- Accepts any model string from frontend -- Detects capabilities via string matching -- Fails only at API call time if model is invalid - -This means: -- No validation that a model exists for a provider -- Frontend is the only source of truth for available models -- Backend capability detection can drift from actual API behavior - -### 4. Provider-Specific Hardcoding -Each provider has unique initialization logic: -- Vertex AI: Service account or ADC, extended thinking, web search -- OpenAI: 3 different client types based on model -- Ollama: No API key, custom base URL (ignored currently) -- Z.AI: Source channel, custom base URL - -No abstraction - pure pattern matching on provider enum. - ---- - -## Pain Points - -### 1. Adding a New Provider Requires Changes In: -1. `backend/crates/qbit-settings/src/schema.rs` - Add enum variant + settings struct -2. `backend/crates/qbit-ai/src/llm_client.rs` - Add factory function -3. `frontend/lib/settings.ts` - Add TypeScript type + settings interface -4. `frontend/lib/ai.ts` - Add model constants -5. `frontend/lib/models.ts` - Add to PROVIDER_GROUPS + PROVIDER_GROUPS_NESTED -6. `frontend/components/Settings/ProviderSettings.tsx` - Add to PROVIDERS array - -### 2. Adding a New Model Requires Changes In: -1. `frontend/lib/ai.ts` - Add constant -2. `frontend/lib/models.ts` - Add to both PROVIDER_GROUPS and PROVIDER_GROUPS_NESTED -3. Potentially `AiSettings.tsx` if it's for synthesis - -### 3. Inconsistencies Found -- **Synthesis models are hardcoded separately** in `AiSettings.tsx` (lines 239-294) instead of sourcing from the central `models.ts` -- **Provider visibility logic duplicated** in `InputStatusRow.tsx` (appears twice: lines 190-198 and 251-259) -- **OpenRouter models hardcoded inline** in `models.ts` instead of having a constant like other providers - -### 4. Type Safety Gaps -- Model IDs are plain strings - no compile-time check that a model belongs to a provider -- Backend accepts any model string - no validation - ---- - -## Proposed Simplification Plan - -### Phase 1: Backend Model Registry - -**Goal**: Single source of truth for model metadata, eliminate string matching - -```rust -// New: backend/crates/qbit-models/src/lib.rs - -#[derive(Clone, Serialize)] -pub struct ModelDefinition { - pub id: &'static str, - pub display_name: &'static str, - pub provider: AiProvider, - pub capabilities: ModelCapabilities, -} - -#[derive(Clone, Default, Serialize)] -pub struct ModelCapabilities { - pub supports_temperature: bool, - pub supports_thinking_history: bool, - pub supports_vision: bool, - pub supports_web_search: bool, - pub is_reasoning_model: bool, // Uses OpenAI reasoning client - pub context_window: u32, - pub max_output_tokens: u32, -} - -pub static MODEL_REGISTRY: LazyLock> = LazyLock::new(|| vec![ - ModelDefinition { - id: "claude-opus-4-5@20251101", - display_name: "Claude Opus 4.5", - provider: AiProvider::VertexAi, - capabilities: ModelCapabilities { - supports_temperature: true, - supports_thinking_history: true, - supports_vision: true, - supports_web_search: true, - context_window: 200_000, - ..Default::default() - }, - }, - ModelDefinition { - id: "gpt-5.2", - display_name: "GPT 5.2", - provider: AiProvider::Openai, - capabilities: ModelCapabilities { - supports_temperature: false, // Reasoning model - supports_thinking_history: true, - supports_vision: true, - supports_web_search: true, - is_reasoning_model: true, - context_window: 128_000, - ..Default::default() - }, - }, - // ... -]); - -// Replace all string matching with registry lookup -pub fn get_model(id: &str) -> Option<&'static ModelDefinition> { - MODEL_REGISTRY.iter().find(|m| m.id == id) -} - -pub fn get_models_for_provider(provider: AiProvider) -> Vec<&'static ModelDefinition> { - MODEL_REGISTRY.iter().filter(|m| m.provider == provider).collect() -} -``` - -**Benefits**: -- Eliminates `model_capabilities.rs` string matching -- Single place to update when models change -- Enables server-side validation -- Exposes to frontend via Tauri command - -**Tauri command**: -```rust -#[tauri::command] -fn get_available_models(provider: Option) -> Vec { - match provider { - Some(p) => get_models_for_provider(p), - None => MODEL_REGISTRY.clone(), - } -} -``` - -**Frontend impact**: Can fetch models from backend instead of hardcoding - -### Phase 2: Unify Backend Client Creation - -**Goal**: Eliminate duplication between `create_*_components()` and `LlmClientFactory` - -```rust -// Trait-based provider abstraction -pub trait LlmProvider: Send + Sync { - fn provider_type(&self) -> AiProvider; - fn create_client(&self, model: &str) -> Result; - fn validate_credentials(&self) -> Result<()>; -} - -// Implementations -pub struct OpenAiProvider { - api_key: String, - enable_web_search: bool, - web_search_context_size: String, -} - -impl LlmProvider for OpenAiProvider { - fn create_client(&self, model: &str) -> Result { - let model_def = get_model(model) - .ok_or_else(|| anyhow!("Unknown model: {}", model))?; - - if model_def.capabilities.is_reasoning_model { - // Use rig-openai-responses - let client = rig_openai_responses::Client::new(&self.api_key); - Ok(LlmClient::OpenAiReasoning(client.completion_model(model))) - } else { - // Use rig-core responses API - let client = rig_openai::Client::new(&self.api_key)?; - Ok(LlmClient::RigOpenAiResponses(client.completion_model(model))) - } - } -} - -// Registry of providers (built from settings) -pub struct ProviderRegistry { - providers: HashMap>, -} - -impl ProviderRegistry { - pub fn from_settings(settings: &QbitSettings) -> Self { ... } - - pub fn get(&self, provider: AiProvider) -> Option<&dyn LlmProvider> { - self.providers.get(&provider).map(|p| p.as_ref()) - } -} -``` - -**Benefits**: -- Single client creation path -- Provider behavior encapsulated in trait impl -- Easier to add new providers -- `LlmClientFactory` becomes thin wrapper around `ProviderRegistry` - -### Phase 3: Generate TypeScript Types from Rust - -**Goal**: Eliminate manual sync between Rust and TypeScript - -Options: -1. **ts-rs** crate - Generate TypeScript interfaces from Rust structs -2. **typeshare** crate - Generates types for multiple languages -3. **Manual codegen script** - Parse Rust and emit TypeScript - -Recommended: `ts-rs` with a build step - -```rust -// In schema.rs -use ts_rs::TS; - -#[derive(TS)] -#[ts(export)] -pub enum AiProvider { ... } -``` - -### Phase 4: Frontend Consolidation - -**Goal**: Single source of truth for all model-related data in `lib/models.ts` - -Now that backend provides the model registry, frontend can fetch dynamically. - -#### 4.1 Fetch models from backend -```typescript -// frontend/lib/models.ts -import { invoke } from "@tauri-apps/api/core"; - -export async function getAvailableModels(provider?: AiProvider): Promise { - return invoke("get_available_models", { provider }); -} -``` - -#### 4.2 Derive PROVIDER_GROUPS from backend data -```typescript -// Single definition per provider (UI metadata only) -const PROVIDER_UI_CONFIG = { - anthropic: { - name: "Anthropic", - icon: "🔶", - description: "Direct Claude API access", - checkConfigured: (s: AiSettings) => !!s.anthropic.api_key, - }, - // ... -} as const; - -// Models come from backend -export async function getProviderGroups(): Promise { - const models = await getAvailableModels(); - // Group by provider and merge with UI config -} -``` - -#### 4.3 Update synthesis models to use central source -```typescript -// In AiSettings.tsx -import { getProviderGroup } from "@/lib/models"; - -// Instead of hardcoded: -const vertexModels = await getProviderGroup("vertex_ai"); -``` - -### Phase 5: Provider Plugin Architecture (Future) - -**Note**: Only pursue this if adding many new providers becomes a common need. - -**Goal**: Make adding providers fully modular - -``` -backend/crates/qbit-providers/ - src/ - traits.rs # ProviderTrait definition - anthropic.rs # impl ProviderTrait for Anthropic - openai.rs # impl ProviderTrait for OpenAI - registry.rs # Provider discovery -``` - -Each provider module exports: -- Settings struct -- Model list -- LLM client factory -- UI metadata (name, icon, description) - ---- - -## Implementation Priority - -| Phase | Effort | Impact | Description | -|-------|--------|--------|-------------| -| **1 - Backend model registry** | Medium | High | Create `qbit-models` crate with `MODEL_REGISTRY`. Single source of truth for all model metadata. Eliminates string matching in `model_capabilities.rs`. | -| **2 - Unify client creation** | Medium | Medium | Consolidate duplicated logic in `create_*_components()` and `LlmClientFactory`. Use trait-based abstraction. | -| **3 - Type generation** | Low-Medium | Medium | Use `ts-rs` to generate TypeScript from Rust. Frontend fetches model list from backend via Tauri command. | -| **4 - Frontend consolidation** | Medium | High | Simplify frontend now that data comes from backend. Remove hardcoded model lists. | -| **5 - Plugin architecture** | High | High | Future work. Only if adding many providers. Full modular provider system. | - -### Rationale - -Starting with the backend (Phase 1) is critical because: -1. It establishes the **single source of truth** for model definitions -2. Frontend can then **fetch models dynamically** instead of hardcoding -3. Backend **validation** becomes possible (reject unknown models) -4. **Capability detection** moves from string matching to registry lookup -5. **Type generation** (Phase 3) can export the registry to TypeScript - ---- - -## Quick Wins (Can Do Immediately) - -### Frontend: -1. **Fix synthesis model lists** - Source from `models.ts` instead of hardcoding -2. **Extract visibility check logic** - Create `isProviderVisible(provider, settings)` helper -3. **Add OpenRouter constants** - Create `OPENROUTER_MODELS` like other providers -4. **DRY the provider metadata** - Single `PROVIDERS` definition used by both Settings and model selector - -### Backend: -1. **Extract `is_reasoning_model()` to qbit-llm-providers** - Currently in rig-openai-responses, duplicated in model_capabilities.rs -2. **Add model validation** - Warn if model string doesn't match known patterns -3. **Consolidate capability detection** - Single `ModelCapabilities::detect(provider, model)` entry point -4. **Add tests for capability detection** - Already have some, but coverage is incomplete for edge cases - ---- - -## Files to Modify - -### Phase 1: Backend Model Registry -**New crate**: `backend/crates/qbit-models/` -- `src/lib.rs` - `ModelDefinition`, `ModelCapabilities`, `MODEL_REGISTRY` -- `src/registry.rs` - Lookup functions - -**Modify**: -- `backend/crates/qbit-llm-providers/src/model_capabilities.rs` - Replace with registry lookups -- `backend/crates/qbit-ai/src/llm_client.rs` - Use registry for capability checks -- `backend/crates/qbit/src/ai/commands/*.rs` - Add `get_available_models` command -- `backend/Cargo.toml` - Add qbit-models to workspace - -### Phase 2: Unify Client Creation -**New**: `backend/crates/qbit-llm-providers/src/provider_trait.rs` -- `LlmProvider` trait -- Per-provider implementations - -**Modify**: -- `backend/crates/qbit-ai/src/llm_client.rs` - - Remove duplicated logic in `create_*_components()` and `LlmClientFactory` - - Both use `ProviderRegistry` -- `backend/crates/qbit-llm-providers/src/lib.rs` - Export provider trait - -### Phase 3: Type Generation -- `backend/crates/qbit-settings/src/schema.rs` - Add ts-rs derives -- `backend/crates/qbit-models/src/lib.rs` - Add ts-rs derives -- `backend/crates/qbit-settings/Cargo.toml` - Add ts-rs dependency -- `backend/crates/qbit-models/Cargo.toml` - Add ts-rs dependency -- New: `scripts/generate-types.sh` - Build script to generate TypeScript - -### Phase 4: Frontend Consolidation -- `frontend/lib/models.ts` - Fetch from backend, remove hardcoded lists -- `frontend/lib/ai.ts` - Remove model constants -- `frontend/lib/settings.ts` - Import generated types -- `frontend/components/Settings/ProviderSettings.tsx` - Use shared provider config -- `frontend/components/Settings/AiSettings.tsx` - Use models from backend -- `frontend/components/UnifiedInput/InputStatusRow.tsx` - Extract visibility logic - ---- - -## Appendix: Current File Locations - -``` -Backend (Provider/Model Related): -├── backend/crates/qbit-settings/src/ -│ └── schema.rs # AiProvider enum, per-provider settings structs -│ -├── backend/crates/qbit-llm-providers/src/ -│ ├── lib.rs # LlmClient enum (11 variants), ProviderConfig enum -│ ├── model_capabilities.rs # ModelCapabilities, VisionCapabilities, string matching -│ └── openai_config.rs # OpenAI web search config -│ -├── backend/crates/qbit-ai/src/ -│ └── llm_client.rs # create_*_components() (10 functions), LlmClientFactory -│ -├── backend/crates/rig-openai-responses/src/ -│ └── lib.rs # is_reasoning_model(), custom OpenAI reasoning client -│ -└── backend/crates/qbit/src/ - └── ai/commands/*.rs # Tauri commands for AI initialization - -Frontend: -├── frontend/lib/ -│ ├── ai.ts # Model ID constants (8 objects), init functions -│ ├── models.ts # PROVIDER_GROUPS, PROVIDER_GROUPS_NESTED, helpers -│ └── settings.ts # TypeScript settings types (mirrors Rust schema.rs) -│ -├── frontend/components/Settings/ -│ ├── ProviderSettings.tsx # Provider config UI, PROVIDERS array (duplicated) -│ ├── ModelSelector.tsx # Model dropdown component -│ └── AiSettings.tsx # Synthesis config (hardcoded models!) -│ -└── frontend/components/UnifiedInput/ - └── InputStatusRow.tsx # Footer model selector - -User Configuration Files: -├── ~/.qbit/settings.toml # Global provider/model settings -└── /.qbit/project.toml # Per-project provider/model override -``` - ---- - -## Summary: Adding a New Provider Today - -To add a new provider (e.g., "Mistral"), you must modify: - -### Backend (5 files): -1. `qbit-settings/src/schema.rs`: - - Add `Mistral` variant to `AiProvider` enum - - Add `MistralSettings` struct - - Add `mistral` field to `AiSettings` - - Update `Default` impl - -2. `qbit-llm-providers/src/lib.rs`: - - Add `RigMistral(...)` variant to `LlmClient` enum - - Add `MistralClientConfig` struct - - Add `Mistral { ... }` variant to `ProviderConfig` enum - - Update all `match` statements - -3. `qbit-llm-providers/src/model_capabilities.rs`: - - Add `"mistral"` cases to capability detection functions - -4. `qbit-ai/src/llm_client.rs`: - - Add `create_mistral_components()` function - - Add `AiProvider::Mistral` case to `LlmClientFactory::create_client()` - -5. `Cargo.toml` (if using a new rig provider crate): - - Add dependency - -### Frontend (5 files): -1. `lib/settings.ts`: - - Add `"mistral"` to `AiProvider` type union - - Add `MistralSettings` interface - - Add `mistral` field to `AiSettings` interface - - Update `DEFAULT_SETTINGS` - -2. `lib/ai.ts`: - - Add `MISTRAL_MODELS` constant - -3. `lib/models.ts`: - - Add Mistral to `PROVIDER_GROUPS` - - Add Mistral to `PROVIDER_GROUPS_NESTED` - -4. `components/Settings/ProviderSettings.tsx`: - - Add Mistral to `PROVIDERS` array - - Add Mistral-specific form fields - -5. `components/UnifiedInput/InputStatusRow.tsx`: - - Add `mistral: settings.ai.mistral.show_in_selector` (appears twice!) - -**Total: 10 files, ~20 separate changes** diff --git a/scripts/verify-tokens.py b/scripts/verify-tokens.py deleted file mode 100755 index 314d2ce1..00000000 --- a/scripts/verify-tokens.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python3 -""" -Verify token counts from Qbit SSE log file. - -Usage: - 1. Set QBIT_SSE_LOG=/tmp/qbit-sse.jsonl before starting Qbit - 2. Have a conversation with the AI - 3. Run: python3 scripts/verify-tokens.py /tmp/qbit-sse.jsonl - -The script will parse the log and show: -- Token counts from each message_start (input_tokens) -- Token counts from each message_delta (output_tokens) -- Total accumulated tokens per conversation turn -""" - -import json -import sys -from pathlib import Path -from datetime import datetime - - -def parse_log(log_path: str): - """Parse JSONL log file and extract token information.""" - - turns = [] - current_turn = None - - with open(log_path, 'r') as f: - for line_num, line in enumerate(f, 1): - line = line.strip() - if not line: - continue - - try: - entry = json.loads(line) - except json.JSONDecodeError as e: - print(f"Warning: Line {line_num} is not valid JSON: {e}") - continue - - event_type = entry.get('event', '') - data = entry.get('data', {}) - ts = entry.get('ts', '') - - if event_type == 'message_start': - # Start a new turn - usage = data.get('message', {}).get('usage', {}) - current_turn = { - 'start_ts': ts, - 'input_tokens': usage.get('input_tokens', 0), - 'output_tokens': 0, - 'model': data.get('message', {}).get('model', 'unknown'), - } - - elif event_type == 'message_delta': - # Complete the turn - usage = data.get('usage', {}) - if current_turn: - current_turn['output_tokens'] = usage.get('output_tokens', 0) - current_turn['end_ts'] = ts - turns.append(current_turn) - current_turn = None - - return turns - - -def main(): - if len(sys.argv) < 2: - print(__doc__) - print("\nError: Please provide the log file path") - print("Example: python3 scripts/verify-tokens.py /tmp/qbit-sse.jsonl") - sys.exit(1) - - log_path = sys.argv[1] - - if not Path(log_path).exists(): - print(f"Error: Log file not found: {log_path}") - sys.exit(1) - - turns = parse_log(log_path) - - if not turns: - print("No complete turns found in log file.") - print("Make sure QBIT_SSE_LOG was set before starting the conversation.") - sys.exit(0) - - # Display results - print("=" * 70) - print("TOKEN VERIFICATION REPORT") - print("=" * 70) - print() - - total_input = 0 - total_output = 0 - - for i, turn in enumerate(turns, 1): - input_tokens = turn['input_tokens'] - output_tokens = turn['output_tokens'] - total = input_tokens + output_tokens - - total_input += input_tokens - total_output += output_tokens - - print(f"Turn {i}: {turn['model']}") - print(f" Input tokens: {input_tokens:>10,}") - print(f" Output tokens: {output_tokens:>10,}") - print(f" Turn total: {total:>10,}") - print() - - print("-" * 70) - print(f"GRAND TOTAL ({len(turns)} turns)") - print(f" Input tokens: {total_input:>10,}") - print(f" Output tokens: {total_output:>10,}") - print(f" Total: {total_input + total_output:>10,}") - print("-" * 70) - print() - print("Compare these values with what Qbit shows in the status bar.") - print("The status bar should show: ↓{input} ↑{output}") - - -if __name__ == '__main__': - main()