diff --git a/.env.example b/.env.example index bf1e6c0..563ec14 100644 --- a/.env.example +++ b/.env.example @@ -42,6 +42,17 @@ ROUTER_LLM_TIMEOUT_MS=2000 # Circuit-breaker cooldown after 5 consecutive failures (default: 30000ms) ROUTER_LLM_CIRCUIT_BREAK_MS=30000 +# --- LLM Filter Extraction (feature-flagged, default off) --- +# When enabled, the API attempts to infer structured FilterDSL conditions from +# natural-language queries when no explicit filter is provided and routing is +# ambiguous (method=default or rule_fallback). All LLM output is validated +# through translateFilter before use; invalid output falls back silently. +ROUTER_FILTER_LLM_ENABLED=false +# Model for filter extraction (default: llama3 for ollama, gpt-4o-mini for openai) +# ROUTER_FILTER_LLM_MODEL=llama3 +# Timeout in milliseconds for filter extraction LLM call (default: 1500) +ROUTER_FILTER_LLM_TIMEOUT_MS=1500 + # --- Enrichment (optional) --- ENRICHMENT_ENABLED=false diff --git a/api/src/services/query-filter-parser.test.ts b/api/src/services/query-filter-parser.test.ts new file mode 100644 index 0000000..e6caac6 --- /dev/null +++ b/api/src/services/query-filter-parser.test.ts @@ -0,0 +1,511 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { + extractStructuredFilter, + _resetFilterParserCircuitBreaker, + _filterParserCircuitBreaker, +} from "./query-filter-parser.js"; +import type { FilterParserRequest } from "./query-filter-parser.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeRequest(query: string): FilterParserRequest { + return { query, strategy: "semantic" }; +} + +beforeEach(() => { + _resetFilterParserCircuitBreaker(); + vi.unstubAllEnvs(); + vi.clearAllMocks(); +}); + +afterEach(() => { + vi.unstubAllEnvs(); +}); + +// --------------------------------------------------------------------------- +// Feature flag +// --------------------------------------------------------------------------- + +describe("extractStructuredFilter — feature flag", () => { + it("returns null when ROUTER_FILTER_LLM_ENABLED is not set (default off)", async () => { + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + const result = await extractStructuredFilter(makeRequest("all typescript files from 2023")); + expect(result).toBeNull(); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("returns null when ROUTER_FILTER_LLM_ENABLED=false", async () => { + vi.stubEnv("ROUTER_FILTER_LLM_ENABLED", "false"); + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + const result = await extractStructuredFilter(makeRequest("all typescript files")); + expect(result).toBeNull(); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("calls LLM when ROUTER_FILTER_LLM_ENABLED=true", async () => { + vi.stubEnv("ROUTER_FILTER_LLM_ENABLED", "true"); + vi.stubEnv("OLLAMA_URL", "http://localhost:11434"); + + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ response: '{"conditions":[{"field":"lang","op":"eq","value":"ts"}],"combine":"and"}' }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("all typescript files")); + expect(fetchMock).toHaveBeenCalled(); + expect(result).not.toBeNull(); + }); +}); + +// --------------------------------------------------------------------------- +// Empty query guard +// --------------------------------------------------------------------------- + +describe("extractStructuredFilter — empty query guard", () => { + it("returns null for empty query", async () => { + vi.stubEnv("ROUTER_FILTER_LLM_ENABLED", "true"); + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + const result = await extractStructuredFilter(makeRequest("")); + expect(result).toBeNull(); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("returns null for whitespace-only query", async () => { + vi.stubEnv("ROUTER_FILTER_LLM_ENABLED", "true"); + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + const result = await extractStructuredFilter(makeRequest(" ")); + expect(result).toBeNull(); + expect(fetchMock).not.toHaveBeenCalled(); + }); +}); + +// --------------------------------------------------------------------------- +// Successful filter extraction (Ollama) +// --------------------------------------------------------------------------- + +describe("extractStructuredFilter — Ollama success cases", () => { + beforeEach(() => { + vi.stubEnv("ROUTER_FILTER_LLM_ENABLED", "true"); + vi.stubEnv("OLLAMA_URL", "http://localhost:11434"); + vi.stubEnv("EMBED_PROVIDER", "ollama"); + }); + + it("returns FilterDSL for valid single-condition response", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + response: '{"conditions":[{"field":"lang","op":"eq","value":"ts"}],"combine":"and"}', + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("all typescript files")); + expect(result).not.toBeNull(); + expect(result!.conditions).toHaveLength(1); + expect(result!.conditions[0].field).toBe("lang"); + expect(result!.conditions[0].op).toBe("eq"); + expect(result!.conditions[0].value).toBe("ts"); + }); + + it("handles temporal range query — between operator", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + response: JSON.stringify({ + conditions: [ + { field: "ingestedAt", op: "between", range: { low: "2023-01-01", high: "2023-12-31" } }, + ], + combine: "and", + }), + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("all invoices from 2023")); + expect(result).not.toBeNull(); + expect(result!.conditions[0].field).toBe("ingestedAt"); + expect(result!.conditions[0].op).toBe("between"); + expect(result!.conditions[0].range).toEqual({ low: "2023-01-01", high: "2023-12-31" }); + }); + + it("handles multi-constraint query — lang + ingestedAt", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + response: JSON.stringify({ + conditions: [ + { field: "lang", op: "eq", value: "ts" }, + { field: "ingestedAt", op: "between", range: { low: "2023-01-01", high: "2024-12-31" } }, + ], + combine: "and", + }), + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter( + makeRequest("all openai invoices from 2023 and 2024"), + ); + expect(result).not.toBeNull(); + expect(result!.conditions).toHaveLength(2); + }); + + it("returns null when LLM responds with literal null", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ response: "null" }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("how does auth work")); + expect(result).toBeNull(); + }); + + it("returns null when LLM responds with empty conditions array", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ response: '{"conditions":[],"combine":"and"}' }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("general question")); + expect(result).toBeNull(); + }); + + it("sends prompt to Ollama /api/generate endpoint", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + response: '{"conditions":[{"field":"lang","op":"eq","value":"ts"}],"combine":"and"}', + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + await extractStructuredFilter(makeRequest("typescript files")); + expect(fetchMock).toHaveBeenCalledWith( + "http://localhost:11434/api/generate", + expect.objectContaining({ method: "POST" }), + ); + }); +}); + +// --------------------------------------------------------------------------- +// Successful filter extraction (OpenAI) +// --------------------------------------------------------------------------- + +describe("extractStructuredFilter — OpenAI success cases", () => { + beforeEach(() => { + vi.stubEnv("ROUTER_FILTER_LLM_ENABLED", "true"); + vi.stubEnv("EMBED_PROVIDER", "openai"); + vi.stubEnv("OPENAI_API_KEY", "test-key"); + vi.stubEnv("OPENAI_BASE_URL", "https://api.openai.com/v1"); + }); + + it("returns FilterDSL using OpenAI chat completions", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + choices: [ + { + message: { + content: '{"conditions":[{"field":"lang","op":"eq","value":"py"}],"combine":"and"}', + }, + }, + ], + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("all python files")); + expect(result).not.toBeNull(); + expect(result!.conditions[0].field).toBe("lang"); + expect(result!.conditions[0].value).toBe("py"); + }); + + it("calls OpenAI /chat/completions endpoint with json_schema response_format", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + choices: [ + { + message: { + content: '{"conditions":[{"field":"lang","op":"eq","value":"py"}],"combine":"and"}', + }, + }, + ], + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + await extractStructuredFilter(makeRequest("python files")); + expect(fetchMock).toHaveBeenCalledWith( + "https://api.openai.com/v1/chat/completions", + expect.objectContaining({ method: "POST" }), + ); + + const [, requestInit] = fetchMock.mock.calls[0] as [string, RequestInit]; + const body = JSON.parse(requestInit.body as string) as { + response_format: { type: string; json_schema: { name: string; strict: boolean } }; + messages: Array<{ role: string }>; + }; + + // Must use structured output format + expect(body.response_format.type).toBe("json_schema"); + expect(body.response_format.json_schema.name).toBe("filter_dsl"); + expect(body.response_format.json_schema.strict).toBe(true); + + // Must use system + user message pattern + expect(body.messages[0].role).toBe("system"); + expect(body.messages[1].role).toBe("user"); + }); + + it("treats empty conditions array from OpenAI as no-filter result", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + choices: [ + { + message: { + content: '{"conditions":[],"combine":"and"}', + }, + }, + ], + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("how does authentication work")); + expect(result).toBeNull(); + }); +}); + +// --------------------------------------------------------------------------- +// Invalid / malformed output +// --------------------------------------------------------------------------- + +describe("extractStructuredFilter — invalid output handling", () => { + beforeEach(() => { + vi.stubEnv("ROUTER_FILTER_LLM_ENABLED", "true"); + vi.stubEnv("OLLAMA_URL", "http://localhost:11434"); + }); + + it("returns null for unparseable JSON response", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ response: "not json at all" }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("all typescript files")); + expect(result).toBeNull(); + }); + + it("returns null when response has unknown field", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + response: '{"conditions":[{"field":"bogusField","op":"eq","value":"x"}],"combine":"and"}', + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("some query")); + expect(result).toBeNull(); + }); + + it("returns null when response has disallowed operator for field", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + // docType only supports text ops (eq/ne/in/notIn), not gte + response: '{"conditions":[{"field":"docType","op":"gte","value":"code"}],"combine":"and"}', + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("some query")); + expect(result).toBeNull(); + }); + + it("returns null when response is missing conditions array", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ response: '{"combine":"and"}' }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("some query")); + expect(result).toBeNull(); + }); + + it("returns null when response has JSON with extra LLM text before the object", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + response: + 'Here is the FilterDSL: {"conditions":[{"field":"lang","op":"eq","value":"ts"}],"combine":"and"} Done.', + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("typescript files")); + expect(result).not.toBeNull(); + expect(result!.conditions[0].field).toBe("lang"); + }); +}); + +// --------------------------------------------------------------------------- +// Timeout and network errors +// --------------------------------------------------------------------------- + +describe("extractStructuredFilter — timeout / network errors", () => { + beforeEach(() => { + vi.stubEnv("ROUTER_FILTER_LLM_ENABLED", "true"); + vi.stubEnv("OLLAMA_URL", "http://localhost:11434"); + vi.stubEnv("ROUTER_FILTER_LLM_TIMEOUT_MS", "1"); + }); + + it("returns null when LLM times out", async () => { + const fetchMock = vi.fn().mockImplementation( + () => + new Promise((_, reject) => + setTimeout(() => reject(new DOMException("Aborted", "AbortError")), 50), + ), + ); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("all typescript files")); + expect(result).toBeNull(); + }); + + it("returns null when LLM request fails with network error", async () => { + const fetchMock = vi.fn().mockRejectedValue(new Error("Network unreachable")); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("all typescript files")); + expect(result).toBeNull(); + }); + + it("returns null when LLM returns HTTP error", async () => { + vi.stubEnv("ROUTER_FILTER_LLM_TIMEOUT_MS", "5000"); + const fetchMock = vi.fn().mockResolvedValue({ + ok: false, + status: 500, + json: async () => ({}), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("all typescript files")); + expect(result).toBeNull(); + }); +}); + +// --------------------------------------------------------------------------- +// Circuit breaker +// --------------------------------------------------------------------------- + +describe("extractStructuredFilter — circuit breaker", () => { + beforeEach(() => { + vi.stubEnv("ROUTER_FILTER_LLM_ENABLED", "true"); + vi.stubEnv("OLLAMA_URL", "http://localhost:11434"); + vi.stubEnv("ROUTER_LLM_CIRCUIT_BREAK_MS", "60000"); + }); + + it("opens circuit after 5 consecutive failures and blocks further calls", async () => { + const fetchMock = vi.fn().mockRejectedValue(new Error("Connection refused")); + vi.stubGlobal("fetch", fetchMock); + + // Trigger 5 failures to open the breaker + for (let i = 0; i < 5; i++) { + await extractStructuredFilter(makeRequest("some query")); + } + + expect(_filterParserCircuitBreaker.state).toBe("open"); + const callCountBeforeOpen = fetchMock.mock.calls.length; + + // Further calls should be blocked (no fetch) + const result = await extractStructuredFilter(makeRequest("another query")); + expect(result).toBeNull(); + expect(fetchMock.mock.calls.length).toBe(callCountBeforeOpen); + }); + + it("resets circuit breaker on no_filter response (empty conditions) — prevents failure accumulation", async () => { + // Trigger 3 failures + const failingFetch = vi.fn().mockRejectedValue(new Error("fail")); + vi.stubGlobal("fetch", failingFetch); + for (let i = 0; i < 3; i++) { + await extractStructuredFilter(makeRequest("query")); + } + expect(_filterParserCircuitBreaker.failures).toBe(3); + + // A clean no-filter response should reset the failure count + const noFilterFetch = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ response: '{"conditions":[],"combine":"and"}' }), + } as unknown as Response); + vi.stubGlobal("fetch", noFilterFetch); + + await extractStructuredFilter(makeRequest("how does auth work")); + expect(_filterParserCircuitBreaker.failures).toBe(0); + expect(_filterParserCircuitBreaker.state).toBe("closed"); + }); + + it("resets circuit breaker on success", async () => { + // Open the breaker first + const failingFetch = vi.fn().mockRejectedValue(new Error("fail")); + vi.stubGlobal("fetch", failingFetch); + for (let i = 0; i < 5; i++) { + await extractStructuredFilter(makeRequest("query")); + } + expect(_filterParserCircuitBreaker.state).toBe("open"); + + // Simulate half-open by resetting the timer + _filterParserCircuitBreaker.state = "half-open"; + + // Successful response resets + const successFetch = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + response: '{"conditions":[{"field":"lang","op":"eq","value":"ts"}],"combine":"and"}', + }), + } as unknown as Response); + vi.stubGlobal("fetch", successFetch); + + await extractStructuredFilter(makeRequest("typescript files")); + expect(_filterParserCircuitBreaker.state).toBe("closed"); + expect(_filterParserCircuitBreaker.failures).toBe(0); + }); +}); + +// --------------------------------------------------------------------------- +// Integration: query pipeline uses inferred filter +// --------------------------------------------------------------------------- + +describe("extractStructuredFilter — in operator", () => { + beforeEach(() => { + vi.stubEnv("ROUTER_FILTER_LLM_ENABLED", "true"); + vi.stubEnv("OLLAMA_URL", "http://localhost:11434"); + }); + + it("handles in operator with values array", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + response: '{"conditions":[{"field":"lang","op":"in","values":["ts","js"]}],"combine":"and"}', + }), + } as unknown as Response); + vi.stubGlobal("fetch", fetchMock); + + const result = await extractStructuredFilter(makeRequest("typescript or javascript files")); + expect(result).not.toBeNull(); + expect(result!.conditions[0].op).toBe("in"); + expect(result!.conditions[0].values).toEqual(["ts", "js"]); + }); +}); diff --git a/api/src/services/query-filter-parser.ts b/api/src/services/query-filter-parser.ts new file mode 100644 index 0000000..7b098a0 --- /dev/null +++ b/api/src/services/query-filter-parser.ts @@ -0,0 +1,504 @@ +/** + * LLM-based structured filter extraction for natural language queries. + * + * Feature-flagged fallback that extracts FilterDSL conditions from free-form + * query text when deterministic routing rules are ambiguous. + * + * All LLM output is treated as untrusted input and validated through + * translateFilter before use. Never executes raw LLM-supplied SQL. + */ + +import type { FilterDSL } from "../pg-helpers.js"; +import { translateFilter, FilterValidationError } from "../pg-helpers.js"; +import type { QueryStrategy } from "./query-router.js"; + +export interface FilterParserRequest { + query: string; + strategy: QueryStrategy; + existingFilter?: FilterDSL | Record; +} + +// --------------------------------------------------------------------------- +// Configuration +// --------------------------------------------------------------------------- + +function getFilterLlmEnabled(): boolean { + return process.env.ROUTER_FILTER_LLM_ENABLED === "true"; +} + +/** Returns true when the ROUTER_FILTER_LLM_ENABLED feature flag is active. */ +export function isFilterLlmEnabled(): boolean { + return getFilterLlmEnabled(); +} + +function getFilterLlmModel(): string { + if (process.env.ROUTER_FILTER_LLM_MODEL) { + return process.env.ROUTER_FILTER_LLM_MODEL; + } + return getEmbedProvider() === "openai" ? "gpt-4o-mini" : "llama3"; +} + +function getFilterLlmTimeoutMs(): number { + const val = process.env.ROUTER_FILTER_LLM_TIMEOUT_MS; + if (val) { + const parsed = Number.parseInt(val, 10); + if (Number.isFinite(parsed) && parsed > 0) return parsed; + } + return 1500; +} + +function getFilterLlmCircuitBreakMs(): number { + const val = process.env.ROUTER_LLM_CIRCUIT_BREAK_MS; + if (val) { + const parsed = Number.parseInt(val, 10); + if (Number.isFinite(parsed) && parsed > 0) return parsed; + } + return 30_000; +} + +function getEmbedProvider(): string { + return process.env.EMBED_PROVIDER || "ollama"; +} + +function getOllamaUrl(): string { + return process.env.OLLAMA_URL || "http://localhost:11434"; +} + +function getOpenAiBaseUrl(): string { + return (process.env.OPENAI_BASE_URL || "https://api.openai.com/v1").replace( + /\/+$/, + "", + ); +} + +// --------------------------------------------------------------------------- +// Circuit breaker (dedicated for filter parser, same defaults as router) +// --------------------------------------------------------------------------- + +type CircuitState = "closed" | "open" | "half-open"; + +interface CircuitBreaker { + state: CircuitState; + failures: number; + openedAt: number | null; +} + +const FAILURE_THRESHOLD = 5; + +export const _filterParserCircuitBreaker: CircuitBreaker = { + state: "closed", + failures: 0, + openedAt: null, +}; + +export function _resetFilterParserCircuitBreaker(): void { + _filterParserCircuitBreaker.state = "closed"; + _filterParserCircuitBreaker.failures = 0; + _filterParserCircuitBreaker.openedAt = null; +} + +function isCircuitOpen(): boolean { + if (_filterParserCircuitBreaker.state === "closed") return false; + if (_filterParserCircuitBreaker.state === "open") { + const cooldown = getFilterLlmCircuitBreakMs(); + if ( + _filterParserCircuitBreaker.openedAt !== null && + Date.now() - _filterParserCircuitBreaker.openedAt >= cooldown + ) { + _filterParserCircuitBreaker.state = "half-open"; + return false; + } + return true; + } + // half-open → allow probe + return false; +} + +function recordParserFailure(): void { + if (_filterParserCircuitBreaker.state === "half-open") { + _filterParserCircuitBreaker.state = "open"; + _filterParserCircuitBreaker.openedAt = Date.now(); + _filterParserCircuitBreaker.failures = FAILURE_THRESHOLD; + return; + } + _filterParserCircuitBreaker.failures += 1; + if (_filterParserCircuitBreaker.failures >= FAILURE_THRESHOLD) { + _filterParserCircuitBreaker.state = "open"; + _filterParserCircuitBreaker.openedAt = Date.now(); + } +} + +function recordParserSuccess(): void { + _filterParserCircuitBreaker.state = "closed"; + _filterParserCircuitBreaker.failures = 0; + _filterParserCircuitBreaker.openedAt = null; +} + +// --------------------------------------------------------------------------- +// Prompt +// --------------------------------------------------------------------------- + +const FILTER_EXTRACTION_PROMPT = `Extract structured filter conditions from the following search query. +Return ONLY valid JSON matching the FilterDSL schema. If no filters can be extracted, return the literal null. + +Available fields and allowed operators: +- docType: document type string (ops: eq, ne, in, notIn) +- repoId: repository identifier string (ops: eq, ne, in, notIn) +- lang: programming language code — use short codes only: "ts" (TypeScript), "js" (JavaScript), "py" (Python), "go" (Go), "rs" (Rust), "java" (Java), "rb" (Ruby), "cpp" (C++) (ops: eq, ne, in, notIn) +- path: file path prefix string (ops: eq, ne, in, notIn) +- mimeType: MIME type string (ops: eq, ne, in, notIn) +- ingestedAt: ingestion timestamp ISO 8601 date string, e.g. "2023-01-01" (ops: eq, ne, gt, gte, lt, lte, between, notBetween, in, notIn, isNull, isNotNull) +- createdAt: creation timestamp ISO 8601 date string (ops: eq, ne, gt, gte, lt, lte, between, notBetween, in, notIn, isNull, isNotNull) +- updatedAt: last update timestamp ISO 8601 date string (ops: eq, ne, gt, gte, lt, lte, between, notBetween, in, notIn, isNull, isNotNull) + +FilterDSL schema: +{ + "conditions": [ + { "field": "", "op": "", "value": "" } + | { "field": "", "op": "in"|"notIn", "values": ["", ""] } + | { "field": "", "op": "between"|"notBetween", "range": { "low": "", "high": "" } } + ], + "combine": "and" | "or" +} + +Examples: +- "all typescript files from 2023" → {"conditions":[{"field":"lang","op":"eq","value":"ts"},{"field":"ingestedAt","op":"between","range":{"low":"2023-01-01","high":"2023-12-31"}}],"combine":"and"} +- "python or javascript code" → {"conditions":[{"field":"lang","op":"in","values":["py","js"]}],"combine":"and"} +- "openai invoices from 2023 and 2024" → {"conditions":[{"field":"ingestedAt","op":"between","range":{"low":"2023-01-01","high":"2024-12-31"}}],"combine":"and"} +- "documents ingested after 2024-06-01" → {"conditions":[{"field":"ingestedAt","op":"gte","value":"2024-06-01"}],"combine":"and"} +- "how does authentication work" → null + +Respond ONLY with valid JSON (the FilterDSL object) or the literal null. +Query: "`; + +/** + * OpenAI-specific system prompt. When using structured output (json_schema + * response_format), the model cannot return the literal `null` token — instead + * it must return a valid object. An empty `conditions` array signals "no + * applicable filters" and is handled identically to a null response. + */ +const OPENAI_SYSTEM_PROMPT = `You are a structured filter extractor. Given a natural-language search query, output a FilterDSL JSON object that captures any explicit attribute or temporal constraints. + +Available fields and allowed operators: +- docType: document type string (ops: eq, ne, in, notIn) +- repoId: repository identifier string (ops: eq, ne, in, notIn) +- lang: programming language code — use short codes only: "ts" (TypeScript), "js" (JavaScript), "py" (Python), "go" (Go), "rs" (Rust), "java" (Java), "rb" (Ruby), "cpp" (C++) (ops: eq, ne, in, notIn) +- path: file path prefix string (ops: eq, ne, in, notIn) +- mimeType: MIME type string (ops: eq, ne, in, notIn) +- ingestedAt: ingestion timestamp ISO 8601 date string, e.g. "2023-01-01" (ops: eq, ne, gt, gte, lt, lte, between, notBetween, in, notIn, isNull, isNotNull) +- createdAt: creation timestamp ISO 8601 date string (ops: eq, ne, gt, gte, lt, lte, between, notBetween, in, notIn, isNull, isNotNull) +- updatedAt: last update timestamp ISO 8601 date string (ops: eq, ne, gt, gte, lt, lte, between, notBetween, in, notIn, isNull, isNotNull) + +Rules: +- Use an empty conditions array when no filter constraints are present in the query. +- Always output "combine": "and" or "combine": "or" (use "and" when unsure). +- Use scalar conditions for single-value comparisons, range conditions for temporal spans, and list conditions for sets. + +Examples: +- "all typescript files from 2023" → {"conditions":[{"field":"lang","op":"eq","value":"ts"},{"field":"ingestedAt","op":"between","range":{"low":"2023-01-01","high":"2023-12-31"}}],"combine":"and"} +- "python or javascript code" → {"conditions":[{"field":"lang","op":"in","values":["py","js"]}],"combine":"and"} +- "openai invoices from 2023 and 2024" → {"conditions":[{"field":"ingestedAt","op":"between","range":{"low":"2023-01-01","high":"2024-12-31"}}],"combine":"and"} +- "how does authentication work" → {"conditions":[],"combine":"and"}`; + +const ALLOWED_FIELDS = [ + "docType", + "repoId", + "lang", + "path", + "mimeType", + "ingestedAt", + "createdAt", + "updatedAt", +] as const; + +/** + * JSON Schema for FilterDSL used with OpenAI structured output. + * Conditions are a discriminated union keyed on `op`. + */ +const FILTER_DSL_JSON_SCHEMA = { + type: "object", + required: ["conditions", "combine"], + additionalProperties: false, + properties: { + conditions: { + type: "array", + items: { + anyOf: [ + { + type: "object", + required: ["field", "op", "value"], + additionalProperties: false, + properties: { + field: { type: "string", enum: ALLOWED_FIELDS }, + op: { + type: "string", + enum: ["eq", "ne", "gt", "gte", "lt", "lte", "isNull", "isNotNull"], + }, + value: { type: "string" }, + }, + }, + { + type: "object", + required: ["field", "op", "values"], + additionalProperties: false, + properties: { + field: { type: "string", enum: ALLOWED_FIELDS }, + op: { type: "string", enum: ["in", "notIn"] }, + values: { type: "array", items: { type: "string" } }, + }, + }, + { + type: "object", + required: ["field", "op", "range"], + additionalProperties: false, + properties: { + field: { type: "string", enum: ALLOWED_FIELDS }, + op: { type: "string", enum: ["between", "notBetween"] }, + range: { + type: "object", + required: ["low", "high"], + additionalProperties: false, + properties: { + low: { type: "string" }, + high: { type: "string" }, + }, + }, + }, + }, + ], + }, + }, + combine: { type: "string", enum: ["and", "or"] }, + }, +} as const; + +// --------------------------------------------------------------------------- +// LLM call and response parsing +// --------------------------------------------------------------------------- + +/** + * Parses and validates the raw LLM text response. + * Returns FilterDSL on success, null if no filters apply, throws on invalid output. + */ +function parseAndValidateFilterResponse(text: string): FilterDSL | null { + const trimmed = text.trim(); + if (trimmed === "null" || trimmed === "") return null; + + const jsonText = extractFirstJsonObject(trimmed); + if (!jsonText) { + throw new Error("Filter parser returned non-JSON response"); + } + + const parsed = JSON.parse(jsonText) as unknown; + if (typeof parsed !== "object" || parsed === null) { + throw new Error("Filter parser returned non-object JSON"); + } + + const obj = parsed as Record; + if (!Array.isArray(obj["conditions"])) { + throw new Error("Filter parser output missing conditions array"); + } + + const candidate = obj as unknown as FilterDSL; + + // Validate through translateFilter — throws FilterValidationError on unknown field/operator + try { + translateFilter(candidate, 0); + } catch (err) { + if (err instanceof FilterValidationError) { + throw new Error(`Filter parser output failed validation: ${err.message}`); + } + throw err; + } + + return candidate; +} + +function extractFirstJsonObject(text: string): string | null { + const start = text.indexOf("{"); + if (start === -1) return null; + + let depth = 0; + let inString = false; + let escapeNext = false; + + for (let index = start; index < text.length; index++) { + const char = text[index]; + + if (escapeNext) { + escapeNext = false; + continue; + } + + if (char === "\\") { + escapeNext = true; + continue; + } + + if (char === '"') { + inString = !inString; + continue; + } + + if (inString) continue; + + if (char === "{") { + depth += 1; + } else if (char === "}") { + depth -= 1; + if (depth === 0) { + return text.slice(start, index + 1); + } + } + } + + return null; +} + +/** + * Calls the LLM to extract filter conditions. + * Returns FilterDSL on success, null if LLM indicates no applicable filters. + * Throws on network/HTTP/parse errors. + */ +async function callLlm(query: string): Promise { + const timeoutMs = getFilterLlmTimeoutMs(); + const controller = new AbortController(); + const timerId = setTimeout(() => controller.abort(), timeoutMs); + + // JSON.stringify produces a properly escaped JSON string including the quotes; + // strip the outer quotes since the prompt template already provides them. + const escapedQuery = JSON.stringify(query).slice(1, -1); + const prompt = `${FILTER_EXTRACTION_PROMPT}${escapedQuery}"`; + + try { + const provider = getEmbedProvider(); + + if (provider === "openai") { + const apiKey = process.env.OPENAI_API_KEY; + if (!apiKey) throw new Error("OPENAI_API_KEY required for LLM filter extraction"); + const model = getFilterLlmModel(); + const response = await fetch(`${getOpenAiBaseUrl()}/chat/completions`, { + method: "POST", + headers: { + "content-type": "application/json", + authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model, + messages: [ + { role: "system", content: OPENAI_SYSTEM_PROMPT }, + { role: "user", content: `Query: "${escapedQuery}"` }, + ], + temperature: 0, + response_format: { + type: "json_schema", + json_schema: { + name: "filter_dsl", + strict: true, + schema: FILTER_DSL_JSON_SCHEMA, + }, + }, + }), + signal: controller.signal, + }); + clearTimeout(timerId); + if (!response.ok) { + throw new Error(`OpenAI chat completions failed: ${response.status}`); + } + const json = (await response.json()) as { + choices?: Array<{ message?: { content?: string } }>; + }; + const content = json.choices?.[0]?.message?.content ?? ""; + return parseAndValidateFilterResponse(content); + } else { + // Default: Ollama /api/generate + const model = getFilterLlmModel(); + const response = await fetch(`${getOllamaUrl()}/api/generate`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ model, prompt, stream: false }), + signal: controller.signal, + }); + clearTimeout(timerId); + if (!response.ok) { + throw new Error(`Ollama generate failed: ${response.status}`); + } + const json = (await response.json()) as { response?: string }; + return parseAndValidateFilterResponse(json.response ?? ""); + } + } catch (err) { + clearTimeout(timerId); + throw err; + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Attempts to extract a structured FilterDSL from a natural language query + * using an LLM. Returns null if: + * - Feature flag `ROUTER_FILTER_LLM_ENABLED` is not `"true"` + * - Circuit breaker is open (too many recent failures) + * - Query is empty + * - LLM indicates no applicable filters + * - LLM output fails schema or field/operator validation + * + * Never throws — all errors produce a null return and are logged. + * All LLM output is validated through translateFilter before returning. + */ +export async function extractStructuredFilter( + request: FilterParserRequest, +): Promise { + if (!getFilterLlmEnabled()) return null; + if (isCircuitOpen()) return null; + if (!request.query || request.query.trim().length === 0) return null; + + const parseStart = Date.now(); + + try { + const result = await callLlm(request.query); + const latencyMs = Date.now() - parseStart; + + if (result === null || result.conditions.length === 0) { + // LLM responded cleanly but found no filters — still a successful call; + // reset breaker so prior failures don't accumulate across clean responses. + recordParserSuccess(); + console.log( + JSON.stringify({ + event: "filter_parser", + status: "no_filter", + strategy: request.strategy, + latencyMs, + }), + ); + return null; + } + + recordParserSuccess(); + console.log( + JSON.stringify({ + event: "filter_parser", + status: "success", + strategy: request.strategy, + conditionCount: result.conditions.length, + latencyMs, + }), + ); + return result; + } catch (err) { + const latencyMs = Date.now() - parseStart; + recordParserFailure(); + console.log( + JSON.stringify({ + event: "filter_parser", + status: "error", + strategy: request.strategy, + errorType: err instanceof Error ? err.name : "unknown", + latencyMs, + }), + ); + return null; + } +} diff --git a/api/src/services/query-router.ts b/api/src/services/query-router.ts index 9f54412..cb2b771 100644 --- a/api/src/services/query-router.ts +++ b/api/src/services/query-router.ts @@ -22,6 +22,8 @@ export interface RoutingResult { confidence: number; rule?: string; durationMs: number; + /** Set to true when LLM filter extraction inferred and applied a FilterDSL. */ + inferredFilter?: boolean; } export interface RouterRequest { diff --git a/api/src/services/query.test.ts b/api/src/services/query.test.ts index 77baca8..3575cf1 100644 --- a/api/src/services/query.test.ts +++ b/api/src/services/query.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect, vi, beforeEach } from "vitest"; +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import { query, countQueryTerms, getAutoMinScore } from "./query.js"; import type { QueryRequest } from "./query.js"; @@ -53,6 +53,14 @@ vi.mock("./query-router.js", () => ({ })), })); +// Mock query-filter-parser so existing tests are not affected by filter +// extraction. Integration tests set ROUTER_FILTER_LLM_ENABLED=true in beforeEach +// to enable extractor invocation. +vi.mock("./query-filter-parser.js", () => ({ + extractStructuredFilter: vi.fn(async () => null), + isFilterLlmEnabled: vi.fn(() => process.env.ROUTER_FILTER_LLM_ENABLED === "true"), +})); + describe("countQueryTerms", () => { it("counts single term", () => { expect(countQueryTerms("hello")).toBe(1); @@ -378,6 +386,141 @@ describe("query service", () => { }); }); +// --------------------------------------------------------------------------- +// LLM filter extraction integration +// --------------------------------------------------------------------------- + +describe("query service — LLM filter extraction integration", () => { + beforeEach(() => { + vi.clearAllMocks(); + process.env.ROUTER_FILTER_LLM_ENABLED = "true"; + }); + + afterEach(() => { + delete process.env.ROUTER_FILTER_LLM_ENABLED; + }); + + it("does NOT call filter extractor when explicit filter is provided", async () => { + const { extractStructuredFilter } = await import("./query-filter-parser.js"); + + const result = await query({ + query: "typescript files", + filter: { docType: "code" }, + }); + + expect(extractStructuredFilter).not.toHaveBeenCalled(); + expect(result.routing.inferredFilter).toBeUndefined(); + }); + + it("calls filter extractor when no filter and routing is ambiguous (default)", async () => { + const { extractStructuredFilter } = await import("./query-filter-parser.js"); + (extractStructuredFilter as ReturnType).mockResolvedValueOnce(null); + + await query({ query: "all invoices from 2023" }); + + expect(extractStructuredFilter).toHaveBeenCalledWith( + expect.objectContaining({ query: "all invoices from 2023" }), + ); + }); + + it("sets routing.inferredFilter=true and applies inferred filter to SQL query", async () => { + const { extractStructuredFilter } = await import("./query-filter-parser.js"); + (extractStructuredFilter as ReturnType).mockResolvedValueOnce({ + conditions: [{ field: "lang", op: "eq", value: "ts" }], + combine: "and", + }); + + const queryMock = vi.fn(async () => ({ rows: [] })); + const { getPool } = await import("../db.js"); + (getPool as any).mockReturnValueOnce({ query: queryMock }); + + const result = await query({ query: "all typescript files from 2023" }); + + expect(result.routing.inferredFilter).toBe(true); + const firstCall = queryMock.mock.calls[0] as unknown as unknown[]; + const sql = String(firstCall[0] ?? ""); + // inferred lang filter should appear in the SQL + expect(sql).toContain("c.lang = $5"); + }); + + it("does NOT set inferredFilter when extractor returns null", async () => { + const { extractStructuredFilter } = await import("./query-filter-parser.js"); + (extractStructuredFilter as ReturnType).mockResolvedValueOnce(null); + + const result = await query({ query: "how does authentication work" }); + + expect(result.routing.inferredFilter).toBeUndefined(); + }); + + it("preserves existing behavior when extractor returns null (no filter applied)", async () => { + const { extractStructuredFilter } = await import("./query-filter-parser.js"); + (extractStructuredFilter as ReturnType).mockResolvedValueOnce(null); + + const queryMock = vi.fn(async () => ({ rows: [] })); + const { getPool } = await import("../db.js"); + (getPool as any).mockReturnValueOnce({ query: queryMock }); + + const result = await query({ query: "general semantic search" }); + + expect(result.ok).toBe(true); + expect(result.routing.inferredFilter).toBeUndefined(); + const firstCall = queryMock.mock.calls[0] as unknown as unknown[]; + const sql = String(firstCall[0] ?? ""); + // No extra filter conditions in the WHERE clause + expect(sql).not.toContain("$5"); + }); + + // Explicit coverage for issue #130 required example queries + it("applies inferred temporal range filter for 'all openai invoices from 2023 and 2024'", async () => { + const { extractStructuredFilter } = await import("./query-filter-parser.js"); + (extractStructuredFilter as ReturnType).mockResolvedValueOnce({ + conditions: [ + { + field: "ingestedAt", + op: "between", + range: { low: "2023-01-01", high: "2024-12-31" }, + }, + ], + combine: "and", + }); + + const queryMock = vi.fn(async () => ({ rows: [] })); + const { getPool } = await import("../db.js"); + (getPool as any).mockReturnValueOnce({ query: queryMock }); + + const result = await query({ query: "all openai invoices from 2023 and 2024" }); + + expect(result.routing.inferredFilter).toBe(true); + const firstCall = queryMock.mock.calls[0] as unknown as unknown[]; + const sql = String(firstCall[0] ?? ""); + // between on ingestedAt expands to >= low AND <= high + expect(sql).toContain("d.ingested_at >= $5 AND d.ingested_at <= $6"); + }); + + it("applies multi-constraint inferred filter for 'python files ingested in 2024'", async () => { + const { extractStructuredFilter } = await import("./query-filter-parser.js"); + (extractStructuredFilter as ReturnType).mockResolvedValueOnce({ + conditions: [ + { field: "lang", op: "eq", value: "py" }, + { field: "ingestedAt", op: "gte", value: "2024-01-01" }, + ], + combine: "and", + }); + + const queryMock = vi.fn(async () => ({ rows: [] })); + const { getPool } = await import("../db.js"); + (getPool as any).mockReturnValueOnce({ query: queryMock }); + + const result = await query({ query: "python files ingested in 2024" }); + + expect(result.routing.inferredFilter).toBe(true); + const firstCall = queryMock.mock.calls[0] as unknown as unknown[]; + const sql = String(firstCall[0] ?? ""); + expect(sql).toContain("c.lang = $5"); + expect(sql).toContain("d.ingested_at >= $6"); + }); +}); + describe("query service — hybrid dispatch", () => { beforeEach(() => { vi.clearAllMocks(); diff --git a/api/src/services/query.ts b/api/src/services/query.ts index 545175d..79511d2 100644 --- a/api/src/services/query.ts +++ b/api/src/services/query.ts @@ -9,6 +9,7 @@ import { classifyQuery } from "./query-router.js"; import type { RoutingResult, QueryStrategy } from "./query-router.js"; import { queryMetadata } from "./query-metadata.js"; import { hybridMetadataFlow, hybridGraphFlow } from "./hybrid-strategy.js"; +import { extractStructuredFilter, isFilterLlmEnabled } from "./query-filter-parser.js"; export type { GraphParams, RoutingResult, QueryStrategy }; @@ -76,10 +77,33 @@ export async function query( strategy: request.strategy, }); + // LLM filter extraction: attempt to infer structured FilterDSL from natural + // language when no explicit filter was provided and routing is ambiguous. + // Only runs when ROUTER_FILTER_LLM_ENABLED=true (default: false). + const hasExplicitFilter = + request.filter !== undefined && request.filter !== null; + const isAmbiguousRouting = + routing.method === "default" || routing.method === "rule_fallback"; + const hasQuery = (request.query?.trim().length ?? 0) > 0; + + let effectiveFilter: FilterDSL | Record | undefined = + request.filter; + + if (!hasExplicitFilter && isAmbiguousRouting && hasQuery && isFilterLlmEnabled()) { + const inferredFilter = await extractStructuredFilter({ + query: request.query as string, + strategy: routing.strategy, + }); + if (inferredFilter !== null) { + effectiveFilter = inferredFilter; + routing.inferredFilter = true; + } + } + // Metadata-only path (no embedding) if (routing.strategy === "metadata") { const metaResult = await queryMetadata( - { collection: col, topK: request.topK, filter: request.filter }, + { collection: col, topK: request.topK, filter: effectiveFilter }, col, ); return { ...metaResult, routing }; @@ -99,7 +123,7 @@ export async function query( if (routing.strategy === "hybrid") { const topK = request.topK ?? 8; const minScore = request.minScore ?? getAutoMinScore(queryText); - const hasFilter = request.filter !== undefined && request.filter !== null; + const hasFilter = effectiveFilter !== undefined && effectiveFilter !== null; const hasGraphExpand = (request.graphExpand === true) || (request.graph !== undefined); if (hasGraphExpand || !hasFilter) { @@ -117,7 +141,7 @@ export async function query( query: queryText, topK, minScore, - filter: request.filter, + filter: effectiveFilter, }); return { ...hybridResult, routing }; } @@ -133,7 +157,7 @@ export async function query( const maxDistance = 1 - minScore; // Translate filter to Postgres WHERE clause (offset by 4 for base params: $1=collection, $2=vector, $3=topK, $4=maxDistance) - const { sql: filterSql, params: filterParams } = translateFilter(request.filter, 4); + const { sql: filterSql, params: filterParams } = translateFilter(effectiveFilter, 4); // Build query with pgvector cosine distance const pool = getPool(); diff --git a/docs/09-api-reference.md b/docs/09-api-reference.md index 9c086f0..46ba1c5 100644 --- a/docs/09-api-reference.md +++ b/docs/09-api-reference.md @@ -197,6 +197,7 @@ Semantic search over chunks with multi-strategy routing. | `confidence` | 0–1 | Classification confidence | | `rule` | string? | Matched rule name | | `durationMs` | number | Router wall-clock time (ms) | +| `inferredFilter` | boolean? | `true` when LLM filter extraction inferred and applied a FilterDSL from the query text | **`graph.meta.` fields:** @@ -386,3 +387,31 @@ Used by the enrichment worker: These endpoints are authenticated like all non-`/healthz` routes. +--- + +## Environment variable reference + +Key env vars that affect API behavior (see `.env.example` for full list): + +| Variable | Default | Description | +|----------|---------|-------------| +| `ROUTER_LLM_ENABLED` | `true` | Enable/disable LLM strategy classification fallback | +| `ROUTER_LLM_MODEL` | `llama3` | Generative model for strategy classification | +| `ROUTER_LLM_TIMEOUT_MS` | `2000` | Timeout for strategy classifier LLM call | +| `ROUTER_LLM_CIRCUIT_BREAK_MS` | `30000` | Circuit-breaker cooldown (ms) after 5 failures | +| `ROUTER_FILTER_LLM_ENABLED` | `false` | Enable LLM filter extraction from natural language queries. Only activates when no explicit `filter` is provided, routing is ambiguous (`method: default` or `rule_fallback`), and the query is non-empty. | +| `ROUTER_FILTER_LLM_MODEL` | _(provider default)_ | Model for filter extraction (`llama3` for ollama, `gpt-4o-mini` for openai) | +| `ROUTER_FILTER_LLM_TIMEOUT_MS` | `1500` | Timeout for filter extraction LLM call | + +### LLM filter extraction + +When `ROUTER_FILTER_LLM_ENABLED=true`, the API will attempt to extract structured `FilterDSL` conditions from natural language queries when: + +1. No explicit `filter` is provided in the request +2. Routing is ambiguous (`method: default` or `method: rule_fallback`) +3. The query is non-empty + +If extraction succeeds, the inferred filter is applied to the query and `routing.inferredFilter: true` is set in the response. If extraction fails, times out, or produces invalid output, the existing behavior is unchanged — no error is returned to the caller. + +**Security:** All LLM output is validated through `translateFilter` before use. Only known fields and operators are accepted; unknown output is discarded. +