From 6fdb402344287c457edb50af879edd93ee6a077a Mon Sep 17 00:00:00 2001 From: Brendan Kellam Date: Sun, 22 Mar 2026 10:54:27 -0700 Subject: [PATCH 1/3] feat(web): replace language param with path in symbol tools, make repo mandatory - find_symbol_definitions and find_symbol_references now accept `path` (the file where the symbol was encountered) instead of `language` - Language is derived internally via detectLanguageFromFilename - `repo` is now required in both tool schemas - Also fixes ambiguous extension overrides in languageDetection.ts Co-Authored-By: Claude Sonnet 4.6 --- .../features/tools/findSymbolDefinitions.ts | 65 +++++++++++++++++ .../features/tools/findSymbolReferences.ts | 72 +++++++++++++++++++ packages/web/src/lib/languageDetection.ts | 23 ++++++ 3 files changed, 160 insertions(+) create mode 100644 packages/web/src/features/tools/findSymbolDefinitions.ts create mode 100644 packages/web/src/features/tools/findSymbolReferences.ts diff --git a/packages/web/src/features/tools/findSymbolDefinitions.ts b/packages/web/src/features/tools/findSymbolDefinitions.ts new file mode 100644 index 000000000..2eb5e8993 --- /dev/null +++ b/packages/web/src/features/tools/findSymbolDefinitions.ts @@ -0,0 +1,65 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { findSearchBasedSymbolDefinitions } from "@/features/codeNav/api"; +import { addLineNumbers } from "@/features/chat/utils"; +import { ToolDefinition } from "./types"; +import { FindSymbolFile } from "./findSymbolReferences"; +import { logger } from "./logger"; +import description from "./findSymbolDefinitions.txt"; +import { detectLanguageFromFilename } from "@/lib/languageDetection"; + +const findSymbolDefinitionsShape = { + symbol: z.string().describe("The symbol to find definitions of"), + path: z.string().describe("The file path where the symbol was encountered."), + repo: z.string().describe("The repository to scope the search to"), +}; + +export type FindSymbolDefinitionsMetadata = { + files: FindSymbolFile[]; +}; + +export const findSymbolDefinitionsDefinition: ToolDefinition< + 'find_symbol_definitions', + typeof findSymbolDefinitionsShape, + FindSymbolDefinitionsMetadata +> = { + name: 'find_symbol_definitions', + title: 'Find symbol definitions', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(findSymbolDefinitionsShape), + execute: async ({ symbol, path, repo }, _context) => { + logger.debug('find_symbol_definitions', { symbol, path, repo }); + const revision = "HEAD"; + const language = detectLanguageFromFilename(path); + + const response = await findSearchBasedSymbolDefinitions({ + symbolName: symbol, + language, + revisionName: revision, + repoName: repo, + }); + + if (isServiceError(response)) { + throw new Error(response.message); + } + + const metadata: FindSymbolDefinitionsMetadata = { + files: response.files.map((file) => ({ + fileName: file.fileName, + repo: file.repository, + language: file.language, + matches: file.matches.map(({ lineContent, range }) => { + return addLineNumbers(lineContent, range.start.lineNumber); + }), + revision, + })), + }; + + return { + output: JSON.stringify(metadata), + metadata, + }; + }, +}; diff --git a/packages/web/src/features/tools/findSymbolReferences.ts b/packages/web/src/features/tools/findSymbolReferences.ts new file mode 100644 index 000000000..1678613b2 --- /dev/null +++ b/packages/web/src/features/tools/findSymbolReferences.ts @@ -0,0 +1,72 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { findSearchBasedSymbolReferences } from "@/features/codeNav/api"; +import { addLineNumbers } from "@/features/chat/utils"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./findSymbolReferences.txt"; +import { detectLanguageFromFilename } from "@/lib/languageDetection"; + +const findSymbolReferencesShape = { + symbol: z.string().describe("The symbol to find references to"), + path: z.string().describe("The file path where the symbol was encountered."), + repo: z.string().describe("The repository to scope the search to"), +}; + +export type FindSymbolFile = { + fileName: string; + repo: string; + language: string; + matches: string[]; + revision: string; +}; + +export type FindSymbolReferencesMetadata = { + files: FindSymbolFile[]; +}; + +export const findSymbolReferencesDefinition: ToolDefinition< + 'find_symbol_references', + typeof findSymbolReferencesShape, + FindSymbolReferencesMetadata +> = { + name: 'find_symbol_references', + title: 'Find symbol references', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(findSymbolReferencesShape), + execute: async ({ symbol, path, repo }, _context) => { + logger.debug('find_symbol_references', { symbol, path, repo }); + const revision = "HEAD"; + const language = detectLanguageFromFilename(path); + + const response = await findSearchBasedSymbolReferences({ + symbolName: symbol, + language, + revisionName: revision, + repoName: repo, + }); + + if (isServiceError(response)) { + throw new Error(response.message); + } + + const metadata: FindSymbolReferencesMetadata = { + files: response.files.map((file) => ({ + fileName: file.fileName, + repo: file.repository, + language: file.language, + matches: file.matches.map(({ lineContent, range }) => { + return addLineNumbers(lineContent, range.start.lineNumber); + }), + revision, + })), + }; + + return { + output: JSON.stringify(metadata), + metadata, + }; + }, +}; diff --git a/packages/web/src/lib/languageDetection.ts b/packages/web/src/lib/languageDetection.ts index e39f38de7..3a7ce2f9c 100644 --- a/packages/web/src/lib/languageDetection.ts +++ b/packages/web/src/lib/languageDetection.ts @@ -1,6 +1,23 @@ import * as linguistLanguages from 'linguist-languages'; import path from 'path'; +// Override map for extensions that are ambiguous in linguist-languages. +// These are extensions where linguist maps to multiple languages, but one +// is overwhelmingly more common in practice. +const ambiguousExtensionOverrides: Record = { + '.cs': 'C#', // Not Smalltalk + '.fs': 'F#', // Not Forth, GLSL, or Filterscript + '.html': 'HTML', // Not Ecmarkup + '.json': 'JSON', // Not OASv2-json, OASv3-json + '.md': 'Markdown', // Not GCC Machine Description + '.rs': 'Rust', // Not RenderScript (deprecated) + '.tsx': 'TSX', // Not XML + '.ts': 'TypeScript', // Not XML + '.txt': 'Text', // Not Adblock Filter List, Vim Help File + '.yaml': 'YAML', // Not MiniYAML, OASv2-yaml, OASv3-yaml + '.yml': 'YAML', +}; + const extensionToLanguage = new Map(); for (const [languageName, languageData] of Object.entries(linguistLanguages)) { @@ -31,6 +48,12 @@ export const detectLanguageFromFilename = (filename: string): string => { // Check for extension match const ext = path.extname(filename).toLowerCase(); + + // Check override map first for ambiguous extensions + if (ext && ext in ambiguousExtensionOverrides) { + return ambiguousExtensionOverrides[ext]; + } + if (ext && extensionToLanguage.has(ext)) { return extensionToLanguage.get(ext)!; } From 31f1093e7f710bec1eb19584f5176c48e87348d6 Mon Sep 17 00:00:00 2001 From: Brendan Kellam Date: Sun, 22 Mar 2026 10:55:34 -0700 Subject: [PATCH 2/3] delete --- .../features/tools/findSymbolDefinitions.ts | 65 ----------------- .../features/tools/findSymbolReferences.ts | 72 ------------------- 2 files changed, 137 deletions(-) delete mode 100644 packages/web/src/features/tools/findSymbolDefinitions.ts delete mode 100644 packages/web/src/features/tools/findSymbolReferences.ts diff --git a/packages/web/src/features/tools/findSymbolDefinitions.ts b/packages/web/src/features/tools/findSymbolDefinitions.ts deleted file mode 100644 index 2eb5e8993..000000000 --- a/packages/web/src/features/tools/findSymbolDefinitions.ts +++ /dev/null @@ -1,65 +0,0 @@ -import { z } from "zod"; -import { isServiceError } from "@/lib/utils"; -import { findSearchBasedSymbolDefinitions } from "@/features/codeNav/api"; -import { addLineNumbers } from "@/features/chat/utils"; -import { ToolDefinition } from "./types"; -import { FindSymbolFile } from "./findSymbolReferences"; -import { logger } from "./logger"; -import description from "./findSymbolDefinitions.txt"; -import { detectLanguageFromFilename } from "@/lib/languageDetection"; - -const findSymbolDefinitionsShape = { - symbol: z.string().describe("The symbol to find definitions of"), - path: z.string().describe("The file path where the symbol was encountered."), - repo: z.string().describe("The repository to scope the search to"), -}; - -export type FindSymbolDefinitionsMetadata = { - files: FindSymbolFile[]; -}; - -export const findSymbolDefinitionsDefinition: ToolDefinition< - 'find_symbol_definitions', - typeof findSymbolDefinitionsShape, - FindSymbolDefinitionsMetadata -> = { - name: 'find_symbol_definitions', - title: 'Find symbol definitions', - isReadOnly: true, - isIdempotent: true, - description, - inputSchema: z.object(findSymbolDefinitionsShape), - execute: async ({ symbol, path, repo }, _context) => { - logger.debug('find_symbol_definitions', { symbol, path, repo }); - const revision = "HEAD"; - const language = detectLanguageFromFilename(path); - - const response = await findSearchBasedSymbolDefinitions({ - symbolName: symbol, - language, - revisionName: revision, - repoName: repo, - }); - - if (isServiceError(response)) { - throw new Error(response.message); - } - - const metadata: FindSymbolDefinitionsMetadata = { - files: response.files.map((file) => ({ - fileName: file.fileName, - repo: file.repository, - language: file.language, - matches: file.matches.map(({ lineContent, range }) => { - return addLineNumbers(lineContent, range.start.lineNumber); - }), - revision, - })), - }; - - return { - output: JSON.stringify(metadata), - metadata, - }; - }, -}; diff --git a/packages/web/src/features/tools/findSymbolReferences.ts b/packages/web/src/features/tools/findSymbolReferences.ts deleted file mode 100644 index 1678613b2..000000000 --- a/packages/web/src/features/tools/findSymbolReferences.ts +++ /dev/null @@ -1,72 +0,0 @@ -import { z } from "zod"; -import { isServiceError } from "@/lib/utils"; -import { findSearchBasedSymbolReferences } from "@/features/codeNav/api"; -import { addLineNumbers } from "@/features/chat/utils"; -import { ToolDefinition } from "./types"; -import { logger } from "./logger"; -import description from "./findSymbolReferences.txt"; -import { detectLanguageFromFilename } from "@/lib/languageDetection"; - -const findSymbolReferencesShape = { - symbol: z.string().describe("The symbol to find references to"), - path: z.string().describe("The file path where the symbol was encountered."), - repo: z.string().describe("The repository to scope the search to"), -}; - -export type FindSymbolFile = { - fileName: string; - repo: string; - language: string; - matches: string[]; - revision: string; -}; - -export type FindSymbolReferencesMetadata = { - files: FindSymbolFile[]; -}; - -export const findSymbolReferencesDefinition: ToolDefinition< - 'find_symbol_references', - typeof findSymbolReferencesShape, - FindSymbolReferencesMetadata -> = { - name: 'find_symbol_references', - title: 'Find symbol references', - isReadOnly: true, - isIdempotent: true, - description, - inputSchema: z.object(findSymbolReferencesShape), - execute: async ({ symbol, path, repo }, _context) => { - logger.debug('find_symbol_references', { symbol, path, repo }); - const revision = "HEAD"; - const language = detectLanguageFromFilename(path); - - const response = await findSearchBasedSymbolReferences({ - symbolName: symbol, - language, - revisionName: revision, - repoName: repo, - }); - - if (isServiceError(response)) { - throw new Error(response.message); - } - - const metadata: FindSymbolReferencesMetadata = { - files: response.files.map((file) => ({ - fileName: file.fileName, - repo: file.repository, - language: file.language, - matches: file.matches.map(({ lineContent, range }) => { - return addLineNumbers(lineContent, range.start.lineNumber); - }), - revision, - })), - }; - - return { - output: JSON.stringify(metadata), - metadata, - }; - }, -}; From 0a607847762cb54d7ef5a3aed4fc656a3aa9a18b Mon Sep 17 00:00:00 2001 From: Brendan Kellam Date: Sun, 22 Mar 2026 11:00:21 -0700 Subject: [PATCH 3/3] changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce5fd59ca..8c879790b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed +- Changed language detection to resolve file extensions with multiple language resolutions (e.g., .md) to the most common resolution. [#1026](https://github.com/sourcebot-dev/sourcebot/pull/1026) + ## [4.15.11] - 2026-03-20 ### Changed