From f5c919bad3a139613fc1ec07033d67aafa7ad572 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 03:46:10 +0000 Subject: [PATCH 1/5] Initial plan From fa3de1cc553ea94d1a1ab736330f52c3505acf10 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 03:53:57 +0000 Subject: [PATCH 2/5] Add .git-ai/include.txt configuration to override .gitignore Co-authored-by: mars167 <29228178+mars167@users.noreply.github.com> --- package-lock.json | 6 ++ src/core/indexer.ts | 92 ++++++++++++++++++---- src/core/indexerIncremental.ts | 74 ++++++++++++++++++ test/includeConfig.test.ts | 137 +++++++++++++++++++++++++++++++++ 4 files changed, 293 insertions(+), 16 deletions(-) create mode 100644 test/includeConfig.test.ts diff --git a/package-lock.json b/package-lock.json index 6cf65d1..069bcc9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -451,6 +451,7 @@ "node_modules/@types/node": { "version": "25.0.9", "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -552,6 +553,7 @@ "node_modules/apache-arrow": { "version": "18.1.0", "license": "Apache-2.0", + "peer": true, "dependencies": { "@swc/helpers": "^0.5.11", "@types/command-line-args": "^5.2.3", @@ -969,6 +971,7 @@ "node_modules/express": { "version": "5.2.1", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -2043,6 +2046,7 @@ "version": "0.21.1", "hasInstallScript": true, "license": "MIT", + "peer": true, "dependencies": { "node-addon-api": "^8.0.0", "node-gyp-build": "^4.8.0" @@ -2218,6 +2222,7 @@ "node_modules/typescript": { "version": "5.9.3", "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -2326,6 +2331,7 @@ "node_modules/zod": { "version": "4.3.5", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/src/core/indexer.ts b/src/core/indexer.ts index c499efc..626e67f 100644 --- a/src/core/indexer.ts +++ b/src/core/indexer.ts @@ -36,6 +36,23 @@ async function loadIgnorePatterns(repoRoot: string, fileName: string): Promise Boolean(l)); } +async function loadIncludePatterns(repoRoot: string): Promise { + const includePath = path.join(repoRoot, '.git-ai', 'include.txt'); + if (!await fs.pathExists(includePath)) return []; + const raw = await fs.readFile(includePath, 'utf-8'); + return raw + .split('\n') + .map(l => l.trim()) + .map((l) => { + if (l.length === 0) return null; + if (l.startsWith('#')) return null; + const withoutLeadingSlash = l.startsWith('/') ? l.slice(1) : l; + if (withoutLeadingSlash.endsWith('/')) return `${withoutLeadingSlash}**`; + return withoutLeadingSlash; + }) + .filter((l): l is string => Boolean(l)); +} + function inferIndexLang(file: string): IndexLang { if (file.endsWith('.md') || file.endsWith('.mdx')) return 'markdown'; if (file.endsWith('.yml') || file.endsWith('.yaml')) return 'yaml'; @@ -71,30 +88,73 @@ export class IndexerV2 { const aiIgnore = await loadIgnorePatterns(this.repoRoot, '.aiignore'); const gitIgnore = await loadIgnorePatterns(this.repoRoot, '.gitignore'); - const files = await glob('**/*.{ts,tsx,js,jsx,java,c,h,go,py,rs,md,mdx,yml,yaml}', { + const includePatterns = await loadIncludePatterns(this.repoRoot); + + // Base ignore patterns that are always applied + const baseIgnore = [ + 'node_modules/**', + '**/node_modules/**', + '.git/**', + '**/.git/**', + '.git-ai/**', + '**/.git-ai/**', + '.repo/**', + '**/.repo/**', + 'dist/**', + 'target/**', + '**/target/**', + 'build/**', + '**/build/**', + '.gradle/**', + '**/.gradle/**', + ]; + + // Get files with normal ignore patterns (aiIgnore and gitIgnore) + const filesNormal = await glob('**/*.{ts,tsx,js,jsx,java,c,h,go,py,rs,md,mdx,yml,yaml}', { cwd: this.scanRoot, nodir: true, ignore: [ - 'node_modules/**', - '**/node_modules/**', - '.git/**', - '**/.git/**', - '.git-ai/**', - '**/.git-ai/**', - '.repo/**', - '**/.repo/**', - 'dist/**', - 'target/**', - '**/target/**', - 'build/**', - '**/build/**', - '.gradle/**', - '**/.gradle/**', + ...baseIgnore, ...aiIgnore, ...gitIgnore, ], }); + let files = filesNormal; + + // If include patterns exist, also get files matching those patterns (ignoring gitIgnore but respecting aiIgnore) + if (includePatterns.length > 0) { + // For each include pattern, get files matching it without gitIgnore restrictions + const includedFileSets = await Promise.all( + includePatterns.map(async (pattern) => { + // Ensure pattern covers all file extensions we support + let fullPattern = pattern; + // If pattern is a directory pattern (e.g., "generated/**"), append file extensions + if (pattern.endsWith('**')) { + fullPattern = `${pattern}/*.{ts,tsx,js,jsx,java,c,h,go,py,rs,md,mdx,yml,yaml}`; + } else if (!pattern.match(/\.(ts|tsx|js|jsx|java|c|h|go|py|rs|md|mdx|yml|yaml)$/)) { + // If pattern doesn't end with a file extension, treat it as a directory + fullPattern = `${pattern}/**/*.{ts,tsx,js,jsx,java,c,h,go,py,rs,md,mdx,yml,yaml}`; + } + + return glob(fullPattern, { + cwd: this.scanRoot, + nodir: true, + ignore: [ + ...baseIgnore, + ...aiIgnore, + // Note: gitIgnore is NOT applied here + ], + }); + }) + ); + + // Flatten and merge with normal files + const includedFiles = includedFileSets.flat(); + const fileSet = new Set([...filesNormal, ...includedFiles]); + files = Array.from(fileSet); + } + const languages = Array.from(new Set(files.map(inferIndexLang))); const { byLang } = await openTablesByLang({ dbDir, diff --git a/src/core/indexerIncremental.ts b/src/core/indexerIncremental.ts index b4f3e6a..9bd10c4 100644 --- a/src/core/indexerIncremental.ts +++ b/src/core/indexerIncremental.ts @@ -21,6 +21,72 @@ export interface IncrementalIndexOptions { onProgress?: (p: { totalFiles: number; processedFiles: number; currentFile?: string }) => void; } +async function loadIgnorePatterns(repoRoot: string, fileName: string): Promise { + const ignorePath = path.join(repoRoot, fileName); + if (!await fs.pathExists(ignorePath)) return []; + const raw = await fs.readFile(ignorePath, 'utf-8'); + return raw + .split('\n') + .map(l => l.trim()) + .map((l) => { + if (l.length === 0) return null; + if (l.startsWith('#')) return null; + if (l.startsWith('!')) return null; + const withoutLeadingSlash = l.startsWith('/') ? l.slice(1) : l; + if (withoutLeadingSlash.endsWith('/')) return `${withoutLeadingSlash}**`; + return withoutLeadingSlash; + }) + .filter((l): l is string => Boolean(l)); +} + +async function loadIncludePatterns(repoRoot: string): Promise { + const includePath = path.join(repoRoot, '.git-ai', 'include.txt'); + if (!await fs.pathExists(includePath)) return []; + const raw = await fs.readFile(includePath, 'utf-8'); + return raw + .split('\n') + .map(l => l.trim()) + .map((l) => { + if (l.length === 0) return null; + if (l.startsWith('#')) return null; + const withoutLeadingSlash = l.startsWith('/') ? l.slice(1) : l; + if (withoutLeadingSlash.endsWith('/')) return `${withoutLeadingSlash}**`; + return withoutLeadingSlash; + }) + .filter((l): l is string => Boolean(l)); +} + +function matchesPattern(file: string, pattern: string): boolean { + // Convert glob pattern to regex + const regexPattern = pattern + .replace(/\*\*/g, '___GLOBSTAR___') + .replace(/\*/g, '[^/]*') + .replace(/___GLOBSTAR___/g, '.*') + .replace(/\?/g, '[^/]') + .replace(/\./g, '\\.'); + const regex = new RegExp(`^${regexPattern}$`); + return regex.test(file); +} + +function shouldIndexFile(file: string, aiIgnore: string[], gitIgnore: string[], includePatterns: string[]): boolean { + // Check if file matches aiIgnore patterns (highest priority exclusion) + if (aiIgnore.some(pattern => matchesPattern(file, pattern))) { + return false; + } + + // Check if file matches include patterns (overrides gitIgnore) + if (includePatterns.some(pattern => matchesPattern(file, pattern))) { + return true; + } + + // Check if file matches gitIgnore patterns + if (gitIgnore.some(pattern => matchesPattern(file, pattern))) { + return false; + } + + return true; +} + function buildChunkText(file: string, symbol: { name: string; kind: string; signature: string }): string { return `file:${file}\nkind:${symbol.kind}\nname:${symbol.name}\nsignature:${symbol.signature}`; } @@ -99,6 +165,11 @@ export class IncrementalIndexerV2 { await fs.ensureDir(gitAiDir); const dbDir = defaultDbDir(this.repoRoot); + // Load ignore and include patterns + const aiIgnore = await loadIgnorePatterns(this.repoRoot, '.aiignore'); + const gitIgnore = await loadIgnorePatterns(this.repoRoot, '.gitignore'); + const includePatterns = await loadIncludePatterns(this.repoRoot); + const { byLang } = await openTablesByLang({ dbDir, dim: this.dim, @@ -144,6 +215,9 @@ export class IncrementalIndexerV2 { if (!isIndexableFile(filePosix)) continue; + // Check if file should be indexed based on ignore/include patterns + if (!shouldIndexFile(filePosix, aiIgnore, gitIgnore, includePatterns)) continue; + const lang = inferIndexLang(filePosix); if (!chunkRowsByLang[lang]) chunkRowsByLang[lang] = []; if (!refRowsByLang[lang]) refRowsByLang[lang] = []; diff --git a/test/includeConfig.test.ts b/test/includeConfig.test.ts new file mode 100644 index 0000000..d9a8455 --- /dev/null +++ b/test/includeConfig.test.ts @@ -0,0 +1,137 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import fs from 'fs-extra'; +import path from 'path'; +import os from 'os'; +import simpleGit from 'simple-git'; +// @ts-ignore dist module has no typings +import { IndexerV2 } from '../dist/src/core/indexer.js'; + +async function createTempDir(): Promise { + const base = await fs.mkdtemp(path.join(os.tmpdir(), 'git-ai-include-')); + return base; +} + +async function initGitRepo(dir: string): Promise { + const git = simpleGit(dir); + await git.init(); + await git.addConfig('user.name', 'Test User'); + await git.addConfig('user.email', 'test@example.com'); +} + +test('include.txt allows indexing of gitignored directories', async () => { + const repoRoot = await createTempDir(); + await initGitRepo(repoRoot); + + // Create a generated code directory structure + const generatedDir = path.join(repoRoot, 'generated'); + const srcDir = path.join(repoRoot, 'src'); + await fs.ensureDir(generatedDir); + await fs.ensureDir(srcDir); + + // Create files + await fs.writeFile(path.join(generatedDir, 'api.ts'), 'export const API_URL = "http://localhost";'); + await fs.writeFile(path.join(srcDir, 'main.ts'), 'import { API_URL } from "../generated/api";'); + + // Add generated directory to .gitignore + await fs.writeFile(path.join(repoRoot, '.gitignore'), 'generated/\n'); + + // Create .git-ai directory and include.txt + const gitAiDir = path.join(repoRoot, '.git-ai'); + await fs.ensureDir(gitAiDir); + await fs.writeFile(path.join(gitAiDir, 'include.txt'), 'generated/**\n'); + + // Run indexer + const indexer = new IndexerV2({ + repoRoot, + dim: 64, + overwrite: true, + }); + + await indexer.run(); + + // Check that the database was created and contains entries + const dbDir = path.join(gitAiDir, 'lancedb'); + const dbExists = await fs.pathExists(dbDir); + assert.ok(dbExists, 'Database directory should exist'); + + // Check that ts table exists (both files are TypeScript) + const tsTablePath = path.join(dbDir, 'refs_ts.lance'); + const tsTableExists = await fs.pathExists(tsTablePath); + assert.ok(tsTableExists, 'TypeScript refs table should exist'); +}); + +test('aiignore takes priority over include.txt', async () => { + const repoRoot = await createTempDir(); + await initGitRepo(repoRoot); + + // Create a generated code directory structure + const generatedDir = path.join(repoRoot, 'generated'); + await fs.ensureDir(generatedDir); + + // Create files + await fs.writeFile(path.join(generatedDir, 'api.ts'), 'export const API_URL = "http://localhost";'); + + // Add generated directory to .gitignore + await fs.writeFile(path.join(repoRoot, '.gitignore'), 'generated/\n'); + + // Create .git-ai directory with both include.txt and parent .aiignore + const gitAiDir = path.join(repoRoot, '.git-ai'); + await fs.ensureDir(gitAiDir); + await fs.writeFile(path.join(gitAiDir, 'include.txt'), 'generated/**\n'); + + // Add generated to .aiignore (should take priority) + await fs.writeFile(path.join(repoRoot, '.aiignore'), 'generated/\n'); + + // Run indexer + const indexer = new IndexerV2({ + repoRoot, + dim: 64, + overwrite: true, + }); + + await indexer.run(); + + // Check that the database was created + const dbDir = path.join(gitAiDir, 'lancedb'); + const dbExists = await fs.pathExists(dbDir); + assert.ok(dbExists, 'Database directory should exist'); + + // The TypeScript table might not exist if no files were indexed + // This is expected behavior as .aiignore takes priority +}); + +test('include.txt with specific file patterns', async () => { + const repoRoot = await createTempDir(); + await initGitRepo(repoRoot); + + // Create directory structure + const buildDir = path.join(repoRoot, 'build'); + await fs.ensureDir(buildDir); + + // Create files + await fs.writeFile(path.join(buildDir, 'important.ts'), 'export const VERSION = "1.0.0";'); + await fs.writeFile(path.join(buildDir, 'temp.ts'), 'export const TEMP = "temp";'); + + // Add build directory to .gitignore + await fs.writeFile(path.join(repoRoot, '.gitignore'), 'build/\n'); + + // Create .git-ai directory and include only specific file + const gitAiDir = path.join(repoRoot, '.git-ai'); + await fs.ensureDir(gitAiDir); + await fs.writeFile(path.join(gitAiDir, 'include.txt'), 'build/important.ts\n'); + + // Run indexer + const indexer = new IndexerV2({ + repoRoot, + dim: 64, + overwrite: true, + }); + + await indexer.run(); + + // Check that the database was created + const dbDir = path.join(gitAiDir, 'lancedb'); + const dbExists = await fs.pathExists(dbDir); + assert.ok(dbExists, 'Database directory should exist'); +}); From a2f7b05916e5f2fd09f157029025b38b1d1e9464 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 03:55:26 +0000 Subject: [PATCH 3/5] Add documentation for .git-ai/include.txt configuration Co-authored-by: mars167 <29228178+mars167@users.noreply.github.com> --- README.md | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ README.zh-CN.md | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+) diff --git a/README.md b/README.md index a90dfa2..dd8896a 100644 --- a/README.md +++ b/README.md @@ -286,6 +286,79 @@ That's it! 3 steps to get started, immediately begin deep understanding of your --- +## ⚙️ Configuration + +### File Filtering + +git-ai respects your project's ignore files to control which files are indexed: + +#### `.gitignore` - Standard Git Ignore + +Files matching patterns in `.gitignore` are excluded from indexing by default. + +#### `.aiignore` - AI-Specific Exclusions (Highest Priority) + +Create a `.aiignore` file in your repository root to exclude specific files from indexing that should be ignored by git-ai but not necessarily by Git: + +```bash +# Example .aiignore +test-fixtures/** +*.generated.ts +docs/api-reference/** +``` + +#### `.git-ai/include.txt` - Force Include (Overrides `.gitignore`) + +Sometimes you need to index generated code or files that are in `.gitignore` but important for code understanding. Create `.git-ai/include.txt` to force-index specific patterns: + +```bash +# Example .git-ai/include.txt +# Include generated API clients +generated/api/** + +# Include specific build artifacts that contain important types +dist/types/** + +# Include code from specific ignored directories +vendor/important-lib/** +``` + +**Priority Order (Highest to Lowest):** +1. `.aiignore` - Explicit exclusions always win +2. `.git-ai/include.txt` - Force-include patterns override `.gitignore` +3. `.gitignore` - Standard Git ignore patterns + +**Supported Pattern Syntax:** +- `**` - Match any number of directories +- `*` - Match any characters within a directory +- `directory/` - Match entire directory (automatically converts to `directory/**`) +- `file.ts` - Match specific file +- Lines starting with `#` are comments + +**Example Configuration:** + +```bash +# .gitignore +dist/ +generated/ +*.log + +# .git-ai/include.txt +generated/api/** +generated/types/** + +# .aiignore (overrides everything) +generated/test-data/** +``` + +With this configuration: +- ✅ `generated/api/client.ts` - Indexed (included via include.txt) +- ✅ `generated/types/models.ts` - Indexed (included via include.txt) +- ❌ `generated/test-data/mock.ts` - Not indexed (.aiignore takes priority) +- ❌ `dist/bundle.js` - Not indexed (.gitignore, not in include.txt) + +--- + ## 🛠️ Troubleshooting ### Windows Installation Issues diff --git a/README.zh-CN.md b/README.zh-CN.md index ca1ed52..d034238 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -283,6 +283,79 @@ git-ai ai graph callers authenticateUser --- +## ⚙️ 配置 + +### 文件过滤 + +git-ai 遵循项目的忽略文件配置来控制哪些文件会被索引: + +#### `.gitignore` - 标准 Git 忽略规则 + +默认情况下,匹配 `.gitignore` 中模式的文件会被排除在索引之外。 + +#### `.aiignore` - AI 专属排除规则(最高优先级) + +在仓库根目录创建 `.aiignore` 文件,用于排除特定文件的索引,这些文件应该被 git-ai 忽略但不一定要被 Git 忽略: + +```bash +# .aiignore 示例 +test-fixtures/** +*.generated.ts +docs/api-reference/** +``` + +#### `.git-ai/include.txt` - 强制包含(覆盖 `.gitignore`) + +有时您需要索引生成的代码或在 `.gitignore` 中但对代码理解很重要的文件。创建 `.git-ai/include.txt` 来强制索引特定模式: + +```bash +# .git-ai/include.txt 示例 +# 包含生成的 API 客户端 +generated/api/** + +# 包含特定的构建产物,其中包含重要的类型定义 +dist/types/** + +# 包含特定被忽略目录中的代码 +vendor/important-lib/** +``` + +**优先级顺序(从高到低):** +1. `.aiignore` - 显式排除规则始终生效 +2. `.git-ai/include.txt` - 强制包含模式覆盖 `.gitignore` +3. `.gitignore` - 标准 Git 忽略模式 + +**支持的模式语法:** +- `**` - 匹配任意数量的目录 +- `*` - 匹配目录内的任意字符 +- `directory/` - 匹配整个目录(自动转换为 `directory/**`) +- `file.ts` - 匹配特定文件 +- 以 `#` 开头的行为注释 + +**配置示例:** + +```bash +# .gitignore +dist/ +generated/ +*.log + +# .git-ai/include.txt +generated/api/** +generated/types/** + +# .aiignore (覆盖所有规则) +generated/test-data/** +``` + +此配置下: +- ✅ `generated/api/client.ts` - 被索引(通过 include.txt 包含) +- ✅ `generated/types/models.ts` - 被索引(通过 include.txt 包含) +- ❌ `generated/test-data/mock.ts` - 不被索引(.aiignore 优先级最高) +- ❌ `dist/bundle.js` - 不被索引(在 .gitignore 中,不在 include.txt 中) + +--- + ## 🛠️ 故障排除 ### Windows 安装问题 From ac6353bb0056383740f0eeeff3287c670986ca79 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 03:57:07 +0000 Subject: [PATCH 4/5] Address code review feedback: extract constants, fix baseIgnore, add regex caching Co-authored-by: mars167 <29228178+mars167@users.noreply.github.com> --- src/core/indexer.ts | 10 +++++++--- src/core/indexerIncremental.ts | 24 ++++++++++++++++-------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/core/indexer.ts b/src/core/indexer.ts index 626e67f..fe6fb4c 100644 --- a/src/core/indexer.ts +++ b/src/core/indexer.ts @@ -9,6 +9,9 @@ import { ChunkRow, RefRow } from './types'; import { toPosixPath } from './paths'; import { getCurrentCommitHash } from './git'; +// Supported file extensions for indexing +const INDEXABLE_EXTENSIONS = 'ts,tsx,js,jsx,java,c,h,go,py,rs,md,mdx,yml,yaml'; + export interface IndexOptions { repoRoot: string; scanRoot?: string; @@ -101,6 +104,7 @@ export class IndexerV2 { '.repo/**', '**/.repo/**', 'dist/**', + '**/dist/**', 'target/**', '**/target/**', 'build/**', @@ -110,7 +114,7 @@ export class IndexerV2 { ]; // Get files with normal ignore patterns (aiIgnore and gitIgnore) - const filesNormal = await glob('**/*.{ts,tsx,js,jsx,java,c,h,go,py,rs,md,mdx,yml,yaml}', { + const filesNormal = await glob(`**/*.{${INDEXABLE_EXTENSIONS}}`, { cwd: this.scanRoot, nodir: true, ignore: [ @@ -131,10 +135,10 @@ export class IndexerV2 { let fullPattern = pattern; // If pattern is a directory pattern (e.g., "generated/**"), append file extensions if (pattern.endsWith('**')) { - fullPattern = `${pattern}/*.{ts,tsx,js,jsx,java,c,h,go,py,rs,md,mdx,yml,yaml}`; + fullPattern = `${pattern}/*.{${INDEXABLE_EXTENSIONS}}`; } else if (!pattern.match(/\.(ts|tsx|js|jsx|java|c|h|go|py|rs|md|mdx|yml|yaml)$/)) { // If pattern doesn't end with a file extension, treat it as a directory - fullPattern = `${pattern}/**/*.{ts,tsx,js,jsx,java,c,h,go,py,rs,md,mdx,yml,yaml}`; + fullPattern = `${pattern}/**/*.{${INDEXABLE_EXTENSIONS}}`; } return glob(fullPattern, { diff --git a/src/core/indexerIncremental.ts b/src/core/indexerIncremental.ts index 9bd10c4..11639d4 100644 --- a/src/core/indexerIncremental.ts +++ b/src/core/indexerIncremental.ts @@ -56,15 +56,23 @@ async function loadIncludePatterns(repoRoot: string): Promise { .filter((l): l is string => Boolean(l)); } +// Cache for compiled regex patterns +const patternCache = new Map(); + function matchesPattern(file: string, pattern: string): boolean { - // Convert glob pattern to regex - const regexPattern = pattern - .replace(/\*\*/g, '___GLOBSTAR___') - .replace(/\*/g, '[^/]*') - .replace(/___GLOBSTAR___/g, '.*') - .replace(/\?/g, '[^/]') - .replace(/\./g, '\\.'); - const regex = new RegExp(`^${regexPattern}$`); + // Check cache first + let regex = patternCache.get(pattern); + if (!regex) { + // Convert glob pattern to regex + const regexPattern = pattern + .replace(/\*\*/g, '___GLOBSTAR___') + .replace(/\*/g, '[^/]*') + .replace(/___GLOBSTAR___/g, '.*') + .replace(/\?/g, '[^/]') + .replace(/\./g, '\\.'); + regex = new RegExp(`^${regexPattern}$`); + patternCache.set(pattern, regex); + } return regex.test(file); } From f50641b89db8c67930c906e49c9ebc13eec0ce6e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 03:59:14 +0000 Subject: [PATCH 5/5] Fix incomplete sanitization in regex pattern matching Co-authored-by: mars167 <29228178+mars167@users.noreply.github.com> --- src/core/indexerIncremental.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/core/indexerIncremental.ts b/src/core/indexerIncremental.ts index 11639d4..58f9a05 100644 --- a/src/core/indexerIncremental.ts +++ b/src/core/indexerIncremental.ts @@ -63,12 +63,16 @@ function matchesPattern(file: string, pattern: string): boolean { // Check cache first let regex = patternCache.get(pattern); if (!regex) { - // Convert glob pattern to regex + // Convert glob pattern to regex by escaping special regex chars first, then handling glob patterns const regexPattern = pattern + // Escape regex special characters except the ones we use for glob + .replace(/[\\^$+{}[\]|()]/g, '\\$&') + // Handle glob patterns .replace(/\*\*/g, '___GLOBSTAR___') .replace(/\*/g, '[^/]*') .replace(/___GLOBSTAR___/g, '.*') .replace(/\?/g, '[^/]') + // Escape dots for literal matching .replace(/\./g, '\\.'); regex = new RegExp(`^${regexPattern}$`); patternCache.set(pattern, regex);