Skip to content

Commit 9eb23a0

Browse files
committed
feat(init): add grep and glob local-op handlers
Add two new local-op types that let the server search project files without reading them all: - grep: regex search across files with optional glob filter, batched (multiple patterns in one round-trip), capped at 100 matches per search with 2000-char line truncation - glob: find files by pattern, batched (multiple patterns in one round-trip), capped at 100 results Uses ripgrep (rg) when available for speed and native binary file detection. Falls back to a Node.js fs-based implementation that skips node_modules, .git, and other non-source directories, with a per-file size cap matching MAX_FILE_BYTES (256KB). Counterpart server-side schemas will be added in cli-init-api. Made-with: Cursor
1 parent 74898e8 commit 9eb23a0

File tree

4 files changed

+578
-0
lines changed

4 files changed

+578
-0
lines changed

src/lib/init/local-ops.ts

Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ import type {
3232
DetectSentryPayload,
3333
DirEntry,
3434
FileExistsBatchPayload,
35+
GlobPayload,
36+
GrepPayload,
3537
ListDirPayload,
3638
LocalOpPayload,
3739
LocalOpResult,
@@ -313,6 +315,10 @@ export async function handleLocalOp(
313315
return await runCommands(payload, options.dryRun);
314316
case "apply-patchset":
315317
return await applyPatchset(payload, options.dryRun);
318+
case "grep":
319+
return await grep(payload);
320+
case "glob":
321+
return await glob(payload);
316322
case "create-sentry-project":
317323
return await createSentryProject(payload, options);
318324
case "detect-sentry":
@@ -846,6 +852,331 @@ async function detectSentry(
846852
};
847853
}
848854

855+
// ── Grep & Glob ─────────────────────────────────────────────────────
856+
857+
const MAX_GREP_RESULTS_PER_SEARCH = 100;
858+
const MAX_GREP_LINE_LENGTH = 2000;
859+
const MAX_GLOB_RESULTS = 100;
860+
const SKIP_DIRS = new Set([
861+
"node_modules",
862+
".git",
863+
"__pycache__",
864+
".venv",
865+
"venv",
866+
"dist",
867+
"build",
868+
]);
869+
870+
type GrepMatch = { path: string; lineNum: number; line: string };
871+
872+
// ── Ripgrep implementations (preferred when rg is on PATH) ──────────
873+
874+
function spawnCollect(
875+
cmd: string,
876+
args: string[],
877+
cwd: string
878+
): Promise<{ stdout: string; ok: boolean }> {
879+
return new Promise((resolve, reject) => {
880+
const child = spawn(cmd, args, {
881+
cwd,
882+
stdio: ["ignore", "pipe", "pipe"],
883+
timeout: 30_000,
884+
});
885+
886+
const chunks: Buffer[] = [];
887+
let len = 0;
888+
child.stdout.on("data", (chunk: Buffer) => {
889+
if (len < MAX_OUTPUT_BYTES) {
890+
chunks.push(chunk);
891+
len += chunk.length;
892+
}
893+
});
894+
895+
child.on("error", (err) => {
896+
reject(err);
897+
});
898+
child.on("close", (code) => {
899+
resolve({
900+
stdout: Buffer.concat(chunks).toString("utf-8"),
901+
ok: code === 0 || code === 1,
902+
});
903+
});
904+
});
905+
}
906+
907+
function parseRgGrepOutput(
908+
cwd: string,
909+
stdout: string,
910+
maxResults: number
911+
): { matches: GrepMatch[]; truncated: boolean } {
912+
const lines = stdout.split("\n").filter(Boolean);
913+
const truncated = lines.length > maxResults;
914+
const matches: GrepMatch[] = [];
915+
916+
for (const line of lines.slice(0, maxResults)) {
917+
const sepIdx = line.indexOf(":");
918+
if (sepIdx === -1) {
919+
continue;
920+
}
921+
const filePart = line.substring(0, sepIdx);
922+
const rest = line.substring(sepIdx + 1);
923+
const numSepIdx = rest.indexOf(":");
924+
if (numSepIdx === -1) {
925+
continue;
926+
}
927+
const lineNum = Number.parseInt(rest.substring(0, numSepIdx), 10);
928+
let text = rest.substring(numSepIdx + 1);
929+
if (text.length > MAX_GREP_LINE_LENGTH) {
930+
text = `${text.substring(0, MAX_GREP_LINE_LENGTH)}…`;
931+
}
932+
matches.push({ path: path.relative(cwd, filePart), lineNum, line: text });
933+
}
934+
935+
return { matches, truncated };
936+
}
937+
938+
async function rgGrepSearch(opts: {
939+
cwd: string;
940+
pattern: string;
941+
searchPath: string | undefined;
942+
include: string | undefined;
943+
maxResults: number;
944+
}): Promise<{ matches: GrepMatch[]; truncated: boolean }> {
945+
const { cwd, pattern, searchPath, include, maxResults } = opts;
946+
const target = searchPath ? safePath(cwd, searchPath) : cwd;
947+
const args = [
948+
"-nH",
949+
"--no-messages",
950+
"--hidden",
951+
"--max-count",
952+
String(maxResults + 1),
953+
"--regexp",
954+
pattern,
955+
];
956+
if (include) {
957+
args.push("--glob", include);
958+
}
959+
args.push(target);
960+
961+
const { stdout } = await spawnCollect("rg", args, cwd);
962+
return parseRgGrepOutput(cwd, stdout, maxResults);
963+
}
964+
965+
async function rgGlobSearch(opts: {
966+
cwd: string;
967+
pattern: string;
968+
searchPath: string | undefined;
969+
maxResults: number;
970+
}): Promise<{ files: string[]; truncated: boolean }> {
971+
const { cwd, pattern, searchPath, maxResults } = opts;
972+
const target = searchPath ? safePath(cwd, searchPath) : cwd;
973+
const args = ["--files", "--hidden", "--glob", pattern, target];
974+
975+
const { stdout } = await spawnCollect("rg", args, cwd);
976+
const lines = stdout.split("\n").filter(Boolean);
977+
const truncated = lines.length > maxResults;
978+
const files = lines.slice(0, maxResults).map((f) => path.relative(cwd, f));
979+
return { files, truncated };
980+
}
981+
982+
// ── Node.js fallback (when rg is not installed) ─────────────────────
983+
984+
/**
985+
* Recursively walk a directory, yielding relative file paths.
986+
* Skips common non-source directories and respects an optional glob filter.
987+
*/
988+
async function* walkFiles(
989+
root: string,
990+
base: string,
991+
globPattern: string | undefined
992+
): AsyncGenerator<string> {
993+
let entries: fs.Dirent[];
994+
try {
995+
entries = await fs.promises.readdir(base, { withFileTypes: true });
996+
} catch {
997+
return;
998+
}
999+
for (const entry of entries) {
1000+
const full = path.join(base, entry.name);
1001+
const rel = path.relative(root, full);
1002+
if (
1003+
entry.isDirectory() &&
1004+
!SKIP_DIRS.has(entry.name) &&
1005+
!entry.name.startsWith(".")
1006+
) {
1007+
yield* walkFiles(root, full, globPattern);
1008+
} else if (
1009+
entry.isFile() &&
1010+
(!globPattern || matchGlob(entry.name, globPattern))
1011+
) {
1012+
yield rel;
1013+
}
1014+
}
1015+
}
1016+
1017+
/** Minimal glob matcher — supports `*` and `?` wildcards. */
1018+
function matchGlob(name: string, pattern: string): boolean {
1019+
const re = pattern
1020+
.replace(/[.+^${}()|[\]\\]/g, "\\$&")
1021+
.replace(/\*/g, ".*")
1022+
.replace(/\?/g, ".");
1023+
return new RegExp(`^${re}$`).test(name);
1024+
}
1025+
1026+
/**
1027+
* Search files for a regex pattern using Node.js fs. Fallback for when
1028+
* ripgrep is not available.
1029+
*/
1030+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: file-walking search with early exits
1031+
async function fsGrepSearch(opts: {
1032+
cwd: string;
1033+
pattern: string;
1034+
searchPath: string | undefined;
1035+
include: string | undefined;
1036+
maxResults: number;
1037+
}): Promise<{ matches: GrepMatch[]; truncated: boolean }> {
1038+
const { cwd, pattern, searchPath, include, maxResults } = opts;
1039+
const target = searchPath ? safePath(cwd, searchPath) : cwd;
1040+
const regex = new RegExp(pattern);
1041+
const matches: GrepMatch[] = [];
1042+
1043+
for await (const rel of walkFiles(cwd, target, include)) {
1044+
if (matches.length > maxResults) {
1045+
break;
1046+
}
1047+
const absPath = path.join(cwd, rel);
1048+
let content: string;
1049+
try {
1050+
const stat = await fs.promises.stat(absPath);
1051+
if (stat.size > MAX_FILE_BYTES) {
1052+
continue;
1053+
}
1054+
content = await fs.promises.readFile(absPath, "utf-8");
1055+
} catch {
1056+
continue;
1057+
}
1058+
const lines = content.split("\n");
1059+
for (let i = 0; i < lines.length; i += 1) {
1060+
const line = lines[i] ?? "";
1061+
if (regex.test(line)) {
1062+
let text = line;
1063+
if (text.length > MAX_GREP_LINE_LENGTH) {
1064+
text = `${text.substring(0, MAX_GREP_LINE_LENGTH)}…`;
1065+
}
1066+
matches.push({ path: rel, lineNum: i + 1, line: text });
1067+
if (matches.length > maxResults) {
1068+
break;
1069+
}
1070+
}
1071+
}
1072+
}
1073+
1074+
const truncated = matches.length > maxResults;
1075+
if (truncated) {
1076+
matches.length = maxResults;
1077+
}
1078+
return { matches, truncated };
1079+
}
1080+
1081+
async function fsGlobSearch(opts: {
1082+
cwd: string;
1083+
pattern: string;
1084+
searchPath: string | undefined;
1085+
maxResults: number;
1086+
}): Promise<{ files: string[]; truncated: boolean }> {
1087+
const { cwd, pattern, searchPath, maxResults } = opts;
1088+
const target = searchPath ? safePath(cwd, searchPath) : cwd;
1089+
const files: string[] = [];
1090+
1091+
for await (const rel of walkFiles(cwd, target, pattern)) {
1092+
files.push(rel);
1093+
if (files.length > maxResults) {
1094+
break;
1095+
}
1096+
}
1097+
1098+
const truncated = files.length > maxResults;
1099+
if (truncated) {
1100+
files.length = maxResults;
1101+
}
1102+
return { files, truncated };
1103+
}
1104+
1105+
// ── Dispatch: try rg, fall back to Node.js ──────────────────────────
1106+
1107+
async function grepSearch(opts: {
1108+
cwd: string;
1109+
pattern: string;
1110+
searchPath: string | undefined;
1111+
include: string | undefined;
1112+
maxResults: number;
1113+
}): Promise<{ matches: GrepMatch[]; truncated: boolean }> {
1114+
try {
1115+
return await rgGrepSearch(opts);
1116+
} catch {
1117+
return await fsGrepSearch(opts);
1118+
}
1119+
}
1120+
1121+
async function globSearchImpl(opts: {
1122+
cwd: string;
1123+
pattern: string;
1124+
searchPath: string | undefined;
1125+
maxResults: number;
1126+
}): Promise<{ files: string[]; truncated: boolean }> {
1127+
try {
1128+
return await rgGlobSearch(opts);
1129+
} catch {
1130+
return await fsGlobSearch(opts);
1131+
}
1132+
}
1133+
1134+
async function grep(payload: GrepPayload): Promise<LocalOpResult> {
1135+
const { cwd, params } = payload;
1136+
const maxResults = params.maxResultsPerSearch ?? MAX_GREP_RESULTS_PER_SEARCH;
1137+
1138+
const results = await Promise.all(
1139+
params.searches.map(async (search) => {
1140+
const { matches, truncated } = await grepSearch({
1141+
cwd,
1142+
pattern: search.pattern,
1143+
searchPath: search.path,
1144+
include: search.include,
1145+
maxResults,
1146+
});
1147+
return {
1148+
pattern: search.pattern,
1149+
matches,
1150+
truncated,
1151+
totalMatches: matches.length,
1152+
};
1153+
})
1154+
);
1155+
1156+
return { ok: true, data: { results } };
1157+
}
1158+
1159+
async function glob(payload: GlobPayload): Promise<LocalOpResult> {
1160+
const { cwd, params } = payload;
1161+
const maxResults = params.maxResults ?? MAX_GLOB_RESULTS;
1162+
1163+
const results = await Promise.all(
1164+
params.patterns.map(async (pattern) => {
1165+
const { files, truncated } = await globSearchImpl({
1166+
cwd,
1167+
pattern,
1168+
searchPath: params.path,
1169+
maxResults,
1170+
});
1171+
return { pattern, files, truncated };
1172+
})
1173+
);
1174+
1175+
return { ok: true, data: { results } };
1176+
}
1177+
1178+
// ── Sentry project + DSN ────────────────────────────────────────────
1179+
8491180
async function createSentryProject(
8501181
payload: CreateSentryProjectPayload,
8511182
options: WizardOptions

src/lib/init/types.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ export type LocalOpPayload =
2525
| FileExistsBatchPayload
2626
| RunCommandsPayload
2727
| ApplyPatchsetPayload
28+
| GrepPayload
29+
| GlobPayload
2830
| CreateSentryProjectPayload
2931
| DetectSentryPayload;
3032

@@ -69,6 +71,33 @@ export type RunCommandsPayload = {
6971
};
7072
};
7173

74+
export type GrepSearch = {
75+
pattern: string;
76+
path?: string;
77+
include?: string;
78+
};
79+
80+
export type GrepPayload = {
81+
type: "local-op";
82+
operation: "grep";
83+
cwd: string;
84+
params: {
85+
searches: GrepSearch[];
86+
maxResultsPerSearch?: number;
87+
};
88+
};
89+
90+
export type GlobPayload = {
91+
type: "local-op";
92+
operation: "glob";
93+
cwd: string;
94+
params: {
95+
patterns: string[];
96+
path?: string;
97+
maxResults?: number;
98+
};
99+
};
100+
72101
export type ApplyPatchsetPayload = {
73102
type: "local-op";
74103
operation: "apply-patchset";

0 commit comments

Comments
 (0)