From a2440cfe4929df1ba188992a3b4063973ad68b10 Mon Sep 17 00:00:00 2001 From: Robert Yates Date: Thu, 29 Jan 2026 22:45:08 -0500 Subject: [PATCH 1/4] large(ish) files were not working --- src/fs/encoding.ts | 24 ++- .../read-write-fs.piping.test.ts | 164 ++++++++++++++++++ 2 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 src/fs/read-write-fs/read-write-fs.piping.test.ts diff --git a/src/fs/encoding.ts b/src/fs/encoding.ts index b9980457..dd0dfe25 100644 --- a/src/fs/encoding.ts +++ b/src/fs/encoding.ts @@ -36,7 +36,16 @@ export function toBuffer( return bytes; } if (encoding === "binary" || encoding === "latin1") { - return Uint8Array.from(content, (c) => c.charCodeAt(0)); + // Use chunked approach for large strings to avoid performance issues + const chunkSize = 65536; // 64KB chunks + if (content.length <= chunkSize) { + return Uint8Array.from(content, (c) => c.charCodeAt(0)); + } + const result = new Uint8Array(content.length); + for (let i = 0; i < content.length; i++) { + result[i] = content.charCodeAt(i); + } + return result; } // Default to UTF-8 for text content return textEncoder.encode(content); @@ -58,7 +67,18 @@ export function fromBuffer( .join(""); } if (encoding === "binary" || encoding === "latin1") { - return String.fromCharCode(...buffer); + // Use chunked approach to avoid call stack limit with large buffers + // String.fromCharCode(...buffer) fails with buffers > ~100KB + const chunkSize = 65536; // 64KB chunks + if (buffer.length <= chunkSize) { + return String.fromCharCode(...buffer); + } + let result = ""; + for (let i = 0; i < buffer.length; i += chunkSize) { + const chunk = buffer.subarray(i, i + chunkSize); + result += String.fromCharCode(...chunk); + } + return result; } // Default to UTF-8 for text content return textDecoder.decode(buffer); diff --git a/src/fs/read-write-fs/read-write-fs.piping.test.ts b/src/fs/read-write-fs/read-write-fs.piping.test.ts new file mode 100644 index 00000000..bbb9c82e --- /dev/null +++ b/src/fs/read-write-fs/read-write-fs.piping.test.ts @@ -0,0 +1,164 @@ +import { mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterAll, beforeAll, describe, expect, it } from "vitest"; +import { Bash } from "../../Bash.js"; +import { ReadWriteFs } from "./read-write-fs.js"; + +/** + * Test piping with ReadWriteFs (real filesystem) + * This test suite validates that just-bash can handle large data through pipes + * when using ReadWriteFs backed by the real filesystem. + */ +describe("ReadWriteFs - Piping with large data", () => { + let tempDir: string; + let fs: ReadWriteFs; + let bash: Bash; + + beforeAll(async () => { + // Create a real temp directory + tempDir = await mkdtemp(join(tmpdir(), "bash-test-")); + console.log("Created temp dir:", tempDir); + + // Use ReadWriteFs with real filesystem + fs = new ReadWriteFs({ root: tempDir }); + bash = new Bash({ fs }); + }); + + afterAll(async () => { + // Cleanup + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + console.log("Cleaned up temp dir:", tempDir); + } + }); + + it("should handle large data with wc -l using ReadWriteFs", async () => { + // Create large text data with trailing newline (standard for text files) + const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`); + const largeText = lines.join("\n") + "\n"; + + console.log( + `Generated text size: ${(largeText.length / 1024 / 1024).toFixed(2)}MB`, + ); + console.log(`Line count: ${lines.length}`); + + // Write to file + await fs.writeFile("/data.txt", largeText); + + // Test piping through cat + const result = await bash.exec("cat /data.txt | wc -l"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result stderr:", result.stderr); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toBe("50000"); + }, 30000); + + it("should handle large data with wc -l FILENAME using ReadWriteFs", async () => { + // Create large text data with trailing newline + const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`); + const largeText = lines.join("\n") + "\n"; + + // Write to file + await fs.writeFile("/data2.txt", largeText); + + // Test direct file access + const result = await bash.exec("wc -l /data2.txt"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toContain("50000"); + }, 30000); + + it("should handle small data with wc -l using ReadWriteFs", async () => { + // Create small text data with trailing newline + const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`); + const smallText = lines.join("\n") + "\n"; + + // Write to file + await fs.writeFile("/small.txt", smallText); + + // Test piping through cat + const result = await bash.exec("cat /small.txt | wc -l"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toBe("100"); + }, 30000); + + it("should handle medium data with multiple pipes", async () => { + // Create medium text data with some repeated lines + const lines = Array.from({ length: 10000 }, (_, i) => { + // Create some duplicates + const lineNum = Math.floor(i / 2); + return `Line ${lineNum}`; + }); + const mediumText = lines.join("\n"); + + // Write to file + await fs.writeFile("/medium.txt", mediumText); + + // Test piping through multiple commands + const result = await bash.exec("cat /medium.txt | sort | uniq | wc -l"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + // Should have 5000 unique lines (0-4999) + expect(result.stdout.trim()).toBe("5000"); + }, 30000); + + it("should handle grep with large files", async () => { + // Create large text data with specific patterns + const lines = Array.from({ length: 20000 }, (_, i) => { + if (i % 3 === 0) { + return `MATCH Line ${i}`; + } + return `Other Line ${i}`; + }); + const largeText = lines.join("\n"); + + // Write to file + await fs.writeFile("/grep-test.txt", largeText); + + // Test grep with wc + const result = await bash.exec("grep MATCH /grep-test.txt | wc -l"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + // Should match every 3rd line: 20000/3 = 6667 (rounded up) + expect(result.stdout.trim()).toBe("6667"); + }, 30000); + + it("should handle binary data correctly", async () => { + // Create binary data + const binaryData = new Uint8Array(10000); + for (let i = 0; i < binaryData.length; i++) { + binaryData[i] = i % 256; + } + + // Write binary file + await fs.writeFile("/binary.bin", binaryData); + + // Test wc -c (byte count) + const result = await bash.exec("wc -c /binary.bin"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toContain("10000"); + }, 30000); +}); + +// Made with Bob From b7b1a5d828339b06ef04290603d4c42de5842c70 Mon Sep 17 00:00:00 2001 From: Robert Yates Date: Sat, 31 Jan 2026 08:11:01 -0500 Subject: [PATCH 2/4] also need to update base64 for large files --- src/commands/base64/base64.binary.test.ts | 67 +++++++++++++++++++++++ src/commands/base64/base64.ts | 25 ++++++++- src/fs/encoding.ts | 8 ++- src/interpreter/redirections.ts | 9 ++- 4 files changed, 103 insertions(+), 6 deletions(-) diff --git a/src/commands/base64/base64.binary.test.ts b/src/commands/base64/base64.binary.test.ts index 0c673d99..be0b8c9d 100644 --- a/src/commands/base64/base64.binary.test.ts +++ b/src/commands/base64/base64.binary.test.ts @@ -130,5 +130,72 @@ describe("base64 with binary data", () => { expect(result.stdout).toBe("test content"); }); + + it("should handle large binary files (1MB+)", async () => { + // Create a 1MB binary file with all byte values repeated + const size = 1024 * 1024; // 1MB + const data = new Uint8Array(size); + for (let i = 0; i < size; i++) { + data[i] = i % 256; + } + + const env = new Bash({ + files: { + "/large.bin": data, + }, + }); + + // Encode the large file + await env.exec("base64 /large.bin > /encoded.txt"); + + // Decode it back + await env.exec("base64 -d /encoded.txt > /decoded.bin"); + + // Verify the decoded file matches the original + const decoded = await env.fs.readFileBuffer( + env.fs.resolvePath("/", "/decoded.bin"), + ); + + expect(decoded.length).toBe(size); + // Check first, middle, and last bytes + expect(decoded[0]).toBe(0); + expect(decoded[255]).toBe(255); + expect(decoded[size / 2]).toBe((size / 2) % 256); + expect(decoded[size - 1]).toBe((size - 1) % 256); + + // Verify a sample of bytes throughout the file + for (let i = 0; i < size; i += 10000) { + expect(decoded[i]).toBe(i % 256); + } + }); + + it("should handle large files via pipe", async () => { + // Create a 512KB binary file + const size = 512 * 1024; + const data = new Uint8Array(size); + for (let i = 0; i < size; i++) { + data[i] = (i * 7) % 256; // Different pattern + } + + const env = new Bash({ + files: { + "/medium.bin": data, + }, + }); + + // Round-trip through pipe + await env.exec("cat /medium.bin | base64 | base64 -d > /output.bin"); + + // Verify the output matches the original + const output = await env.fs.readFileBuffer( + env.fs.resolvePath("/", "/output.bin"), + ); + + expect(output.length).toBe(size); + // Check a sample of bytes + for (let i = 0; i < size; i += 5000) { + expect(output[i]).toBe((i * 7) % 256); + } + }); }); }); diff --git a/src/commands/base64/base64.ts b/src/commands/base64/base64.ts index e6661365..65536d64 100644 --- a/src/commands/base64/base64.ts +++ b/src/commands/base64/base64.ts @@ -93,7 +93,19 @@ export const base64Command: Command = { // For decoding, read as text and strip whitespace const readResult = await readBinary(ctx, files, "base64"); if (!readResult.ok) return readResult.error; - // Use binary string (latin1) to preserve bytes for input + + // Use Buffer if available (Node.js) for better large file handling + if (typeof Buffer !== "undefined") { + const buffer = Buffer.from(readResult.data); + const cleaned = buffer.toString("utf8").replace(/\s/g, ""); + const decoded = Buffer.from(cleaned, "base64"); + // Convert to binary string (each char code = byte value) + // Use Buffer's latin1 encoding which treats each byte as a character + const result = decoded.toString("latin1"); + return { stdout: result, stderr: "", exitCode: 0 }; + } + + // Browser fallback - use binary string (latin1) to preserve bytes for input const input = String.fromCharCode(...readResult.data); const cleaned = input.replace(/\s/g, ""); // Decode base64 to binary string (each char code = byte value) @@ -105,8 +117,15 @@ export const base64Command: Command = { const readResult = await readBinary(ctx, files, "base64"); if (!readResult.ok) return readResult.error; - // Convert binary to base64 - let encoded = btoa(String.fromCharCode(...readResult.data)); + // Use Buffer if available (Node.js) for better large file handling + let encoded: string; + if (typeof Buffer !== "undefined") { + const buffer = Buffer.from(readResult.data); + encoded = buffer.toString("base64"); + } else { + // Browser fallback - convert binary to base64 + encoded = btoa(String.fromCharCode(...readResult.data)); + } if (wrapCols > 0) { const lines: string[] = []; diff --git a/src/fs/encoding.ts b/src/fs/encoding.ts index dd0dfe25..ca8059fb 100644 --- a/src/fs/encoding.ts +++ b/src/fs/encoding.ts @@ -67,8 +67,12 @@ export function fromBuffer( .join(""); } if (encoding === "binary" || encoding === "latin1") { - // Use chunked approach to avoid call stack limit with large buffers - // String.fromCharCode(...buffer) fails with buffers > ~100KB + // Use Buffer if available (Node.js) - much more efficient and avoids spread operator limits + if (typeof Buffer !== "undefined") { + return Buffer.from(buffer).toString(encoding); + } + + // Browser fallback - String.fromCharCode(...buffer) fails with buffers > ~100KB const chunkSize = 65536; // 64KB chunks if (buffer.length <= chunkSize) { return String.fromCharCode(...buffer); diff --git a/src/interpreter/redirections.ts b/src/interpreter/redirections.ts index 8595ce3c..55ff9195 100644 --- a/src/interpreter/redirections.ts +++ b/src/interpreter/redirections.ts @@ -53,9 +53,16 @@ async function checkOutputRedirectTarget( * Determine the encoding to use for file I/O. * If all character codes are <= 255, use binary encoding (byte data). * Otherwise, use UTF-8 encoding (text with Unicode characters). + * For performance, only check the first 8KB of large strings. */ function getFileEncoding(content: string): "binary" | "utf8" { - for (let i = 0; i < content.length; i++) { + const SAMPLE_SIZE = 8192; // 8KB + + // For large strings, only check the first 8KB + // This is sufficient since UTF-8 files typically have Unicode chars early + const checkLength = Math.min(content.length, SAMPLE_SIZE); + + for (let i = 0; i < checkLength; i++) { if (content.charCodeAt(i) > 255) { return "utf8"; } From 50fbda450bb2519dc4492d33cb5a39cd4b5969d5 Mon Sep 17 00:00:00 2001 From: Robert Yates Date: Tue, 3 Feb 2026 14:18:51 -0500 Subject: [PATCH 3/4] jq-was --- package.json | 1 + pnpm-lock.yaml | 7 + src/commands/jq/jq-worker.ts | 66 +++++++ src/commands/jq/jq.functions.test.ts | 10 +- src/commands/jq/jq.limits.test.ts | 37 ++-- src/commands/jq/jq.test.ts | 2 +- src/commands/jq/jq.ts | 271 +++++++++++++++++---------- src/jq-budget-test.test.ts | 128 +++++++++++++ 8 files changed, 401 insertions(+), 121 deletions(-) create mode 100644 src/commands/jq/jq-worker.ts create mode 100644 src/jq-budget-test.test.ts diff --git a/package.json b/package.json index 3eb27dba..72a962f7 100644 --- a/package.json +++ b/package.json @@ -95,6 +95,7 @@ "fast-xml-parser": "^5.3.3", "file-type": "^21.2.0", "ini": "^6.0.0", + "jq-web": "^0.6.2", "minimatch": "^10.1.1", "modern-tar": "^0.7.3", "papaparse": "^5.5.3", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1a9e24e6..d764e418 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -23,6 +23,9 @@ dependencies: ini: specifier: ^6.0.0 version: 6.0.0 + jq-web: + specifier: ^0.6.2 + version: 0.6.2 minimatch: specifier: ^10.1.1 version: 10.1.1 @@ -1390,6 +1393,10 @@ packages: hasBin: true dev: true + /jq-web@0.6.2: + resolution: {integrity: sha512-+7XvjBYwTx4vP5PYkf6Q6orubO/v+UgMU6By1GritrmShr9QpT3UKa4ANzXWQfhdqtBnQYXsm7ZNbdIHT6tYpQ==} + dev: false + /js-tokens@9.0.1: resolution: {integrity: sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==} dev: true diff --git a/src/commands/jq/jq-worker.ts b/src/commands/jq/jq-worker.ts new file mode 100644 index 00000000..dac59a29 --- /dev/null +++ b/src/commands/jq/jq-worker.ts @@ -0,0 +1,66 @@ +/** + * Worker thread for executing jq-web with timeout protection. + * This allows us to terminate long-running jq operations. + */ + +import { parentPort } from "node:worker_threads"; +import { createRequire } from "node:module"; + +const require = createRequire(import.meta.url); + +interface WorkerMessage { + input: string; + filter: string; + flags: string[]; +} + +interface WorkerResult { + success: true; + output: string; + exitCode: number; +} + +interface WorkerError { + success: false; + error: string; + exitCode: number; + stderr?: string; +} + +if (!parentPort) { + throw new Error("This file must be run as a worker thread"); +} + +parentPort.on("message", async (message: WorkerMessage) => { + try { + const jqPromise: Promise = require("jq-web"); + const jq = await jqPromise; + + try { + const output = jq.raw(message.input, message.filter, message.flags); + const result: WorkerResult = { + success: true, + output, + exitCode: 0, + }; + parentPort!.postMessage(result); + } catch (e: any) { + const error: WorkerError = { + success: false, + error: e.message, + exitCode: e.exitCode || 3, + stderr: e.stderr, + }; + parentPort!.postMessage(error); + } + } catch (e: any) { + const error: WorkerError = { + success: false, + error: e.message, + exitCode: 1, + }; + parentPort!.postMessage(error); + } +}); + +// Made with Bob diff --git a/src/commands/jq/jq.functions.test.ts b/src/commands/jq/jq.functions.test.ts index a8cbd7ea..df17e342 100644 --- a/src/commands/jq/jq.functions.test.ts +++ b/src/commands/jq/jq.functions.test.ts @@ -313,16 +313,18 @@ describe("jq builtin functions", () => { expect(result.exitCode).toBe(0); }); - it("should return null for pow with non-numeric args", async () => { + it("should error for pow with non-numeric args", async () => { const env = new Bash(); const result = await env.exec("jq -n 'pow(\"a\"; 2)'"); - expect(result.stdout).toBe("null\n"); + expect(result.exitCode).toBe(5); + expect(result.stderr).toContain("number required"); }); - it("should return null for atan2 with non-numeric args", async () => { + it("should error for atan2 with non-numeric args", async () => { const env = new Bash(); const result = await env.exec("jq -n 'atan2(\"a\"; 2)'"); - expect(result.stdout).toBe("null\n"); + expect(result.exitCode).toBe(5); + expect(result.stderr).toContain("number required"); }); }); }); diff --git a/src/commands/jq/jq.limits.test.ts b/src/commands/jq/jq.limits.test.ts index bb6c5d22..72d65e27 100644 --- a/src/commands/jq/jq.limits.test.ts +++ b/src/commands/jq/jq.limits.test.ts @@ -1,14 +1,17 @@ import { describe, expect, it } from "vitest"; import { Bash } from "../../Bash.js"; -import { ExecutionLimitError } from "../../interpreter/errors.js"; /** * JQ Execution Limits Tests * - * These tests verify that jq commands cannot cause runaway compute. - * JQ programs should complete in bounded time regardless of input. + * NOTE: We now use jq-web (real jq compiled to WebAssembly) with worker-based + * timeout protection. Real jq does not have artificial iteration limits. * - * IMPORTANT: All tests should complete quickly (<1s each). + * These tests verify that: + * 1. Infinite loops are terminated by timeout (1 second) + * 2. Normal operations that complete quickly work correctly + * + * IMPORTANT: Timeout tests may take up to 1 second each. */ describe("JQ Execution Limits", () => { @@ -18,8 +21,8 @@ describe("JQ Execution Limits", () => { // until condition that never becomes true const result = await env.exec(`echo 'null' | jq 'until(false; .)'`); - expect(result.stderr).toContain("too many iterations"); - expect(result.exitCode).toBe(ExecutionLimitError.EXIT_CODE); + expect(result.stderr).toContain("timeout"); + expect(result.exitCode).toBe(124); // Standard timeout exit code }); it("should allow until that terminates", async () => { @@ -37,8 +40,8 @@ describe("JQ Execution Limits", () => { // while condition that's always true const result = await env.exec(`echo '0' | jq '[while(true; . + 1)]'`); - expect(result.stderr).toContain("too many iterations"); - expect(result.exitCode).toBe(ExecutionLimitError.EXIT_CODE); + expect(result.stderr).toContain("timeout"); + expect(result.exitCode).toBe(124); }); it("should allow while that terminates", async () => { @@ -51,26 +54,26 @@ describe("JQ Execution Limits", () => { }); describe("repeat protection", () => { - it("should protect against infinite repeat", async () => { + it("should protect against infinite repeat without limit", async () => { const env = new Bash(); - // repeat with identity produces infinite stream + // repeat without limit produces infinite stream const result = await env.exec( - `echo '1' | jq '[limit(100000; repeat(.))]'`, + `echo '1' | jq 'repeat(.)'`, ); - expect(result.stderr).toContain("too many iterations"); - expect(result.exitCode).toBe(ExecutionLimitError.EXIT_CODE); + expect(result.stderr).toContain("timeout"); + expect(result.exitCode).toBe(124); }); - it("should allow repeat that terminates naturally", async () => { + it("should allow repeat with limit", async () => { const env = new Bash(); - // repeat with update that eventually returns empty stops + // repeat with limit terminates after specified iterations const result = await env.exec( - `echo '5' | jq -c '[limit(10; repeat(if . > 0 then . - 1 else empty end))]'`, + `echo '1' | jq -c '[limit(5; repeat(.))]'`, ); expect(result.exitCode).toBe(0); - expect(result.stdout.trim()).toBe("[5,4,3,2,1,0]"); + expect(result.stdout.trim()).toBe("[1,1,1,1,1]"); }); }); diff --git a/src/commands/jq/jq.test.ts b/src/commands/jq/jq.test.ts index 949561b9..7b8d8761 100644 --- a/src/commands/jq/jq.test.ts +++ b/src/commands/jq/jq.test.ts @@ -253,8 +253,8 @@ describe("jq", () => { const output = JSON.parse(result.stdout); // group_by sorts by key: false < true (alphabetically) expect(output).toEqual([ - { merged: true, count: 2 }, { merged: false, count: 1 }, + { merged: true, count: 2 }, ]); }); diff --git a/src/commands/jq/jq.ts b/src/commands/jq/jq.ts index 820c8552..3c76b08a 100644 --- a/src/commands/jq/jq.ts +++ b/src/commands/jq/jq.ts @@ -1,19 +1,85 @@ /** * jq - Command-line JSON processor * - * Full jq implementation with proper parser and evaluator. + * Uses jq-web (real jq compiled to WebAssembly) for full jq compatibility. + * Executes in a worker thread with timeout protection to prevent runaway compute. */ -import { ExecutionLimitError } from "../../interpreter/errors.js"; import type { Command, CommandContext, ExecResult } from "../../types.js"; import { readFiles } from "../../utils/file-reader.js"; import { hasHelpFlag, showHelp, unknownOption } from "../help.js"; -import { - type EvaluateOptions, - evaluate, - parse, - type QueryValue, -} from "../query-engine/index.js"; +import { Worker } from "node:worker_threads"; +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; + +// Timeout for jq execution (1000ms = 1 second) +// This prevents infinite loops from hanging the process while allowing +// normal operations to complete +const JQ_TIMEOUT_MS = 1000; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +// Worker path: try current directory first (dist), then fall back to src +// When running tests, we're in src/commands/jq/, worker is in src/commands/jq/jq-worker.ts +// When running from dist, we're in dist/commands/jq/, worker is in dist/commands/jq/jq-worker.js +let workerPath = join(__dirname, "jq-worker.js"); +// For tests running from source, use the TypeScript file +if (__filename.includes("/src/")) { + workerPath = join(__dirname, "jq-worker.ts"); +} + +/** + * Execute jq in a worker thread with timeout protection. + * Returns the result or throws an error if timeout is exceeded. + */ +async function executeJqWithTimeout( + input: string, + filter: string, + flags: string[], +): Promise<{ output: string; exitCode: number }> { + return new Promise((resolve, reject) => { + const worker = new Worker(workerPath); + let timedOut = false; + + const timeout = setTimeout(() => { + timedOut = true; + worker.terminate(); + reject(new Error("jq execution timeout: operation took too long")); + }, JQ_TIMEOUT_MS); + + worker.on("message", (result: any) => { + clearTimeout(timeout); + worker.terminate(); + + if (timedOut) return; + + if (result.success) { + resolve({ output: result.output, exitCode: result.exitCode }); + } else { + const error: any = new Error(result.error); + error.exitCode = result.exitCode; + error.stderr = result.stderr; + reject(error); + } + }); + + worker.on("error", (err) => { + clearTimeout(timeout); + worker.terminate(); + if (!timedOut) reject(err); + }); + + worker.on("exit", (code) => { + clearTimeout(timeout); + if (!timedOut && code !== 0) { + reject(new Error(`Worker exited with code ${code}`)); + } + }); + + worker.postMessage({ input, filter, flags }); + }); +} /** * Parse a JSON stream (concatenated JSON values). @@ -125,60 +191,23 @@ const jqHelp = { ], }; -function formatValue( - v: QueryValue, - compact: boolean, - raw: boolean, - sortKeys: boolean, - useTab: boolean, - indent = 0, -): string { - if (v === null) return "null"; - if (v === undefined) return "null"; - if (typeof v === "boolean") return String(v); - if (typeof v === "number") { - if (!Number.isFinite(v)) return "null"; - return String(v); - } - if (typeof v === "string") return raw ? v : JSON.stringify(v); - - const indentStr = useTab ? "\t" : " "; - - if (Array.isArray(v)) { - if (v.length === 0) return "[]"; - if (compact) { - return `[${v.map((x) => formatValue(x, true, false, sortKeys, useTab)).join(",")}]`; - } - const items = v.map( - (x) => - indentStr.repeat(indent + 1) + - formatValue(x, false, false, sortKeys, useTab, indent + 1), - ); - return `[\n${items.join(",\n")}\n${indentStr.repeat(indent)}]`; - } - - if (typeof v === "object") { - let keys = Object.keys(v as object); - if (sortKeys) keys = keys.sort(); - if (keys.length === 0) return "{}"; - if (compact) { - return `{${keys.map((k) => `${JSON.stringify(k)}:${formatValue((v as Record)[k], true, false, sortKeys, useTab)}`).join(",")}}`; - } - const items = keys.map((k) => { - const val = formatValue( - (v as Record)[k], - false, - false, - sortKeys, - useTab, - indent + 1, - ); - return `${indentStr.repeat(indent + 1)}${JSON.stringify(k)}: ${val}`; - }); - return `{\n${items.join(",\n")}\n${indentStr.repeat(indent)}}`; - } - - return String(v); +/** + * Build jq flags string from options + */ +function buildJqFlags(options: { + raw: boolean; + compact: boolean; + sortKeys: boolean; + useTab: boolean; + joinOutput: boolean; +}): string { + const flags: string[] = []; + if (options.raw) flags.push("-r"); + if (options.compact) flags.push("-c"); + if (options.sortKeys) flags.push("-S"); + if (options.useTab) flags.push("--tab"); + if (options.joinOutput) flags.push("-j"); + return flags.join(" "); } export const jqCommand: Command = { @@ -268,81 +297,125 @@ export const jqCommand: Command = { } try { - const ast = parse(filter); - let values: QueryValue[] = []; - - const evalOptions: EvaluateOptions = { - limits: ctx.limits - ? { maxIterations: ctx.limits.maxJqIterations } - : undefined, - env: ctx.env, - }; + // Build jq flags array (jq-web expects an array, not a string) + const flags: string[] = []; + if (raw) flags.push("-r"); + if (compact) flags.push("-c"); + if (sortKeys) flags.push("-S"); + if (useTab) flags.push("--tab"); + if (joinOutput) flags.push("-j"); + + const outputParts: string[] = []; if (nullInput) { - values = evaluate(null, ast, evalOptions); + // Null input mode: run filter with null input + const { output } = await executeJqWithTimeout("null", filter, flags); + if (output !== undefined) { + outputParts.push(output); + } } else if (slurp) { // Slurp mode: combine all inputs into single array - // Use JSON stream parser to handle concatenated JSON (not just NDJSON) - const items: QueryValue[] = []; + const items: unknown[] = []; for (const { content } of inputs) { const trimmed = content.trim(); if (trimmed) { items.push(...parseJsonStream(trimmed)); } } - values = evaluate(items, ast, evalOptions); + const jsonInput = JSON.stringify(items); + const { output } = await executeJqWithTimeout(jsonInput, filter, flags); + if (output !== undefined) { + outputParts.push(output); + } } else { - // Process each input file separately - // Use JSON stream parser to handle concatenated JSON (e.g., cat file1.json file2.json | jq .) + // Process each input separately for (const { content } of inputs) { const trimmed = content.trim(); if (!trimmed) continue; const jsonValues = parseJsonStream(trimmed); for (const jsonValue of jsonValues) { - values.push(...evaluate(jsonValue, ast, evalOptions)); + const jsonInput = JSON.stringify(jsonValue); + const { output } = await executeJqWithTimeout(jsonInput, filter, flags); + // Include result even if undefined/empty (e.g., 'empty' filter) + if (output !== undefined) { + outputParts.push(output); + } } } } - const formatted = values.map((v) => - formatValue(v, compact, raw, sortKeys, useTab), - ); - const separator = joinOutput ? "" : "\n"; - const output = formatted.join(separator); - const exitCode = - exitStatus && - (values.length === 0 || - values.every((v) => v === null || v === undefined || v === false)) - ? 1 - : 0; + // executeJqWithTimeout() returns formatted output + // - Without -j: includes newlines between values but NOT a trailing newline + // - With -j: no newlines at all + // We need to add the final newline (unless -j is used) and handle multiple inputs + + let output: string; + if (joinOutput) { + // With -j: concatenate all outputs with no separators or trailing newline + output = outputParts.join(""); + } else { + // Without -j: each output part needs a trailing newline + // jq-web doesn't add the final newline, so we need to add it + output = outputParts.map(part => part.endsWith("\n") ? part : `${part}\n`).join(""); + } + + // Calculate exit code for -e flag + // We need to check if output represents null/false/empty + let exitCode = 0; + if (exitStatus) { + const trimmed = output.trim(); + if (!trimmed || trimmed === "null" || trimmed === "false") { + exitCode = 1; + } + } return { - stdout: output ? (joinOutput ? output : `${output}\n`) : "", + stdout: output, stderr: "", exitCode, }; } catch (e) { - if (e instanceof ExecutionLimitError) { + const error = e as any; + const msg = error.message; + + // Check for timeout + if (msg.includes("timeout")) { return { stdout: "", - stderr: `jq: ${e.message}\n`, - exitCode: ExecutionLimitError.EXIT_CODE, + stderr: "jq: execution timeout: operation took too long\n", + exitCode: 124, // Standard timeout exit code }; } - const msg = (e as Error).message; - if (msg.includes("Unknown function")) { + + // Check if jq-web provided an exit code + const exitCode = error.exitCode || 3; + + // Use stderr from jq-web if available, otherwise format the message + let stderr = error.stderr || msg; + + // For JSON parse errors from parseJsonStream, format as parse error + if (msg.includes("Invalid JSON") || msg.includes("Unexpected")) { + stderr = `jq: parse error: ${msg}`; return { stdout: "", - stderr: `jq: error: ${msg}\n`, - exitCode: 3, + stderr: `${stderr}\n`, + exitCode: 5, }; } + + // Ensure stderr ends with newline + if (!stderr.endsWith('\n')) { + stderr += '\n'; + } + return { stdout: "", - stderr: `jq: parse error: ${msg}\n`, - exitCode: 5, + stderr, + exitCode, }; } }, }; + +// Made with Bob diff --git a/src/jq-budget-test.test.ts b/src/jq-budget-test.test.ts new file mode 100644 index 00000000..810a7a15 --- /dev/null +++ b/src/jq-budget-test.test.ts @@ -0,0 +1,128 @@ +/** + * Test for jq command with budget file data + * This test demonstrates an issue where jq filtering returns empty results + */ + +import { describe, it, expect } from 'vitest'; +import { Bash } from './Bash'; +import { InMemoryFs } from './fs/in-memory-fs'; + +describe('jq with budget file', () => { + it('should find EXPENSES rows using jq filter', async () => { + // Create a simplified budget structure + const budgetData = [ + { + excelData: { + sheets: [ + { + name: 'Budget Overview', + rows: [ + ['INCOME', '', '', ''], + ['Source', 'Jan', 'Feb', 'TOTAL'], + ['Salary', 5200, 5200, 10400], + ['TOTAL INCOME', 5200, 5200, 10400], + ['', '', '', ''], + ['EXPENSES', '', '', ''], + ['Source', 'Jan', 'Feb', 'TOTAL'], + ['Rent', 1800, 1800, 3600], + ['TOTAL EXPENSES', 1800, 1800, 3600], + ], + }, + ], + }, + availableSheets: [ + { + index: 0, + name: 'Budget Overview', + rowCount: 9, + }, + ], + }, + ]; + + const fs = new InMemoryFs(); + const bash = new Bash({ fs }); + + // Write the budget data to a file + await bash.exec( + `echo '${JSON.stringify(budgetData)}' > /budgetFile.json`, + ); + + // First, verify the file was created and contains data + const catResult = await bash.exec('cat /budgetFile.json'); + expect(catResult.stdout).toContain('EXPENSES'); + expect(catResult.stdout).toContain('TOTAL EXPENSES'); + + // Test 1: Simple jq to get a specific row (this should work) + const simpleJqResult = await bash.exec( + 'cat /budgetFile.json | jq ".[0].excelData.sheets[0].rows[5]"', + ); + console.log('Simple jq result:', simpleJqResult.stdout); + expect(simpleJqResult.stdout).toContain('EXPENSES'); + + // Test 2: Complex jq filter to find EXPENSES rows + // This is the command that returns empty in the collie-poc tests + const complexJqResult = await bash.exec( + 'cat /budgetFile.json | jq \'.[0].excelData.sheets[0].rows | to_entries | .[] | select(.value[0] == "EXPENSES" or .value[0] == "TOTAL EXPENSES") | {index: .key, firstColumn: .value[0]}\'', + ); + + console.log('Complex jq stdout:', complexJqResult.stdout); + console.log('Complex jq stderr:', complexJqResult.stderr); + console.log('Complex jq exitCode:', complexJqResult.exitCode); + + // Expected output should contain both EXPENSES entries + expect(complexJqResult.stdout).toContain('"firstColumn": "EXPENSES"'); + expect(complexJqResult.stdout).toContain('"firstColumn": "TOTAL EXPENSES"'); + expect(complexJqResult.stdout).toContain('"index": 5'); + expect(complexJqResult.stdout).toContain('"index": 8'); + }); + + it('should test jq availability and version', async () => { + const fs = new InMemoryFs(); + const bash = new Bash({ fs }); + + // Check if jq command exists + const whichResult = await bash.exec('which jq'); + console.log('which jq:', whichResult.stdout, whichResult.stderr); + + // Try to get jq version + const versionResult = await bash.exec('jq --version'); + console.log('jq version:', versionResult.stdout, versionResult.stderr); + }); + + it('should test basic jq functionality', async () => { + const fs = new InMemoryFs(); + const bash = new Bash({ fs }); + + // Create a simple JSON file + await bash.exec('echo \'{"name": "test", "value": 123}\' > /test.json'); + + // Test basic jq + const result = await bash.exec('cat /test.json | jq ".name"'); + console.log('Basic jq result:', result.stdout, result.stderr); + expect(result.stdout.trim()).toBe('"test"'); + }); + + it('should test jq with array filtering', async () => { + const fs = new InMemoryFs(); + const bash = new Bash({ fs }); + + // Create an array JSON file + const data = [ + { name: 'Alice', age: 30 }, + { name: 'Bob', age: 25 }, + { name: 'Charlie', age: 35 }, + ]; + + await bash.exec(`echo '${JSON.stringify(data)}' > /users.json`); + + // Test array filtering + const result = await bash.exec( + 'cat /users.json | jq ".[] | select(.age > 30)"', + ); + console.log('Array filter result:', result.stdout, result.stderr); + expect(result.stdout).toContain('Charlie'); + expect(result.stdout).not.toContain('Alice'); + expect(result.stdout).not.toContain('Bob'); + }); +}); \ No newline at end of file From b06379d49846b35818416017d498127c9a1a4c6b Mon Sep 17 00:00:00 2001 From: Robert Yates Date: Fri, 6 Feb 2026 11:07:05 -0500 Subject: [PATCH 4/4] need to copy in the workers --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 72a962f7..775505e0 100644 --- a/package.json +++ b/package.json @@ -49,7 +49,7 @@ "scripts": { "build": "rm -rf dist && tsc && pnpm build:lib && pnpm build:browser && pnpm build:cli && pnpm build:shell && pnpm build:worker && pnpm build:clean && sed '1,/^-->/d' AGENTS.npm.md > dist/AGENTS.md", "build:clean": "find dist -name '*.test.js' -delete && find dist -name '*.test.d.ts' -delete", - "build:worker": "esbuild src/commands/python3/worker.ts --bundle --platform=node --format=esm --outfile=src/commands/python3/worker.js --external:pyodide && cp src/commands/python3/worker.js dist/commands/python3/worker.js && mkdir -p dist/bin/chunks && cp src/commands/python3/worker.js dist/bin/chunks/worker.js && mkdir -p dist/bundle/chunks && cp src/commands/python3/worker.js dist/bundle/chunks/worker.js", + "build:worker": "esbuild src/commands/python3/worker.ts --bundle --platform=node --format=esm --outfile=src/commands/python3/worker.js --external:pyodide && cp src/commands/python3/worker.js dist/commands/python3/worker.js && mkdir -p dist/bin/chunks && cp src/commands/python3/worker.js dist/bin/chunks/worker.js && mkdir -p dist/bundle/chunks && cp src/commands/python3/worker.js dist/bundle/chunks/worker.js && esbuild src/commands/jq/jq-worker.ts --bundle --platform=node --format=esm --outfile=dist/commands/jq/jq-worker.js && cp dist/commands/jq/jq-worker.js dist/bundle/chunks/jq-worker.js && cp dist/commands/jq/jq-worker.js dist/bin/chunks/jq-worker.js", "build:lib": "esbuild dist/index.js --bundle --splitting --platform=node --format=esm --minify --outdir=dist/bundle --chunk-names=chunks/[name]-[hash] --external:diff --external:minimatch --external:sprintf-js --external:turndown --external:sql.js --external:pyodide --external:@mongodb-js/zstd --external:node-liblzma --external:compressjs", "build:browser": "esbuild dist/browser.js --bundle --platform=browser --format=esm --minify --outfile=dist/bundle/browser.js --external:diff --external:minimatch --external:sprintf-js --external:turndown --external:node:* --external:@mongodb-js/zstd --external:node-liblzma --external:compressjs --define:__BROWSER__=true", "build:cli": "esbuild dist/cli/just-bash.js --bundle --splitting --platform=node --format=esm --minify --outdir=dist/bin --entry-names=[name] --chunk-names=chunks/[name]-[hash] --banner:js='#!/usr/bin/env node' --external:sql.js --external:pyodide --external:@mongodb-js/zstd --external:node-liblzma --external:compressjs",