From a2440cfe4929df1ba188992a3b4063973ad68b10 Mon Sep 17 00:00:00 2001 From: Robert Yates Date: Thu, 29 Jan 2026 22:45:08 -0500 Subject: [PATCH 1/3] large(ish) files were not working --- src/fs/encoding.ts | 24 ++- .../read-write-fs.piping.test.ts | 164 ++++++++++++++++++ 2 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 src/fs/read-write-fs/read-write-fs.piping.test.ts diff --git a/src/fs/encoding.ts b/src/fs/encoding.ts index b9980457..dd0dfe25 100644 --- a/src/fs/encoding.ts +++ b/src/fs/encoding.ts @@ -36,7 +36,16 @@ export function toBuffer( return bytes; } if (encoding === "binary" || encoding === "latin1") { - return Uint8Array.from(content, (c) => c.charCodeAt(0)); + // Use chunked approach for large strings to avoid performance issues + const chunkSize = 65536; // 64KB chunks + if (content.length <= chunkSize) { + return Uint8Array.from(content, (c) => c.charCodeAt(0)); + } + const result = new Uint8Array(content.length); + for (let i = 0; i < content.length; i++) { + result[i] = content.charCodeAt(i); + } + return result; } // Default to UTF-8 for text content return textEncoder.encode(content); @@ -58,7 +67,18 @@ export function fromBuffer( .join(""); } if (encoding === "binary" || encoding === "latin1") { - return String.fromCharCode(...buffer); + // Use chunked approach to avoid call stack limit with large buffers + // String.fromCharCode(...buffer) fails with buffers > ~100KB + const chunkSize = 65536; // 64KB chunks + if (buffer.length <= chunkSize) { + return String.fromCharCode(...buffer); + } + let result = ""; + for (let i = 0; i < buffer.length; i += chunkSize) { + const chunk = buffer.subarray(i, i + chunkSize); + result += String.fromCharCode(...chunk); + } + return result; } // Default to UTF-8 for text content return textDecoder.decode(buffer); diff --git a/src/fs/read-write-fs/read-write-fs.piping.test.ts b/src/fs/read-write-fs/read-write-fs.piping.test.ts new file mode 100644 index 00000000..bbb9c82e --- /dev/null +++ b/src/fs/read-write-fs/read-write-fs.piping.test.ts @@ -0,0 +1,164 @@ +import { mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterAll, beforeAll, describe, expect, it } from "vitest"; +import { Bash } from "../../Bash.js"; +import { ReadWriteFs } from "./read-write-fs.js"; + +/** + * Test piping with ReadWriteFs (real filesystem) + * This test suite validates that just-bash can handle large data through pipes + * when using ReadWriteFs backed by the real filesystem. + */ +describe("ReadWriteFs - Piping with large data", () => { + let tempDir: string; + let fs: ReadWriteFs; + let bash: Bash; + + beforeAll(async () => { + // Create a real temp directory + tempDir = await mkdtemp(join(tmpdir(), "bash-test-")); + console.log("Created temp dir:", tempDir); + + // Use ReadWriteFs with real filesystem + fs = new ReadWriteFs({ root: tempDir }); + bash = new Bash({ fs }); + }); + + afterAll(async () => { + // Cleanup + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + console.log("Cleaned up temp dir:", tempDir); + } + }); + + it("should handle large data with wc -l using ReadWriteFs", async () => { + // Create large text data with trailing newline (standard for text files) + const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`); + const largeText = lines.join("\n") + "\n"; + + console.log( + `Generated text size: ${(largeText.length / 1024 / 1024).toFixed(2)}MB`, + ); + console.log(`Line count: ${lines.length}`); + + // Write to file + await fs.writeFile("/data.txt", largeText); + + // Test piping through cat + const result = await bash.exec("cat /data.txt | wc -l"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result stderr:", result.stderr); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toBe("50000"); + }, 30000); + + it("should handle large data with wc -l FILENAME using ReadWriteFs", async () => { + // Create large text data with trailing newline + const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`); + const largeText = lines.join("\n") + "\n"; + + // Write to file + await fs.writeFile("/data2.txt", largeText); + + // Test direct file access + const result = await bash.exec("wc -l /data2.txt"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toContain("50000"); + }, 30000); + + it("should handle small data with wc -l using ReadWriteFs", async () => { + // Create small text data with trailing newline + const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`); + const smallText = lines.join("\n") + "\n"; + + // Write to file + await fs.writeFile("/small.txt", smallText); + + // Test piping through cat + const result = await bash.exec("cat /small.txt | wc -l"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toBe("100"); + }, 30000); + + it("should handle medium data with multiple pipes", async () => { + // Create medium text data with some repeated lines + const lines = Array.from({ length: 10000 }, (_, i) => { + // Create some duplicates + const lineNum = Math.floor(i / 2); + return `Line ${lineNum}`; + }); + const mediumText = lines.join("\n"); + + // Write to file + await fs.writeFile("/medium.txt", mediumText); + + // Test piping through multiple commands + const result = await bash.exec("cat /medium.txt | sort | uniq | wc -l"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + // Should have 5000 unique lines (0-4999) + expect(result.stdout.trim()).toBe("5000"); + }, 30000); + + it("should handle grep with large files", async () => { + // Create large text data with specific patterns + const lines = Array.from({ length: 20000 }, (_, i) => { + if (i % 3 === 0) { + return `MATCH Line ${i}`; + } + return `Other Line ${i}`; + }); + const largeText = lines.join("\n"); + + // Write to file + await fs.writeFile("/grep-test.txt", largeText); + + // Test grep with wc + const result = await bash.exec("grep MATCH /grep-test.txt | wc -l"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + // Should match every 3rd line: 20000/3 = 6667 (rounded up) + expect(result.stdout.trim()).toBe("6667"); + }, 30000); + + it("should handle binary data correctly", async () => { + // Create binary data + const binaryData = new Uint8Array(10000); + for (let i = 0; i < binaryData.length; i++) { + binaryData[i] = i % 256; + } + + // Write binary file + await fs.writeFile("/binary.bin", binaryData); + + // Test wc -c (byte count) + const result = await bash.exec("wc -c /binary.bin"); + + console.log("Result stdout:", result.stdout.trim()); + console.log("Result exitCode:", result.exitCode); + + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toContain("10000"); + }, 30000); +}); + +// Made with Bob From b7b1a5d828339b06ef04290603d4c42de5842c70 Mon Sep 17 00:00:00 2001 From: Robert Yates Date: Sat, 31 Jan 2026 08:11:01 -0500 Subject: [PATCH 2/3] also need to update base64 for large files --- src/commands/base64/base64.binary.test.ts | 67 +++++++++++++++++++++++ src/commands/base64/base64.ts | 25 ++++++++- src/fs/encoding.ts | 8 ++- src/interpreter/redirections.ts | 9 ++- 4 files changed, 103 insertions(+), 6 deletions(-) diff --git a/src/commands/base64/base64.binary.test.ts b/src/commands/base64/base64.binary.test.ts index 0c673d99..be0b8c9d 100644 --- a/src/commands/base64/base64.binary.test.ts +++ b/src/commands/base64/base64.binary.test.ts @@ -130,5 +130,72 @@ describe("base64 with binary data", () => { expect(result.stdout).toBe("test content"); }); + + it("should handle large binary files (1MB+)", async () => { + // Create a 1MB binary file with all byte values repeated + const size = 1024 * 1024; // 1MB + const data = new Uint8Array(size); + for (let i = 0; i < size; i++) { + data[i] = i % 256; + } + + const env = new Bash({ + files: { + "/large.bin": data, + }, + }); + + // Encode the large file + await env.exec("base64 /large.bin > /encoded.txt"); + + // Decode it back + await env.exec("base64 -d /encoded.txt > /decoded.bin"); + + // Verify the decoded file matches the original + const decoded = await env.fs.readFileBuffer( + env.fs.resolvePath("/", "/decoded.bin"), + ); + + expect(decoded.length).toBe(size); + // Check first, middle, and last bytes + expect(decoded[0]).toBe(0); + expect(decoded[255]).toBe(255); + expect(decoded[size / 2]).toBe((size / 2) % 256); + expect(decoded[size - 1]).toBe((size - 1) % 256); + + // Verify a sample of bytes throughout the file + for (let i = 0; i < size; i += 10000) { + expect(decoded[i]).toBe(i % 256); + } + }); + + it("should handle large files via pipe", async () => { + // Create a 512KB binary file + const size = 512 * 1024; + const data = new Uint8Array(size); + for (let i = 0; i < size; i++) { + data[i] = (i * 7) % 256; // Different pattern + } + + const env = new Bash({ + files: { + "/medium.bin": data, + }, + }); + + // Round-trip through pipe + await env.exec("cat /medium.bin | base64 | base64 -d > /output.bin"); + + // Verify the output matches the original + const output = await env.fs.readFileBuffer( + env.fs.resolvePath("/", "/output.bin"), + ); + + expect(output.length).toBe(size); + // Check a sample of bytes + for (let i = 0; i < size; i += 5000) { + expect(output[i]).toBe((i * 7) % 256); + } + }); }); }); diff --git a/src/commands/base64/base64.ts b/src/commands/base64/base64.ts index e6661365..65536d64 100644 --- a/src/commands/base64/base64.ts +++ b/src/commands/base64/base64.ts @@ -93,7 +93,19 @@ export const base64Command: Command = { // For decoding, read as text and strip whitespace const readResult = await readBinary(ctx, files, "base64"); if (!readResult.ok) return readResult.error; - // Use binary string (latin1) to preserve bytes for input + + // Use Buffer if available (Node.js) for better large file handling + if (typeof Buffer !== "undefined") { + const buffer = Buffer.from(readResult.data); + const cleaned = buffer.toString("utf8").replace(/\s/g, ""); + const decoded = Buffer.from(cleaned, "base64"); + // Convert to binary string (each char code = byte value) + // Use Buffer's latin1 encoding which treats each byte as a character + const result = decoded.toString("latin1"); + return { stdout: result, stderr: "", exitCode: 0 }; + } + + // Browser fallback - use binary string (latin1) to preserve bytes for input const input = String.fromCharCode(...readResult.data); const cleaned = input.replace(/\s/g, ""); // Decode base64 to binary string (each char code = byte value) @@ -105,8 +117,15 @@ export const base64Command: Command = { const readResult = await readBinary(ctx, files, "base64"); if (!readResult.ok) return readResult.error; - // Convert binary to base64 - let encoded = btoa(String.fromCharCode(...readResult.data)); + // Use Buffer if available (Node.js) for better large file handling + let encoded: string; + if (typeof Buffer !== "undefined") { + const buffer = Buffer.from(readResult.data); + encoded = buffer.toString("base64"); + } else { + // Browser fallback - convert binary to base64 + encoded = btoa(String.fromCharCode(...readResult.data)); + } if (wrapCols > 0) { const lines: string[] = []; diff --git a/src/fs/encoding.ts b/src/fs/encoding.ts index dd0dfe25..ca8059fb 100644 --- a/src/fs/encoding.ts +++ b/src/fs/encoding.ts @@ -67,8 +67,12 @@ export function fromBuffer( .join(""); } if (encoding === "binary" || encoding === "latin1") { - // Use chunked approach to avoid call stack limit with large buffers - // String.fromCharCode(...buffer) fails with buffers > ~100KB + // Use Buffer if available (Node.js) - much more efficient and avoids spread operator limits + if (typeof Buffer !== "undefined") { + return Buffer.from(buffer).toString(encoding); + } + + // Browser fallback - String.fromCharCode(...buffer) fails with buffers > ~100KB const chunkSize = 65536; // 64KB chunks if (buffer.length <= chunkSize) { return String.fromCharCode(...buffer); diff --git a/src/interpreter/redirections.ts b/src/interpreter/redirections.ts index 8595ce3c..55ff9195 100644 --- a/src/interpreter/redirections.ts +++ b/src/interpreter/redirections.ts @@ -53,9 +53,16 @@ async function checkOutputRedirectTarget( * Determine the encoding to use for file I/O. * If all character codes are <= 255, use binary encoding (byte data). * Otherwise, use UTF-8 encoding (text with Unicode characters). + * For performance, only check the first 8KB of large strings. */ function getFileEncoding(content: string): "binary" | "utf8" { - for (let i = 0; i < content.length; i++) { + const SAMPLE_SIZE = 8192; // 8KB + + // For large strings, only check the first 8KB + // This is sufficient since UTF-8 files typically have Unicode chars early + const checkLength = Math.min(content.length, SAMPLE_SIZE); + + for (let i = 0; i < checkLength; i++) { if (content.charCodeAt(i) > 255) { return "utf8"; } From d0713abd4c19a2e87d383d818aae5e15530eb659 Mon Sep 17 00:00:00 2001 From: Robert Yates Date: Fri, 6 Feb 2026 11:36:34 -0500 Subject: [PATCH 3/3] linter fixes --- src/commands/base64/base64.ts | 4 ++-- src/fs/encoding.ts | 2 +- src/fs/read-write-fs/read-write-fs.piping.test.ts | 6 +++--- src/interpreter/redirections.ts | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/commands/base64/base64.ts b/src/commands/base64/base64.ts index 65536d64..f68cf2e7 100644 --- a/src/commands/base64/base64.ts +++ b/src/commands/base64/base64.ts @@ -93,7 +93,7 @@ export const base64Command: Command = { // For decoding, read as text and strip whitespace const readResult = await readBinary(ctx, files, "base64"); if (!readResult.ok) return readResult.error; - + // Use Buffer if available (Node.js) for better large file handling if (typeof Buffer !== "undefined") { const buffer = Buffer.from(readResult.data); @@ -104,7 +104,7 @@ export const base64Command: Command = { const result = decoded.toString("latin1"); return { stdout: result, stderr: "", exitCode: 0 }; } - + // Browser fallback - use binary string (latin1) to preserve bytes for input const input = String.fromCharCode(...readResult.data); const cleaned = input.replace(/\s/g, ""); diff --git a/src/fs/encoding.ts b/src/fs/encoding.ts index ca8059fb..f1261594 100644 --- a/src/fs/encoding.ts +++ b/src/fs/encoding.ts @@ -71,7 +71,7 @@ export function fromBuffer( if (typeof Buffer !== "undefined") { return Buffer.from(buffer).toString(encoding); } - + // Browser fallback - String.fromCharCode(...buffer) fails with buffers > ~100KB const chunkSize = 65536; // 64KB chunks if (buffer.length <= chunkSize) { diff --git a/src/fs/read-write-fs/read-write-fs.piping.test.ts b/src/fs/read-write-fs/read-write-fs.piping.test.ts index bbb9c82e..e10c5fc1 100644 --- a/src/fs/read-write-fs/read-write-fs.piping.test.ts +++ b/src/fs/read-write-fs/read-write-fs.piping.test.ts @@ -36,7 +36,7 @@ describe("ReadWriteFs - Piping with large data", () => { it("should handle large data with wc -l using ReadWriteFs", async () => { // Create large text data with trailing newline (standard for text files) const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`); - const largeText = lines.join("\n") + "\n"; + const largeText = `${lines.join("\n")}\n`; console.log( `Generated text size: ${(largeText.length / 1024 / 1024).toFixed(2)}MB`, @@ -60,7 +60,7 @@ describe("ReadWriteFs - Piping with large data", () => { it("should handle large data with wc -l FILENAME using ReadWriteFs", async () => { // Create large text data with trailing newline const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`); - const largeText = lines.join("\n") + "\n"; + const largeText = `${lines.join("\n")}\n`; // Write to file await fs.writeFile("/data2.txt", largeText); @@ -78,7 +78,7 @@ describe("ReadWriteFs - Piping with large data", () => { it("should handle small data with wc -l using ReadWriteFs", async () => { // Create small text data with trailing newline const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`); - const smallText = lines.join("\n") + "\n"; + const smallText = `${lines.join("\n")}\n`; // Write to file await fs.writeFile("/small.txt", smallText); diff --git a/src/interpreter/redirections.ts b/src/interpreter/redirections.ts index 55ff9195..3869fb3c 100644 --- a/src/interpreter/redirections.ts +++ b/src/interpreter/redirections.ts @@ -57,11 +57,11 @@ async function checkOutputRedirectTarget( */ function getFileEncoding(content: string): "binary" | "utf8" { const SAMPLE_SIZE = 8192; // 8KB - + // For large strings, only check the first 8KB // This is sufficient since UTF-8 files typically have Unicode chars early const checkLength = Math.min(content.length, SAMPLE_SIZE); - + for (let i = 0; i < checkLength; i++) { if (content.charCodeAt(i) > 255) { return "utf8";