From a2440cfe4929df1ba188992a3b4063973ad68b10 Mon Sep 17 00:00:00 2001
From: Robert Yates <Robert_Yates@us.ibm.com>
Date: Thu, 29 Jan 2026 22:45:08 -0500
Subject: [PATCH 1/3] large(ish) files were not working

---
 src/fs/encoding.ts                            |  24 ++-
 .../read-write-fs.piping.test.ts              | 164 ++++++++++++++++++
 2 files changed, 186 insertions(+), 2 deletions(-)
 create mode 100644 src/fs/read-write-fs/read-write-fs.piping.test.ts

diff --git a/src/fs/encoding.ts b/src/fs/encoding.ts
index b9980457..dd0dfe25 100644
--- a/src/fs/encoding.ts
+++ b/src/fs/encoding.ts
@@ -36,7 +36,16 @@ export function toBuffer(
     return bytes;
   }
   if (encoding === "binary" || encoding === "latin1") {
-    return Uint8Array.from(content, (c) => c.charCodeAt(0));
+    // Use chunked approach for large strings to avoid performance issues
+    const chunkSize = 65536; // 64KB chunks
+    if (content.length <= chunkSize) {
+      return Uint8Array.from(content, (c) => c.charCodeAt(0));
+    }
+    const result = new Uint8Array(content.length);
+    for (let i = 0; i < content.length; i++) {
+      result[i] = content.charCodeAt(i);
+    }
+    return result;
   }
   // Default to UTF-8 for text content
   return textEncoder.encode(content);
@@ -58,7 +67,18 @@ export function fromBuffer(
       .join("");
   }
   if (encoding === "binary" || encoding === "latin1") {
-    return String.fromCharCode(...buffer);
+    // Use chunked approach to avoid call stack limit with large buffers
+    // String.fromCharCode(...buffer) fails with buffers > ~100KB
+    const chunkSize = 65536; // 64KB chunks
+    if (buffer.length <= chunkSize) {
+      return String.fromCharCode(...buffer);
+    }
+    let result = "";
+    for (let i = 0; i < buffer.length; i += chunkSize) {
+      const chunk = buffer.subarray(i, i + chunkSize);
+      result += String.fromCharCode(...chunk);
+    }
+    return result;
   }
   // Default to UTF-8 for text content
   return textDecoder.decode(buffer);
diff --git a/src/fs/read-write-fs/read-write-fs.piping.test.ts b/src/fs/read-write-fs/read-write-fs.piping.test.ts
new file mode 100644
index 00000000..bbb9c82e
--- /dev/null
+++ b/src/fs/read-write-fs/read-write-fs.piping.test.ts
@@ -0,0 +1,164 @@
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterAll, beforeAll, describe, expect, it } from "vitest";
+import { Bash } from "../../Bash.js";
+import { ReadWriteFs } from "./read-write-fs.js";
+
+/**
+ * Test piping with ReadWriteFs (real filesystem)
+ * This test suite validates that just-bash can handle large data through pipes
+ * when using ReadWriteFs backed by the real filesystem.
+ */
+describe("ReadWriteFs - Piping with large data", () => {
+  let tempDir: string;
+  let fs: ReadWriteFs;
+  let bash: Bash;
+
+  beforeAll(async () => {
+    // Create a real temp directory
+    tempDir = await mkdtemp(join(tmpdir(), "bash-test-"));
+    console.log("Created temp dir:", tempDir);
+
+    // Use ReadWriteFs with real filesystem
+    fs = new ReadWriteFs({ root: tempDir });
+    bash = new Bash({ fs });
+  });
+
+  afterAll(async () => {
+    // Cleanup
+    if (tempDir) {
+      await rm(tempDir, { recursive: true, force: true });
+      console.log("Cleaned up temp dir:", tempDir);
+    }
+  });
+
+  it("should handle large data with wc -l using ReadWriteFs", async () => {
+    // Create large text data with trailing newline (standard for text files)
+    const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`);
+    const largeText = lines.join("\n") + "\n";
+
+    console.log(
+      `Generated text size: ${(largeText.length / 1024 / 1024).toFixed(2)}MB`,
+    );
+    console.log(`Line count: ${lines.length}`);
+
+    // Write to file
+    await fs.writeFile("/data.txt", largeText);
+
+    // Test piping through cat
+    const result = await bash.exec("cat /data.txt | wc -l");
+
+    console.log("Result stdout:", result.stdout.trim());
+    console.log("Result stderr:", result.stderr);
+    console.log("Result exitCode:", result.exitCode);
+
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout.trim()).toBe("50000");
+  }, 30000);
+
+  it("should handle large data with wc -l FILENAME using ReadWriteFs", async () => {
+    // Create large text data with trailing newline
+    const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`);
+    const largeText = lines.join("\n") + "\n";
+
+    // Write to file
+    await fs.writeFile("/data2.txt", largeText);
+
+    // Test direct file access
+    const result = await bash.exec("wc -l /data2.txt");
+
+    console.log("Result stdout:", result.stdout.trim());
+    console.log("Result exitCode:", result.exitCode);
+
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout.trim()).toContain("50000");
+  }, 30000);
+
+  it("should handle small data with wc -l using ReadWriteFs", async () => {
+    // Create small text data with trailing newline
+    const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`);
+    const smallText = lines.join("\n") + "\n";
+
+    // Write to file
+    await fs.writeFile("/small.txt", smallText);
+
+    // Test piping through cat
+    const result = await bash.exec("cat /small.txt | wc -l");
+
+    console.log("Result stdout:", result.stdout.trim());
+    console.log("Result exitCode:", result.exitCode);
+
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout.trim()).toBe("100");
+  }, 30000);
+
+  it("should handle medium data with multiple pipes", async () => {
+    // Create medium text data with some repeated lines
+    const lines = Array.from({ length: 10000 }, (_, i) => {
+      // Create some duplicates
+      const lineNum = Math.floor(i / 2);
+      return `Line ${lineNum}`;
+    });
+    const mediumText = lines.join("\n");
+
+    // Write to file
+    await fs.writeFile("/medium.txt", mediumText);
+
+    // Test piping through multiple commands
+    const result = await bash.exec("cat /medium.txt | sort | uniq | wc -l");
+
+    console.log("Result stdout:", result.stdout.trim());
+    console.log("Result exitCode:", result.exitCode);
+
+    expect(result.exitCode).toBe(0);
+    // Should have 5000 unique lines (0-4999)
+    expect(result.stdout.trim()).toBe("5000");
+  }, 30000);
+
+  it("should handle grep with large files", async () => {
+    // Create large text data with specific patterns
+    const lines = Array.from({ length: 20000 }, (_, i) => {
+      if (i % 3 === 0) {
+        return `MATCH Line ${i}`;
+      }
+      return `Other Line ${i}`;
+    });
+    const largeText = lines.join("\n");
+
+    // Write to file
+    await fs.writeFile("/grep-test.txt", largeText);
+
+    // Test grep with wc
+    const result = await bash.exec("grep MATCH /grep-test.txt | wc -l");
+
+    console.log("Result stdout:", result.stdout.trim());
+    console.log("Result exitCode:", result.exitCode);
+
+    expect(result.exitCode).toBe(0);
+    // Should match every 3rd line: 20000/3 = 6667 (rounded up)
+    expect(result.stdout.trim()).toBe("6667");
+  }, 30000);
+
+  it("should handle binary data correctly", async () => {
+    // Create binary data
+    const binaryData = new Uint8Array(10000);
+    for (let i = 0; i < binaryData.length; i++) {
+      binaryData[i] = i % 256;
+    }
+
+    // Write binary file
+    await fs.writeFile("/binary.bin", binaryData);
+
+    // Test wc -c (byte count)
+    const result = await bash.exec("wc -c /binary.bin");
+
+    console.log("Result stdout:", result.stdout.trim());
+    console.log("Result exitCode:", result.exitCode);
+
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout.trim()).toContain("10000");
+  }, 30000);
+});
+
+// Made with Bob

From b7b1a5d828339b06ef04290603d4c42de5842c70 Mon Sep 17 00:00:00 2001
From: Robert Yates <Robert_Yates@us.ibm.com>
Date: Sat, 31 Jan 2026 08:11:01 -0500
Subject: [PATCH 2/3] also need to update base64 for large files

---
 src/commands/base64/base64.binary.test.ts | 67 +++++++++++++++++++++++
 src/commands/base64/base64.ts             | 25 ++++++++-
 src/fs/encoding.ts                        |  8 ++-
 src/interpreter/redirections.ts           |  9 ++-
 4 files changed, 103 insertions(+), 6 deletions(-)

diff --git a/src/commands/base64/base64.binary.test.ts b/src/commands/base64/base64.binary.test.ts
index 0c673d99..be0b8c9d 100644
--- a/src/commands/base64/base64.binary.test.ts
+++ b/src/commands/base64/base64.binary.test.ts
@@ -130,5 +130,72 @@ describe("base64 with binary data", () => {
 
       expect(result.stdout).toBe("test content");
     });
+
+    it("should handle large binary files (1MB+)", async () => {
+      // Create a 1MB binary file with all byte values repeated
+      const size = 1024 * 1024; // 1MB
+      const data = new Uint8Array(size);
+      for (let i = 0; i < size; i++) {
+        data[i] = i % 256;
+      }
+
+      const env = new Bash({
+        files: {
+          "/large.bin": data,
+        },
+      });
+
+      // Encode the large file
+      await env.exec("base64 /large.bin > /encoded.txt");
+
+      // Decode it back
+      await env.exec("base64 -d /encoded.txt > /decoded.bin");
+
+      // Verify the decoded file matches the original
+      const decoded = await env.fs.readFileBuffer(
+        env.fs.resolvePath("/", "/decoded.bin"),
+      );
+
+      expect(decoded.length).toBe(size);
+      // Check first, middle, and last bytes
+      expect(decoded[0]).toBe(0);
+      expect(decoded[255]).toBe(255);
+      expect(decoded[size / 2]).toBe((size / 2) % 256);
+      expect(decoded[size - 1]).toBe((size - 1) % 256);
+
+      // Verify a sample of bytes throughout the file
+      for (let i = 0; i < size; i += 10000) {
+        expect(decoded[i]).toBe(i % 256);
+      }
+    });
+
+    it("should handle large files via pipe", async () => {
+      // Create a 512KB binary file
+      const size = 512 * 1024;
+      const data = new Uint8Array(size);
+      for (let i = 0; i < size; i++) {
+        data[i] = (i * 7) % 256; // Different pattern
+      }
+
+      const env = new Bash({
+        files: {
+          "/medium.bin": data,
+        },
+      });
+
+      // Round-trip through pipe
+      await env.exec("cat /medium.bin | base64 | base64 -d > /output.bin");
+
+      // Verify the output matches the original
+      const output = await env.fs.readFileBuffer(
+        env.fs.resolvePath("/", "/output.bin"),
+      );
+
+      expect(output.length).toBe(size);
+      // Check a sample of bytes
+      for (let i = 0; i < size; i += 5000) {
+        expect(output[i]).toBe((i * 7) % 256);
+      }
+    });
   });
 });
diff --git a/src/commands/base64/base64.ts b/src/commands/base64/base64.ts
index e6661365..65536d64 100644
--- a/src/commands/base64/base64.ts
+++ b/src/commands/base64/base64.ts
@@ -93,7 +93,19 @@ export const base64Command: Command = {
         // For decoding, read as text and strip whitespace
         const readResult = await readBinary(ctx, files, "base64");
         if (!readResult.ok) return readResult.error;
-        // Use binary string (latin1) to preserve bytes for input
+        
+        // Use Buffer if available (Node.js) for better large file handling
+        if (typeof Buffer !== "undefined") {
+          const buffer = Buffer.from(readResult.data);
+          const cleaned = buffer.toString("utf8").replace(/\s/g, "");
+          const decoded = Buffer.from(cleaned, "base64");
+          // Convert to binary string (each char code = byte value)
+          // Use Buffer's latin1 encoding which treats each byte as a character
+          const result = decoded.toString("latin1");
+          return { stdout: result, stderr: "", exitCode: 0 };
+        }
+        
+        // Browser fallback - use binary string (latin1) to preserve bytes for input
         const input = String.fromCharCode(...readResult.data);
         const cleaned = input.replace(/\s/g, "");
         // Decode base64 to binary string (each char code = byte value)
@@ -105,8 +117,15 @@ export const base64Command: Command = {
       const readResult = await readBinary(ctx, files, "base64");
       if (!readResult.ok) return readResult.error;
 
-      // Convert binary to base64
-      let encoded = btoa(String.fromCharCode(...readResult.data));
+      // Use Buffer if available (Node.js) for better large file handling
+      let encoded: string;
+      if (typeof Buffer !== "undefined") {
+        const buffer = Buffer.from(readResult.data);
+        encoded = buffer.toString("base64");
+      } else {
+        // Browser fallback - convert binary to base64
+        encoded = btoa(String.fromCharCode(...readResult.data));
+      }
 
       if (wrapCols > 0) {
         const lines: string[] = [];
diff --git a/src/fs/encoding.ts b/src/fs/encoding.ts
index dd0dfe25..ca8059fb 100644
--- a/src/fs/encoding.ts
+++ b/src/fs/encoding.ts
@@ -67,8 +67,12 @@ export function fromBuffer(
       .join("");
   }
   if (encoding === "binary" || encoding === "latin1") {
-    // Use chunked approach to avoid call stack limit with large buffers
-    // String.fromCharCode(...buffer) fails with buffers > ~100KB
+    // Use Buffer if available (Node.js) - much more efficient and avoids spread operator limits
+    if (typeof Buffer !== "undefined") {
+      return Buffer.from(buffer).toString(encoding);
+    }
+    
+    // Browser fallback - String.fromCharCode(...buffer) fails with buffers > ~100KB
     const chunkSize = 65536; // 64KB chunks
     if (buffer.length <= chunkSize) {
       return String.fromCharCode(...buffer);
diff --git a/src/interpreter/redirections.ts b/src/interpreter/redirections.ts
index 8595ce3c..55ff9195 100644
--- a/src/interpreter/redirections.ts
+++ b/src/interpreter/redirections.ts
@@ -53,9 +53,16 @@ async function checkOutputRedirectTarget(
  * Determine the encoding to use for file I/O.
  * If all character codes are <= 255, use binary encoding (byte data).
  * Otherwise, use UTF-8 encoding (text with Unicode characters).
+ * For performance, only check the first 8KB of large strings.
  */
 function getFileEncoding(content: string): "binary" | "utf8" {
-  for (let i = 0; i < content.length; i++) {
+  const SAMPLE_SIZE = 8192; // 8KB
+  
+  // For large strings, only check the first 8KB
+  // This is sufficient since UTF-8 files typically have Unicode chars early
+  const checkLength = Math.min(content.length, SAMPLE_SIZE);
+  
+  for (let i = 0; i < checkLength; i++) {
     if (content.charCodeAt(i) > 255) {
       return "utf8";
     }

From d0713abd4c19a2e87d383d818aae5e15530eb659 Mon Sep 17 00:00:00 2001
From: Robert Yates <Robert_Yates@us.ibm.com>
Date: Fri, 6 Feb 2026 11:36:34 -0500
Subject: [PATCH 3/3] linter fixes

---
 src/commands/base64/base64.ts                     | 4 ++--
 src/fs/encoding.ts                                | 2 +-
 src/fs/read-write-fs/read-write-fs.piping.test.ts | 6 +++---
 src/interpreter/redirections.ts                   | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/commands/base64/base64.ts b/src/commands/base64/base64.ts
index 65536d64..f68cf2e7 100644
--- a/src/commands/base64/base64.ts
+++ b/src/commands/base64/base64.ts
@@ -93,7 +93,7 @@ export const base64Command: Command = {
         // For decoding, read as text and strip whitespace
         const readResult = await readBinary(ctx, files, "base64");
         if (!readResult.ok) return readResult.error;
-        
+
         // Use Buffer if available (Node.js) for better large file handling
         if (typeof Buffer !== "undefined") {
           const buffer = Buffer.from(readResult.data);
@@ -104,7 +104,7 @@ export const base64Command: Command = {
           const result = decoded.toString("latin1");
           return { stdout: result, stderr: "", exitCode: 0 };
         }
-        
+
         // Browser fallback - use binary string (latin1) to preserve bytes for input
         const input = String.fromCharCode(...readResult.data);
         const cleaned = input.replace(/\s/g, "");
diff --git a/src/fs/encoding.ts b/src/fs/encoding.ts
index ca8059fb..f1261594 100644
--- a/src/fs/encoding.ts
+++ b/src/fs/encoding.ts
@@ -71,7 +71,7 @@ export function fromBuffer(
     if (typeof Buffer !== "undefined") {
       return Buffer.from(buffer).toString(encoding);
     }
-    
+
     // Browser fallback - String.fromCharCode(...buffer) fails with buffers > ~100KB
     const chunkSize = 65536; // 64KB chunks
     if (buffer.length <= chunkSize) {
diff --git a/src/fs/read-write-fs/read-write-fs.piping.test.ts b/src/fs/read-write-fs/read-write-fs.piping.test.ts
index bbb9c82e..e10c5fc1 100644
--- a/src/fs/read-write-fs/read-write-fs.piping.test.ts
+++ b/src/fs/read-write-fs/read-write-fs.piping.test.ts
@@ -36,7 +36,7 @@ describe("ReadWriteFs - Piping with large data", () => {
   it("should handle large data with wc -l using ReadWriteFs", async () => {
     // Create large text data with trailing newline (standard for text files)
     const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`);
-    const largeText = lines.join("\n") + "\n";
+    const largeText = `${lines.join("\n")}\n`;
 
     console.log(
       `Generated text size: ${(largeText.length / 1024 / 1024).toFixed(2)}MB`,
@@ -60,7 +60,7 @@ describe("ReadWriteFs - Piping with large data", () => {
   it("should handle large data with wc -l FILENAME using ReadWriteFs", async () => {
     // Create large text data with trailing newline
     const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`);
-    const largeText = lines.join("\n") + "\n";
+    const largeText = `${lines.join("\n")}\n`;
 
     // Write to file
     await fs.writeFile("/data2.txt", largeText);
@@ -78,7 +78,7 @@ describe("ReadWriteFs - Piping with large data", () => {
   it("should handle small data with wc -l using ReadWriteFs", async () => {
     // Create small text data with trailing newline
     const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`);
-    const smallText = lines.join("\n") + "\n";
+    const smallText = `${lines.join("\n")}\n`;
 
     // Write to file
     await fs.writeFile("/small.txt", smallText);
diff --git a/src/interpreter/redirections.ts b/src/interpreter/redirections.ts
index 55ff9195..3869fb3c 100644
--- a/src/interpreter/redirections.ts
+++ b/src/interpreter/redirections.ts
@@ -57,11 +57,11 @@ async function checkOutputRedirectTarget(
  */
 function getFileEncoding(content: string): "binary" | "utf8" {
   const SAMPLE_SIZE = 8192; // 8KB
-  
+
   // For large strings, only check the first 8KB
   // This is sufficient since UTF-8 files typically have Unicode chars early
   const checkLength = Math.min(content.length, SAMPLE_SIZE);
-  
+
   for (let i = 0; i < checkLength; i++) {
     if (content.charCodeAt(i) > 255) {
       return "utf8";