Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions src/commands/base64/base64.binary.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,72 @@ describe("base64 with binary data", () => {

expect(result.stdout).toBe("test content");
});

it("should handle large binary files (1MB+)", async () => {
// Create a 1MB binary file with all byte values repeated
const size = 1024 * 1024; // 1MB
const data = new Uint8Array(size);
for (let i = 0; i < size; i++) {
data[i] = i % 256;
}

const env = new Bash({
files: {
"/large.bin": data,
},
});

// Encode the large file
await env.exec("base64 /large.bin > /encoded.txt");

// Decode it back
await env.exec("base64 -d /encoded.txt > /decoded.bin");

// Verify the decoded file matches the original
const decoded = await env.fs.readFileBuffer(
env.fs.resolvePath("/", "/decoded.bin"),
);

expect(decoded.length).toBe(size);
// Check first, middle, and last bytes
expect(decoded[0]).toBe(0);
expect(decoded[255]).toBe(255);
expect(decoded[size / 2]).toBe((size / 2) % 256);
expect(decoded[size - 1]).toBe((size - 1) % 256);

// Verify a sample of bytes throughout the file
for (let i = 0; i < size; i += 10000) {
expect(decoded[i]).toBe(i % 256);
}
});

it("should handle large files via pipe", async () => {
// Create a 512KB binary file
const size = 512 * 1024;
const data = new Uint8Array(size);
for (let i = 0; i < size; i++) {
data[i] = (i * 7) % 256; // Different pattern
}

const env = new Bash({
files: {
"/medium.bin": data,
},
});

// Round-trip through pipe
await env.exec("cat /medium.bin | base64 | base64 -d > /output.bin");

// Verify the output matches the original
const output = await env.fs.readFileBuffer(
env.fs.resolvePath("/", "/output.bin"),
);

expect(output.length).toBe(size);
// Check a sample of bytes
for (let i = 0; i < size; i += 5000) {
expect(output[i]).toBe((i * 7) % 256);
}
});
});
});
25 changes: 22 additions & 3 deletions src/commands/base64/base64.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,19 @@ export const base64Command: Command = {
// For decoding, read as text and strip whitespace
const readResult = await readBinary(ctx, files, "base64");
if (!readResult.ok) return readResult.error;
// Use binary string (latin1) to preserve bytes for input

// Use Buffer if available (Node.js) for better large file handling
if (typeof Buffer !== "undefined") {
const buffer = Buffer.from(readResult.data);
const cleaned = buffer.toString("utf8").replace(/\s/g, "");
const decoded = Buffer.from(cleaned, "base64");
// Convert to binary string (each char code = byte value)
// Use Buffer's latin1 encoding which treats each byte as a character
const result = decoded.toString("latin1");
return { stdout: result, stderr: "", exitCode: 0 };
}

// Browser fallback - use binary string (latin1) to preserve bytes for input
const input = String.fromCharCode(...readResult.data);
const cleaned = input.replace(/\s/g, "");
// Decode base64 to binary string (each char code = byte value)
Expand All @@ -105,8 +117,15 @@ export const base64Command: Command = {
const readResult = await readBinary(ctx, files, "base64");
if (!readResult.ok) return readResult.error;

// Convert binary to base64
let encoded = btoa(String.fromCharCode(...readResult.data));
// Use Buffer if available (Node.js) for better large file handling
let encoded: string;
if (typeof Buffer !== "undefined") {
const buffer = Buffer.from(readResult.data);
encoded = buffer.toString("base64");
} else {
// Browser fallback - convert binary to base64
encoded = btoa(String.fromCharCode(...readResult.data));
}

if (wrapCols > 0) {
const lines: string[] = [];
Expand Down
28 changes: 26 additions & 2 deletions src/fs/encoding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,16 @@ export function toBuffer(
return bytes;
}
if (encoding === "binary" || encoding === "latin1") {
return Uint8Array.from(content, (c) => c.charCodeAt(0));
// Use chunked approach for large strings to avoid performance issues
const chunkSize = 65536; // 64KB chunks
if (content.length <= chunkSize) {
return Uint8Array.from(content, (c) => c.charCodeAt(0));
}
const result = new Uint8Array(content.length);
for (let i = 0; i < content.length; i++) {
result[i] = content.charCodeAt(i);
}
return result;
}
// Default to UTF-8 for text content
return textEncoder.encode(content);
Expand All @@ -58,7 +67,22 @@ export function fromBuffer(
.join("");
}
if (encoding === "binary" || encoding === "latin1") {
return String.fromCharCode(...buffer);
Copy link
Copy Markdown

@vercel vercel bot Jan 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Base64 encoding in fromBuffer fails with large buffers (>100KB) due to JavaScript call stack limit when using String.fromCharCode(...buffer) spread operator.

Fix on Vercel

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, that is what i removed, this is an agent review of the diff and it's looking at the old code (which I removed)

// Use Buffer if available (Node.js) - much more efficient and avoids spread operator limits
if (typeof Buffer !== "undefined") {
return Buffer.from(buffer).toString(encoding);
}

// Browser fallback - String.fromCharCode(...buffer) fails with buffers > ~100KB
const chunkSize = 65536; // 64KB chunks
if (buffer.length <= chunkSize) {
return String.fromCharCode(...buffer);
}
let result = "";
for (let i = 0; i < buffer.length; i += chunkSize) {
const chunk = buffer.subarray(i, i + chunkSize);
result += String.fromCharCode(...chunk);
}
return result;
}
// Default to UTF-8 for text content
return textDecoder.decode(buffer);
Expand Down
164 changes: 164 additions & 0 deletions src/fs/read-write-fs/read-write-fs.piping.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import { mkdtemp, rm } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterAll, beforeAll, describe, expect, it } from "vitest";
import { Bash } from "../../Bash.js";
import { ReadWriteFs } from "./read-write-fs.js";

/**
* Test piping with ReadWriteFs (real filesystem)
* This test suite validates that just-bash can handle large data through pipes
* when using ReadWriteFs backed by the real filesystem.
*/
describe("ReadWriteFs - Piping with large data", () => {
let tempDir: string;
let fs: ReadWriteFs;
let bash: Bash;

beforeAll(async () => {
// Create a real temp directory
tempDir = await mkdtemp(join(tmpdir(), "bash-test-"));
console.log("Created temp dir:", tempDir);

// Use ReadWriteFs with real filesystem
fs = new ReadWriteFs({ root: tempDir });
bash = new Bash({ fs });
});

afterAll(async () => {
// Cleanup
if (tempDir) {
await rm(tempDir, { recursive: true, force: true });
console.log("Cleaned up temp dir:", tempDir);
}
});

it("should handle large data with wc -l using ReadWriteFs", async () => {
// Create large text data with trailing newline (standard for text files)
const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`);
const largeText = `${lines.join("\n")}\n`;

console.log(
`Generated text size: ${(largeText.length / 1024 / 1024).toFixed(2)}MB`,
);
console.log(`Line count: ${lines.length}`);

// Write to file
await fs.writeFile("/data.txt", largeText);

// Test piping through cat
const result = await bash.exec("cat /data.txt | wc -l");

console.log("Result stdout:", result.stdout.trim());
console.log("Result stderr:", result.stderr);
console.log("Result exitCode:", result.exitCode);

expect(result.exitCode).toBe(0);
expect(result.stdout.trim()).toBe("50000");
}, 30000);

it("should handle large data with wc -l FILENAME using ReadWriteFs", async () => {
// Create large text data with trailing newline
const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`);
const largeText = `${lines.join("\n")}\n`;

// Write to file
await fs.writeFile("/data2.txt", largeText);

// Test direct file access
const result = await bash.exec("wc -l /data2.txt");

console.log("Result stdout:", result.stdout.trim());
console.log("Result exitCode:", result.exitCode);

expect(result.exitCode).toBe(0);
expect(result.stdout.trim()).toContain("50000");
}, 30000);

it("should handle small data with wc -l using ReadWriteFs", async () => {
// Create small text data with trailing newline
const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`);
const smallText = `${lines.join("\n")}\n`;

// Write to file
await fs.writeFile("/small.txt", smallText);

// Test piping through cat
const result = await bash.exec("cat /small.txt | wc -l");

console.log("Result stdout:", result.stdout.trim());
console.log("Result exitCode:", result.exitCode);

expect(result.exitCode).toBe(0);
expect(result.stdout.trim()).toBe("100");
}, 30000);

it("should handle medium data with multiple pipes", async () => {
// Create medium text data with some repeated lines
const lines = Array.from({ length: 10000 }, (_, i) => {
// Create some duplicates
const lineNum = Math.floor(i / 2);
return `Line ${lineNum}`;
});
const mediumText = lines.join("\n");

// Write to file
await fs.writeFile("/medium.txt", mediumText);

// Test piping through multiple commands
const result = await bash.exec("cat /medium.txt | sort | uniq | wc -l");

console.log("Result stdout:", result.stdout.trim());
console.log("Result exitCode:", result.exitCode);

expect(result.exitCode).toBe(0);
// Should have 5000 unique lines (0-4999)
expect(result.stdout.trim()).toBe("5000");
}, 30000);

it("should handle grep with large files", async () => {
// Create large text data with specific patterns
const lines = Array.from({ length: 20000 }, (_, i) => {
if (i % 3 === 0) {
return `MATCH Line ${i}`;
}
return `Other Line ${i}`;
});
const largeText = lines.join("\n");

// Write to file
await fs.writeFile("/grep-test.txt", largeText);

// Test grep with wc
const result = await bash.exec("grep MATCH /grep-test.txt | wc -l");

console.log("Result stdout:", result.stdout.trim());
console.log("Result exitCode:", result.exitCode);

expect(result.exitCode).toBe(0);
// Should match every 3rd line: 20000/3 = 6667 (rounded up)
expect(result.stdout.trim()).toBe("6667");
}, 30000);

it("should handle binary data correctly", async () => {
// Create binary data
const binaryData = new Uint8Array(10000);
for (let i = 0; i < binaryData.length; i++) {
binaryData[i] = i % 256;
}

// Write binary file
await fs.writeFile("/binary.bin", binaryData);

// Test wc -c (byte count)
const result = await bash.exec("wc -c /binary.bin");

console.log("Result stdout:", result.stdout.trim());
console.log("Result exitCode:", result.exitCode);

expect(result.exitCode).toBe(0);
expect(result.stdout.trim()).toContain("10000");
}, 30000);
});

// Made with Bob
9 changes: 8 additions & 1 deletion src/interpreter/redirections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,16 @@ async function checkOutputRedirectTarget(
* Determine the encoding to use for file I/O.
* If all character codes are <= 255, use binary encoding (byte data).
* Otherwise, use UTF-8 encoding (text with Unicode characters).
* For performance, only check the first 8KB of large strings.
*/
function getFileEncoding(content: string): "binary" | "utf8" {
for (let i = 0; i < content.length; i++) {
const SAMPLE_SIZE = 8192; // 8KB

// For large strings, only check the first 8KB
// This is sufficient since UTF-8 files typically have Unicode chars early
const checkLength = Math.min(content.length, SAMPLE_SIZE);

for (let i = 0; i < checkLength; i++) {
if (content.charCodeAt(i) > 255) {
return "utf8";
}
Expand Down