Skip to content

Commit 264ee4c

Browse files
committed
fix(builtins): limit AWK getline file cache to prevent memory exhaustion
Closes #988
1 parent 502c380 commit 264ee4c

File tree

1 file changed

+145
-0
lines changed
  • crates/bashkit/src/builtins

1 file changed

+145
-0
lines changed

crates/bashkit/src/builtins/awk.rs

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ use crate::error::{Error, Result};
2525
use crate::fs::FileSystem;
2626
use crate::interpreter::ExecResult;
2727

28+
/// THREAT[TM-DOS-988]: Maximum number of distinct files cached by `getline var < file`.
29+
/// Prevents memory exhaustion from unbounded file caching.
30+
const MAX_GETLINE_CACHED_FILES: usize = 100;
31+
2832
/// awk command - pattern scanning and processing
2933
pub struct Awk;
3034

@@ -2100,6 +2104,9 @@ struct AwkInterpreter {
21002104
file_appends: HashMap<String, String>,
21012105
/// Cached file inputs for `getline var < file` redirection.
21022106
/// Maps resolved path -> (lines, current_position).
2107+
/// THREAT[TM-DOS-988]: Bounded to `MAX_GETLINE_CACHED_FILES` entries and
2108+
/// per-file size capped by `FsLimits::max_file_size` to prevent memory
2109+
/// exhaustion via unbounded file caching.
21032110
file_inputs: HashMap<String, (Vec<String>, usize)>,
21042111
/// VFS reference for lazy file reads (getline < file).
21052112
fs: Option<Arc<dyn FileSystem>>,
@@ -2128,13 +2135,22 @@ impl AwkInterpreter {
21282135
/// Load a file into the `file_inputs` cache if not already present.
21292136
/// Uses a separate thread + tokio runtime to bridge async VFS → sync context.
21302137
/// Returns true on success, false on error.
2138+
///
2139+
/// THREAT[TM-DOS-988]: Enforces two limits:
2140+
/// 1. Max `MAX_GETLINE_CACHED_FILES` distinct files cached at once.
2141+
/// 2. Per-file byte size capped by `FsLimits::max_file_size`.
21312142
fn ensure_file_loaded(&mut self, resolved: &str) -> bool {
21322143
if self.file_inputs.contains_key(resolved) {
21332144
return true;
21342145
}
2146+
// Enforce cached-file count limit.
2147+
if self.file_inputs.len() >= MAX_GETLINE_CACHED_FILES {
2148+
return false;
2149+
}
21352150
let Some(fs) = &self.fs else {
21362151
return false;
21372152
};
2153+
let max_file_size = fs.limits().max_file_size;
21382154
let fs = fs.clone();
21392155
let p = PathBuf::from(resolved);
21402156
// Spawn a thread with its own runtime to avoid blocking the current async runtime.
@@ -2148,6 +2164,10 @@ impl AwkInterpreter {
21482164
.join();
21492165
match result {
21502166
Ok(Ok(bytes)) => {
2167+
// Enforce per-file size limit.
2168+
if bytes.len() as u64 > max_file_size {
2169+
return false;
2170+
}
21512171
let text = String::from_utf8_lossy(&bytes).into_owned();
21522172
let lines: Vec<String> = text.lines().map(|l| l.to_string()).collect();
21532173
self.file_inputs.insert(resolved.to_string(), (lines, 0));
@@ -4379,4 +4399,129 @@ mod tests {
43794399
.unwrap();
43804400
assert_eq!(result.stdout, "ok\n");
43814401
}
4402+
4403+
/// Helper: run AWK with a caller-provided VFS.
4404+
async fn run_awk_with_custom_fs(
4405+
args: &[&str],
4406+
stdin: Option<&str>,
4407+
fs: Arc<InMemoryFs>,
4408+
) -> Result<ExecResult> {
4409+
let awk = Awk;
4410+
let mut vars = HashMap::new();
4411+
let mut cwd = PathBuf::from("/");
4412+
let args: Vec<String> = args.iter().map(|s| s.to_string()).collect();
4413+
4414+
let ctx = Context {
4415+
args: &args,
4416+
env: &HashMap::new(),
4417+
variables: &mut vars,
4418+
cwd: &mut cwd,
4419+
fs,
4420+
stdin,
4421+
#[cfg(feature = "http_client")]
4422+
http_client: None,
4423+
#[cfg(feature = "git")]
4424+
git_client: None,
4425+
shell: None,
4426+
};
4427+
4428+
awk.execute(ctx).await
4429+
}
4430+
4431+
#[tokio::test]
4432+
async fn test_awk_getline_file_cache_limit_exceeded() {
4433+
// Opening more than MAX_GETLINE_CACHED_FILES distinct files must fail
4434+
// gracefully (getline returns -1 for new files beyond the limit).
4435+
use crate::fs::FsLimits;
4436+
4437+
let limits = FsLimits {
4438+
max_file_count: 200_000,
4439+
max_total_bytes: 200_000_000,
4440+
..FsLimits::default()
4441+
};
4442+
let fs = Arc::new(InMemoryFs::with_limits(limits));
4443+
let count = MAX_GETLINE_CACHED_FILES + 5;
4444+
for i in 0..count {
4445+
fs.write_file(
4446+
std::path::Path::new(&format!("/tmp/f{i}.txt")),
4447+
format!("line{i}").as_bytes(),
4448+
)
4449+
.await
4450+
.unwrap();
4451+
}
4452+
4453+
// AWK program: read one line from each file, count successes
4454+
let prog = format!(
4455+
r#"BEGIN{{ ok=0; for(i=0;i<{count};i++) {{ f="/tmp/f"i".txt"; if((getline x < f)>0) ok++ }} print ok }}"#,
4456+
);
4457+
let result = run_awk_with_custom_fs(&[&prog], None, fs).await.unwrap();
4458+
let ok: usize = result.stdout.trim().parse().unwrap();
4459+
// Exactly MAX_GETLINE_CACHED_FILES should succeed, rest should fail
4460+
assert_eq!(ok, MAX_GETLINE_CACHED_FILES);
4461+
}
4462+
4463+
#[tokio::test]
4464+
async fn test_awk_getline_file_cache_within_limit() {
4465+
// Opening a reasonable number of files should all succeed.
4466+
let fs = Arc::new(InMemoryFs::new());
4467+
let count = 10;
4468+
for i in 0..count {
4469+
fs.write_file(
4470+
std::path::Path::new(&format!("/tmp/f{i}.txt")),
4471+
format!("data{i}").as_bytes(),
4472+
)
4473+
.await
4474+
.unwrap();
4475+
}
4476+
4477+
let prog = format!(
4478+
r#"BEGIN{{ ok=0; for(i=0;i<{count};i++) {{ f="/tmp/f"i".txt"; if((getline x < f)>0) ok++ }} print ok }}"#,
4479+
);
4480+
let result = run_awk_with_custom_fs(&[&prog], None, fs).await.unwrap();
4481+
let ok: usize = result.stdout.trim().parse().unwrap();
4482+
assert_eq!(ok, count);
4483+
}
4484+
4485+
#[tokio::test]
4486+
async fn test_awk_getline_file_size_limit() {
4487+
// A file exceeding FsLimits::max_file_size is rejected by getline.
4488+
// Defense-in-depth: VFS also enforces limits, so a file at exactly
4489+
// the boundary is accepted while one over is rejected at VFS level.
4490+
use crate::fs::FsLimits;
4491+
4492+
let limits = FsLimits {
4493+
max_file_size: 100,
4494+
..FsLimits::unlimited()
4495+
};
4496+
let fs = Arc::new(InMemoryFs::with_limits(limits));
4497+
// Write a file within limits -- should be readable via getline.
4498+
fs.write_file(std::path::Path::new("/tmp/ok.txt"), &[b'a'; 100])
4499+
.await
4500+
.unwrap();
4501+
// Attempt to write an oversized file -- VFS rejects it, so getline
4502+
// returns -1 (file not found).
4503+
let _ = fs
4504+
.write_file(std::path::Path::new("/tmp/big.txt"), &[b'x'; 101])
4505+
.await;
4506+
4507+
// Within-limit file succeeds
4508+
let result = run_awk_with_custom_fs(
4509+
&[r#"BEGIN{r=(getline x < "/tmp/ok.txt"); print r}"#],
4510+
None,
4511+
fs.clone(),
4512+
)
4513+
.await
4514+
.unwrap();
4515+
assert_eq!(result.stdout, "1\n");
4516+
4517+
// Over-limit file fails (not stored by VFS)
4518+
let result = run_awk_with_custom_fs(
4519+
&[r#"BEGIN{r=(getline x < "/tmp/big.txt"); print r}"#],
4520+
None,
4521+
fs,
4522+
)
4523+
.await
4524+
.unwrap();
4525+
assert_eq!(result.stdout, "-1\n");
4526+
}
43824527
}

0 commit comments

Comments
 (0)