@@ -25,6 +25,10 @@ use crate::error::{Error, Result};
2525use crate :: fs:: FileSystem ;
2626use crate :: interpreter:: ExecResult ;
2727
28+ /// THREAT[TM-DOS-988]: Maximum number of distinct files cached by `getline var < file`.
29+ /// Prevents memory exhaustion from unbounded file caching.
30+ const MAX_GETLINE_CACHED_FILES : usize = 100 ;
31+
2832/// awk command - pattern scanning and processing
2933pub struct Awk ;
3034
@@ -2100,6 +2104,9 @@ struct AwkInterpreter {
21002104 file_appends : HashMap < String , String > ,
21012105 /// Cached file inputs for `getline var < file` redirection.
21022106 /// Maps resolved path -> (lines, current_position).
2107+ /// THREAT[TM-DOS-988]: Bounded to `MAX_GETLINE_CACHED_FILES` entries and
2108+ /// per-file size capped by `FsLimits::max_file_size` to prevent memory
2109+ /// exhaustion via unbounded file caching.
21032110 file_inputs : HashMap < String , ( Vec < String > , usize ) > ,
21042111 /// VFS reference for lazy file reads (getline < file).
21052112 fs : Option < Arc < dyn FileSystem > > ,
@@ -2128,13 +2135,22 @@ impl AwkInterpreter {
21282135 /// Load a file into the `file_inputs` cache if not already present.
21292136 /// Uses a separate thread + tokio runtime to bridge async VFS → sync context.
21302137 /// Returns true on success, false on error.
2138+ ///
2139+ /// THREAT[TM-DOS-988]: Enforces two limits:
2140+ /// 1. Max `MAX_GETLINE_CACHED_FILES` distinct files cached at once.
2141+ /// 2. Per-file byte size capped by `FsLimits::max_file_size`.
21312142 fn ensure_file_loaded ( & mut self , resolved : & str ) -> bool {
21322143 if self . file_inputs . contains_key ( resolved) {
21332144 return true ;
21342145 }
2146+ // Enforce cached-file count limit.
2147+ if self . file_inputs . len ( ) >= MAX_GETLINE_CACHED_FILES {
2148+ return false ;
2149+ }
21352150 let Some ( fs) = & self . fs else {
21362151 return false ;
21372152 } ;
2153+ let max_file_size = fs. limits ( ) . max_file_size ;
21382154 let fs = fs. clone ( ) ;
21392155 let p = PathBuf :: from ( resolved) ;
21402156 // Spawn a thread with its own runtime to avoid blocking the current async runtime.
@@ -2148,6 +2164,10 @@ impl AwkInterpreter {
21482164 . join ( ) ;
21492165 match result {
21502166 Ok ( Ok ( bytes) ) => {
2167+ // Enforce per-file size limit.
2168+ if bytes. len ( ) as u64 > max_file_size {
2169+ return false ;
2170+ }
21512171 let text = String :: from_utf8_lossy ( & bytes) . into_owned ( ) ;
21522172 let lines: Vec < String > = text. lines ( ) . map ( |l| l. to_string ( ) ) . collect ( ) ;
21532173 self . file_inputs . insert ( resolved. to_string ( ) , ( lines, 0 ) ) ;
@@ -4379,4 +4399,129 @@ mod tests {
43794399 . unwrap ( ) ;
43804400 assert_eq ! ( result. stdout, "ok\n " ) ;
43814401 }
4402+
4403+ /// Helper: run AWK with a caller-provided VFS.
4404+ async fn run_awk_with_custom_fs (
4405+ args : & [ & str ] ,
4406+ stdin : Option < & str > ,
4407+ fs : Arc < InMemoryFs > ,
4408+ ) -> Result < ExecResult > {
4409+ let awk = Awk ;
4410+ let mut vars = HashMap :: new ( ) ;
4411+ let mut cwd = PathBuf :: from ( "/" ) ;
4412+ let args: Vec < String > = args. iter ( ) . map ( |s| s. to_string ( ) ) . collect ( ) ;
4413+
4414+ let ctx = Context {
4415+ args : & args,
4416+ env : & HashMap :: new ( ) ,
4417+ variables : & mut vars,
4418+ cwd : & mut cwd,
4419+ fs,
4420+ stdin,
4421+ #[ cfg( feature = "http_client" ) ]
4422+ http_client : None ,
4423+ #[ cfg( feature = "git" ) ]
4424+ git_client : None ,
4425+ shell : None ,
4426+ } ;
4427+
4428+ awk. execute ( ctx) . await
4429+ }
4430+
4431+ #[ tokio:: test]
4432+ async fn test_awk_getline_file_cache_limit_exceeded ( ) {
4433+ // Opening more than MAX_GETLINE_CACHED_FILES distinct files must fail
4434+ // gracefully (getline returns -1 for new files beyond the limit).
4435+ use crate :: fs:: FsLimits ;
4436+
4437+ let limits = FsLimits {
4438+ max_file_count : 200_000 ,
4439+ max_total_bytes : 200_000_000 ,
4440+ ..FsLimits :: default ( )
4441+ } ;
4442+ let fs = Arc :: new ( InMemoryFs :: with_limits ( limits) ) ;
4443+ let count = MAX_GETLINE_CACHED_FILES + 5 ;
4444+ for i in 0 ..count {
4445+ fs. write_file (
4446+ std:: path:: Path :: new ( & format ! ( "/tmp/f{i}.txt" ) ) ,
4447+ format ! ( "line{i}" ) . as_bytes ( ) ,
4448+ )
4449+ . await
4450+ . unwrap ( ) ;
4451+ }
4452+
4453+ // AWK program: read one line from each file, count successes
4454+ let prog = format ! (
4455+ r#"BEGIN{{ ok=0; for(i=0;i<{count};i++) {{ f="/tmp/f"i".txt"; if((getline x < f)>0) ok++ }} print ok }}"# ,
4456+ ) ;
4457+ let result = run_awk_with_custom_fs ( & [ & prog] , None , fs) . await . unwrap ( ) ;
4458+ let ok: usize = result. stdout . trim ( ) . parse ( ) . unwrap ( ) ;
4459+ // Exactly MAX_GETLINE_CACHED_FILES should succeed, rest should fail
4460+ assert_eq ! ( ok, MAX_GETLINE_CACHED_FILES ) ;
4461+ }
4462+
4463+ #[ tokio:: test]
4464+ async fn test_awk_getline_file_cache_within_limit ( ) {
4465+ // Opening a reasonable number of files should all succeed.
4466+ let fs = Arc :: new ( InMemoryFs :: new ( ) ) ;
4467+ let count = 10 ;
4468+ for i in 0 ..count {
4469+ fs. write_file (
4470+ std:: path:: Path :: new ( & format ! ( "/tmp/f{i}.txt" ) ) ,
4471+ format ! ( "data{i}" ) . as_bytes ( ) ,
4472+ )
4473+ . await
4474+ . unwrap ( ) ;
4475+ }
4476+
4477+ let prog = format ! (
4478+ r#"BEGIN{{ ok=0; for(i=0;i<{count};i++) {{ f="/tmp/f"i".txt"; if((getline x < f)>0) ok++ }} print ok }}"# ,
4479+ ) ;
4480+ let result = run_awk_with_custom_fs ( & [ & prog] , None , fs) . await . unwrap ( ) ;
4481+ let ok: usize = result. stdout . trim ( ) . parse ( ) . unwrap ( ) ;
4482+ assert_eq ! ( ok, count) ;
4483+ }
4484+
4485+ #[ tokio:: test]
4486+ async fn test_awk_getline_file_size_limit ( ) {
4487+ // A file exceeding FsLimits::max_file_size is rejected by getline.
4488+ // Defense-in-depth: VFS also enforces limits, so a file at exactly
4489+ // the boundary is accepted while one over is rejected at VFS level.
4490+ use crate :: fs:: FsLimits ;
4491+
4492+ let limits = FsLimits {
4493+ max_file_size : 100 ,
4494+ ..FsLimits :: unlimited ( )
4495+ } ;
4496+ let fs = Arc :: new ( InMemoryFs :: with_limits ( limits) ) ;
4497+ // Write a file within limits -- should be readable via getline.
4498+ fs. write_file ( std:: path:: Path :: new ( "/tmp/ok.txt" ) , & [ b'a' ; 100 ] )
4499+ . await
4500+ . unwrap ( ) ;
4501+ // Attempt to write an oversized file -- VFS rejects it, so getline
4502+ // returns -1 (file not found).
4503+ let _ = fs
4504+ . write_file ( std:: path:: Path :: new ( "/tmp/big.txt" ) , & [ b'x' ; 101 ] )
4505+ . await ;
4506+
4507+ // Within-limit file succeeds
4508+ let result = run_awk_with_custom_fs (
4509+ & [ r#"BEGIN{r=(getline x < "/tmp/ok.txt"); print r}"# ] ,
4510+ None ,
4511+ fs. clone ( ) ,
4512+ )
4513+ . await
4514+ . unwrap ( ) ;
4515+ assert_eq ! ( result. stdout, "1\n " ) ;
4516+
4517+ // Over-limit file fails (not stored by VFS)
4518+ let result = run_awk_with_custom_fs (
4519+ & [ r#"BEGIN{r=(getline x < "/tmp/big.txt"); print r}"# ] ,
4520+ None ,
4521+ fs,
4522+ )
4523+ . await
4524+ . unwrap ( ) ;
4525+ assert_eq ! ( result. stdout, "-1\n " ) ;
4526+ }
43824527}
0 commit comments