Skip to content

Commit 53b8477

Browse files
chaliyclaude
andauthored
feat: bash compatibility — compound arrays, grep -f, awk getline, jq env/input
## Summary - **Compound associative array init**: `declare -A m=([k]=v)` now tokenized as single word; lexer peeks for `[` after `=(` to distinguish from indexed arrays - **grep -f pattern-only mode**: fix parse requiring positional pattern when `-f` is specified; unskip grep -r and grep -f tests with proper VFS file setup - **AWK getline**: implement `getline` statement with index-based iteration; fix ORS and missing-field test expectations - **JQ env access**: expose bashkit shell vars to process env so jaq's `env` builtin works; RAII drop guard for cleanup on all return paths - **String ops, read -r, heredocs** (from prior commit on branch): prefix/suffix replace, heredoc tests, read builtin backslash handling ## Test plan - [x] `cargo clippy --all-targets --all-features -- -D warnings` clean - [x] `cargo test --all-features` — all pass (69 lib + 101 doc) - [x] `cargo test --test spec_tests` — 13 suites green - [x] 9 previously-skipped tests unskipped - [x] ~12 new spec + unit tests added (positive and negative) --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent a7dbdfd commit 53b8477

File tree

9 files changed

+377
-28
lines changed

9 files changed

+377
-28
lines changed

crates/bashkit/src/builtins/awk.rs

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ enum AwkAction {
8383
Break,
8484
Continue,
8585
Delete(String, AwkExpr), // delete arr[key]
86+
Getline, // getline — read next input record into $0
8687
#[allow(dead_code)] // Exit code support for future
8788
Exit(Option<AwkExpr>),
8889
Expression(AwkExpr),
@@ -484,6 +485,9 @@ impl<'a> AwkParser<'a> {
484485
if self.matches_keyword("delete") {
485486
return self.parse_delete();
486487
}
488+
if self.matches_keyword("getline") {
489+
return Ok(AwkAction::Getline);
490+
}
487491
if self.matches_keyword("exit") {
488492
self.skip_whitespace();
489493
if self.pos < self.input.len() {
@@ -1049,7 +1053,7 @@ impl<'a> AwkParser<'a> {
10491053
let remaining = &self.input[self.pos..];
10501054
let keywords = [
10511055
"in", "if", "else", "while", "for", "do", "break", "continue", "next", "exit",
1052-
"delete", "print", "printf",
1056+
"delete", "getline", "print", "printf",
10531057
];
10541058
for kw in keywords {
10551059
if remaining.starts_with(kw) {
@@ -1510,13 +1514,19 @@ enum AwkFlow {
15101514
struct AwkInterpreter {
15111515
state: AwkState,
15121516
output: String,
1517+
/// Lines of current input file (set before main loop)
1518+
input_lines: Vec<String>,
1519+
/// Current line index within input_lines
1520+
line_index: usize,
15131521
}
15141522

15151523
impl AwkInterpreter {
15161524
fn new() -> Self {
15171525
Self {
15181526
state: AwkState::default(),
15191527
output: String::new(),
1528+
input_lines: Vec::new(),
1529+
line_index: 0,
15201530
}
15211531
}
15221532

@@ -2380,6 +2390,15 @@ impl AwkInterpreter {
23802390
AwkFlow::Continue
23812391
}
23822392
AwkAction::Next => AwkFlow::Next,
2393+
AwkAction::Getline => {
2394+
// Advance to next input line and update $0, NR, NF, FNR
2395+
self.line_index += 1;
2396+
if self.line_index < self.input_lines.len() {
2397+
let line = self.input_lines[self.line_index].clone();
2398+
self.state.set_line(&line);
2399+
}
2400+
AwkFlow::Continue
2401+
}
23832402
AwkAction::Break => AwkFlow::Break,
23842403
AwkAction::Continue => AwkFlow::LoopContinue,
23852404
AwkAction::Exit(expr) => {
@@ -2556,8 +2575,13 @@ impl Builtin for Awk {
25562575

25572576
'files: for input in inputs {
25582577
interp.state.fnr = 0;
2559-
for line in input.lines() {
2560-
interp.state.set_line(line);
2578+
// Index-based iteration so getline can advance the index
2579+
interp.input_lines = input.lines().map(|l| l.to_string()).collect();
2580+
interp.line_index = 0;
2581+
2582+
while interp.line_index < interp.input_lines.len() {
2583+
let line = interp.input_lines[interp.line_index].clone();
2584+
interp.state.set_line(&line);
25612585

25622586
for rule in &program.main_rules {
25632587
// Check pattern
@@ -2587,6 +2611,7 @@ impl Builtin for Awk {
25872611
}
25882612
}
25892613
}
2614+
interp.line_index += 1;
25902615
}
25912616
}
25922617

@@ -3024,6 +3049,29 @@ mod tests {
30243049
assert_eq!(result.stdout, "line1\n");
30253050
}
30263051

3052+
#[tokio::test]
3053+
async fn test_awk_getline_basic() {
3054+
let result = run_awk(&["{getline; print}"], Some("line1\nline2"))
3055+
.await
3056+
.unwrap();
3057+
assert_eq!(result.stdout, "line2\n");
3058+
}
3059+
3060+
#[tokio::test]
3061+
async fn test_awk_getline_updates_fields() {
3062+
let result = run_awk(&["{getline; print $1}"], Some("a b\nc d"))
3063+
.await
3064+
.unwrap();
3065+
assert_eq!(result.stdout, "c\n");
3066+
}
3067+
3068+
#[tokio::test]
3069+
async fn test_awk_getline_at_eof() {
3070+
// getline at EOF should keep current $0
3071+
let result = run_awk(&["{getline; print}"], Some("only")).await.unwrap();
3072+
assert_eq!(result.stdout, "only\n");
3073+
}
3074+
30273075
#[tokio::test]
30283076
async fn test_awk_revenue_calculation() {
30293077
// This is the exact eval task pattern

crates/bashkit/src/builtins/grep.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,8 @@ impl GrepOptions {
278278
i += 1;
279279
}
280280

281-
// First positional is pattern (if no -e patterns)
282-
if opts.patterns.is_empty() {
281+
// First positional is pattern (if no -e patterns and no -f file)
282+
if opts.patterns.is_empty() && opts.pattern_file.is_none() {
283283
if positional.is_empty() {
284284
return Err(Error::Execution("grep: missing pattern".to_string()));
285285
}

crates/bashkit/src/builtins/jq.rs

Lines changed: 114 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,21 @@ use crate::interpreter::ExecResult;
2222
/// produce deeply nested parse trees in jaq.
2323
const MAX_JQ_JSON_DEPTH: usize = 100;
2424

25+
/// RAII guard that restores process env vars when dropped.
26+
/// Ensures cleanup even on early-return error paths.
27+
struct EnvRestoreGuard(Vec<(String, Option<String>)>);
28+
29+
impl Drop for EnvRestoreGuard {
30+
fn drop(&mut self) {
31+
for (k, old) in &self.0 {
32+
match old {
33+
Some(v) => std::env::set_var(k, v),
34+
None => std::env::remove_var(k),
35+
}
36+
}
37+
}
38+
}
39+
2540
/// jq command - JSON processor
2641
pub struct Jq;
2742

@@ -328,20 +343,40 @@ impl Builtin for Jq {
328343
json_vals.into_iter().map(Val::from).collect()
329344
};
330345

346+
// Expose bashkit's shell env/variables to the process environment so
347+
// jaq's built-in `env` function (which reads std::env::vars()) works.
348+
// Include both ctx.env (prefix assignments like FOO=bar jq ...)
349+
// and ctx.variables (set via export builtin).
350+
// Uses a drop guard to ensure cleanup on all return paths.
351+
let mut seen = std::collections::HashSet::new();
352+
let mut env_backup: Vec<(String, Option<String>)> = Vec::new();
353+
for (k, v) in ctx.env.iter().chain(ctx.variables.iter()) {
354+
if seen.insert(k.clone()) {
355+
let old = std::env::var(k).ok();
356+
std::env::set_var(k, v);
357+
env_backup.push((k.clone(), old));
358+
}
359+
}
360+
let _env_guard = EnvRestoreGuard(env_backup);
361+
331362
// Track for -e exit status
332363
let mut has_output = false;
333364
let mut all_null_or_false = true;
334365

335-
for jaq_input in inputs_to_process {
336-
// Create empty inputs iterator
337-
let inputs = RcIter::new(core::iter::empty());
366+
// Shared input iterator: main loop pops one value per filter run,
367+
// and jaq's input/inputs functions consume from the same source.
368+
let shared_inputs = RcIter::new(inputs_to_process.into_iter().map(Ok::<Val, String>));
369+
370+
for jaq_input in &shared_inputs {
371+
let jaq_input: Val =
372+
jaq_input.map_err(|e| Error::Execution(format!("jq: input error: {}", e)))?;
338373

339374
// Run the filter, passing any --arg/--argjson variable values
340375
let var_vals: Vec<Val> = var_bindings
341376
.iter()
342377
.map(|(_, v)| Val::from(v.clone()))
343378
.collect();
344-
let ctx = Ctx::new(var_vals, &inputs);
379+
let ctx = Ctx::new(var_vals, &shared_inputs);
345380
for result in filter.run((ctx, jaq_input)) {
346381
match result {
347382
Ok(val) => {
@@ -594,6 +629,27 @@ mod tests {
594629
/// TM-DOS-027: Deeply nested JSON arrays must be rejected
595630
/// Note: serde_json has a built-in recursion limit (~128 levels) that fires first.
596631
/// Our check_json_depth is defense-in-depth for values within serde's limit.
632+
#[tokio::test]
633+
async fn test_jq_input_reads_next() {
634+
let result = run_jq_with_args(&["input"], "1\n2").await.unwrap();
635+
assert_eq!(result.trim(), "2");
636+
}
637+
638+
#[tokio::test]
639+
async fn test_jq_inputs_collects_remaining() {
640+
let result = run_jq_with_args(&["-c", "[inputs]"], "1\n2\n3")
641+
.await
642+
.unwrap();
643+
assert_eq!(result.trim(), "[2,3]");
644+
}
645+
646+
#[tokio::test]
647+
async fn test_jq_inputs_single_value() {
648+
// With single input, inputs yields empty array
649+
let result = run_jq_with_args(&["-c", "[inputs]"], "42").await.unwrap();
650+
assert_eq!(result.trim(), "[]");
651+
}
652+
597653
#[tokio::test]
598654
async fn test_jq_json_depth_limit_arrays() {
599655
// Build 150-level nested JSON: [[[[....[1]....]]]]
@@ -787,6 +843,60 @@ mod tests {
787843
assert_eq!(arr[1]["id"], 2);
788844
}
789845

846+
// --- env tests ---
847+
848+
#[tokio::test]
849+
async fn test_jq_env_access() {
850+
let jq = Jq;
851+
let fs = Arc::new(InMemoryFs::new());
852+
let mut vars = HashMap::new();
853+
let mut cwd = PathBuf::from("/");
854+
let mut env = HashMap::new();
855+
env.insert("TESTVAR".to_string(), "hello".to_string());
856+
let args = vec!["-n".to_string(), "env.TESTVAR".to_string()];
857+
858+
let ctx = Context {
859+
args: &args,
860+
env: &env,
861+
variables: &mut vars,
862+
cwd: &mut cwd,
863+
fs,
864+
stdin: None,
865+
#[cfg(feature = "http_client")]
866+
http_client: None,
867+
#[cfg(feature = "git")]
868+
git_client: None,
869+
};
870+
871+
let result = jq.execute(ctx).await.unwrap();
872+
assert_eq!(result.stdout.trim(), "\"hello\"");
873+
}
874+
875+
#[tokio::test]
876+
async fn test_jq_env_missing_var() {
877+
let jq = Jq;
878+
let fs = Arc::new(InMemoryFs::new());
879+
let mut vars = HashMap::new();
880+
let mut cwd = PathBuf::from("/");
881+
let args = vec!["-n".to_string(), "env.NO_SUCH_VAR_999".to_string()];
882+
883+
let ctx = Context {
884+
args: &args,
885+
env: &HashMap::new(),
886+
variables: &mut vars,
887+
cwd: &mut cwd,
888+
fs,
889+
stdin: None,
890+
#[cfg(feature = "http_client")]
891+
http_client: None,
892+
#[cfg(feature = "git")]
893+
git_client: None,
894+
};
895+
896+
let result = jq.execute(ctx).await.unwrap();
897+
assert_eq!(result.stdout.trim(), "null");
898+
}
899+
790900
// --- Argument parsing bug regression tests ---
791901

792902
#[tokio::test]

crates/bashkit/src/parser/lexer.rs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,57 @@ impl<'a> Lexer<'a> {
477477
} else {
478478
word.push('\\');
479479
}
480+
} else if ch == '(' && word.ends_with('=') && self.looks_like_assoc_assign() {
481+
// Associative compound assignment: var=([k]="v" ...) — keep entire
482+
// (...) as part of word so declare -A m=([k]="v") stays one token.
483+
// Regular indexed arr=(a b "c d") is left for parser token-by-token path.
484+
word.push(ch);
485+
self.advance();
486+
let mut depth = 1;
487+
while let Some(c) = self.peek_char() {
488+
word.push(c);
489+
self.advance();
490+
match c {
491+
'(' => depth += 1,
492+
')' => {
493+
depth -= 1;
494+
if depth == 0 {
495+
break;
496+
}
497+
}
498+
'"' => {
499+
while let Some(qc) = self.peek_char() {
500+
word.push(qc);
501+
self.advance();
502+
if qc == '"' {
503+
break;
504+
}
505+
if qc == '\\' {
506+
if let Some(esc) = self.peek_char() {
507+
word.push(esc);
508+
self.advance();
509+
}
510+
}
511+
}
512+
}
513+
'\'' => {
514+
while let Some(qc) = self.peek_char() {
515+
word.push(qc);
516+
self.advance();
517+
if qc == '\'' {
518+
break;
519+
}
520+
}
521+
}
522+
'\\' => {
523+
if let Some(esc) = self.peek_char() {
524+
word.push(esc);
525+
self.advance();
526+
}
527+
}
528+
_ => {}
529+
}
530+
}
480531
} else if self.is_word_char(ch) {
481532
word.push(ch);
482533
self.advance();
@@ -756,6 +807,26 @@ impl<'a> Lexer<'a> {
756807
Some(Token::Word(word))
757808
}
758809

810+
/// Peek ahead (without consuming) to see if `=(` starts an associative
811+
/// compound assignment like `([key]=val ...)`. Returns true when the
812+
/// first non-whitespace char after `(` is `[`.
813+
fn looks_like_assoc_assign(&self) -> bool {
814+
let mut chars = self.chars.clone();
815+
// Skip the `(` we haven't consumed yet
816+
if chars.next() != Some('(') {
817+
return false;
818+
}
819+
// Skip optional whitespace
820+
for ch in chars {
821+
match ch {
822+
' ' | '\t' => continue,
823+
'[' => return true,
824+
_ => return false,
825+
}
826+
}
827+
false
828+
}
829+
759830
fn is_word_char(&self, ch: char) -> bool {
760831
!matches!(
761832
ch,
@@ -958,4 +1029,25 @@ mod tests {
9581029
let content = lexer.read_heredoc("EOF");
9591030
assert_eq!(content, "hello\nworld\n");
9601031
}
1032+
1033+
#[test]
1034+
fn test_assoc_compound_assignment() {
1035+
// declare -A m=([foo]="bar" [baz]="qux") should keep the compound
1036+
// assignment as a single Word token
1037+
let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
1038+
assert_eq!(
1039+
lexer.next_token(),
1040+
Some(Token::Word(r#"m=([foo]="bar" [baz]="qux")"#.to_string()))
1041+
);
1042+
assert_eq!(lexer.next_token(), None);
1043+
}
1044+
1045+
#[test]
1046+
fn test_indexed_array_not_collapsed() {
1047+
// arr=("hello world") should NOT be collapsed — parser handles
1048+
// quoted elements token-by-token via the LeftParen path
1049+
let mut lexer = Lexer::new(r#"arr=("hello world")"#);
1050+
assert_eq!(lexer.next_token(), Some(Token::Word("arr=".to_string())));
1051+
assert_eq!(lexer.next_token(), Some(Token::LeftParen));
1052+
}
9611053
}

0 commit comments

Comments
 (0)