diff --git a/crates/bashkit/src/builtins/awk.rs b/crates/bashkit/src/builtins/awk.rs index 29df664a..8e025595 100644 --- a/crates/bashkit/src/builtins/awk.rs +++ b/crates/bashkit/src/builtins/awk.rs @@ -362,6 +362,91 @@ impl AwkState { /// 100 levels × ~2KB = ~200KB, well within typical stack limits. const MAX_AWK_PARSER_DEPTH: usize = 100; +/// Preprocess awk program: replace newlines with semicolons inside action blocks. +/// This makes newlines act as statement separators per POSIX awk spec. +/// Respects string literals, regex literals, and nested braces. +fn normalize_awk_newlines(input: &str) -> String { + let mut result = String::with_capacity(input.len()); + let chars: Vec = input.chars().collect(); + let mut i = 0; + let mut brace_depth = 0; + + while i < chars.len() { + match chars[i] { + '{' => { + brace_depth += 1; + result.push('{'); + i += 1; + } + '}' => { + if brace_depth > 0 { + brace_depth -= 1; + } + result.push('}'); + i += 1; + } + '"' => { + // String literal — pass through unchanged + result.push('"'); + i += 1; + while i < chars.len() && chars[i] != '"' { + if chars[i] == '\\' && i + 1 < chars.len() { + result.push(chars[i]); + i += 1; + } + result.push(chars[i]); + i += 1; + } + if i < chars.len() { + result.push(chars[i]); // closing " + i += 1; + } + } + '/' if brace_depth > 0 => { + // Potential regex literal — pass through unchanged + result.push('/'); + i += 1; + while i < chars.len() && chars[i] != '/' { + if chars[i] == '\\' && i + 1 < chars.len() { + result.push(chars[i]); + i += 1; + } + result.push(chars[i]); + i += 1; + } + if i < chars.len() { + result.push(chars[i]); // closing / + i += 1; + } + } + '#' => { + // Comment — skip to end of line, replace with newline/semicolon + while i < chars.len() && chars[i] != '\n' { + i += 1; + } + if i < chars.len() { + if brace_depth > 0 { + result.push(';'); + } else { + result.push('\n'); + } + i += 1; + } + } + '\n' if brace_depth > 0 => { + // Inside action block: replace newline with semicolon + result.push(';'); + i += 1; + } + _ => { + result.push(chars[i]); + i += 1; + } + } + } + result +} + struct AwkParser<'a> { input: &'a str, pos: usize, @@ -642,6 +727,17 @@ impl<'a> AwkParser<'a> { )); } + // Skip empty statements (consecutive semicolons from newline normalization) + while self.pos < self.input.len() && self.current_char().unwrap() == ';' { + self.pos += 1; + self.skip_whitespace(); + } + if self.pos >= self.input.len() { + return Err(Error::Execution( + "awk: unterminated action block".to_string(), + )); + } + let c = self.current_char().unwrap(); if c == '}' { self.pos += 1; @@ -3240,6 +3336,7 @@ impl Builtin for Awk { return Err(Error::Execution("awk: no program given".to_string())); } + let program_str = normalize_awk_newlines(&program_str); let mut parser = AwkParser::new(&program_str); let program = parser.parse()?; diff --git a/crates/bashkit/tests/awk_newline_tests.rs b/crates/bashkit/tests/awk_newline_tests.rs new file mode 100644 index 00000000..36548f49 --- /dev/null +++ b/crates/bashkit/tests/awk_newline_tests.rs @@ -0,0 +1,48 @@ +//! Tests for awk newline handling as statement separators + +use bashkit::Bash; + +/// Issue #809: newlines between assignments should work as statement separators +#[tokio::test] +async fn awk_newline_separates_assignments() { + let mut bash = Bash::new(); + let result = bash + .exec( + r#"echo test | awk '{ + x=1 + y=2 + print x, y +}'"#, + ) + .await + .unwrap(); + assert_eq!(result.stdout.trim(), "1 2"); +} + +/// Semicolons should still work +#[tokio::test] +async fn awk_semicolons_still_work() { + let mut bash = Bash::new(); + let result = bash + .exec(r#"echo test | awk '{ x=1; y=2; print x, y }'"#) + .await + .unwrap(); + assert_eq!(result.stdout.trim(), "1 2"); +} + +/// Assignment after if on separate line +#[tokio::test] +async fn awk_newline_after_if() { + let mut bash = Bash::new(); + let result = bash + .exec( + r#"echo test | awk '{ + if (1) x=1 + y=2 + print x, y +}'"#, + ) + .await + .unwrap(); + assert_eq!(result.stdout.trim(), "1 2"); +}