Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions crates/bashkit/src/builtins/awk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,91 @@ impl AwkState {
/// 100 levels × ~2KB = ~200KB, well within typical stack limits.
const MAX_AWK_PARSER_DEPTH: usize = 100;

/// Preprocess awk program: replace newlines with semicolons inside action blocks.
/// This makes newlines act as statement separators per POSIX awk spec.
/// Respects string literals, regex literals, and nested braces.
fn normalize_awk_newlines(input: &str) -> String {
let mut result = String::with_capacity(input.len());
let chars: Vec<char> = input.chars().collect();
let mut i = 0;
let mut brace_depth = 0;

while i < chars.len() {
match chars[i] {
'{' => {
brace_depth += 1;
result.push('{');
i += 1;
}
'}' => {
if brace_depth > 0 {
brace_depth -= 1;
}
result.push('}');
i += 1;
}
'"' => {
// String literal — pass through unchanged
result.push('"');
i += 1;
while i < chars.len() && chars[i] != '"' {
if chars[i] == '\\' && i + 1 < chars.len() {
result.push(chars[i]);
i += 1;
}
result.push(chars[i]);
i += 1;
}
if i < chars.len() {
result.push(chars[i]); // closing "
i += 1;
}
}
'/' if brace_depth > 0 => {
// Potential regex literal — pass through unchanged
result.push('/');
i += 1;
while i < chars.len() && chars[i] != '/' {
if chars[i] == '\\' && i + 1 < chars.len() {
result.push(chars[i]);
i += 1;
}
result.push(chars[i]);
i += 1;
}
if i < chars.len() {
result.push(chars[i]); // closing /
i += 1;
}
}
'#' => {
// Comment — skip to end of line, replace with newline/semicolon
while i < chars.len() && chars[i] != '\n' {
i += 1;
}
if i < chars.len() {
if brace_depth > 0 {
result.push(';');
} else {
result.push('\n');
}
i += 1;
}
}
'\n' if brace_depth > 0 => {
// Inside action block: replace newline with semicolon
result.push(';');
i += 1;
}
_ => {
result.push(chars[i]);
i += 1;
}
}
}
result
}

struct AwkParser<'a> {
input: &'a str,
pos: usize,
Expand Down Expand Up @@ -642,6 +727,17 @@ impl<'a> AwkParser<'a> {
));
}

// Skip empty statements (consecutive semicolons from newline normalization)
while self.pos < self.input.len() && self.current_char().unwrap() == ';' {
self.pos += 1;
self.skip_whitespace();
}
if self.pos >= self.input.len() {
return Err(Error::Execution(
"awk: unterminated action block".to_string(),
));
}

let c = self.current_char().unwrap();
if c == '}' {
self.pos += 1;
Expand Down Expand Up @@ -3240,6 +3336,7 @@ impl Builtin for Awk {
return Err(Error::Execution("awk: no program given".to_string()));
}

let program_str = normalize_awk_newlines(&program_str);
let mut parser = AwkParser::new(&program_str);
let program = parser.parse()?;

Expand Down
48 changes: 48 additions & 0 deletions crates/bashkit/tests/awk_newline_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//! Tests for awk newline handling as statement separators

use bashkit::Bash;

/// Issue #809: newlines between assignments should work as statement separators
#[tokio::test]
async fn awk_newline_separates_assignments() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"echo test | awk '{
x=1
y=2
print x, y
}'"#,
)
.await
.unwrap();
assert_eq!(result.stdout.trim(), "1 2");
}

/// Semicolons should still work
#[tokio::test]
async fn awk_semicolons_still_work() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo test | awk '{ x=1; y=2; print x, y }'"#)
.await
.unwrap();
assert_eq!(result.stdout.trim(), "1 2");
}

/// Assignment after if on separate line
#[tokio::test]
async fn awk_newline_after_if() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"echo test | awk '{
if (1) x=1
y=2
print x, y
}'"#,
)
.await
.unwrap();
assert_eq!(result.stdout.trim(), "1 2");
}
Loading