Skip to content

Commit b94c526

Browse files
authored
fix(awk): treat newlines as statement separators in action blocks (#831)
## Summary - Preprocess awk programs to normalize newlines to semicolons inside `{...}` blocks - Skip empty statements (consecutive semicolons) in parse_action_block - Respects string literals, regex literals, and comments during normalization ## Test plan - [x] `awk_newline_separates_assignments` — `{ x=1\n y=2\n print x, y }` works - [x] `awk_semicolons_still_work` — explicit semicolons unchanged - [x] `awk_newline_after_if` — assignment after if on separate line - [x] Full test suite passes (all 2100+ awk tests) Closes #809
1 parent 4013c4c commit b94c526

File tree

2 files changed

+145
-0
lines changed

2 files changed

+145
-0
lines changed

crates/bashkit/src/builtins/awk.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,91 @@ impl AwkState {
362362
/// 100 levels × ~2KB = ~200KB, well within typical stack limits.
363363
const MAX_AWK_PARSER_DEPTH: usize = 100;
364364

365+
/// Preprocess awk program: replace newlines with semicolons inside action blocks.
366+
/// This makes newlines act as statement separators per POSIX awk spec.
367+
/// Respects string literals, regex literals, and nested braces.
368+
fn normalize_awk_newlines(input: &str) -> String {
369+
let mut result = String::with_capacity(input.len());
370+
let chars: Vec<char> = input.chars().collect();
371+
let mut i = 0;
372+
let mut brace_depth = 0;
373+
374+
while i < chars.len() {
375+
match chars[i] {
376+
'{' => {
377+
brace_depth += 1;
378+
result.push('{');
379+
i += 1;
380+
}
381+
'}' => {
382+
if brace_depth > 0 {
383+
brace_depth -= 1;
384+
}
385+
result.push('}');
386+
i += 1;
387+
}
388+
'"' => {
389+
// String literal — pass through unchanged
390+
result.push('"');
391+
i += 1;
392+
while i < chars.len() && chars[i] != '"' {
393+
if chars[i] == '\\' && i + 1 < chars.len() {
394+
result.push(chars[i]);
395+
i += 1;
396+
}
397+
result.push(chars[i]);
398+
i += 1;
399+
}
400+
if i < chars.len() {
401+
result.push(chars[i]); // closing "
402+
i += 1;
403+
}
404+
}
405+
'/' if brace_depth > 0 => {
406+
// Potential regex literal — pass through unchanged
407+
result.push('/');
408+
i += 1;
409+
while i < chars.len() && chars[i] != '/' {
410+
if chars[i] == '\\' && i + 1 < chars.len() {
411+
result.push(chars[i]);
412+
i += 1;
413+
}
414+
result.push(chars[i]);
415+
i += 1;
416+
}
417+
if i < chars.len() {
418+
result.push(chars[i]); // closing /
419+
i += 1;
420+
}
421+
}
422+
'#' => {
423+
// Comment — skip to end of line, replace with newline/semicolon
424+
while i < chars.len() && chars[i] != '\n' {
425+
i += 1;
426+
}
427+
if i < chars.len() {
428+
if brace_depth > 0 {
429+
result.push(';');
430+
} else {
431+
result.push('\n');
432+
}
433+
i += 1;
434+
}
435+
}
436+
'\n' if brace_depth > 0 => {
437+
// Inside action block: replace newline with semicolon
438+
result.push(';');
439+
i += 1;
440+
}
441+
_ => {
442+
result.push(chars[i]);
443+
i += 1;
444+
}
445+
}
446+
}
447+
result
448+
}
449+
365450
struct AwkParser<'a> {
366451
input: &'a str,
367452
pos: usize,
@@ -642,6 +727,17 @@ impl<'a> AwkParser<'a> {
642727
));
643728
}
644729

730+
// Skip empty statements (consecutive semicolons from newline normalization)
731+
while self.pos < self.input.len() && self.current_char().unwrap() == ';' {
732+
self.pos += 1;
733+
self.skip_whitespace();
734+
}
735+
if self.pos >= self.input.len() {
736+
return Err(Error::Execution(
737+
"awk: unterminated action block".to_string(),
738+
));
739+
}
740+
645741
let c = self.current_char().unwrap();
646742
if c == '}' {
647743
self.pos += 1;
@@ -3240,6 +3336,7 @@ impl Builtin for Awk {
32403336
return Err(Error::Execution("awk: no program given".to_string()));
32413337
}
32423338

3339+
let program_str = normalize_awk_newlines(&program_str);
32433340
let mut parser = AwkParser::new(&program_str);
32443341
let program = parser.parse()?;
32453342

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
//! Tests for awk newline handling as statement separators
2+
3+
use bashkit::Bash;
4+
5+
/// Issue #809: newlines between assignments should work as statement separators
6+
#[tokio::test]
7+
async fn awk_newline_separates_assignments() {
8+
let mut bash = Bash::new();
9+
let result = bash
10+
.exec(
11+
r#"echo test | awk '{
12+
x=1
13+
y=2
14+
print x, y
15+
}'"#,
16+
)
17+
.await
18+
.unwrap();
19+
assert_eq!(result.stdout.trim(), "1 2");
20+
}
21+
22+
/// Semicolons should still work
23+
#[tokio::test]
24+
async fn awk_semicolons_still_work() {
25+
let mut bash = Bash::new();
26+
let result = bash
27+
.exec(r#"echo test | awk '{ x=1; y=2; print x, y }'"#)
28+
.await
29+
.unwrap();
30+
assert_eq!(result.stdout.trim(), "1 2");
31+
}
32+
33+
/// Assignment after if on separate line
34+
#[tokio::test]
35+
async fn awk_newline_after_if() {
36+
let mut bash = Bash::new();
37+
let result = bash
38+
.exec(
39+
r#"echo test | awk '{
40+
if (1) x=1
41+
y=2
42+
print x, y
43+
}'"#,
44+
)
45+
.await
46+
.unwrap();
47+
assert_eq!(result.stdout.trim(), "1 2");
48+
}

0 commit comments

Comments
 (0)