Skip to content

Commit 69c73ef

Browse files
authored
feat(fuzz): add awk_fuzz target for awk builtin (#1112)
## Summary - Add fuzz target for the `awk` builtin which implements a full AWK interpreter with pattern matching, expressions, functions, and field separation - Target splits fuzz input into AWK program (first line) and input data (remainder) - Exercises both default and colon field separator modes - Includes depth-limiting filter to reject deeply nested expressions ## Changes - New `fuzz/fuzz_targets/awk_fuzz.rs`: fuzz target - Updated `fuzz/Cargo.toml`: added `[[bin]]` entry for `awk_fuzz` - New `tests/awk_fuzz_scaffold_tests.rs`: 5 scaffold tests (valid program, invalid syntax, BEGIN/END, regex, field separator) ## Test plan - [x] `cargo test --test awk_fuzz_scaffold_tests` — 5 tests pass - [x] `cargo clippy --all-targets --all-features -- -D warnings` — clean - [x] `cargo fmt --check` — clean Closes #1095
1 parent 182ce60 commit 69c73ef

File tree

3 files changed

+155
-0
lines changed

3 files changed

+155
-0
lines changed

crates/bashkit/fuzz/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,10 @@ path = "fuzz_targets/jq_fuzz.rs"
5959
test = false
6060
doc = false
6161
bench = false
62+
63+
[[bin]]
64+
name = "awk_fuzz"
65+
path = "fuzz_targets/awk_fuzz.rs"
66+
test = false
67+
doc = false
68+
bench = false
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
//! Fuzz target for the awk builtin
2+
//!
3+
//! Tests AWK program parsing and execution to find:
4+
//! - Panics in the AWK expression parser
5+
//! - Stack overflow from deeply nested expressions or function calls
6+
//! - ReDoS from pathological regex patterns
7+
//! - Memory exhaustion from unbounded field/record processing
8+
//!
9+
//! Run with: cargo +nightly fuzz run awk_fuzz -- -max_total_time=300
10+
11+
#![no_main]
12+
13+
use libfuzzer_sys::fuzz_target;
14+
15+
fuzz_target!(|data: &[u8]| {
16+
// Only process valid UTF-8
17+
if let Ok(input) = std::str::from_utf8(data) {
18+
// Limit input size to prevent OOM
19+
if input.len() > 1024 {
20+
return;
21+
}
22+
23+
// Split input into AWK program (first line) and input data (rest)
24+
let (program, input_data) = match input.find('\n') {
25+
Some(pos) => (&input[..pos], &input[pos + 1..]),
26+
None => (input, "a b c\n1 2 3\n" as &str),
27+
};
28+
29+
// Skip empty programs
30+
if program.trim().is_empty() {
31+
return;
32+
}
33+
34+
// Reject deeply nested expressions
35+
let depth: i32 = program
36+
.bytes()
37+
.map(|b| match b {
38+
b'(' | b'{' => 1,
39+
b')' | b'}' => -1,
40+
_ => 0,
41+
})
42+
.scan(0i32, |acc, d| {
43+
*acc += d;
44+
Some(*acc)
45+
})
46+
.max()
47+
.unwrap_or(0);
48+
if depth > 15 {
49+
return;
50+
}
51+
52+
let rt = tokio::runtime::Builder::new_current_thread()
53+
.enable_all()
54+
.build()
55+
.unwrap();
56+
57+
rt.block_on(async {
58+
let mut bash = bashkit::Bash::builder()
59+
.limits(
60+
bashkit::ExecutionLimits::new()
61+
.max_commands(50)
62+
.max_subst_depth(3)
63+
.max_stdout_bytes(4096)
64+
.max_stderr_bytes(4096)
65+
.timeout(std::time::Duration::from_millis(200)),
66+
)
67+
.build();
68+
69+
// Test 1: pipe data through awk program
70+
let script = format!(
71+
"echo '{}' | awk '{}' 2>/dev/null; true",
72+
input_data.replace('\'', "'\\''"),
73+
program.replace('\'', "'\\''"),
74+
);
75+
let _ = bash.exec(&script).await;
76+
77+
// Test 2: awk with -F (field separator) flag
78+
let script2 = format!(
79+
"echo '{}' | awk -F: '{}' 2>/dev/null; true",
80+
input_data.replace('\'', "'\\''"),
81+
program.replace('\'', "'\\''"),
82+
);
83+
let _ = bash.exec(&script2).await;
84+
});
85+
}
86+
});
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Scaffold tests for the awk_fuzz target.
2+
// Validates that the awk builtin handles arbitrary programs and input
3+
// data without panicking.
4+
5+
use bashkit::{Bash, ExecutionLimits};
6+
7+
fn fuzz_bash() -> Bash {
8+
Bash::builder()
9+
.limits(
10+
ExecutionLimits::new()
11+
.max_commands(50)
12+
.max_subst_depth(3)
13+
.max_stdout_bytes(4096)
14+
.max_stderr_bytes(4096)
15+
.timeout(std::time::Duration::from_secs(2)),
16+
)
17+
.build()
18+
}
19+
20+
#[tokio::test]
21+
async fn awk_valid_program() {
22+
let mut bash = fuzz_bash();
23+
let result = bash.exec("echo 'a b c' | awk '{print $2}'").await.unwrap();
24+
assert_eq!(result.stdout.trim(), "b");
25+
}
26+
27+
#[tokio::test]
28+
async fn awk_invalid_program() {
29+
let mut bash = fuzz_bash();
30+
let _ = bash.exec("echo 'x' | awk '{{{{{' 2>/dev/null; true").await;
31+
// Must not panic
32+
}
33+
34+
#[tokio::test]
35+
async fn awk_begin_end() {
36+
let mut bash = fuzz_bash();
37+
let result = bash
38+
.exec("echo 'x' | awk 'BEGIN{print \"start\"} END{print \"end\"}'")
39+
.await
40+
.unwrap();
41+
assert!(result.stdout.contains("start"));
42+
assert!(result.stdout.contains("end"));
43+
}
44+
45+
#[tokio::test]
46+
async fn awk_regex_pattern() {
47+
let mut bash = fuzz_bash();
48+
let _ = bash
49+
.exec("echo 'hello' | awk '/[[[/' 2>/dev/null; true")
50+
.await;
51+
// Must not panic on malformed regex
52+
}
53+
54+
#[tokio::test]
55+
async fn awk_field_separator() {
56+
let mut bash = fuzz_bash();
57+
let result = bash
58+
.exec("echo 'a:b:c' | awk -F: '{print $2}'")
59+
.await
60+
.unwrap();
61+
assert_eq!(result.stdout.trim(), "b");
62+
}

0 commit comments

Comments
 (0)