Skip to content

Commit 91f1582

Browse files
committed
feat(fuzz): add awk_fuzz target for awk builtin
Add fuzz target for the awk builtin which implements a full AWK interpreter with pattern matching, expressions, functions, and field separation — many code paths to explore. The target splits fuzz input into an AWK program (first line) and input data (remainder), then exercises the awk builtin with both default and colon field separators through the full interpreter with tight resource limits. Includes scaffold tests validating the awk builtin handles valid programs, invalid syntax, BEGIN/END blocks, malformed regex, and field separators without panicking. Closes #1095
1 parent b4212af commit 91f1582

File tree

3 files changed

+155
-0
lines changed

3 files changed

+155
-0
lines changed

crates/bashkit/fuzz/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,10 @@ path = "fuzz_targets/glob_fuzz.rs"
5252
test = false
5353
doc = false
5454
bench = false
55+
56+
[[bin]]
57+
name = "awk_fuzz"
58+
path = "fuzz_targets/awk_fuzz.rs"
59+
test = false
60+
doc = false
61+
bench = false
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
//! Fuzz target for the awk builtin
2+
//!
3+
//! Tests AWK program parsing and execution to find:
4+
//! - Panics in the AWK expression parser
5+
//! - Stack overflow from deeply nested expressions or function calls
6+
//! - ReDoS from pathological regex patterns
7+
//! - Memory exhaustion from unbounded field/record processing
8+
//!
9+
//! Run with: cargo +nightly fuzz run awk_fuzz -- -max_total_time=300
10+
11+
#![no_main]
12+
13+
use libfuzzer_sys::fuzz_target;
14+
15+
fuzz_target!(|data: &[u8]| {
16+
// Only process valid UTF-8
17+
if let Ok(input) = std::str::from_utf8(data) {
18+
// Limit input size to prevent OOM
19+
if input.len() > 1024 {
20+
return;
21+
}
22+
23+
// Split input into AWK program (first line) and input data (rest)
24+
let (program, input_data) = match input.find('\n') {
25+
Some(pos) => (&input[..pos], &input[pos + 1..]),
26+
None => (input, "a b c\n1 2 3\n" as &str),
27+
};
28+
29+
// Skip empty programs
30+
if program.trim().is_empty() {
31+
return;
32+
}
33+
34+
// Reject deeply nested expressions
35+
let depth: i32 = program
36+
.bytes()
37+
.map(|b| match b {
38+
b'(' | b'{' => 1,
39+
b')' | b'}' => -1,
40+
_ => 0,
41+
})
42+
.scan(0i32, |acc, d| {
43+
*acc += d;
44+
Some(*acc)
45+
})
46+
.max()
47+
.unwrap_or(0);
48+
if depth > 15 {
49+
return;
50+
}
51+
52+
let rt = tokio::runtime::Builder::new_current_thread()
53+
.enable_all()
54+
.build()
55+
.unwrap();
56+
57+
rt.block_on(async {
58+
let mut bash = bashkit::Bash::builder()
59+
.limits(
60+
bashkit::ExecutionLimits::new()
61+
.max_commands(50)
62+
.max_subst_depth(3)
63+
.max_stdout_bytes(4096)
64+
.max_stderr_bytes(4096)
65+
.timeout(std::time::Duration::from_millis(200)),
66+
)
67+
.build();
68+
69+
// Test 1: pipe data through awk program
70+
let script = format!(
71+
"echo '{}' | awk '{}' 2>/dev/null; true",
72+
input_data.replace('\'', "'\\''"),
73+
program.replace('\'', "'\\''"),
74+
);
75+
let _ = bash.exec(&script).await;
76+
77+
// Test 2: awk with -F (field separator) flag
78+
let script2 = format!(
79+
"echo '{}' | awk -F: '{}' 2>/dev/null; true",
80+
input_data.replace('\'', "'\\''"),
81+
program.replace('\'', "'\\''"),
82+
);
83+
let _ = bash.exec(&script2).await;
84+
});
85+
}
86+
});
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Scaffold tests for the awk_fuzz target.
2+
// Validates that the awk builtin handles arbitrary programs and input
3+
// data without panicking.
4+
5+
use bashkit::{Bash, ExecutionLimits};
6+
7+
fn fuzz_bash() -> Bash {
8+
Bash::builder()
9+
.limits(
10+
ExecutionLimits::new()
11+
.max_commands(50)
12+
.max_subst_depth(3)
13+
.max_stdout_bytes(4096)
14+
.max_stderr_bytes(4096)
15+
.timeout(std::time::Duration::from_secs(2)),
16+
)
17+
.build()
18+
}
19+
20+
#[tokio::test]
21+
async fn awk_valid_program() {
22+
let mut bash = fuzz_bash();
23+
let result = bash.exec("echo 'a b c' | awk '{print $2}'").await.unwrap();
24+
assert_eq!(result.stdout.trim(), "b");
25+
}
26+
27+
#[tokio::test]
28+
async fn awk_invalid_program() {
29+
let mut bash = fuzz_bash();
30+
let _ = bash.exec("echo 'x' | awk '{{{{{' 2>/dev/null; true").await;
31+
// Must not panic
32+
}
33+
34+
#[tokio::test]
35+
async fn awk_begin_end() {
36+
let mut bash = fuzz_bash();
37+
let result = bash
38+
.exec("echo 'x' | awk 'BEGIN{print \"start\"} END{print \"end\"}'")
39+
.await
40+
.unwrap();
41+
assert!(result.stdout.contains("start"));
42+
assert!(result.stdout.contains("end"));
43+
}
44+
45+
#[tokio::test]
46+
async fn awk_regex_pattern() {
47+
let mut bash = fuzz_bash();
48+
let _ = bash
49+
.exec("echo 'hello' | awk '/[[[/' 2>/dev/null; true")
50+
.await;
51+
// Must not panic on malformed regex
52+
}
53+
54+
#[tokio::test]
55+
async fn awk_field_separator() {
56+
let mut bash = fuzz_bash();
57+
let result = bash
58+
.exec("echo 'a:b:c' | awk -F: '{print $2}'")
59+
.await
60+
.unwrap();
61+
assert_eq!(result.stdout.trim(), "b");
62+
}

0 commit comments

Comments
 (0)