From 241817629f6c134c103ee8b75da683ae6f652d50 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Mon, 6 Apr 2026 10:36:31 +0000 Subject: [PATCH] feat(fuzz): add awk_fuzz target for awk builtin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add fuzz target for the awk builtin which implements a full AWK interpreter with pattern matching, expressions, functions, and field separation — many code paths to explore. The target splits fuzz input into an AWK program (first line) and input data (remainder), then exercises the awk builtin with both default and colon field separators through the full interpreter with tight resource limits. Includes scaffold tests validating the awk builtin handles valid programs, invalid syntax, BEGIN/END blocks, malformed regex, and field separators without panicking. Closes #1095 --- crates/bashkit/fuzz/Cargo.toml | 7 ++ crates/bashkit/fuzz/fuzz_targets/awk_fuzz.rs | 86 +++++++++++++++++++ .../bashkit/tests/awk_fuzz_scaffold_tests.rs | 62 +++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 crates/bashkit/fuzz/fuzz_targets/awk_fuzz.rs create mode 100644 crates/bashkit/tests/awk_fuzz_scaffold_tests.rs diff --git a/crates/bashkit/fuzz/Cargo.toml b/crates/bashkit/fuzz/Cargo.toml index 02d82374..9b7c2889 100644 --- a/crates/bashkit/fuzz/Cargo.toml +++ b/crates/bashkit/fuzz/Cargo.toml @@ -59,3 +59,10 @@ path = "fuzz_targets/jq_fuzz.rs" test = false doc = false bench = false + +[[bin]] +name = "awk_fuzz" +path = "fuzz_targets/awk_fuzz.rs" +test = false +doc = false +bench = false diff --git a/crates/bashkit/fuzz/fuzz_targets/awk_fuzz.rs b/crates/bashkit/fuzz/fuzz_targets/awk_fuzz.rs new file mode 100644 index 00000000..7248e8cb --- /dev/null +++ b/crates/bashkit/fuzz/fuzz_targets/awk_fuzz.rs @@ -0,0 +1,86 @@ +//! Fuzz target for the awk builtin +//! +//! Tests AWK program parsing and execution to find: +//! - Panics in the AWK expression parser +//! - Stack overflow from deeply nested expressions or function calls +//! - ReDoS from pathological regex patterns +//! - Memory exhaustion from unbounded field/record processing +//! +//! Run with: cargo +nightly fuzz run awk_fuzz -- -max_total_time=300 + +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + // Only process valid UTF-8 + if let Ok(input) = std::str::from_utf8(data) { + // Limit input size to prevent OOM + if input.len() > 1024 { + return; + } + + // Split input into AWK program (first line) and input data (rest) + let (program, input_data) = match input.find('\n') { + Some(pos) => (&input[..pos], &input[pos + 1..]), + None => (input, "a b c\n1 2 3\n" as &str), + }; + + // Skip empty programs + if program.trim().is_empty() { + return; + } + + // Reject deeply nested expressions + let depth: i32 = program + .bytes() + .map(|b| match b { + b'(' | b'{' => 1, + b')' | b'}' => -1, + _ => 0, + }) + .scan(0i32, |acc, d| { + *acc += d; + Some(*acc) + }) + .max() + .unwrap_or(0); + if depth > 15 { + return; + } + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + + rt.block_on(async { + let mut bash = bashkit::Bash::builder() + .limits( + bashkit::ExecutionLimits::new() + .max_commands(50) + .max_subst_depth(3) + .max_stdout_bytes(4096) + .max_stderr_bytes(4096) + .timeout(std::time::Duration::from_millis(200)), + ) + .build(); + + // Test 1: pipe data through awk program + let script = format!( + "echo '{}' | awk '{}' 2>/dev/null; true", + input_data.replace('\'', "'\\''"), + program.replace('\'', "'\\''"), + ); + let _ = bash.exec(&script).await; + + // Test 2: awk with -F (field separator) flag + let script2 = format!( + "echo '{}' | awk -F: '{}' 2>/dev/null; true", + input_data.replace('\'', "'\\''"), + program.replace('\'', "'\\''"), + ); + let _ = bash.exec(&script2).await; + }); + } +}); diff --git a/crates/bashkit/tests/awk_fuzz_scaffold_tests.rs b/crates/bashkit/tests/awk_fuzz_scaffold_tests.rs new file mode 100644 index 00000000..c8882392 --- /dev/null +++ b/crates/bashkit/tests/awk_fuzz_scaffold_tests.rs @@ -0,0 +1,62 @@ +// Scaffold tests for the awk_fuzz target. +// Validates that the awk builtin handles arbitrary programs and input +// data without panicking. + +use bashkit::{Bash, ExecutionLimits}; + +fn fuzz_bash() -> Bash { + Bash::builder() + .limits( + ExecutionLimits::new() + .max_commands(50) + .max_subst_depth(3) + .max_stdout_bytes(4096) + .max_stderr_bytes(4096) + .timeout(std::time::Duration::from_secs(2)), + ) + .build() +} + +#[tokio::test] +async fn awk_valid_program() { + let mut bash = fuzz_bash(); + let result = bash.exec("echo 'a b c' | awk '{print $2}'").await.unwrap(); + assert_eq!(result.stdout.trim(), "b"); +} + +#[tokio::test] +async fn awk_invalid_program() { + let mut bash = fuzz_bash(); + let _ = bash.exec("echo 'x' | awk '{{{{{' 2>/dev/null; true").await; + // Must not panic +} + +#[tokio::test] +async fn awk_begin_end() { + let mut bash = fuzz_bash(); + let result = bash + .exec("echo 'x' | awk 'BEGIN{print \"start\"} END{print \"end\"}'") + .await + .unwrap(); + assert!(result.stdout.contains("start")); + assert!(result.stdout.contains("end")); +} + +#[tokio::test] +async fn awk_regex_pattern() { + let mut bash = fuzz_bash(); + let _ = bash + .exec("echo 'hello' | awk '/[[[/' 2>/dev/null; true") + .await; + // Must not panic on malformed regex +} + +#[tokio::test] +async fn awk_field_separator() { + let mut bash = fuzz_bash(); + let result = bash + .exec("echo 'a:b:c' | awk -F: '{print $2}'") + .await + .unwrap(); + assert_eq!(result.stdout.trim(), "b"); +}