diff --git a/crates/bashkit/fuzz/Cargo.toml b/crates/bashkit/fuzz/Cargo.toml index 02d82374..9b7c2889 100644 --- a/crates/bashkit/fuzz/Cargo.toml +++ b/crates/bashkit/fuzz/Cargo.toml @@ -59,3 +59,10 @@ path = "fuzz_targets/jq_fuzz.rs" test = false doc = false bench = false + +[[bin]] +name = "awk_fuzz" +path = "fuzz_targets/awk_fuzz.rs" +test = false +doc = false +bench = false diff --git a/crates/bashkit/fuzz/fuzz_targets/awk_fuzz.rs b/crates/bashkit/fuzz/fuzz_targets/awk_fuzz.rs new file mode 100644 index 00000000..7248e8cb --- /dev/null +++ b/crates/bashkit/fuzz/fuzz_targets/awk_fuzz.rs @@ -0,0 +1,86 @@ +//! Fuzz target for the awk builtin +//! +//! Tests AWK program parsing and execution to find: +//! - Panics in the AWK expression parser +//! - Stack overflow from deeply nested expressions or function calls +//! - ReDoS from pathological regex patterns +//! - Memory exhaustion from unbounded field/record processing +//! +//! Run with: cargo +nightly fuzz run awk_fuzz -- -max_total_time=300 + +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + // Only process valid UTF-8 + if let Ok(input) = std::str::from_utf8(data) { + // Limit input size to prevent OOM + if input.len() > 1024 { + return; + } + + // Split input into AWK program (first line) and input data (rest) + let (program, input_data) = match input.find('\n') { + Some(pos) => (&input[..pos], &input[pos + 1..]), + None => (input, "a b c\n1 2 3\n" as &str), + }; + + // Skip empty programs + if program.trim().is_empty() { + return; + } + + // Reject deeply nested expressions + let depth: i32 = program + .bytes() + .map(|b| match b { + b'(' | b'{' => 1, + b')' | b'}' => -1, + _ => 0, + }) + .scan(0i32, |acc, d| { + *acc += d; + Some(*acc) + }) + .max() + .unwrap_or(0); + if depth > 15 { + return; + } + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + + rt.block_on(async { + let mut bash = bashkit::Bash::builder() + .limits( + bashkit::ExecutionLimits::new() + .max_commands(50) + .max_subst_depth(3) + .max_stdout_bytes(4096) + .max_stderr_bytes(4096) + .timeout(std::time::Duration::from_millis(200)), + ) + .build(); + + // Test 1: pipe data through awk program + let script = format!( + "echo '{}' | awk '{}' 2>/dev/null; true", + input_data.replace('\'', "'\\''"), + program.replace('\'', "'\\''"), + ); + let _ = bash.exec(&script).await; + + // Test 2: awk with -F (field separator) flag + let script2 = format!( + "echo '{}' | awk -F: '{}' 2>/dev/null; true", + input_data.replace('\'', "'\\''"), + program.replace('\'', "'\\''"), + ); + let _ = bash.exec(&script2).await; + }); + } +}); diff --git a/crates/bashkit/tests/awk_fuzz_scaffold_tests.rs b/crates/bashkit/tests/awk_fuzz_scaffold_tests.rs new file mode 100644 index 00000000..c8882392 --- /dev/null +++ b/crates/bashkit/tests/awk_fuzz_scaffold_tests.rs @@ -0,0 +1,62 @@ +// Scaffold tests for the awk_fuzz target. +// Validates that the awk builtin handles arbitrary programs and input +// data without panicking. + +use bashkit::{Bash, ExecutionLimits}; + +fn fuzz_bash() -> Bash { + Bash::builder() + .limits( + ExecutionLimits::new() + .max_commands(50) + .max_subst_depth(3) + .max_stdout_bytes(4096) + .max_stderr_bytes(4096) + .timeout(std::time::Duration::from_secs(2)), + ) + .build() +} + +#[tokio::test] +async fn awk_valid_program() { + let mut bash = fuzz_bash(); + let result = bash.exec("echo 'a b c' | awk '{print $2}'").await.unwrap(); + assert_eq!(result.stdout.trim(), "b"); +} + +#[tokio::test] +async fn awk_invalid_program() { + let mut bash = fuzz_bash(); + let _ = bash.exec("echo 'x' | awk '{{{{{' 2>/dev/null; true").await; + // Must not panic +} + +#[tokio::test] +async fn awk_begin_end() { + let mut bash = fuzz_bash(); + let result = bash + .exec("echo 'x' | awk 'BEGIN{print \"start\"} END{print \"end\"}'") + .await + .unwrap(); + assert!(result.stdout.contains("start")); + assert!(result.stdout.contains("end")); +} + +#[tokio::test] +async fn awk_regex_pattern() { + let mut bash = fuzz_bash(); + let _ = bash + .exec("echo 'hello' | awk '/[[[/' 2>/dev/null; true") + .await; + // Must not panic on malformed regex +} + +#[tokio::test] +async fn awk_field_separator() { + let mut bash = fuzz_bash(); + let result = bash + .exec("echo 'a:b:c' | awk -F: '{print $2}'") + .await + .unwrap(); + assert_eq!(result.stdout.trim(), "b"); +}