From 71e4c1c56558fb2c22c9f0c2bf32675023d9f965 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 02:14:18 -0500
Subject: [PATCH 01/25] feat(output): add JSON, table, and YARA formatters

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 src/lib.rs          |   3 +
 src/output/json.rs  |   9 ++
 src/output/mod.rs   | 305 +++++++++++++++++++++++++++++++++++++++++++-
 src/output/table.rs |   9 ++
 src/output/yara.rs  |   9 ++
 5 files changed, 334 insertions(+), 1 deletion(-)
 create mode 100644 src/output/json.rs
 create mode 100644 src/output/table.rs
 create mode 100644 src/output/yara.rs
diff --git a/src/lib.rs b/src/lib.rs
index 8dfb54b..afdfc3b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -84,3 +84,6 @@ pub use extraction::{
     AsciiExtractionConfig, BasicExtractor, CanonicalString, ExtractionConfig, StringExtractor,
     StringOccurrence, Utf16ExtractionConfig, deduplicate,
 };
+
+// Re-export output infrastructure types
+pub use output::{OutputFormat, OutputMetadata, format_output};
diff --git a/src/output/json.rs b/src/output/json.rs
new file mode 100644
index 0000000..e183a25
--- /dev/null
+++ b/src/output/json.rs
@@ -0,0 +1,9 @@
+use crate::types::{FoundString, Result};
+
+use super::OutputMetadata;
+
+/// Format strings as JSONL output, one object per line.
+pub fn format_json(_strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
+    // TODO: Implement JSON formatter in a subsequent phase.
+    Ok(String::new())
+}
diff --git a/src/output/mod.rs b/src/output/mod.rs
index 34403c4..bf97cb9 100644
--- a/src/output/mod.rs
+++ b/src/output/mod.rs
@@ -1 +1,304 @@
-// Output formatting
+//! Output formatting infrastructure for Stringy.
+//!
+//! This module provides the core dispatch logic and shared metadata for output
+//! formatters. Concrete formatters live in submodules and are selected via the
+//! `OutputFormat` enum.
+//!
+//! Supported formats:
+//! - Table (human-readable, TTY-friendly)
+//! - JSON (JSONL, one object per line)
+//! - YARA (rule template output)
+//!
+//! ## Example
+//!
+//! ```rust
+//! use stringy::{format_output, FoundString, OutputFormat, OutputMetadata};
+//! use stringy::types::{Encoding, StringSource};
+//!
+//! let strings = vec![FoundString::new(
+//!     "example".to_string(),
+//!     Encoding::Ascii,
+//!     0,
+//!     7,
+//!     StringSource::SectionData,
+//! )];
+//!
+//! let metadata = OutputMetadata::new(
+//!     "sample.bin".to_string(),
+//!     OutputFormat::Table,
+//!     strings.len(),
+//!     strings.len(),
+//! );
+//!
+//! let output = format_output(&strings, &metadata)?;
+//! # Ok::<(), stringy::StringyError>(())
+//! ```
+
+use crate::types::{FoundString, Result};
+
+pub mod json;
+pub mod table;
+pub mod yara;
+
+pub use json::format_json;
+pub use table::format_table;
+pub use yara::format_yara;
+
+/// Output format selection for Stringy formatters.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum OutputFormat {
+    /// Human-readable table format with TTY detection.
+    Table,
+    /// JSONL output, one JSON object per line.
+    Json,
+    /// YARA rule template output.
+    Yara,
+}
+
+/// Metadata describing the output context.
+///
+/// This struct is marked `#[non_exhaustive]` to allow adding new fields without
+/// breaking downstream code. Use `OutputMetadata::new()` to construct instances.
+#[non_exhaustive]
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct OutputMetadata {
+    /// Name of the analyzed binary file.
+    pub binary_name: String,
+    /// Output format to be used.
+    pub format: OutputFormat,
+    /// Total number of strings extracted.
+    pub total_strings: usize,
+    /// Number of strings after filtering.
+    pub filtered_strings: usize,
+}
+
+impl OutputMetadata {
+    /// Create a new `OutputMetadata` instance.
+    #[must_use]
+    pub fn new(
+        binary_name: String,
+        format: OutputFormat,
+        total_strings: usize,
+        filtered_strings: usize,
+    ) -> Self {
+        Self {
+            binary_name,
+            format,
+            total_strings,
+            filtered_strings,
+        }
+    }
+}
+
+/// Format output strings using the requested output format.
+///
+/// # Arguments
+///
+/// * `strings` - The extracted strings to format.
+/// * `metadata` - Output context and format selection.
+///
+/// # Returns
+///
+/// A formatted output string on success.
+pub fn format_output(strings: &[FoundString], metadata: &OutputMetadata) -> Result<String> {
+    format_output_with(strings, metadata, format_table, format_json, format_yara)
+}
+
+fn format_output_with<
+    FTable: Fn(&[FoundString], &OutputMetadata) -> Result<String>,
+    FJson: Fn(&[FoundString], &OutputMetadata) -> Result<String>,
+    FYara: Fn(&[FoundString], &OutputMetadata) -> Result<String>,
+>(
+    strings: &[FoundString],
+    metadata: &OutputMetadata,
+    table_formatter: FTable,
+    json_formatter: FJson,
+    yara_formatter: FYara,
+) -> Result<String> {
+    match metadata.format {
+        OutputFormat::Table => table_formatter(strings, metadata),
+        OutputFormat::Json => json_formatter(strings, metadata),
+        OutputFormat::Yara => yara_formatter(strings, metadata),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::{Encoding, StringSource, StringyError};
+
+    fn build_found_string(text: &str) -> FoundString {
+        FoundString::new(
+            text.to_string(),
+            Encoding::Ascii,
+            0,
+            text.len() as u32,
+            StringSource::SectionData,
+        )
+    }
+
+    #[test]
+    fn test_output_format_enum_properties() {
+        let table = OutputFormat::Table;
+        let json = OutputFormat::Json;
+        let yara = OutputFormat::Yara;
+
+        let copied = table;
+        let cloned = json;
+
+        assert_eq!(copied, OutputFormat::Table);
+        assert_eq!(cloned, OutputFormat::Json);
+        assert_ne!(table, json);
+        assert_ne!(json, yara);
+        assert_ne!(table, yara);
+
+        let debug = format!("{:?}", OutputFormat::Yara);
+        assert!(!debug.is_empty(), "Debug output should not be empty");
+    }
+
+    #[test]
+    fn test_output_metadata_construction() {
+        let metadata = OutputMetadata::new("sample.bin".to_string(), OutputFormat::Table, 12, 9);
+
+        assert_eq!(metadata.binary_name, "sample.bin");
+        assert_eq!(metadata.format, OutputFormat::Table);
+        assert_eq!(metadata.total_strings, 12);
+        assert_eq!(metadata.filtered_strings, 9);
+
+        let other = OutputMetadata::new("other.exe".to_string(), OutputFormat::Json, 1, 1);
+
+        assert_eq!(other.binary_name, "other.exe");
+        assert_eq!(other.format, OutputFormat::Json);
+        assert_eq!(other.total_strings, 1);
+        assert_eq!(other.filtered_strings, 1);
+    }
+
+    #[test]
+    fn test_dispatch_logic_for_each_format() {
+        let strings = vec![build_found_string("alpha")];
+        let metadata = OutputMetadata::new(
+            "sample.bin".to_string(),
+            OutputFormat::Table,
+            strings.len(),
+            strings.len(),
+        );
+
+        let result = format_output_with(
+            &strings,
+            &metadata,
+            |_, _| Ok("table".to_string()),
+            |_, _| Ok("json".to_string()),
+            |_, _| Ok("yara".to_string()),
+        )
+        .expect("Dispatch should succeed");
+
+        assert_eq!(result, "table");
+
+        let json_metadata = OutputMetadata::new(
+            "sample.bin".to_string(),
+            OutputFormat::Json,
+            strings.len(),
+            strings.len(),
+        );
+
+        let json_result = format_output_with(
+            &strings,
+            &json_metadata,
+            |_, _| Ok("table".to_string()),
+            |_, _| Ok("json".to_string()),
+            |_, _| Ok("yara".to_string()),
+        )
+        .expect("Dispatch should succeed");
+
+        assert_eq!(json_result, "json");
+
+        let yara_metadata = OutputMetadata::new(
+            "sample.bin".to_string(),
+            OutputFormat::Yara,
+            strings.len(),
+            strings.len(),
+        );
+
+        let yara_result = format_output_with(
+            &strings,
+            &yara_metadata,
+            |_, _| Ok("table".to_string()),
+            |_, _| Ok("json".to_string()),
+            |_, _| Ok("yara".to_string()),
+        )
+        .expect("Dispatch should succeed");
+
+        assert_eq!(yara_result, "yara");
+    }
+
+    #[test]
+    fn test_edge_cases() {
+        // Use injected stubs to validate dispatch on edge-case metadata without
+        // depending on placeholder formatter output.
+        let empty: Vec<FoundString> = Vec::new();
+        let metadata = OutputMetadata::new("empty.bin".to_string(), OutputFormat::Table, 0, 0);
+
+        let output = format_output_with(
+            &empty,
+            &metadata,
+            |_, _| Ok("table".to_string()),
+            |_, _| Ok("json".to_string()),
+            |_, _| Ok("yara".to_string()),
+        )
+        .expect("Formatting should succeed");
+        assert_eq!(output, "table");
+
+        let single = vec![build_found_string("x")];
+        let single_metadata =
+            OutputMetadata::new("single.bin".to_string(), OutputFormat::Json, 1, 1);
+
+        let single_output = format_output_with(
+            &single,
+            &single_metadata,
+            |_, _| Ok("table".to_string()),
+            |_, _| Ok("json".to_string()),
+            |_, _| Ok("yara".to_string()),
+        )
+        .expect("Formatting should succeed");
+        assert_eq!(single_output, "json");
+
+        let long_name = "a".repeat(512);
+        let long_metadata = OutputMetadata::new(long_name, OutputFormat::Yara, 1, 0);
+        let long_output = format_output_with(
+            &single,
+            &long_metadata,
+            |_, _| Ok("table".to_string()),
+            |_, _| Ok("json".to_string()),
+            |_, _| Ok("yara".to_string()),
+        )
+        .expect("Formatting should succeed");
+        assert_eq!(long_output, "yara");
+    }
+
+    #[test]
+    fn test_error_propagation() {
+        let strings = vec![build_found_string("err")];
+        let metadata = OutputMetadata::new(
+            "sample.bin".to_string(),
+            OutputFormat::Json,
+            strings.len(),
+            strings.len(),
+        );
+
+        let error = format_output_with(
+            &strings,
+            &metadata,
+            |_, _| Ok("table".to_string()),
+            |_, _| Err(StringyError::ConfigError("formatter failed".to_string())),
+            |_, _| Ok("yara".to_string()),
+        )
+        .expect_err("Formatter errors should propagate");
+
+        match error {
+            StringyError::ConfigError(message) => {
+                assert_eq!(message, "formatter failed");
+            }
+            _ => panic!("Unexpected error type"),
+        }
+    }
+}
diff --git a/src/output/table.rs b/src/output/table.rs
new file mode 100644
index 0000000..d34c71e
--- /dev/null
+++ b/src/output/table.rs
@@ -0,0 +1,9 @@
+use crate::types::{FoundString, Result};
+
+use super::OutputMetadata;
+
+/// Format strings in a human-readable table format.
+pub fn format_table(_strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
+    // TODO: Implement table formatter in a subsequent phase.
+    Ok(String::new())
+}
diff --git a/src/output/yara.rs b/src/output/yara.rs
new file mode 100644
index 0000000..8e367b8
--- /dev/null
+++ b/src/output/yara.rs
@@ -0,0 +1,9 @@
+use crate::types::{FoundString, Result};
+
+use super::OutputMetadata;
+
+/// Format strings as YARA rule templates.
+pub fn format_yara(_strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
+    // TODO: Implement YARA formatter in a subsequent phase.
+    Ok(String::new())
+}

From 4c4c955ad568921b671eb10b688b2a04ea413786 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 02:35:07 -0500
Subject: [PATCH 02/25] Implement table output formatter with TTY and plain
 modes

Adds a human-readable table output formatter for extracted strings, with automatic TTY detection. In TTY mode, outputs an aligned table with headers and truncated columns; in non-TTY mode, outputs plain string text, one per line. Includes dynamic column width calculation, tag formatting, Unicode-safe truncation, and comprehensive integration and unit tests with snapshot verification.
---
 src/lib.rs                                    |   2 +-
 src/output/mod.rs                             |   2 +-
 src/output/table.rs                           | 707 +++++++++++++++++-
 tests/output_table_integration.rs             | 396 ++++++++++
 ..._integration__edge_empty_section_name.snap |   7 +
 ...integration__edge_many_tags_truncated.snap |   7 +
 ...able_integration__edge_string_sources.snap |   9 +
 ...e_integration__edge_very_short_string.snap |   9 +
 ...ut_table_integration__edge_zero_score.snap |   7 +
 ...able_integration__plain_empty_strings.snap |   5 +
 ...ion__plain_long_strings_not_truncated.snap |   5 +
 ...e_integration__plain_multiple_strings.snap |   7 +
 ...n__plain_preserves_special_characters.snap |   8 +
 ...able_integration__plain_single_string.snap |   5 +
 ...le_integration__plain_unicode_strings.snap |   7 +
 ..._table_integration__tty_all_tag_types.snap |  16 +
 ..._table_integration__tty_empty_strings.snap |   5 +
 ...ut_table_integration__tty_high_scores.snap |   8 +
 ...e_integration__tty_long_section_names.snap |   9 +
 ...tegration__tty_long_strings_truncated.snap |   8 +
 ...egration__tty_missing_optional_fields.snap |   9 +
 ...ble_integration__tty_multiple_strings.snap |  10 +
 ..._table_integration__tty_single_string.snap |   7 +
 ...e_integration__tty_special_characters.snap |   9 +
 ...ation__tty_strings_with_multiple_tags.snap |   8 +
 ...le_integration__tty_various_encodings.snap |   9 +
 26 files changed, 1275 insertions(+), 6 deletions(-)
 create mode 100644 tests/output_table_integration.rs
 create mode 100644 tests/snapshots/output_table_integration__edge_empty_section_name.snap
 create mode 100644 tests/snapshots/output_table_integration__edge_many_tags_truncated.snap
 create mode 100644 tests/snapshots/output_table_integration__edge_string_sources.snap
 create mode 100644 tests/snapshots/output_table_integration__edge_very_short_string.snap
 create mode 100644 tests/snapshots/output_table_integration__edge_zero_score.snap
 create mode 100644 tests/snapshots/output_table_integration__plain_empty_strings.snap
 create mode 100644 tests/snapshots/output_table_integration__plain_long_strings_not_truncated.snap
 create mode 100644 tests/snapshots/output_table_integration__plain_multiple_strings.snap
 create mode 100644 tests/snapshots/output_table_integration__plain_preserves_special_characters.snap
 create mode 100644 tests/snapshots/output_table_integration__plain_single_string.snap
 create mode 100644 tests/snapshots/output_table_integration__plain_unicode_strings.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_all_tag_types.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_empty_strings.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_high_scores.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_long_section_names.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_long_strings_truncated.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_missing_optional_fields.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_multiple_strings.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_single_string.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_special_characters.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_strings_with_multiple_tags.snap
 create mode 100644 tests/snapshots/output_table_integration__tty_various_encodings.snap

diff --git a/src/lib.rs b/src/lib.rs
index afdfc3b..58a931d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -86,4 +86,4 @@ pub use extraction::{
 };
 
 // Re-export output infrastructure types
-pub use output::{OutputFormat, OutputMetadata, format_output};
+pub use output::{OutputFormat, OutputMetadata, format_output, format_table_with_mode};
diff --git a/src/output/mod.rs b/src/output/mod.rs
index bf97cb9..419c129 100644
--- a/src/output/mod.rs
+++ b/src/output/mod.rs
@@ -41,7 +41,7 @@ pub mod table;
 pub mod yara;
 
 pub use json::format_json;
-pub use table::format_table;
+pub use table::{format_table, format_table_with_mode};
 pub use yara::format_yara;
 
 /// Output format selection for Stringy formatters.
diff --git a/src/output/table.rs b/src/output/table.rs
index d34c71e..4844082 100644
--- a/src/output/table.rs
+++ b/src/output/table.rs
@@ -1,9 +1,708 @@
-use crate::types::{FoundString, Result};
+//! Table output formatter for Stringy.
+//!
+//! This module provides human-readable table output with automatic TTY detection.
+//! When output is directed to a terminal (TTY), strings are displayed in an aligned
+//! table with headers showing String, Tags, Score, and Section columns. When output
+//! is piped or redirected (non-TTY), only the raw string text is emitted, one per line,
+//! for seamless integration with other command-line tools.
+//!
+//! # TTY Mode Example
+//!
+//! ```text
+//! String                                                       | Tags         | Score | Section
+//! -------------------------------------------------------------|--------------|-------|--------
+//! https://malware.example.com/beacon                           | url          |   150 | .rdata
+//! C:\Windows\System32\cmd.exe                                  | filepath     |   120 | .data
+//! GetProcAddress                                               | import       |    80 |
+//! ```
+//!
+//! # Non-TTY Mode Example
+//!
+//! ```text
+//! https://malware.example.com/beacon
+//! C:\Windows\System32\cmd.exe
+//! GetProcAddress
+//! ```
+//!
+//! # Column Layout
+//!
+//! - **String**: Up to 60 characters, truncated with `...` if longer
+//! - **Tags**: First 2-3 tags, comma-separated, max 20 characters
+//! - **Score**: Right-aligned integer score
+//! - **Section**: Section name where the string was found
+
+use std::io::IsTerminal;
+
+use crate::classification::ranking::RankingConfig;
+use crate::types::{FoundString, Result, Tag};
 
 use super::OutputMetadata;
 
+/// Maximum width for the string column before truncation.
+const STRING_COLUMN_WIDTH: usize = 60;
+
+/// Maximum width for the tags column.
+const TAGS_COLUMN_WIDTH: usize = 20;
+
+/// Maximum width for the score column.
+const SCORE_COLUMN_WIDTH: usize = 6;
+
+/// Maximum width for the section column.
+const SECTION_COLUMN_WIDTH: usize = 15;
+
 /// Format strings in a human-readable table format.
-pub fn format_table(_strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
-    // TODO: Implement table formatter in a subsequent phase.
-    Ok(String::new())
+///
+/// Automatically detects whether output is going to a TTY (terminal) and adjusts
+/// the format accordingly. In TTY mode, outputs an aligned table with headers.
+/// In non-TTY mode (piped/redirected), outputs plain strings one per line.
+///
+/// # Arguments
+///
+/// * `strings` - The extracted strings to format
+/// * `metadata` - Output context (currently unused but reserved for future features)
+///
+/// # Returns
+///
+/// A formatted string ready for output.
+pub fn format_table(strings: &[FoundString], metadata: &OutputMetadata) -> Result<String> {
+    let is_tty = std::io::stdout().is_terminal();
+    format_table_with_mode(strings, metadata, is_tty)
+}
+
+/// Format table with explicit TTY mode specification.
+///
+/// This function allows explicit control over the output mode, useful for testing
+/// and programmatic control over output format.
+///
+/// # Arguments
+///
+/// * `strings` - The extracted strings to format
+/// * `metadata` - Output context
+/// * `is_tty` - Whether to use TTY mode (true) or plain mode (false)
+pub fn format_table_with_mode(
+    strings: &[FoundString],
+    metadata: &OutputMetadata,
+    is_tty: bool,
+) -> Result<String> {
+    if is_tty {
+        format_table_tty(strings, metadata)
+    } else {
+        format_table_plain(strings)
+    }
+}
+
+/// Format strings as an aligned table for TTY output.
+///
+/// Creates a table with headers and aligned columns showing:
+/// - String text (truncated if necessary)
+/// - Tags (comma-separated, limited count)
+/// - Score (right-aligned)
+/// - Section name
+fn format_table_tty(strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
+    if strings.is_empty() {
+        return Ok(String::new());
+    }
+
+    let mut output = String::new();
+
+    // Calculate dynamic column widths based on content
+    let section_width = calculate_section_width(strings);
+    let tags_width = calculate_tags_width(strings);
+
+    // Build header
+    let header = format!(
+        "{} | {} | {} | {}",
+        pad_string("String", STRING_COLUMN_WIDTH, Alignment::Left),
+        pad_string("Tags", tags_width, Alignment::Left),
+        pad_string("Score", SCORE_COLUMN_WIDTH, Alignment::Right),
+        pad_string("Section", section_width, Alignment::Left),
+    );
+    output.push_str(&header);
+    output.push('\n');
+
+    // Build separator line
+    let separator = format!(
+        "{}-|-{}-|-{}-|-{}",
+        "-".repeat(STRING_COLUMN_WIDTH),
+        "-".repeat(tags_width),
+        "-".repeat(SCORE_COLUMN_WIDTH),
+        "-".repeat(section_width),
+    );
+    output.push_str(&separator);
+    output.push('\n');
+
+    // Build rows
+    for found_string in strings {
+        let truncated_text = truncate_string(&found_string.text, STRING_COLUMN_WIDTH);
+        let tags_display = format_tags(&found_string.tags);
+        let section_display = found_string.section.as_deref().unwrap_or("");
+
+        let row = format!(
+            "{} | {} | {} | {}",
+            pad_string(&truncated_text, STRING_COLUMN_WIDTH, Alignment::Left),
+            pad_string(&tags_display, tags_width, Alignment::Left),
+            pad_string(
+                &found_string.score.to_string(),
+                SCORE_COLUMN_WIDTH,
+                Alignment::Right
+            ),
+            pad_string(section_display, section_width, Alignment::Left),
+        );
+        output.push_str(&row);
+        output.push('\n');
+    }
+
+    // Remove trailing newline for consistency
+    if output.ends_with('\n') {
+        output.pop();
+    }
+
+    Ok(output)
+}
+
+/// Format strings as plain text for non-TTY output.
+///
+/// Outputs only the string text, one per line, suitable for piping to other tools.
+fn format_table_plain(strings: &[FoundString]) -> Result<String> {
+    let lines: Vec<String> = strings
+        .iter()
+        .map(|s| sanitize_plain_text(&s.text))
+        .collect();
+    Ok(lines.join("\n"))
+}
+
+/// Calculate the optimal width for the section column based on content.
+fn calculate_section_width(strings: &[FoundString]) -> usize {
+    let max_section_len = strings
+        .iter()
+        .filter_map(|s| s.section.as_ref())
+        .map(|s| s.len())
+        .max()
+        .unwrap_or(0);
+
+    // Minimum width is "Section" header length, maximum is SECTION_COLUMN_WIDTH
+    max_section_len.clamp("Section".len(), SECTION_COLUMN_WIDTH)
+}
+
+/// Calculate the optimal width for the tags column based on content.
+fn calculate_tags_width(strings: &[FoundString]) -> usize {
+    let max_tags_len = strings
+        .iter()
+        .map(|s| format_tags(&s.tags).len())
+        .max()
+        .unwrap_or(0);
+
+    // Minimum width is "Tags" header length, maximum is TAGS_COLUMN_WIDTH
+    max_tags_len.clamp("Tags".len(), TAGS_COLUMN_WIDTH)
+}
+
+/// Format tags for display in the table.
+///
+/// Converts tags to their display format using serde rename values where applicable.
+/// Limits output to `MAX_TAGS_DISPLAY` tags to prevent overflow.
+///
+/// # Arguments
+///
+/// * `tags` - Slice of tags to format
+///
+/// # Returns
+///
+/// Comma-separated string of tag names, or empty string if no tags.
+///
+/// # Examples
+///
+/// ```ignore
+/// let tags = vec![Tag::IPv4, Tag::FilePath];
+/// assert_eq!(format_tags(&tags), "ipv4, filepath");
+/// ```
+pub fn format_tags(tags: &[Tag]) -> String {
+    if tags.is_empty() {
+        return String::new();
+    }
+
+    let config = RankingConfig::default();
+    let max_boost = tags
+        .iter()
+        .map(|tag| tag_boost_value(tag, &config))
+        .max()
+        .unwrap_or(0);
+
+    let tag_strings: Vec<String> = tags
+        .iter()
+        .filter(|tag| tag_boost_value(tag, &config) == max_boost)
+        .map(tag_to_display_string)
+        .collect();
+
+    let result = tag_strings.join(", ");
+
+    // Truncate if still too long
+    if result.len() > TAGS_COLUMN_WIDTH {
+        truncate_string(&result, TAGS_COLUMN_WIDTH)
+    } else {
+        result
+    }
+}
+
+/// Sanitize plain text output so each string renders as a single line.
+///
+/// Replaces CRLF, LF, and CR with escaped sequences to preserve content
+/// while keeping output line-based.
+fn sanitize_plain_text(text: &str) -> String {
+    text.replace("\r\n", "\\r\\n")
+        .replace('\n', "\\n")
+        .replace('\r', "\\r")
+}
+
+/// Get the ranking boost value for a tag using the provided config.
+fn tag_boost_value(tag: &Tag, config: &RankingConfig) -> i32 {
+    config.tag_boosts.get(tag).copied().unwrap_or(0)
+}
+
+/// Convert a single tag to its display string.
+///
+/// Uses the serde rename value where defined, otherwise uses lowercase Debug format.
+fn tag_to_display_string(tag: &Tag) -> String {
+    match tag {
+        Tag::Url => "url".to_string(),
+        Tag::Domain => "domain".to_string(),
+        Tag::IPv4 => "ipv4".to_string(),
+        Tag::IPv6 => "ipv6".to_string(),
+        Tag::FilePath => "filepath".to_string(),
+        Tag::RegistryPath => "regpath".to_string(),
+        Tag::Guid => "guid".to_string(),
+        Tag::Email => "email".to_string(),
+        Tag::Base64 => "b64".to_string(),
+        Tag::FormatString => "fmt".to_string(),
+        Tag::UserAgent => "user-agent-ish".to_string(),
+        Tag::DemangledSymbol => "demangled".to_string(),
+        Tag::Import => "import".to_string(),
+        Tag::Export => "export".to_string(),
+        Tag::Version => "version".to_string(),
+        Tag::Manifest => "manifest".to_string(),
+        Tag::Resource => "resource".to_string(),
+        Tag::DylibPath => "dylib-path".to_string(),
+        Tag::Rpath => "rpath".to_string(),
+        Tag::RpathVariable => "rpath-var".to_string(),
+        Tag::FrameworkPath => "framework-path".to_string(),
+    }
+}
+
+/// Truncate a string to the specified maximum length.
+///
+/// If the string exceeds the maximum length, it is truncated and `...` is appended.
+/// Handles Unicode correctly by truncating at character boundaries.
+///
+/// # Arguments
+///
+/// * `s` - The string to truncate
+/// * `max_len` - Maximum length including the ellipsis
+///
+/// # Returns
+///
+/// The original string if it fits, or a truncated version with `...` appended.
+///
+/// # Examples
+///
+/// ```ignore
+/// assert_eq!(truncate_string("hello", 10), "hello");
+/// assert_eq!(truncate_string("hello world", 8), "hello...");
+/// ```
+pub fn truncate_string(s: &str, max_len: usize) -> String {
+    if s.len() <= max_len {
+        return s.to_string();
+    }
+
+    if max_len <= 3 {
+        return ".".repeat(max_len);
+    }
+
+    // Find a valid character boundary for truncation
+    let truncate_at = max_len - 3;
+    let mut end_index = truncate_at;
+
+    // Ensure we don't split a multi-byte character
+    for (idx, _) in s.char_indices() {
+        if idx <= truncate_at {
+            end_index = idx;
+        } else {
+            break;
+        }
+    }
+
+    // Handle case where we need to include at least one character
+    if end_index == 0 && !s.is_empty() {
+        if let Some((idx, _)) = s.char_indices().nth(1) {
+            end_index = idx;
+        } else {
+            end_index = s.len();
+        }
+    }
+
+    format!("{}...", &s[..end_index])
+}
+
+/// Text alignment for padding.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Alignment {
+    /// Left-align text (pad on right).
+    Left,
+    /// Right-align text (pad on left).
+    Right,
+}
+
+/// Pad a string to a fixed width with the specified alignment.
+///
+/// # Arguments
+///
+/// * `s` - The string to pad
+/// * `width` - Target width
+/// * `alignment` - Left or right alignment
+///
+/// # Returns
+///
+/// The padded string.
+pub fn pad_string(s: &str, width: usize, alignment: Alignment) -> String {
+    match alignment {
+        Alignment::Left => format!("{:<width$}", s, width = width),
+        Alignment::Right => format!("{:>width$}", s, width = width),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::output::OutputFormat;
+    use crate::types::{Encoding, StringSource};
+
+    fn make_test_string(text: &str) -> FoundString {
+        FoundString::new(
+            text.to_string(),
+            Encoding::Ascii,
+            0x1000,
+            text.len() as u32,
+            StringSource::SectionData,
+        )
+    }
+
+    fn make_metadata() -> OutputMetadata {
+        OutputMetadata::new("test.bin".to_string(), OutputFormat::Table, 10, 10)
+    }
+
+    // Tests for format_tags
+    mod format_tags_tests {
+        use super::*;
+
+        #[test]
+        fn empty_tags() {
+            assert_eq!(format_tags(&[]), "");
+        }
+
+        #[test]
+        fn single_tag() {
+            assert_eq!(format_tags(&[Tag::Url]), "url");
+            assert_eq!(format_tags(&[Tag::IPv4]), "ipv4");
+            assert_eq!(format_tags(&[Tag::FilePath]), "filepath");
+        }
+
+        #[test]
+        fn two_tags() {
+            assert_eq!(format_tags(&[Tag::Url, Tag::Domain]), "url");
+            assert_eq!(format_tags(&[Tag::IPv4, Tag::FilePath]), "ipv4");
+        }
+
+        #[test]
+        fn three_tags() {
+            assert_eq!(format_tags(&[Tag::Url, Tag::Domain, Tag::IPv4]), "url");
+        }
+
+        #[test]
+        fn more_than_max_tags_truncated() {
+            let tags = vec![
+                Tag::Url,
+                Tag::Domain,
+                Tag::IPv4,
+                Tag::FilePath,
+                Tag::RegistryPath,
+            ];
+            assert_eq!(format_tags(&tags), "url");
+        }
+
+        #[test]
+        fn multiple_tags_same_priority() {
+            assert_eq!(format_tags(&[Tag::Import, Tag::Export]), "import, export");
+        }
+
+        #[test]
+        fn all_tag_variants_have_display() {
+            // Ensure all tag variants produce valid output
+            let all_tags = vec![
+                Tag::Url,
+                Tag::Domain,
+                Tag::IPv4,
+                Tag::IPv6,
+                Tag::FilePath,
+                Tag::RegistryPath,
+                Tag::Guid,
+                Tag::Email,
+                Tag::Base64,
+                Tag::FormatString,
+                Tag::UserAgent,
+                Tag::DemangledSymbol,
+                Tag::Import,
+                Tag::Export,
+                Tag::Version,
+                Tag::Manifest,
+                Tag::Resource,
+                Tag::DylibPath,
+                Tag::Rpath,
+                Tag::RpathVariable,
+                Tag::FrameworkPath,
+            ];
+
+            for tag in all_tags {
+                let display = tag_to_display_string(&tag);
+                assert!(!display.is_empty(), "Tag {:?} should have display", tag);
+                assert!(display.is_ascii(), "Tag display should be ASCII");
+            }
+        }
+    }
+
+    // Tests for truncate_string
+    mod truncate_string_tests {
+        use super::*;
+
+        #[test]
+        fn short_string_unchanged() {
+            assert_eq!(truncate_string("hello", 10), "hello");
+            assert_eq!(truncate_string("", 10), "");
+        }
+
+        #[test]
+        fn exact_length_unchanged() {
+            assert_eq!(truncate_string("hello", 5), "hello");
+        }
+
+        #[test]
+        fn long_string_truncated() {
+            assert_eq!(truncate_string("hello world", 8), "hello...");
+        }
+
+        #[test]
+        fn very_short_max_length() {
+            assert_eq!(truncate_string("hello", 3), "...");
+            assert_eq!(truncate_string("hello", 2), "..");
+            assert_eq!(truncate_string("hello", 1), ".");
+        }
+
+        #[test]
+        fn unicode_string_safe_truncation() {
+            // Ensure we don't split multi-byte characters
+            let unicode = "hello\u{1F600}world"; // emoji in the middle
+            let truncated = truncate_string(unicode, 8);
+            // Should truncate before the emoji to avoid splitting it
+            assert!(truncated.ends_with("..."));
+            assert!(truncated.len() <= 8);
+        }
+
+        #[test]
+        fn unicode_at_boundary() {
+            let text = "\u{4E2D}\u{6587}\u{6D4B}\u{8BD5}"; // Chinese characters
+            let truncated = truncate_string(text, 6);
+            assert!(truncated.is_char_boundary(truncated.len() - 3));
+        }
+    }
+
+    // Tests for pad_string
+    mod pad_string_tests {
+        use super::*;
+
+        #[test]
+        fn left_alignment() {
+            assert_eq!(pad_string("hi", 5, Alignment::Left), "hi   ");
+            assert_eq!(pad_string("hello", 5, Alignment::Left), "hello");
+        }
+
+        #[test]
+        fn right_alignment() {
+            assert_eq!(pad_string("hi", 5, Alignment::Right), "   hi");
+            assert_eq!(pad_string("hello", 5, Alignment::Right), "hello");
+        }
+
+        #[test]
+        fn exact_width() {
+            assert_eq!(pad_string("exact", 5, Alignment::Left), "exact");
+            assert_eq!(pad_string("exact", 5, Alignment::Right), "exact");
+        }
+
+        #[test]
+        fn empty_string() {
+            assert_eq!(pad_string("", 5, Alignment::Left), "     ");
+            assert_eq!(pad_string("", 5, Alignment::Right), "     ");
+        }
+    }
+
+    // Tests for format_table
+    mod format_table_tests {
+        use super::*;
+
+        #[test]
+        fn empty_strings_returns_empty() {
+            let result = format_table_with_mode(&[], &make_metadata(), true).unwrap();
+            assert_eq!(result, "");
+        }
+
+        #[test]
+        fn single_string_tty_mode() {
+            let strings = vec![make_test_string("test string")];
+            let result = format_table_with_mode(&strings, &make_metadata(), true).unwrap();
+
+            // Should have header, separator, and one data row
+            let lines: Vec<&str> = result.lines().collect();
+            assert_eq!(lines.len(), 3);
+            assert!(lines[0].contains("String"));
+            assert!(lines[0].contains("Tags"));
+            assert!(lines[0].contains("Score"));
+            assert!(lines[0].contains("Section"));
+            assert!(lines[1].contains("---"));
+            assert!(lines[2].contains("test string"));
+        }
+
+        #[test]
+        fn single_string_plain_mode() {
+            let strings = vec![make_test_string("test string")];
+            let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+
+            assert_eq!(result, "test string");
+        }
+
+        #[test]
+        fn multiple_strings_plain_mode() {
+            let strings = vec![
+                make_test_string("first"),
+                make_test_string("second"),
+                make_test_string("third"),
+            ];
+            let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+
+            assert_eq!(result, "first\nsecond\nthird");
+        }
+
+        #[test]
+        fn string_with_tags_displayed() {
+            let mut found = make_test_string("http://example.com");
+            found.tags = vec![Tag::Url, Tag::Domain];
+
+            let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
+            assert!(result.contains("url"));
+        }
+
+        #[test]
+        fn string_with_section_displayed() {
+            let found = make_test_string("test").with_section(".rodata".to_string());
+
+            let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
+            assert!(result.contains(".rodata"));
+        }
+
+        #[test]
+        fn string_with_score_displayed() {
+            let found = make_test_string("test").with_score(150);
+
+            let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
+            assert!(result.contains("150"));
+        }
+
+        #[test]
+        fn long_string_truncated_in_tty() {
+            let long_text = "a".repeat(100);
+            let strings = vec![make_test_string(&long_text)];
+            let result = format_table_with_mode(&strings, &make_metadata(), true).unwrap();
+
+            // Should contain truncated version with ...
+            assert!(result.contains("..."));
+            // Should not contain the full 100 character string
+            assert!(!result.contains(&long_text));
+        }
+
+        #[test]
+        fn long_string_not_truncated_in_plain() {
+            let long_text = "a".repeat(100);
+            let strings = vec![make_test_string(&long_text)];
+            let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+
+            // Plain mode should have full string
+            assert_eq!(result, long_text);
+        }
+
+        #[test]
+        fn missing_optional_fields_handled() {
+            // String with no section, no tags, default score
+            let found = make_test_string("minimal");
+
+            let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
+            // Should not crash and should contain the string
+            assert!(result.contains("minimal"));
+        }
+
+        #[test]
+        fn special_characters_in_string() {
+            let strings = vec![make_test_string("tab\there"), make_test_string("pipe|here")];
+            let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+
+            // Each string should be on its own line in output
+            let lines: Vec<&str> = result.lines().collect();
+            assert_eq!(lines.len(), 2);
+            assert!(lines[0].contains("tab\there"));
+            assert!(lines[1].contains("pipe|here"));
+        }
+
+        #[test]
+        fn string_with_embedded_newline() {
+            let strings = vec![make_test_string("line1\nline2")];
+            let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+            assert_eq!(result, "line1\\nline2");
+        }
+    }
+
+    // Tests for column width calculation
+    mod column_width_tests {
+        use super::*;
+
+        #[test]
+        fn section_width_minimum() {
+            let strings = vec![make_test_string("test")];
+            let width = calculate_section_width(&strings);
+            assert_eq!(width, "Section".len());
+        }
+
+        #[test]
+        fn section_width_from_content() {
+            let strings = vec![make_test_string("test").with_section(".rodata.str1.1".to_string())];
+            let width = calculate_section_width(&strings);
+            assert_eq!(width, ".rodata.str1.1".len());
+        }
+
+        #[test]
+        fn section_width_capped_at_max() {
+            let long_section = "a".repeat(50);
+            let strings = vec![make_test_string("test").with_section(long_section)];
+            let width = calculate_section_width(&strings);
+            assert_eq!(width, SECTION_COLUMN_WIDTH);
+        }
+
+        #[test]
+        fn tags_width_minimum() {
+            let strings = vec![make_test_string("test")];
+            let width = calculate_tags_width(&strings);
+            assert_eq!(width, "Tags".len());
+        }
+
+        #[test]
+        fn tags_width_from_content() {
+            let mut found = make_test_string("test");
+            found.tags = vec![Tag::Url, Tag::Domain];
+            let width = calculate_tags_width(&[found]);
+            assert_eq!(width, "Tags".len());
+        }
+    }
 }
diff --git a/tests/output_table_integration.rs b/tests/output_table_integration.rs
new file mode 100644
index 0000000..3446464
--- /dev/null
+++ b/tests/output_table_integration.rs
@@ -0,0 +1,396 @@
+//! Integration tests for table output formatter.
+//!
+//! Uses insta snapshots to verify output format consistency.
+
+use insta::assert_snapshot;
+use stringy::output::{OutputFormat, OutputMetadata, format_table_with_mode};
+use stringy::types::{Encoding, FoundString, StringSource, Tag};
+
+/// Create a test FoundString with common defaults.
+fn make_string(text: &str) -> FoundString {
+    FoundString::new(
+        text.to_string(),
+        Encoding::Ascii,
+        0x1000,
+        text.len() as u32,
+        StringSource::SectionData,
+    )
+}
+
+/// Create OutputMetadata for tests.
+fn make_metadata(count: usize) -> OutputMetadata {
+    OutputMetadata::new(
+        "test_binary.exe".to_string(),
+        OutputFormat::Table,
+        count,
+        count,
+    )
+}
+
+// TTY mode tests
+
+#[test]
+fn test_tty_empty_strings() {
+    let result = format_table_with_mode(&[], &make_metadata(0), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_tty_single_string() {
+    let strings = vec![make_string("GetProcAddress")];
+    let result = format_table_with_mode(&strings, &make_metadata(1), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_tty_multiple_strings() {
+    let strings = vec![
+        make_string("https://malware.example.com/beacon")
+            .with_tags(vec![Tag::Url])
+            .with_score(150)
+            .with_section(".rdata".to_string()),
+        make_string("C:\\Windows\\System32\\cmd.exe")
+            .with_tags(vec![Tag::FilePath])
+            .with_score(120)
+            .with_section(".data".to_string()),
+        make_string("GetProcAddress")
+            .with_tags(vec![Tag::Import])
+            .with_score(80),
+        make_string("192.168.1.100")
+            .with_tags(vec![Tag::IPv4])
+            .with_score(100)
+            .with_section(".rodata".to_string()),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(4), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_tty_strings_with_multiple_tags() {
+    let strings = vec![
+        make_string("http://evil.com/download.exe")
+            .with_tags(vec![Tag::Url, Tag::Domain, Tag::FilePath])
+            .with_score(200)
+            .with_section(".rdata".to_string()),
+        make_string("user@example.com")
+            .with_tags(vec![Tag::Email, Tag::Domain])
+            .with_score(90)
+            .with_section(".data".to_string()),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(2), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_tty_long_strings_truncated() {
+    let long_url = format!(
+        "https://very-long-subdomain.malware-domain.example.com/path/to/beacon?id={}",
+        "x".repeat(50)
+    );
+    let long_path = format!(
+        "C:\\Users\\Administrator\\AppData\\Local\\Temp\\{}.exe",
+        "a".repeat(60)
+    );
+
+    let strings = vec![
+        make_string(&long_url)
+            .with_tags(vec![Tag::Url])
+            .with_score(150)
+            .with_section(".rdata".to_string()),
+        make_string(&long_path)
+            .with_tags(vec![Tag::FilePath])
+            .with_score(120)
+            .with_section(".data".to_string()),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(2), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_tty_missing_optional_fields() {
+    let strings = vec![
+        // No section
+        make_string("kernel32.dll")
+            .with_tags(vec![Tag::Import])
+            .with_score(50),
+        // No tags
+        make_string("mysterious string")
+            .with_score(10)
+            .with_section(".text".to_string()),
+        // No tags, no section, default score
+        make_string("bare minimum"),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(3), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_tty_special_characters() {
+    let strings = vec![
+        make_string("string with\ttab")
+            .with_score(10)
+            .with_section(".data".to_string()),
+        make_string("pipe|character")
+            .with_score(10)
+            .with_section(".data".to_string()),
+        make_string("backslash\\here")
+            .with_tags(vec![Tag::FilePath])
+            .with_score(20)
+            .with_section(".rdata".to_string()),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(3), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_tty_various_encodings() {
+    let strings = vec![
+        FoundString::new(
+            "ASCII string".to_string(),
+            Encoding::Ascii,
+            0x1000,
+            12,
+            StringSource::SectionData,
+        )
+        .with_score(50)
+        .with_section(".rodata".to_string()),
+        FoundString::new(
+            "UTF-8 string".to_string(),
+            Encoding::Utf8,
+            0x2000,
+            12,
+            StringSource::SectionData,
+        )
+        .with_score(50)
+        .with_section(".rodata".to_string()),
+        FoundString::new(
+            "UTF-16LE string".to_string(),
+            Encoding::Utf16Le,
+            0x3000,
+            30,
+            StringSource::SectionData,
+        )
+        .with_score(50)
+        .with_section(".data".to_string()),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(3), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_tty_high_scores() {
+    let strings = vec![
+        make_string("critical IOC")
+            .with_tags(vec![Tag::Url, Tag::IPv4])
+            .with_score(9999)
+            .with_section(".rdata".to_string()),
+        make_string("negative score")
+            .with_score(-50)
+            .with_section(".text".to_string()),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(2), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_tty_all_tag_types() {
+    // Test a variety of tag types to ensure they all display correctly
+    let strings = vec![
+        make_string("https://example.com")
+            .with_tags(vec![Tag::Url])
+            .with_score(100),
+        make_string("example.com")
+            .with_tags(vec![Tag::Domain])
+            .with_score(80),
+        make_string("192.168.1.1")
+            .with_tags(vec![Tag::IPv4])
+            .with_score(90),
+        make_string("::1").with_tags(vec![Tag::IPv6]).with_score(90),
+        make_string("/etc/passwd")
+            .with_tags(vec![Tag::FilePath])
+            .with_score(85),
+        make_string("HKLM\\Software")
+            .with_tags(vec![Tag::RegistryPath])
+            .with_score(85),
+        make_string("{12345678-1234-1234-1234-123456789012}")
+            .with_tags(vec![Tag::Guid])
+            .with_score(70),
+        make_string("user@domain.com")
+            .with_tags(vec![Tag::Email])
+            .with_score(75),
+        make_string("SGVsbG8gV29ybGQ=")
+            .with_tags(vec![Tag::Base64])
+            .with_score(60),
+        make_string("%s %d %x")
+            .with_tags(vec![Tag::FormatString])
+            .with_score(50),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(10), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_tty_long_section_names() {
+    let strings = vec![
+        make_string("string one")
+            .with_score(10)
+            .with_section(".rodata.str1.1".to_string()),
+        make_string("string two")
+            .with_score(20)
+            .with_section(".data.rel.ro".to_string()),
+        make_string("string three")
+            .with_score(30)
+            .with_section(".text".to_string()),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(3), true).unwrap();
+    assert_snapshot!(result);
+}
+
+// Non-TTY (plain) mode tests
+
+#[test]
+fn test_plain_empty_strings() {
+    let result = format_table_with_mode(&[], &make_metadata(0), false).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_plain_single_string() {
+    let strings = vec![make_string("GetProcAddress")];
+    let result = format_table_with_mode(&strings, &make_metadata(1), false).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_plain_multiple_strings() {
+    let strings = vec![
+        make_string("https://malware.example.com/beacon")
+            .with_tags(vec![Tag::Url])
+            .with_score(150),
+        make_string("C:\\Windows\\System32\\cmd.exe")
+            .with_tags(vec![Tag::FilePath])
+            .with_score(120),
+        make_string("GetProcAddress")
+            .with_tags(vec![Tag::Import])
+            .with_score(80),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(4), false).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_plain_long_strings_not_truncated() {
+    let long_string = "a".repeat(200);
+    let strings = vec![make_string(&long_string)];
+    let result = format_table_with_mode(&strings, &make_metadata(1), false).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_plain_preserves_special_characters() {
+    let strings = vec![
+        make_string("tab\there"),
+        make_string("pipe|here"),
+        make_string("quote\"here"),
+        make_string("line1\nline2"),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(3), false).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_plain_unicode_strings() {
+    let strings = vec![
+        make_string("\u{4E2D}\u{6587}\u{5B57}\u{7B26}\u{4E32}"), // Chinese characters
+        make_string("\u{0420}\u{0443}\u{0441}\u{0441}\u{043A}\u{0438}\u{0439}"), // Russian
+        make_string("\u{1F600}\u{1F601}\u{1F602}"),              // Emojis
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(3), false).unwrap();
+    assert_snapshot!(result);
+}
+
+// Edge case tests
+
+#[test]
+fn test_edge_many_tags_truncated() {
+    let strings = vec![
+        make_string("multi-tagged")
+            .with_tags(vec![
+                Tag::Url,
+                Tag::Domain,
+                Tag::IPv4,
+                Tag::FilePath,
+                Tag::RegistryPath,
+            ])
+            .with_score(100)
+            .with_section(".data".to_string()),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(1), true).unwrap();
+    // Should only show first 3 tags
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_edge_zero_score() {
+    let strings = vec![
+        make_string("zero score string")
+            .with_score(0)
+            .with_section(".data".to_string()),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(1), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_edge_empty_section_name() {
+    // Section explicitly set to empty string vs None
+    let strings = vec![make_string("with empty section").with_section(String::new())];
+    let result = format_table_with_mode(&strings, &make_metadata(1), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_edge_very_short_string() {
+    let strings = vec![
+        make_string("a").with_score(10),
+        make_string("ab").with_score(20),
+        make_string("abc").with_score(30),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(3), true).unwrap();
+    assert_snapshot!(result);
+}
+
+#[test]
+fn test_edge_string_sources() {
+    let strings = vec![
+        FoundString::new(
+            "import_func".to_string(),
+            Encoding::Ascii,
+            0x1000,
+            11,
+            StringSource::ImportName,
+        )
+        .with_tags(vec![Tag::Import])
+        .with_score(80),
+        FoundString::new(
+            "export_func".to_string(),
+            Encoding::Ascii,
+            0x2000,
+            11,
+            StringSource::ExportName,
+        )
+        .with_tags(vec![Tag::Export])
+        .with_score(80),
+        FoundString::new(
+            "resource string".to_string(),
+            Encoding::Utf16Le,
+            0x3000,
+            30,
+            StringSource::ResourceString,
+        )
+        .with_tags(vec![Tag::Resource])
+        .with_score(60),
+    ];
+    let result = format_table_with_mode(&strings, &make_metadata(3), true).unwrap();
+    assert_snapshot!(result);
+}
diff --git a/tests/snapshots/output_table_integration__edge_empty_section_name.snap b/tests/snapshots/output_table_integration__edge_empty_section_name.snap
new file mode 100644
index 0000000..be098ad
--- /dev/null
+++ b/tests/snapshots/output_table_integration__edge_empty_section_name.snap
@@ -0,0 +1,7 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags |  Score | Section
+-------------------------------------------------------------|------|--------|--------
+with empty section                                           |      |      0 |
diff --git a/tests/snapshots/output_table_integration__edge_many_tags_truncated.snap b/tests/snapshots/output_table_integration__edge_many_tags_truncated.snap
new file mode 100644
index 0000000..eded6bc
--- /dev/null
+++ b/tests/snapshots/output_table_integration__edge_many_tags_truncated.snap
@@ -0,0 +1,7 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags |  Score | Section
+-------------------------------------------------------------|------|--------|--------
+multi-tagged                                                 | url  |    100 | .data
diff --git a/tests/snapshots/output_table_integration__edge_string_sources.snap b/tests/snapshots/output_table_integration__edge_string_sources.snap
new file mode 100644
index 0000000..d86c3f3
--- /dev/null
+++ b/tests/snapshots/output_table_integration__edge_string_sources.snap
@@ -0,0 +1,9 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags     |  Score | Section
+-------------------------------------------------------------|----------|--------|--------
+import_func                                                  | import   |     80 |        
+export_func                                                  | export   |     80 |        
+resource string                                              | resource |     60 |
diff --git a/tests/snapshots/output_table_integration__edge_very_short_string.snap b/tests/snapshots/output_table_integration__edge_very_short_string.snap
new file mode 100644
index 0000000..25d3c61
--- /dev/null
+++ b/tests/snapshots/output_table_integration__edge_very_short_string.snap
@@ -0,0 +1,9 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags |  Score | Section
+-------------------------------------------------------------|------|--------|--------
+a                                                            |      |     10 |        
+ab                                                           |      |     20 |        
+abc                                                          |      |     30 |
diff --git a/tests/snapshots/output_table_integration__edge_zero_score.snap b/tests/snapshots/output_table_integration__edge_zero_score.snap
new file mode 100644
index 0000000..3803bf2
--- /dev/null
+++ b/tests/snapshots/output_table_integration__edge_zero_score.snap
@@ -0,0 +1,7 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags |  Score | Section
+-------------------------------------------------------------|------|--------|--------
+zero score string                                            |      |      0 | .data
diff --git a/tests/snapshots/output_table_integration__plain_empty_strings.snap b/tests/snapshots/output_table_integration__plain_empty_strings.snap
new file mode 100644
index 0000000..c900bf2
--- /dev/null
+++ b/tests/snapshots/output_table_integration__plain_empty_strings.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+
diff --git a/tests/snapshots/output_table_integration__plain_long_strings_not_truncated.snap b/tests/snapshots/output_table_integration__plain_long_strings_not_truncated.snap
new file mode 100644
index 0000000..6372697
--- /dev/null
+++ b/tests/snapshots/output_table_integration__plain_long_strings_not_truncated.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
diff --git a/tests/snapshots/output_table_integration__plain_multiple_strings.snap b/tests/snapshots/output_table_integration__plain_multiple_strings.snap
new file mode 100644
index 0000000..f7c8c0f
--- /dev/null
+++ b/tests/snapshots/output_table_integration__plain_multiple_strings.snap
@@ -0,0 +1,7 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+https://malware.example.com/beacon
+C:\Windows\System32\cmd.exe
+GetProcAddress
diff --git a/tests/snapshots/output_table_integration__plain_preserves_special_characters.snap b/tests/snapshots/output_table_integration__plain_preserves_special_characters.snap
new file mode 100644
index 0000000..d41e9ba
--- /dev/null
+++ b/tests/snapshots/output_table_integration__plain_preserves_special_characters.snap
@@ -0,0 +1,8 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+tab	here
+pipe|here
+quote"here
+line1\nline2
diff --git a/tests/snapshots/output_table_integration__plain_single_string.snap b/tests/snapshots/output_table_integration__plain_single_string.snap
new file mode 100644
index 0000000..dbeff49
--- /dev/null
+++ b/tests/snapshots/output_table_integration__plain_single_string.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+GetProcAddress
diff --git a/tests/snapshots/output_table_integration__plain_unicode_strings.snap b/tests/snapshots/output_table_integration__plain_unicode_strings.snap
new file mode 100644
index 0000000..a44510c
--- /dev/null
+++ b/tests/snapshots/output_table_integration__plain_unicode_strings.snap
@@ -0,0 +1,7 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+中文字符串
+Русский
+😀😁😂
diff --git a/tests/snapshots/output_table_integration__tty_all_tag_types.snap b/tests/snapshots/output_table_integration__tty_all_tag_types.snap
new file mode 100644
index 0000000..5f6612e
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_all_tag_types.snap
@@ -0,0 +1,16 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags     |  Score | Section
+-------------------------------------------------------------|----------|--------|--------
+https://example.com                                          | url      |    100 |        
+example.com                                                  | domain   |     80 |        
+192.168.1.1                                                  | ipv4     |     90 |        
+::1                                                          | ipv6     |     90 |        
+/etc/passwd                                                  | filepath |     85 |        
+HKLM\Software                                                | regpath  |     85 |        
+{12345678-1234-1234-1234-123456789012}                       | guid     |     70 |        
+user@domain.com                                              | email    |     75 |        
+SGVsbG8gV29ybGQ=                                             | b64      |     60 |        
+%s %d %x                                                     | fmt      |     50 |
diff --git a/tests/snapshots/output_table_integration__tty_empty_strings.snap b/tests/snapshots/output_table_integration__tty_empty_strings.snap
new file mode 100644
index 0000000..c900bf2
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_empty_strings.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+
diff --git a/tests/snapshots/output_table_integration__tty_high_scores.snap b/tests/snapshots/output_table_integration__tty_high_scores.snap
new file mode 100644
index 0000000..fa3a32a
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_high_scores.snap
@@ -0,0 +1,8 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags |  Score | Section
+-------------------------------------------------------------|------|--------|--------
+critical IOC                                                 | url  |   9999 | .rdata 
+negative score                                               |      |    -50 | .text
diff --git a/tests/snapshots/output_table_integration__tty_long_section_names.snap b/tests/snapshots/output_table_integration__tty_long_section_names.snap
new file mode 100644
index 0000000..8cbd810
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_long_section_names.snap
@@ -0,0 +1,9 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags |  Score | Section       
+-------------------------------------------------------------|------|--------|---------------
+string one                                                   |      |     10 | .rodata.str1.1
+string two                                                   |      |     20 | .data.rel.ro  
+string three                                                 |      |     30 | .text
diff --git a/tests/snapshots/output_table_integration__tty_long_strings_truncated.snap b/tests/snapshots/output_table_integration__tty_long_strings_truncated.snap
new file mode 100644
index 0000000..643d930
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_long_strings_truncated.snap
@@ -0,0 +1,8 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags     |  Score | Section
+-------------------------------------------------------------|----------|--------|--------
+https://very-long-subdomain.malware-domain.example.com/pa... | url      |    150 | .rdata 
+C:\Users\Administrator\AppData\Local\Temp\aaaaaaaaaaaaaaa... | filepath |    120 | .data
diff --git a/tests/snapshots/output_table_integration__tty_missing_optional_fields.snap b/tests/snapshots/output_table_integration__tty_missing_optional_fields.snap
new file mode 100644
index 0000000..8e6e113
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_missing_optional_fields.snap
@@ -0,0 +1,9 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags   |  Score | Section
+-------------------------------------------------------------|--------|--------|--------
+kernel32.dll                                                 | import |     50 |        
+mysterious string                                            |        |     10 | .text  
+bare minimum                                                 |        |      0 |
diff --git a/tests/snapshots/output_table_integration__tty_multiple_strings.snap b/tests/snapshots/output_table_integration__tty_multiple_strings.snap
new file mode 100644
index 0000000..f7a8eca
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_multiple_strings.snap
@@ -0,0 +1,10 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags     |  Score | Section
+-------------------------------------------------------------|----------|--------|--------
+https://malware.example.com/beacon                           | url      |    150 | .rdata 
+C:\Windows\System32\cmd.exe                                  | filepath |    120 | .data  
+GetProcAddress                                               | import   |     80 |        
+192.168.1.100                                                | ipv4     |    100 | .rodata
diff --git a/tests/snapshots/output_table_integration__tty_single_string.snap b/tests/snapshots/output_table_integration__tty_single_string.snap
new file mode 100644
index 0000000..28cbea8
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_single_string.snap
@@ -0,0 +1,7 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags |  Score | Section
+-------------------------------------------------------------|------|--------|--------
+GetProcAddress                                               |      |      0 |
diff --git a/tests/snapshots/output_table_integration__tty_special_characters.snap b/tests/snapshots/output_table_integration__tty_special_characters.snap
new file mode 100644
index 0000000..2ebce1e
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_special_characters.snap
@@ -0,0 +1,9 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags     |  Score | Section
+-------------------------------------------------------------|----------|--------|--------
+string with	tab                                              |          |     10 | .data  
+pipe|character                                               |          |     10 | .data  
+backslash\here                                               | filepath |     20 | .rdata
diff --git a/tests/snapshots/output_table_integration__tty_strings_with_multiple_tags.snap b/tests/snapshots/output_table_integration__tty_strings_with_multiple_tags.snap
new file mode 100644
index 0000000..8be72b8
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_strings_with_multiple_tags.snap
@@ -0,0 +1,8 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags   |  Score | Section
+-------------------------------------------------------------|--------|--------|--------
+http://evil.com/download.exe                                 | url    |    200 | .rdata 
+user@example.com                                             | domain |     90 | .data
diff --git a/tests/snapshots/output_table_integration__tty_various_encodings.snap b/tests/snapshots/output_table_integration__tty_various_encodings.snap
new file mode 100644
index 0000000..eade21f
--- /dev/null
+++ b/tests/snapshots/output_table_integration__tty_various_encodings.snap
@@ -0,0 +1,9 @@
+---
+source: tests/output_table_integration.rs
+expression: result
+---
+String                                                       | Tags |  Score | Section
+-------------------------------------------------------------|------|--------|--------
+ASCII string                                                 |      |     50 | .rodata
+UTF-8 string                                                 |      |     50 | .rodata
+UTF-16LE string                                              |      |     50 | .data

From b8bcc8e99950dae10956e762e3ee6510c3d2dc17 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 02:55:30 -0500
Subject: [PATCH 03/25] Implement JSON and YARA output formatters

Added full implementations for `format_json` and `format_yara` functions, including serialization logic, escaping, and tag/group handling. Updated documentation and examples to reflect new UTF-16 extraction API and output formatting. Added comprehensive integration and snapshot tests for both output formats to ensure correctness and stability.
---
 src/classification/mod.rs                     |  24 +-
 src/classification/semantic.rs                |  24 +-
 src/classification/symbols.rs                 |  49 +--
 src/extraction/ascii.rs                       |   2 +-
 src/extraction/mod.rs                         |  11 +-
 src/extraction/pe_resources.rs                |   7 +
 src/lib.rs                                    |  10 +-
 src/output/json.rs                            | 279 +++++++++++++++-
 src/output/yara.rs                            | 298 +++++++++++++++++-
 tests/output_json_integration.rs              | 266 ++++++++++++++++
 tests/output_yara_integration.rs              | 196 ++++++++++++
 ..._json_integration__json_all_encodings.snap |   8 +
 ...ntegration__json_all_fields_populated.snap |   5 +
 ...ut_json_integration__json_all_sources.snap |  10 +
 ...utput_json_integration__json_all_tags.snap |   5 +
 ...t_json_integration__json_debug_fields.snap |   5 +
 ..._json_integration__json_empty_strings.snap |   5 +
 ...t_json_integration__json_long_strings.snap |   5 +
 ...on_integration__json_multiple_strings.snap |   7 +
 ...ntegration__json_optional_fields_none.snap |   5 +
 ..._json_integration__json_original_text.snap |   5 +
 ..._json_integration__json_single_string.snap |   5 +
 ..._integration__json_special_characters.snap |   5 +
 ...son_integration__json_unicode_content.snap |   5 +
 ..._yara_integration__yara_all_tag_types.snap |  80 +++++
 ...ration__yara_binary_name_sanitization.snap |  20 ++
 ...a_integration__yara_edge_case_names-2.snap |  20 ++
 ...ara_integration__yara_edge_case_names.snap |  20 ++
 ..._yara_integration__yara_empty_strings.snap |  16 +
 ..._integration__yara_encoding_modifiers.snap |  23 ++
 ...ut_yara_integration__yara_high_scores.snap |  23 ++
 ...ntegration__yara_long_strings_skipped.snap |  19 ++
 ...ara_integration__yara_mixed_encodings.snap |  26 ++
 ..._yara_multiple_strings_different_tags.snap |  26 ++
 ...ation__yara_multiple_strings_same_tag.snap |  22 ++
 ...output_yara_integration__yara_no_tags.snap |  22 ++
 ..._yara_integration__yara_single_string.snap |  20 ++
 ..._integration__yara_special_characters.snap |  20 ++
 ..._integration__yara_unicode_in_strings.snap |  20 ++
 39 files changed, 1539 insertions(+), 79 deletions(-)
 create mode 100644 tests/output_json_integration.rs
 create mode 100644 tests/output_yara_integration.rs
 create mode 100644 tests/snapshots/output_json_integration__json_all_encodings.snap
 create mode 100644 tests/snapshots/output_json_integration__json_all_fields_populated.snap
 create mode 100644 tests/snapshots/output_json_integration__json_all_sources.snap
 create mode 100644 tests/snapshots/output_json_integration__json_all_tags.snap
 create mode 100644 tests/snapshots/output_json_integration__json_debug_fields.snap
 create mode 100644 tests/snapshots/output_json_integration__json_empty_strings.snap
 create mode 100644 tests/snapshots/output_json_integration__json_long_strings.snap
 create mode 100644 tests/snapshots/output_json_integration__json_multiple_strings.snap
 create mode 100644 tests/snapshots/output_json_integration__json_optional_fields_none.snap
 create mode 100644 tests/snapshots/output_json_integration__json_original_text.snap
 create mode 100644 tests/snapshots/output_json_integration__json_single_string.snap
 create mode 100644 tests/snapshots/output_json_integration__json_special_characters.snap
 create mode 100644 tests/snapshots/output_json_integration__json_unicode_content.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_all_tag_types.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_binary_name_sanitization.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_edge_case_names-2.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_edge_case_names.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_empty_strings.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_encoding_modifiers.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_high_scores.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_long_strings_skipped.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_mixed_encodings.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_multiple_strings_different_tags.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_multiple_strings_same_tag.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_no_tags.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_single_string.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_special_characters.snap
 create mode 100644 tests/snapshots/output_yara_integration__yara_unicode_in_strings.snap

diff --git a/src/classification/mod.rs b/src/classification/mod.rs
index 704ac76..f425aa9 100644
--- a/src/classification/mod.rs
+++ b/src/classification/mod.rs
@@ -26,22 +26,14 @@
 //! use stringy::types::{FoundString, Encoding, StringSource, Tag};
 //!
 //! let classifier = SemanticClassifier::new();
-//! let found_string = FoundString {
-//!     text: "C:\\Windows\\System32\\cmd.exe".to_string(),
-//!     original_text: None,
-//!     encoding: Encoding::Ascii,
-//!     offset: 0,
-//!     rva: None,
-//!     section: None,
-//!     length: 27,
-//!     tags: Vec::new(),
-//!     score: 0,
-//!     section_weight: None,
-//!     semantic_boost: None,
-//!     noise_penalty: None,
-//!     source: StringSource::SectionData,
-//!     confidence: 1.0,
-//! };
+//! let text = "C:\\Windows\\System32\\cmd.exe";
+//! let found_string = FoundString::new(
+//!     text.to_string(),
+//!     Encoding::Ascii,
+//!     0,
+//!     text.len() as u32,
+//!     StringSource::SectionData,
+//! );
 //!
 //! let tags = classifier.classify(&found_string);
 //! assert!(tags.contains(&Tag::FilePath));
diff --git a/src/classification/semantic.rs b/src/classification/semantic.rs
index c6df7a7..0ad913f 100644
--- a/src/classification/semantic.rs
+++ b/src/classification/semantic.rs
@@ -23,22 +23,14 @@
 //! use stringy::types::{FoundString, Encoding, StringSource};
 //!
 //! let classifier = SemanticClassifier::new();
-//! let found_string = FoundString {
-//!     text: "https://example.com/api".to_string(),
-//!     original_text: None,
-//!     encoding: Encoding::Ascii,
-//!     offset: 0,
-//!     rva: None,
-//!     section: None,
-//!     length: 24,
-//!     tags: Vec::new(),
-//!     score: 0,
-//!     section_weight: None,
-//!     semantic_boost: None,
-//!     noise_penalty: None,
-//!     source: StringSource::SectionData,
-//!     confidence: 1.0,
-//! };
+//! let text = "https://example.com/api";
+//! let found_string = FoundString::new(
+//!     text.to_string(),
+//!     Encoding::Ascii,
+//!     0,
+//!     text.len() as u32,
+//!     StringSource::SectionData,
+//! );
 //!
 //! let tags = classifier.classify(&found_string);
 //! assert_eq!(tags.len(), 1);
diff --git a/src/classification/symbols.rs b/src/classification/symbols.rs
index 27b7cd2..b69ae31 100644
--- a/src/classification/symbols.rs
+++ b/src/classification/symbols.rs
@@ -18,24 +18,17 @@
 //! use stringy::types::{FoundString, Encoding, StringSource, Tag};
 //!
 //! let demangler = SymbolDemangler::new();
-//! let mut found_string = FoundString {
-//!     text: "_ZN4core3fmt5Write9write_str17h1234567890abcdefE".to_string(),
-//!     original_text: None,
-//!     encoding: Encoding::Ascii,
-//!     offset: 0,
-//!     rva: None,
-//!     section: None,
-//!     length: 47,
-//!     tags: Vec::new(),
-//!     score: 0,
-//!     section_weight: None,
-//!     semantic_boost: None,
-//!     noise_penalty: None,
-//!     source: StringSource::ImportName,
-//!     confidence: 1.0,
-//! };
+//! let text = "_ZN4core3fmt5Write9write_str17h1234567890abcdefE";
+//! let mut found_string = FoundString::new(
+//!     text.to_string(),
+//!     Encoding::Ascii,
+//!     0,
+//!     text.len() as u32,
+//!     StringSource::ImportName,
+//! );
 //!
 //! demangler.demangle(&mut found_string);
+//! assert!(found_string.tags.contains(&Tag::DemangledSymbol));
 //! // found_string.text now contains the demangled symbol
 //! // found_string.original_text contains the original mangled form
 //! // found_string.tags contains Tag::DemangledSymbol
@@ -129,22 +122,14 @@ impl SymbolDemangler {
     /// use stringy::types::{FoundString, Encoding, StringSource, Tag};
     ///
     /// let demangler = SymbolDemangler::new();
-    /// let mut found_string = FoundString {
-    ///     text: "_ZN4core3fmt5Write9write_str17h1234567890abcdefE".to_string(),
-    ///     original_text: None,
-    ///     encoding: Encoding::Ascii,
-    ///     offset: 0,
-    ///     rva: None,
-    ///     section: None,
-    ///     length: 47,
-    ///     tags: Vec::new(),
-    ///     score: 0,
-    ///     section_weight: None,
-    ///     semantic_boost: None,
-    ///     noise_penalty: None,
-    ///     source: StringSource::ImportName,
-    ///     confidence: 1.0,
-    /// };
+    /// let text = "_ZN4core3fmt5Write9write_str17h1234567890abcdefE";
+    /// let mut found_string = FoundString::new(
+    ///     text.to_string(),
+    ///     Encoding::Ascii,
+    ///     0,
+    ///     text.len() as u32,
+    ///     StringSource::ImportName,
+    /// );
     ///
     /// demangler.demangle(&mut found_string);
     /// assert!(found_string.tags.contains(&Tag::DemangledSymbol));
diff --git a/src/extraction/ascii.rs b/src/extraction/ascii.rs
index 9f9d82f..2025348 100644
--- a/src/extraction/ascii.rs
+++ b/src/extraction/ascii.rs
@@ -26,7 +26,7 @@
 //!     is_writable: false,
 //!     weight: 1.0,
 //! };
-//! let strings = extract_from_section(&section, data, &config);
+//! let strings = extract_from_section(&section, data, &config, None, false, 0.5);
 //! ```
 
 use crate::extraction::config::NoiseFilterConfig;
diff --git a/src/extraction/mod.rs b/src/extraction/mod.rs
index 6b3a85f..af814f8 100644
--- a/src/extraction/mod.rs
+++ b/src/extraction/mod.rs
@@ -40,7 +40,7 @@
 //! and noise filtering. It implements byte-level scanning for contiguous UTF-16LE character
 //! sequences, following the pattern established in the ASCII extractor.
 //!
-//! - `extract_utf16le_strings()`: Basic byte-level UTF-16LE string scanning
+//! - `extract_utf16_strings()`: Basic byte-level UTF-16 string scanning
 //! - `extract_from_section()`: Section-aware extraction with proper metadata population
 //! - `Utf16ExtractionConfig`: Configuration for minimum/maximum character count and confidence thresholds
 //!
@@ -89,6 +89,7 @@
 //! use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
 //! use stringy::container::{detect_format, create_parser};
 //!
+//! # fn example() -> stringy::Result<()> {
 //! let data = std::fs::read("example.exe")?;
 //! let format = detect_format(&data);
 //! let parser = create_parser(format)?;
@@ -100,7 +101,7 @@
 //!
 //! // Format-specific extractors
 //! use stringy::extraction::{
-//!     extract_ascii_strings, extract_utf16le_strings, extract_load_command_strings, extract_resources,
+//!     extract_ascii_strings, extract_utf16_strings, extract_load_command_strings, extract_resources,
 //!     extract_resource_strings, AsciiExtractionConfig, Utf16ExtractionConfig,
 //! };
 //!
@@ -108,9 +109,9 @@
 //! let ascii_config = AsciiExtractionConfig::default();
 //! let ascii_strings = extract_ascii_strings(&data, &ascii_config);
 //!
-//! // UTF-16LE extraction
+//! // UTF-16 extraction
 //! let utf16_config = Utf16ExtractionConfig::default();
-//! let utf16le_strings = extract_utf16le_strings(&data, &utf16_config);
+//! let utf16_strings = extract_utf16_strings(&data, &utf16_config);
 //!
 //! // Phase 1: Get resource metadata
 //! let metadata = extract_resources(&data);
@@ -121,6 +122,8 @@
 //! // Mach-O load command extraction
 //! let macho_data = std::fs::read("example.dylib")?;
 //! let load_command_strings = extract_load_command_strings(&macho_data);
+//! # Ok(())
+//! # }
 //! ```
 
 use crate::classification::{SemanticClassifier, SymbolDemangler};
diff --git a/src/extraction/pe_resources.rs b/src/extraction/pe_resources.rs
index 7938667..211b085 100644
--- a/src/extraction/pe_resources.rs
+++ b/src/extraction/pe_resources.rs
@@ -48,7 +48,9 @@
 //!
 //! ```rust
 //! use stringy::extraction::pe_resources::extract_resources;
+//! use stringy::types::ResourceType;
 //!
+//! # fn example() -> stringy::Result<()> {
 //! let pe_data = std::fs::read("example.exe")?;
 //! let resources = extract_resources(&pe_data);
 //!
@@ -65,6 +67,8 @@
 //!         _ => {}
 //!     }
 //! }
+//! # Ok(())
+//! # }
 //! ```
 //!
 //! ## Phase 2: Resource String Extraction
@@ -73,6 +77,7 @@
 //! use stringy::extraction::pe_resources::extract_resource_strings;
 //! use stringy::types::Tag;
 //!
+//! # fn example() -> stringy::Result<()> {
 //! let pe_data = std::fs::read("example.exe")?;
 //! let strings = extract_resource_strings(&pe_data);
 //!
@@ -85,6 +90,8 @@
 //! let ui_strings: Vec<_> = strings.iter()
 //!     .filter(|s| s.tags.contains(&Tag::Resource) && !s.tags.contains(&Tag::Version))
 //!     .collect();
+//! # Ok(())
+//! # }
 //! ```
 
 use crate::types::{
diff --git a/src/lib.rs b/src/lib.rs
index 58a931d..d340897 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -42,10 +42,10 @@
 //! println!("Found {} ASCII strings", ascii_strings.len());
 //!
 //! // UTF-16LE string extraction (Windows PE binaries)
-//! use stringy::extraction::{extract_utf16le_strings, Utf16ExtractionConfig};
+//! use stringy::extraction::{extract_utf16_strings, Utf16ExtractionConfig};
 //! let utf16_config = Utf16ExtractionConfig::default();
-//! let utf16le_strings = extract_utf16le_strings(&data, &utf16_config);
-//! println!("Found {} UTF-16LE strings", utf16le_strings.len());
+//! let utf16_strings = extract_utf16_strings(&data, &utf16_config);
+//! println!("Found {} UTF-16 strings", utf16_strings.len());
 //! # Ok(())
 //! # }
 //! ```
@@ -86,4 +86,6 @@ pub use extraction::{
 };
 
 // Re-export output infrastructure types
-pub use output::{OutputFormat, OutputMetadata, format_output, format_table_with_mode};
+pub use output::{
+    OutputFormat, OutputMetadata, format_json, format_output, format_table_with_mode, format_yara,
+};
diff --git a/src/output/json.rs b/src/output/json.rs
index e183a25..635d8aa 100644
--- a/src/output/json.rs
+++ b/src/output/json.rs
@@ -1,9 +1,282 @@
-use crate::types::{FoundString, Result};
+use crate::types::{FoundString, Result, StringyError};
 
 use super::OutputMetadata;
 
 /// Format strings as JSONL output, one object per line.
 pub fn format_json(_strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
-    // TODO: Implement JSON formatter in a subsequent phase.
-    Ok(String::new())
+    if _strings.is_empty() {
+        return Ok(String::new());
+    }
+
+    let mut lines = Vec::with_capacity(_strings.len());
+    for item in _strings {
+        if !item.confidence.is_finite() {
+            return Err(StringyError::ConfigError(
+                "JSON serialization failed: non-finite confidence".to_string(),
+            ));
+        }
+        let line = serde_json::to_string(item).map_err(|err| {
+            StringyError::ConfigError(format!("JSON serialization failed: {}", err))
+        })?;
+        lines.push(line);
+    }
+
+    Ok(lines.join("\n"))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::output::{OutputFormat, OutputMetadata};
+    use crate::types::{Encoding, FoundString, StringSource, Tag};
+    use serde_json::Value;
+
+    fn make_metadata(count: usize) -> OutputMetadata {
+        OutputMetadata::new("test.bin".to_string(), OutputFormat::Json, count, count)
+    }
+
+    fn make_string(text: &str) -> FoundString {
+        FoundString::new(
+            text.to_string(),
+            Encoding::Ascii,
+            0x1000,
+            text.len() as u32,
+            StringSource::SectionData,
+        )
+    }
+
+    fn parse_line(line: &str) -> Value {
+        serde_json::from_str(line).expect("JSON should parse")
+    }
+
+    #[test]
+    fn test_empty_strings_returns_empty_output() {
+        let output = format_json(&[], &make_metadata(0)).expect("Formatting should succeed");
+        assert!(output.is_empty());
+    }
+
+    #[test]
+    fn test_single_string_serialization() {
+        let strings = vec![make_string("alpha")];
+        let output = format_json(&strings, &make_metadata(1)).expect("Formatting should succeed");
+        let value = parse_line(&output);
+        assert_eq!(value["text"], "alpha");
+        assert_eq!(value["encoding"], "Ascii");
+    }
+
+    #[test]
+    fn test_multiple_strings_jsonl_format() {
+        let strings = vec![make_string("one"), make_string("two")];
+        let output = format_json(&strings, &make_metadata(2)).expect("Formatting should succeed");
+        let lines: Vec<&str> = output.lines().collect();
+        assert_eq!(lines.len(), 2);
+        assert_eq!(parse_line(lines[0])["text"], "one");
+        assert_eq!(parse_line(lines[1])["text"], "two");
+    }
+
+    #[test]
+    fn test_optional_fields_excluded_when_none() {
+        let strings = vec![make_string("no-optional")];
+        let output = format_json(&strings, &make_metadata(1)).expect("Formatting should succeed");
+        assert!(!output.contains("original_text"));
+        assert!(!output.contains("section_weight"));
+        assert!(!output.contains("semantic_boost"));
+        assert!(!output.contains("noise_penalty"));
+    }
+
+    #[test]
+    fn test_optional_fields_included_when_some() {
+        let strings = vec![
+            make_string("with-optional")
+                .with_original_text("orig".to_string())
+                .with_section_weight(10)
+                .with_semantic_boost(5)
+                .with_noise_penalty(-2),
+        ];
+        let output = format_json(&strings, &make_metadata(1)).expect("Formatting should succeed");
+        assert!(output.contains("original_text"));
+        assert!(output.contains("section_weight"));
+        assert!(output.contains("semantic_boost"));
+        assert!(output.contains("noise_penalty"));
+    }
+
+    #[test]
+    fn test_special_characters_are_escaped() {
+        let strings = vec![make_string("quote\" backslash\\ line\n tab\t")];
+        let output = format_json(&strings, &make_metadata(1)).expect("Formatting should succeed");
+        assert!(output.contains("\\\""));
+        assert!(output.contains("\\\\"));
+        assert!(output.contains("\\n"));
+        assert!(output.contains("\\t"));
+    }
+
+    #[test]
+    fn test_all_encodings_serialize_correctly() {
+        let strings = vec![
+            FoundString::new(
+                "a".to_string(),
+                Encoding::Ascii,
+                0,
+                1,
+                StringSource::SectionData,
+            ),
+            FoundString::new(
+                "b".to_string(),
+                Encoding::Utf8,
+                1,
+                1,
+                StringSource::SectionData,
+            ),
+            FoundString::new(
+                "c".to_string(),
+                Encoding::Utf16Le,
+                2,
+                2,
+                StringSource::SectionData,
+            ),
+            FoundString::new(
+                "d".to_string(),
+                Encoding::Utf16Be,
+                3,
+                2,
+                StringSource::SectionData,
+            ),
+        ];
+        let output = format_json(&strings, &make_metadata(4)).expect("Formatting should succeed");
+        let lines: Vec<&str> = output.lines().collect();
+        assert_eq!(parse_line(lines[0])["encoding"], "Ascii");
+        assert_eq!(parse_line(lines[1])["encoding"], "Utf8");
+        assert_eq!(parse_line(lines[2])["encoding"], "Utf16Le");
+        assert_eq!(parse_line(lines[3])["encoding"], "Utf16Be");
+    }
+
+    #[test]
+    fn test_all_tag_types_serialize_correct_names() {
+        let tags = vec![
+            Tag::Url,
+            Tag::Domain,
+            Tag::IPv4,
+            Tag::IPv6,
+            Tag::FilePath,
+            Tag::RegistryPath,
+            Tag::Guid,
+            Tag::Email,
+            Tag::Base64,
+            Tag::FormatString,
+            Tag::UserAgent,
+            Tag::DemangledSymbol,
+            Tag::Import,
+            Tag::Export,
+            Tag::Version,
+            Tag::Manifest,
+            Tag::Resource,
+            Tag::DylibPath,
+            Tag::Rpath,
+            Tag::RpathVariable,
+            Tag::FrameworkPath,
+        ];
+        let strings = vec![make_string("tagged").with_tags(tags)];
+        let output = format_json(&strings, &make_metadata(1)).expect("Formatting should succeed");
+        let value = parse_line(&output);
+        let tag_values: Vec<String> = value["tags"]
+            .as_array()
+            .expect("tags should be an array")
+            .iter()
+            .map(|item| item.as_str().expect("tag should be string").to_string())
+            .collect();
+
+        let expected = vec![
+            "Url",
+            "Domain",
+            "ipv4",
+            "ipv6",
+            "filepath",
+            "regpath",
+            "guid",
+            "Email",
+            "b64",
+            "fmt",
+            "user-agent-ish",
+            "demangled",
+            "Import",
+            "Export",
+            "Version",
+            "Manifest",
+            "Resource",
+            "dylib-path",
+            "rpath",
+            "rpath-var",
+            "framework-path",
+        ];
+
+        for name in expected {
+            assert!(tag_values.iter().any(|tag| tag == name));
+        }
+    }
+
+    #[test]
+    fn test_all_source_types_serialize_correctly() {
+        let strings = vec![
+            FoundString::new(
+                "a".to_string(),
+                Encoding::Ascii,
+                0,
+                1,
+                StringSource::SectionData,
+            ),
+            FoundString::new(
+                "b".to_string(),
+                Encoding::Ascii,
+                1,
+                1,
+                StringSource::ImportName,
+            ),
+            FoundString::new(
+                "c".to_string(),
+                Encoding::Ascii,
+                2,
+                1,
+                StringSource::ExportName,
+            ),
+            FoundString::new(
+                "d".to_string(),
+                Encoding::Ascii,
+                3,
+                1,
+                StringSource::ResourceString,
+            ),
+            FoundString::new(
+                "e".to_string(),
+                Encoding::Ascii,
+                4,
+                1,
+                StringSource::LoadCommand,
+            ),
+            FoundString::new(
+                "f".to_string(),
+                Encoding::Ascii,
+                5,
+                1,
+                StringSource::DebugInfo,
+            ),
+        ];
+        let output = format_json(&strings, &make_metadata(6)).expect("Formatting should succeed");
+        let lines: Vec<&str> = output.lines().collect();
+        assert_eq!(parse_line(lines[0])["source"], "SectionData");
+        assert_eq!(parse_line(lines[1])["source"], "ImportName");
+        assert_eq!(parse_line(lines[2])["source"], "ExportName");
+        assert_eq!(parse_line(lines[3])["source"], "ResourceString");
+        assert_eq!(parse_line(lines[4])["source"], "LoadCommand");
+        assert_eq!(parse_line(lines[5])["source"], "DebugInfo");
+    }
+
+    #[test]
+    fn test_error_propagation_for_serialization_failures() {
+        let strings = vec![make_string("nan").with_confidence(f32::NAN)];
+        let result = format_json(&strings, &make_metadata(1));
+        match result {
+            Err(StringyError::ConfigError(_)) => {}
+            _ => panic!("Expected ConfigError on invalid JSON serialization"),
+        }
+    }
 }
diff --git a/src/output/yara.rs b/src/output/yara.rs
index 8e367b8..5e90d53 100644
--- a/src/output/yara.rs
+++ b/src/output/yara.rs
@@ -1,9 +1,301 @@
-use crate::types::{FoundString, Result};
+use crate::types::{Encoding, FoundString, Result};
 
 use super::OutputMetadata;
+use std::collections::{BTreeMap, HashMap};
+use std::time::{SystemTime, UNIX_EPOCH};
 
 /// Format strings as YARA rule templates.
 pub fn format_yara(_strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
-    // TODO: Implement YARA formatter in a subsequent phase.
-    Ok(String::new())
+    let timestamp = current_timestamp();
+    let base_rule_name = sanitize_rule_name(&_metadata.binary_name);
+    let rule_name = format!("{}_strings", base_rule_name);
+
+    let mut output = String::new();
+    output.push_str("// YARA rule generated by Stringy\n");
+    output.push_str(&format!("// Binary: {}\n", _metadata.binary_name));
+    output.push_str(&format!("// Generated: {}\n\n", timestamp));
+
+    output.push_str(&format!("rule {} {{\n", rule_name));
+    output.push_str("  meta:\n");
+    output.push_str(&format!(
+        "    description = \"Strings extracted from {}\"\n",
+        escape_yara_string(&_metadata.binary_name)
+    ));
+    output.push_str("    generated_by = \"stringy\"\n");
+    output.push_str(&format!("    generated_at = \"{}\"\n", timestamp));
+
+    if _strings.is_empty() {
+        output.push_str("  condition:\n");
+        output.push_str("    true\n");
+        output.push_str("}\n");
+        return Ok(output);
+    }
+
+    let grouped = group_strings_by_tag(_strings);
+    let mut strings_block = String::new();
+    let mut counters: HashMap<String, usize> = HashMap::new();
+    let mut included = 0usize;
+
+    strings_block.push_str("  strings:\n");
+    for (tag, items) in grouped {
+        strings_block.push_str(&format!("    // tag: {}\n", tag));
+        let var_tag = sanitize_identifier(&tag);
+        for item in items {
+            let char_count = item.text.chars().count();
+            if char_count > 200 {
+                strings_block.push_str(&format!(
+                    "    // skipped (length > 200 chars): {}\n",
+                    char_count
+                ));
+                continue;
+            }
+
+            let counter = counters.entry(var_tag.clone()).or_insert(0);
+            *counter += 1;
+            let var_name = format!("${}_{}", var_tag, *counter);
+            let escaped = escape_yara_string(&item.text);
+            let modifier = get_yara_modifier(item.encoding);
+
+            strings_block.push_str(&format!("    // score: {}\n", item.score));
+            strings_block.push_str(&format!(
+                "    {} = \"{}\" {}\n",
+                var_name, escaped, modifier
+            ));
+            included += 1;
+        }
+    }
+
+    output.push_str(&strings_block);
+    output.push_str("  condition:\n");
+    if included == 0 {
+        output.push_str("    true\n");
+    } else {
+        output.push_str("    any of them\n");
+    }
+    output.push_str("}\n");
+
+    Ok(output)
+}
+
+fn current_timestamp() -> String {
+    match SystemTime::now().duration_since(UNIX_EPOCH) {
+        Ok(duration) => duration.as_secs().to_string(),
+        Err(_) => "0".to_string(),
+    }
+}
+
+fn sanitize_rule_name(binary_name: &str) -> String {
+    let mut sanitized = String::new();
+    for ch in binary_name.chars() {
+        if ch.is_ascii_alphanumeric() {
+            sanitized.push(ch);
+        } else {
+            sanitized.push('_');
+        }
+    }
+
+    if sanitized.is_empty() {
+        sanitized.push('_');
+    }
+
+    let first = sanitized.chars().next().unwrap_or('_');
+    if !first.is_ascii_alphabetic() && first != '_' {
+        sanitized.insert(0, '_');
+    }
+
+    sanitized
+}
+
+fn sanitize_identifier(name: &str) -> String {
+    let mut sanitized = String::new();
+    for ch in name.chars() {
+        if ch.is_ascii_alphanumeric() || ch == '_' {
+            sanitized.push(ch);
+        } else {
+            sanitized.push('_');
+        }
+    }
+
+    if sanitized.is_empty() {
+        "tag".to_string()
+    } else {
+        sanitized
+    }
+}
+
+fn escape_yara_string(text: &str) -> String {
+    let mut escaped = String::new();
+    for byte in text.as_bytes() {
+        match *byte {
+            b'\\' => escaped.push_str("\\\\"),
+            b'"' => escaped.push_str("\\\""),
+            b'\n' => escaped.push_str("\\n"),
+            b'\r' => escaped.push_str("\\r"),
+            b'\t' => escaped.push_str("\\t"),
+            0x08 => escaped.push_str("\\b"),
+            0x0b => escaped.push_str("\\x0b"),
+            0x0c => escaped.push_str("\\x0c"),
+            0x00..=0x1f | 0x7f..=0xff => {
+                escaped.push_str(&format!("\\x{:02x}", byte));
+            }
+            _ => escaped.push(*byte as char),
+        }
+    }
+    escaped
+}
+
+fn get_yara_modifier(encoding: Encoding) -> &'static str {
+    match encoding {
+        Encoding::Ascii | Encoding::Utf8 => "ascii",
+        Encoding::Utf16Le | Encoding::Utf16Be => "wide",
+    }
+}
+
+fn tag_name(tag: &crate::types::Tag) -> &'static str {
+    match tag {
+        crate::types::Tag::Url => "Url",
+        crate::types::Tag::Domain => "Domain",
+        crate::types::Tag::IPv4 => "ipv4",
+        crate::types::Tag::IPv6 => "ipv6",
+        crate::types::Tag::FilePath => "filepath",
+        crate::types::Tag::RegistryPath => "regpath",
+        crate::types::Tag::Guid => "guid",
+        crate::types::Tag::Email => "Email",
+        crate::types::Tag::Base64 => "b64",
+        crate::types::Tag::FormatString => "fmt",
+        crate::types::Tag::UserAgent => "user-agent-ish",
+        crate::types::Tag::DemangledSymbol => "demangled",
+        crate::types::Tag::Import => "Import",
+        crate::types::Tag::Export => "Export",
+        crate::types::Tag::Version => "Version",
+        crate::types::Tag::Manifest => "Manifest",
+        crate::types::Tag::Resource => "Resource",
+        crate::types::Tag::DylibPath => "dylib-path",
+        crate::types::Tag::Rpath => "rpath",
+        crate::types::Tag::RpathVariable => "rpath-var",
+        crate::types::Tag::FrameworkPath => "framework-path",
+    }
+}
+
+fn group_strings_by_tag(strings: &[FoundString]) -> BTreeMap<String, Vec<&FoundString>> {
+    let mut grouped: BTreeMap<String, Vec<&FoundString>> = BTreeMap::new();
+
+    for item in strings {
+        let tag = item
+            .tags
+            .first()
+            .map(|tag| tag_name(tag).to_string())
+            .unwrap_or_else(|| "untagged".to_string());
+        grouped.entry(tag).or_default().push(item);
+    }
+
+    grouped
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::output::{OutputFormat, OutputMetadata};
+    use crate::types::{FoundString, StringSource, Tag};
+
+    fn make_metadata() -> OutputMetadata {
+        OutputMetadata::new("sample.bin".to_string(), OutputFormat::Yara, 0, 0)
+    }
+
+    fn make_string(text: &str) -> FoundString {
+        FoundString::new(
+            text.to_string(),
+            Encoding::Ascii,
+            0,
+            text.len() as u32,
+            StringSource::SectionData,
+        )
+    }
+
+    #[test]
+    fn test_sanitize_rule_name() {
+        assert_eq!(sanitize_rule_name("sample.bin"), "sample_bin");
+        assert_eq!(sanitize_rule_name("123name"), "_123name");
+        assert_eq!(sanitize_rule_name("$weird#name"), "_weird_name");
+        assert_eq!(sanitize_rule_name(""), "_");
+    }
+
+    #[test]
+    fn test_escape_yara_string() {
+        let input = "quote\" backslash\\ line\n tab\t";
+        let escaped = escape_yara_string(input);
+        assert!(escaped.contains("\\\""));
+        assert!(escaped.contains("\\\\"));
+        assert!(escaped.contains("\\n"));
+        assert!(escaped.contains("\\t"));
+    }
+
+    #[test]
+    fn test_get_yara_modifier() {
+        assert_eq!(get_yara_modifier(Encoding::Ascii), "ascii");
+        assert_eq!(get_yara_modifier(Encoding::Utf8), "ascii");
+        assert_eq!(get_yara_modifier(Encoding::Utf16Le), "wide");
+        assert_eq!(get_yara_modifier(Encoding::Utf16Be), "wide");
+    }
+
+    #[test]
+    fn test_group_strings_by_tag() {
+        let strings = vec![
+            make_string("one").with_tags(vec![Tag::Url]),
+            make_string("two").with_tags(vec![Tag::Domain]),
+            make_string("three"),
+        ];
+        let grouped = group_strings_by_tag(&strings);
+        assert!(grouped.contains_key("Url"));
+        assert!(grouped.contains_key("Domain"));
+        assert!(grouped.contains_key("untagged"));
+    }
+
+    #[test]
+    fn test_empty_strings_produces_minimal_rule() {
+        let output = format_yara(&[], &make_metadata()).expect("Formatting should succeed");
+        assert!(output.contains("condition:"));
+        assert!(output.contains("true"));
+    }
+
+    #[test]
+    fn test_single_string_produces_rule() {
+        let strings = vec![make_string("alpha").with_tags(vec![Tag::Url])];
+        let output = format_yara(&strings, &make_metadata()).expect("Formatting should succeed");
+        assert!(output.contains("strings:"));
+        assert!(output.contains("$Url_1"));
+        assert!(output.contains("\"alpha\""));
+    }
+
+    #[test]
+    fn test_long_strings_are_skipped() {
+        let long_text = "a".repeat(201);
+        let strings = vec![make_string(&long_text).with_tags(vec![Tag::Url])];
+        let output = format_yara(&strings, &make_metadata()).expect("Formatting should succeed");
+        assert!(output.contains("skipped (length > 200 chars)"));
+    }
+
+    #[test]
+    fn test_binary_name_sanitization_in_rule_name() {
+        let metadata = OutputMetadata::new("weird name.exe".to_string(), OutputFormat::Yara, 1, 1);
+        let strings = vec![make_string("alpha")];
+        let output = format_yara(&strings, &metadata).expect("Formatting should succeed");
+        assert!(output.contains("rule weird_name_exe_strings"));
+    }
+
+    #[test]
+    fn test_encodings_apply_modifiers() {
+        let mut string = make_string("wide");
+        string.encoding = Encoding::Utf16Le;
+        let output = format_yara(&[string], &make_metadata()).expect("Formatting should succeed");
+        assert!(output.contains("wide"));
+    }
+
+    #[test]
+    fn test_unicode_content_is_escaped() {
+        let unicode = "\u{4E2D}\u{6587}";
+        let strings = vec![make_string(unicode).with_tags(vec![Tag::Domain])];
+        let output = format_yara(&strings, &make_metadata()).expect("Formatting should succeed");
+        assert!(output.contains("\\x"));
+    }
 }
diff --git a/tests/output_json_integration.rs b/tests/output_json_integration.rs
new file mode 100644
index 0000000..4590956
--- /dev/null
+++ b/tests/output_json_integration.rs
@@ -0,0 +1,266 @@
+//! Integration tests for JSON output formatter.
+//!
+//! Uses insta snapshots to verify output format consistency.
+
+use insta::assert_snapshot;
+use serde_json::Value;
+use stringy::output::{OutputFormat, OutputMetadata, format_json};
+use stringy::types::{Encoding, FoundString, StringSource, Tag};
+
+fn make_string(text: &str) -> FoundString {
+    FoundString::new(
+        text.to_string(),
+        Encoding::Ascii,
+        0x1000,
+        text.len() as u32,
+        StringSource::SectionData,
+    )
+}
+
+fn make_metadata(count: usize) -> OutputMetadata {
+    OutputMetadata::new(
+        "test_binary.exe".to_string(),
+        OutputFormat::Json,
+        count,
+        count,
+    )
+}
+
+fn parse_line(line: &str) -> Value {
+    serde_json::from_str(line).expect("JSON should parse")
+}
+
+#[test]
+fn test_json_empty_strings() {
+    let output = format_json(&[], &make_metadata(0)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_single_string() {
+    let strings = vec![make_string("GetProcAddress")];
+    let output = format_json(&strings, &make_metadata(1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_multiple_strings() {
+    let strings = vec![make_string("one"), make_string("two"), make_string("three")];
+    let output = format_json(&strings, &make_metadata(3)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_all_fields_populated() {
+    let strings = vec![
+        make_string("fielded")
+            .with_original_text("original".to_string())
+            .with_section(".rdata".to_string())
+            .with_rva(0x2000)
+            .with_tags(vec![Tag::Url])
+            .with_score(150)
+            .with_section_weight(20)
+            .with_semantic_boost(30)
+            .with_noise_penalty(-10)
+            .with_confidence(0.9),
+    ];
+    let output = format_json(&strings, &make_metadata(1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_optional_fields_none() {
+    let strings = vec![make_string("no-optional")];
+    let output = format_json(&strings, &make_metadata(1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_special_characters() {
+    let strings = vec![make_string("quote\" backslash\\ line\n tab\t")];
+    let output = format_json(&strings, &make_metadata(1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_all_encodings() {
+    let strings = vec![
+        FoundString::new(
+            "ASCII".to_string(),
+            Encoding::Ascii,
+            0,
+            5,
+            StringSource::SectionData,
+        ),
+        FoundString::new(
+            "UTF8".to_string(),
+            Encoding::Utf8,
+            1,
+            4,
+            StringSource::SectionData,
+        ),
+        FoundString::new(
+            "UTF16LE".to_string(),
+            Encoding::Utf16Le,
+            2,
+            14,
+            StringSource::SectionData,
+        ),
+        FoundString::new(
+            "UTF16BE".to_string(),
+            Encoding::Utf16Be,
+            3,
+            14,
+            StringSource::SectionData,
+        ),
+    ];
+    let output = format_json(&strings, &make_metadata(4)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_all_tags() {
+    let tags = vec![
+        Tag::Url,
+        Tag::Domain,
+        Tag::IPv4,
+        Tag::IPv6,
+        Tag::FilePath,
+        Tag::RegistryPath,
+        Tag::Guid,
+        Tag::Email,
+        Tag::Base64,
+        Tag::FormatString,
+        Tag::UserAgent,
+        Tag::DemangledSymbol,
+        Tag::Import,
+        Tag::Export,
+        Tag::Version,
+        Tag::Manifest,
+        Tag::Resource,
+        Tag::DylibPath,
+        Tag::Rpath,
+        Tag::RpathVariable,
+        Tag::FrameworkPath,
+    ];
+    let strings = vec![make_string("tagged").with_tags(tags)];
+    let output = format_json(&strings, &make_metadata(1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_all_sources() {
+    let strings = vec![
+        FoundString::new(
+            "sec".to_string(),
+            Encoding::Ascii,
+            0,
+            3,
+            StringSource::SectionData,
+        ),
+        FoundString::new(
+            "imp".to_string(),
+            Encoding::Ascii,
+            1,
+            3,
+            StringSource::ImportName,
+        ),
+        FoundString::new(
+            "exp".to_string(),
+            Encoding::Ascii,
+            2,
+            3,
+            StringSource::ExportName,
+        ),
+        FoundString::new(
+            "res".to_string(),
+            Encoding::Ascii,
+            3,
+            3,
+            StringSource::ResourceString,
+        ),
+        FoundString::new(
+            "lc".to_string(),
+            Encoding::Ascii,
+            4,
+            2,
+            StringSource::LoadCommand,
+        ),
+        FoundString::new(
+            "dbg".to_string(),
+            Encoding::Ascii,
+            5,
+            3,
+            StringSource::DebugInfo,
+        ),
+    ];
+    let output = format_json(&strings, &make_metadata(6)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_debug_fields() {
+    let strings = vec![
+        make_string("debug")
+            .with_section_weight(10)
+            .with_semantic_boost(5)
+            .with_noise_penalty(-3),
+    ];
+    let output = format_json(&strings, &make_metadata(1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_original_text() {
+    let strings = vec![make_string("demangled").with_original_text("_ZN".to_string())];
+    let output = format_json(&strings, &make_metadata(1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_long_strings() {
+    let long_text = "a".repeat(300);
+    let strings = vec![make_string(&long_text).with_score(5)];
+    let output = format_json(&strings, &make_metadata(1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_unicode_content() {
+    let unicode = "\u{4E2D}\u{6587}\u{5B57}\u{7B26}";
+    let strings = vec![make_string(unicode)];
+    let output = format_json(&strings, &make_metadata(1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_json_parse_roundtrip() {
+    let strings = vec![
+        make_string("roundtrip")
+            .with_tags(vec![Tag::Url])
+            .with_score(10),
+        make_string("another")
+            .with_tags(vec![Tag::Domain])
+            .with_score(20),
+    ];
+    let output = format_json(&strings, &make_metadata(2)).unwrap();
+    let lines: Vec<&str> = output.lines().collect();
+    assert_eq!(lines.len(), 2);
+
+    let first: FoundString = serde_json::from_str(lines[0]).expect("should deserialize");
+    let second: FoundString = serde_json::from_str(lines[1]).expect("should deserialize");
+
+    assert_eq!(first.text, "roundtrip");
+    assert_eq!(second.text, "another");
+}
+
+#[test]
+fn test_json_optional_fields_excluded() {
+    let strings = vec![make_string("no-optional")];
+    let output = format_json(&strings, &make_metadata(1)).unwrap();
+    let value = parse_line(&output);
+    assert!(value.get("original_text").is_none());
+    assert!(value.get("section_weight").is_none());
+    assert!(value.get("semantic_boost").is_none());
+    assert!(value.get("noise_penalty").is_none());
+}
diff --git a/tests/output_yara_integration.rs b/tests/output_yara_integration.rs
new file mode 100644
index 0000000..9468a75
--- /dev/null
+++ b/tests/output_yara_integration.rs
@@ -0,0 +1,196 @@
+//! Integration tests for YARA output formatter.
+//!
+//! Uses insta snapshots to verify output format consistency.
+
+use insta::assert_snapshot;
+use stringy::output::{OutputFormat, OutputMetadata, format_yara};
+use stringy::types::{Encoding, FoundString, StringSource, Tag};
+
+fn make_string(text: &str) -> FoundString {
+    FoundString::new(
+        text.to_string(),
+        Encoding::Ascii,
+        0x1000,
+        text.len() as u32,
+        StringSource::SectionData,
+    )
+}
+
+fn make_metadata(binary_name: &str, count: usize) -> OutputMetadata {
+    OutputMetadata::new(binary_name.to_string(), OutputFormat::Yara, count, count)
+}
+
+#[test]
+fn test_yara_empty_strings() {
+    let output = format_yara(&[], &make_metadata("empty.bin", 0)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_single_string() {
+    let strings = vec![make_string("GetProcAddress").with_tags(vec![Tag::Import])];
+    let output = format_yara(&strings, &make_metadata("single.exe", 1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_multiple_strings_same_tag() {
+    let strings = vec![
+        make_string("alpha").with_tags(vec![Tag::Url]),
+        make_string("beta").with_tags(vec![Tag::Url]),
+    ];
+    let output = format_yara(&strings, &make_metadata("same-tag.exe", 2)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_multiple_strings_different_tags() {
+    let strings = vec![
+        make_string("https://example.com").with_tags(vec![Tag::Url]),
+        make_string("example.com").with_tags(vec![Tag::Domain]),
+        make_string("192.168.1.1").with_tags(vec![Tag::IPv4]),
+    ];
+    let output = format_yara(&strings, &make_metadata("diff-tag.exe", 3)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_no_tags() {
+    let strings = vec![make_string("no-tag"), make_string("still-no-tag")];
+    let output = format_yara(&strings, &make_metadata("untagged.exe", 2)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_long_strings_skipped() {
+    let long_text = "a".repeat(201);
+    let strings = vec![make_string(&long_text).with_tags(vec![Tag::Url])];
+    let output = format_yara(&strings, &make_metadata("long.exe", 1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_special_characters() {
+    let strings = vec![
+        make_string("quote\" backslash\\ line\n tab\t")
+            .with_tags(vec![Tag::FilePath])
+            .with_score(10),
+    ];
+    let output = format_yara(&strings, &make_metadata("special.exe", 1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_binary_name_sanitization() {
+    let strings = vec![make_string("alpha")];
+    let output = format_yara(&strings, &make_metadata("weird name.exe", 1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_encoding_modifiers() {
+    let ascii = make_string("ascii");
+    let utf16 = FoundString::new(
+        "wide".to_string(),
+        Encoding::Utf16Le,
+        0x2000,
+        8,
+        StringSource::SectionData,
+    )
+    .with_tags(vec![Tag::Resource]);
+
+    let output = format_yara(&[ascii, utf16], &make_metadata("enc.exe", 2)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_mixed_encodings() {
+    let strings = vec![
+        FoundString::new(
+            "ascii".to_string(),
+            Encoding::Ascii,
+            0x1000,
+            5,
+            StringSource::SectionData,
+        )
+        .with_tags(vec![Tag::Url]),
+        FoundString::new(
+            "utf8".to_string(),
+            Encoding::Utf8,
+            0x2000,
+            4,
+            StringSource::SectionData,
+        )
+        .with_tags(vec![Tag::Domain]),
+        FoundString::new(
+            "utf16".to_string(),
+            Encoding::Utf16Be,
+            0x3000,
+            10,
+            StringSource::SectionData,
+        )
+        .with_tags(vec![Tag::Resource]),
+    ];
+    let output = format_yara(&strings, &make_metadata("mixed.exe", 3)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_high_scores() {
+    let strings = vec![
+        make_string("critical")
+            .with_tags(vec![Tag::Url])
+            .with_score(9999),
+        make_string("low")
+            .with_tags(vec![Tag::Domain])
+            .with_score(-10),
+    ];
+    let output = format_yara(&strings, &make_metadata("scores.exe", 2)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_all_tag_types() {
+    let strings = vec![
+        make_string("url").with_tags(vec![Tag::Url]),
+        make_string("domain").with_tags(vec![Tag::Domain]),
+        make_string("ipv4").with_tags(vec![Tag::IPv4]),
+        make_string("ipv6").with_tags(vec![Tag::IPv6]),
+        make_string("path").with_tags(vec![Tag::FilePath]),
+        make_string("reg").with_tags(vec![Tag::RegistryPath]),
+        make_string("guid").with_tags(vec![Tag::Guid]),
+        make_string("email").with_tags(vec![Tag::Email]),
+        make_string("b64").with_tags(vec![Tag::Base64]),
+        make_string("fmt").with_tags(vec![Tag::FormatString]),
+        make_string("agent").with_tags(vec![Tag::UserAgent]),
+        make_string("demangled").with_tags(vec![Tag::DemangledSymbol]),
+        make_string("import").with_tags(vec![Tag::Import]),
+        make_string("export").with_tags(vec![Tag::Export]),
+        make_string("version").with_tags(vec![Tag::Version]),
+        make_string("manifest").with_tags(vec![Tag::Manifest]),
+        make_string("resource").with_tags(vec![Tag::Resource]),
+        make_string("dylib").with_tags(vec![Tag::DylibPath]),
+        make_string("rpath").with_tags(vec![Tag::Rpath]),
+        make_string("rpathvar").with_tags(vec![Tag::RpathVariable]),
+        make_string("framework").with_tags(vec![Tag::FrameworkPath]),
+    ];
+    let output = format_yara(&strings, &make_metadata("tags.exe", strings.len())).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_unicode_in_strings() {
+    let unicode = "\u{4E2D}\u{6587}\u{5B57}\u{7B26}";
+    let strings = vec![make_string(unicode).with_tags(vec![Tag::Domain])];
+    let output = format_yara(&strings, &make_metadata("unicode.exe", 1)).unwrap();
+    assert_snapshot!(output);
+}
+
+#[test]
+fn test_yara_edge_case_names() {
+    let strings = vec![make_string("alpha")];
+    let output_numbers = format_yara(&strings, &make_metadata("12345", 1)).unwrap();
+    let output_special = format_yara(&strings, &make_metadata("#$%", 1)).unwrap();
+    assert_snapshot!(output_numbers);
+    assert_snapshot!(output_special);
+}
diff --git a/tests/snapshots/output_json_integration__json_all_encodings.snap b/tests/snapshots/output_json_integration__json_all_encodings.snap
new file mode 100644
index 0000000..fac7e90
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_all_encodings.snap
@@ -0,0 +1,8 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"ASCII","encoding":"Ascii","offset":0,"rva":null,"section":null,"length":5,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
+{"text":"UTF8","encoding":"Utf8","offset":1,"rva":null,"section":null,"length":4,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
+{"text":"UTF16LE","encoding":"Utf16Le","offset":2,"rva":null,"section":null,"length":14,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
+{"text":"UTF16BE","encoding":"Utf16Be","offset":3,"rva":null,"section":null,"length":14,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_json_integration__json_all_fields_populated.snap b/tests/snapshots/output_json_integration__json_all_fields_populated.snap
new file mode 100644
index 0000000..3593900
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_all_fields_populated.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"fielded","original_text":"original","encoding":"Ascii","offset":4096,"rva":8192,"section":".rdata","length":7,"tags":["Url"],"score":150,"section_weight":20,"semantic_boost":30,"noise_penalty":-10,"source":"SectionData","confidence":0.9}
diff --git a/tests/snapshots/output_json_integration__json_all_sources.snap b/tests/snapshots/output_json_integration__json_all_sources.snap
new file mode 100644
index 0000000..ab773f4
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_all_sources.snap
@@ -0,0 +1,10 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"sec","encoding":"Ascii","offset":0,"rva":null,"section":null,"length":3,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
+{"text":"imp","encoding":"Ascii","offset":1,"rva":null,"section":null,"length":3,"tags":[],"score":0,"source":"ImportName","confidence":1.0}
+{"text":"exp","encoding":"Ascii","offset":2,"rva":null,"section":null,"length":3,"tags":[],"score":0,"source":"ExportName","confidence":1.0}
+{"text":"res","encoding":"Ascii","offset":3,"rva":null,"section":null,"length":3,"tags":[],"score":0,"source":"ResourceString","confidence":1.0}
+{"text":"lc","encoding":"Ascii","offset":4,"rva":null,"section":null,"length":2,"tags":[],"score":0,"source":"LoadCommand","confidence":1.0}
+{"text":"dbg","encoding":"Ascii","offset":5,"rva":null,"section":null,"length":3,"tags":[],"score":0,"source":"DebugInfo","confidence":1.0}
diff --git a/tests/snapshots/output_json_integration__json_all_tags.snap b/tests/snapshots/output_json_integration__json_all_tags.snap
new file mode 100644
index 0000000..f3a0b35
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_all_tags.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"tagged","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":6,"tags":["Url","Domain","ipv4","ipv6","filepath","regpath","guid","Email","b64","fmt","user-agent-ish","demangled","Import","Export","Version","Manifest","Resource","dylib-path","rpath","rpath-var","framework-path"],"score":0,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_json_integration__json_debug_fields.snap b/tests/snapshots/output_json_integration__json_debug_fields.snap
new file mode 100644
index 0000000..be79024
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_debug_fields.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"debug","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":5,"tags":[],"score":0,"section_weight":10,"semantic_boost":5,"noise_penalty":-3,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_json_integration__json_empty_strings.snap b/tests/snapshots/output_json_integration__json_empty_strings.snap
new file mode 100644
index 0000000..d7f4d70
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_empty_strings.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+
diff --git a/tests/snapshots/output_json_integration__json_long_strings.snap b/tests/snapshots/output_json_integration__json_long_strings.snap
new file mode 100644
index 0000000..6ff94ee
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_long_strings.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":300,"tags":[],"score":5,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_json_integration__json_multiple_strings.snap b/tests/snapshots/output_json_integration__json_multiple_strings.snap
new file mode 100644
index 0000000..a71d29e
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_multiple_strings.snap
@@ -0,0 +1,7 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"one","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":3,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
+{"text":"two","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":3,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
+{"text":"three","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":5,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_json_integration__json_optional_fields_none.snap b/tests/snapshots/output_json_integration__json_optional_fields_none.snap
new file mode 100644
index 0000000..c7cc4bb
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_optional_fields_none.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"no-optional","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":11,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_json_integration__json_original_text.snap b/tests/snapshots/output_json_integration__json_original_text.snap
new file mode 100644
index 0000000..6e6c2b0
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_original_text.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"demangled","original_text":"_ZN","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":9,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_json_integration__json_single_string.snap b/tests/snapshots/output_json_integration__json_single_string.snap
new file mode 100644
index 0000000..2a3d52a
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_single_string.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"GetProcAddress","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":14,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_json_integration__json_special_characters.snap b/tests/snapshots/output_json_integration__json_special_characters.snap
new file mode 100644
index 0000000..75d19f2
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_special_characters.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"quote\" backslash\\ line\n tab\t","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":28,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_json_integration__json_unicode_content.snap b/tests/snapshots/output_json_integration__json_unicode_content.snap
new file mode 100644
index 0000000..77c2d01
--- /dev/null
+++ b/tests/snapshots/output_json_integration__json_unicode_content.snap
@@ -0,0 +1,5 @@
+---
+source: tests/output_json_integration.rs
+expression: output
+---
+{"text":"中文字符","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":12,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_yara_integration__yara_all_tag_types.snap b/tests/snapshots/output_yara_integration__yara_all_tag_types.snap
new file mode 100644
index 0000000..29c2418
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_all_tag_types.snap
@@ -0,0 +1,80 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: tags.exe
+// Generated: 1768722692
+
+rule tags_exe_strings {
+  meta:
+    description = "Strings extracted from tags.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: Domain
+    // score: 0
+    $Domain_1 = "domain" ascii
+    // tag: Email
+    // score: 0
+    $Email_1 = "email" ascii
+    // tag: Export
+    // score: 0
+    $Export_1 = "export" ascii
+    // tag: Import
+    // score: 0
+    $Import_1 = "import" ascii
+    // tag: Manifest
+    // score: 0
+    $Manifest_1 = "manifest" ascii
+    // tag: Resource
+    // score: 0
+    $Resource_1 = "resource" ascii
+    // tag: Url
+    // score: 0
+    $Url_1 = "url" ascii
+    // tag: Version
+    // score: 0
+    $Version_1 = "version" ascii
+    // tag: b64
+    // score: 0
+    $b64_1 = "b64" ascii
+    // tag: demangled
+    // score: 0
+    $demangled_1 = "demangled" ascii
+    // tag: dylib-path
+    // score: 0
+    $dylib_path_1 = "dylib" ascii
+    // tag: filepath
+    // score: 0
+    $filepath_1 = "path" ascii
+    // tag: fmt
+    // score: 0
+    $fmt_1 = "fmt" ascii
+    // tag: framework-path
+    // score: 0
+    $framework_path_1 = "framework" ascii
+    // tag: guid
+    // score: 0
+    $guid_1 = "guid" ascii
+    // tag: ipv4
+    // score: 0
+    $ipv4_1 = "ipv4" ascii
+    // tag: ipv6
+    // score: 0
+    $ipv6_1 = "ipv6" ascii
+    // tag: regpath
+    // score: 0
+    $regpath_1 = "reg" ascii
+    // tag: rpath
+    // score: 0
+    $rpath_1 = "rpath" ascii
+    // tag: rpath-var
+    // score: 0
+    $rpath_var_1 = "rpathvar" ascii
+    // tag: user-agent-ish
+    // score: 0
+    $user_agent_ish_1 = "agent" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_binary_name_sanitization.snap b/tests/snapshots/output_yara_integration__yara_binary_name_sanitization.snap
new file mode 100644
index 0000000..11e82b2
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_binary_name_sanitization.snap
@@ -0,0 +1,20 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: weird name.exe
+// Generated: 1768722692
+
+rule weird_name_exe_strings {
+  meta:
+    description = "Strings extracted from weird name.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: untagged
+    // score: 0
+    $untagged_1 = "alpha" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_edge_case_names-2.snap b/tests/snapshots/output_yara_integration__yara_edge_case_names-2.snap
new file mode 100644
index 0000000..0427dfc
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_edge_case_names-2.snap
@@ -0,0 +1,20 @@
+---
+source: tests/output_yara_integration.rs
+expression: output_special
+---
+// YARA rule generated by Stringy
+// Binary: #$%
+// Generated: 1768722692
+
+rule ____strings {
+  meta:
+    description = "Strings extracted from #$%"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: untagged
+    // score: 0
+    $untagged_1 = "alpha" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_edge_case_names.snap b/tests/snapshots/output_yara_integration__yara_edge_case_names.snap
new file mode 100644
index 0000000..93718e1
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_edge_case_names.snap
@@ -0,0 +1,20 @@
+---
+source: tests/output_yara_integration.rs
+expression: output_numbers
+---
+// YARA rule generated by Stringy
+// Binary: 12345
+// Generated: 1768722692
+
+rule _12345_strings {
+  meta:
+    description = "Strings extracted from 12345"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: untagged
+    // score: 0
+    $untagged_1 = "alpha" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_empty_strings.snap b/tests/snapshots/output_yara_integration__yara_empty_strings.snap
new file mode 100644
index 0000000..895d38d
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_empty_strings.snap
@@ -0,0 +1,16 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: empty.bin
+// Generated: 1768722692
+
+rule empty_bin_strings {
+  meta:
+    description = "Strings extracted from empty.bin"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  condition:
+    true
+}
diff --git a/tests/snapshots/output_yara_integration__yara_encoding_modifiers.snap b/tests/snapshots/output_yara_integration__yara_encoding_modifiers.snap
new file mode 100644
index 0000000..b255b3f
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_encoding_modifiers.snap
@@ -0,0 +1,23 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: enc.exe
+// Generated: 1768722692
+
+rule enc_exe_strings {
+  meta:
+    description = "Strings extracted from enc.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: Resource
+    // score: 0
+    $Resource_1 = "wide" wide
+    // tag: untagged
+    // score: 0
+    $untagged_1 = "ascii" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_high_scores.snap b/tests/snapshots/output_yara_integration__yara_high_scores.snap
new file mode 100644
index 0000000..1f86286
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_high_scores.snap
@@ -0,0 +1,23 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: scores.exe
+// Generated: 1768722692
+
+rule scores_exe_strings {
+  meta:
+    description = "Strings extracted from scores.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: Domain
+    // score: -10
+    $Domain_1 = "low" ascii
+    // tag: Url
+    // score: 9999
+    $Url_1 = "critical" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_long_strings_skipped.snap b/tests/snapshots/output_yara_integration__yara_long_strings_skipped.snap
new file mode 100644
index 0000000..97f125a
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_long_strings_skipped.snap
@@ -0,0 +1,19 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: long.exe
+// Generated: 1768722692
+
+rule long_exe_strings {
+  meta:
+    description = "Strings extracted from long.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: Url
+    // skipped (length > 200 chars): 201
+  condition:
+    true
+}
diff --git a/tests/snapshots/output_yara_integration__yara_mixed_encodings.snap b/tests/snapshots/output_yara_integration__yara_mixed_encodings.snap
new file mode 100644
index 0000000..2a54f81
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_mixed_encodings.snap
@@ -0,0 +1,26 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: mixed.exe
+// Generated: 1768722692
+
+rule mixed_exe_strings {
+  meta:
+    description = "Strings extracted from mixed.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: Domain
+    // score: 0
+    $Domain_1 = "utf8" ascii
+    // tag: Resource
+    // score: 0
+    $Resource_1 = "utf16" wide
+    // tag: Url
+    // score: 0
+    $Url_1 = "ascii" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_multiple_strings_different_tags.snap b/tests/snapshots/output_yara_integration__yara_multiple_strings_different_tags.snap
new file mode 100644
index 0000000..b32a4a9
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_multiple_strings_different_tags.snap
@@ -0,0 +1,26 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: diff-tag.exe
+// Generated: 1768722692
+
+rule diff_tag_exe_strings {
+  meta:
+    description = "Strings extracted from diff-tag.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: Domain
+    // score: 0
+    $Domain_1 = "example.com" ascii
+    // tag: Url
+    // score: 0
+    $Url_1 = "https://example.com" ascii
+    // tag: ipv4
+    // score: 0
+    $ipv4_1 = "192.168.1.1" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_multiple_strings_same_tag.snap b/tests/snapshots/output_yara_integration__yara_multiple_strings_same_tag.snap
new file mode 100644
index 0000000..8bf2a3d
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_multiple_strings_same_tag.snap
@@ -0,0 +1,22 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: same-tag.exe
+// Generated: 1768722692
+
+rule same_tag_exe_strings {
+  meta:
+    description = "Strings extracted from same-tag.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: Url
+    // score: 0
+    $Url_1 = "alpha" ascii
+    // score: 0
+    $Url_2 = "beta" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_no_tags.snap b/tests/snapshots/output_yara_integration__yara_no_tags.snap
new file mode 100644
index 0000000..1b53e20
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_no_tags.snap
@@ -0,0 +1,22 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: untagged.exe
+// Generated: 1768722692
+
+rule untagged_exe_strings {
+  meta:
+    description = "Strings extracted from untagged.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: untagged
+    // score: 0
+    $untagged_1 = "no-tag" ascii
+    // score: 0
+    $untagged_2 = "still-no-tag" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_single_string.snap b/tests/snapshots/output_yara_integration__yara_single_string.snap
new file mode 100644
index 0000000..bfe7b2f
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_single_string.snap
@@ -0,0 +1,20 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: single.exe
+// Generated: 1768722692
+
+rule single_exe_strings {
+  meta:
+    description = "Strings extracted from single.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: Import
+    // score: 0
+    $Import_1 = "GetProcAddress" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_special_characters.snap b/tests/snapshots/output_yara_integration__yara_special_characters.snap
new file mode 100644
index 0000000..5da9535
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_special_characters.snap
@@ -0,0 +1,20 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: special.exe
+// Generated: 1768722692
+
+rule special_exe_strings {
+  meta:
+    description = "Strings extracted from special.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: filepath
+    // score: 10
+    $filepath_1 = "quote\" backslash\\ line\n tab\t" ascii
+  condition:
+    any of them
+}
diff --git a/tests/snapshots/output_yara_integration__yara_unicode_in_strings.snap b/tests/snapshots/output_yara_integration__yara_unicode_in_strings.snap
new file mode 100644
index 0000000..52b4910
--- /dev/null
+++ b/tests/snapshots/output_yara_integration__yara_unicode_in_strings.snap
@@ -0,0 +1,20 @@
+---
+source: tests/output_yara_integration.rs
+expression: output
+---
+// YARA rule generated by Stringy
+// Binary: unicode.exe
+// Generated: 1768722692
+
+rule unicode_exe_strings {
+  meta:
+    description = "Strings extracted from unicode.exe"
+    generated_by = "stringy"
+    generated_at = "1768722692"
+  strings:
+    // tag: Domain
+    // score: 0
+    $Domain_1 = "\xe4\xb8\xad\xe6\x96\x87\xe5\xad\x97\xe7\xac\xa6" ascii
+  condition:
+    any of them
+}

From d2710e87fff51e24163a991a8e5a422fe28b1c0d Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 11:43:17 -0500
Subject: [PATCH 04/25] feat(output): add generated_at timestamp to output
 metadata

- Introduced an optional `generated_at` field in `OutputMetadata` for deterministic outputs.
- Updated YARA formatter to utilize the `generated_at` timestamp instead of runtime timestamps.
- Adjusted integration tests and snapshots to reflect changes in generated timestamps.

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 src/output/mod.rs                             | 12 +++
 src/output/yara.rs                            | 75 +++++++++++++++++--
 tests/output_yara_integration.rs              |  1 +
 ..._yara_integration__yara_all_tag_types.snap |  4 +-
 ...ration__yara_binary_name_sanitization.snap |  4 +-
 ...a_integration__yara_edge_case_names-2.snap |  4 +-
 ...ara_integration__yara_edge_case_names.snap |  4 +-
 ..._yara_integration__yara_empty_strings.snap |  4 +-
 ..._integration__yara_encoding_modifiers.snap |  4 +-
 ...ut_yara_integration__yara_high_scores.snap |  4 +-
 ...ntegration__yara_long_strings_skipped.snap |  4 +-
 ...ara_integration__yara_mixed_encodings.snap |  6 +-
 ..._yara_multiple_strings_different_tags.snap |  4 +-
 ...ation__yara_multiple_strings_same_tag.snap |  4 +-
 ...output_yara_integration__yara_no_tags.snap |  4 +-
 ..._yara_integration__yara_single_string.snap |  4 +-
 ..._integration__yara_special_characters.snap |  4 +-
 ..._integration__yara_unicode_in_strings.snap |  4 +-
 18 files changed, 111 insertions(+), 39 deletions(-)

diff --git a/src/output/mod.rs b/src/output/mod.rs
index 419c129..767d25f 100644
--- a/src/output/mod.rs
+++ b/src/output/mod.rs
@@ -70,6 +70,10 @@ pub struct OutputMetadata {
     pub total_strings: usize,
     /// Number of strings after filtering.
     pub filtered_strings: usize,
+    /// Optional generated-at timestamp for deterministic outputs.
+    ///
+    /// When set, formatters may use this value instead of runtime timestamps.
+    pub generated_at: Option<String>,
 }
 
 impl OutputMetadata {
@@ -86,8 +90,16 @@ impl OutputMetadata {
             format,
             total_strings,
             filtered_strings,
+            generated_at: None,
         }
     }
+
+    /// Set an explicit generated-at timestamp for deterministic outputs.
+    #[must_use]
+    pub fn with_generated_at(mut self, generated_at: String) -> Self {
+        self.generated_at = Some(generated_at);
+        self
+    }
 }
 
 /// Format output strings using the requested output format.
diff --git a/src/output/yara.rs b/src/output/yara.rs
index 5e90d53..cd197c6 100644
--- a/src/output/yara.rs
+++ b/src/output/yara.rs
@@ -6,7 +6,10 @@ use std::time::{SystemTime, UNIX_EPOCH};
 
 /// Format strings as YARA rule templates.
 pub fn format_yara(_strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
-    let timestamp = current_timestamp();
+    let timestamp = _metadata
+        .generated_at
+        .clone()
+        .unwrap_or_else(current_timestamp);
     let base_rule_name = sanitize_rule_name(&_metadata.binary_name);
     let rule_name = format!("{}_strings", base_rule_name);
 
@@ -53,14 +56,26 @@ pub fn format_yara(_strings: &[FoundString], _metadata: &OutputMetadata) -> Resu
             let counter = counters.entry(var_tag.clone()).or_insert(0);
             *counter += 1;
             let var_name = format!("${}_{}", var_tag, *counter);
-            let escaped = escape_yara_string(&item.text);
-            let modifier = get_yara_modifier(item.encoding);
-
             strings_block.push_str(&format!("    // score: {}\n", item.score));
-            strings_block.push_str(&format!(
-                "    {} = \"{}\" {}\n",
-                var_name, escaped, modifier
-            ));
+
+            match item.encoding {
+                Encoding::Utf16Be => {
+                    let hex = utf16be_hex_string(&item.text);
+                    strings_block.push_str(&format!("    {} = {}\n", var_name, hex));
+                }
+                Encoding::Utf16Le => {
+                    let escaped = escape_yara_unicode_literal(&item.text);
+                    strings_block.push_str(&format!("    {} = \"{}\" wide\n", var_name, escaped));
+                }
+                Encoding::Ascii | Encoding::Utf8 => {
+                    let escaped = escape_yara_string(&item.text);
+                    let modifier = get_yara_modifier(item.encoding);
+                    strings_block.push_str(&format!(
+                        "    {} = \"{}\" {}\n",
+                        var_name, escaped, modifier
+                    ));
+                }
+            }
             included += 1;
         }
     }
@@ -144,6 +159,50 @@ fn escape_yara_string(text: &str) -> String {
     escaped
 }
 
+fn escape_yara_unicode_literal(text: &str) -> String {
+    let mut escaped = String::new();
+    for ch in text.chars() {
+        match ch {
+            '\\' => escaped.push_str("\\\\"),
+            '"' => escaped.push_str("\\\""),
+            '\n' => escaped.push_str("\\n"),
+            '\r' => escaped.push_str("\\r"),
+            '\t' => escaped.push_str("\\t"),
+            _ if ch.is_control() => {
+                let mut buf = [0; 4];
+                let encoded = ch.encode_utf8(&mut buf);
+                for byte in encoded.as_bytes() {
+                    escaped.push_str(&format!("\\x{:02x}", byte));
+                }
+            }
+            _ => escaped.push(ch),
+        }
+    }
+    escaped
+}
+
+fn utf16be_hex_string(text: &str) -> String {
+    let mut bytes = Vec::new();
+    for unit in text.encode_utf16() {
+        bytes.extend_from_slice(&unit.to_be_bytes());
+    }
+
+    if bytes.is_empty() {
+        return "{ }".to_string();
+    }
+
+    let mut hex = String::new();
+    hex.push_str("{ ");
+    for (idx, byte) in bytes.iter().enumerate() {
+        if idx > 0 {
+            hex.push(' ');
+        }
+        hex.push_str(&format!("{:02x}", byte));
+    }
+    hex.push_str(" }");
+    hex
+}
+
 fn get_yara_modifier(encoding: Encoding) -> &'static str {
     match encoding {
         Encoding::Ascii | Encoding::Utf8 => "ascii",
diff --git a/tests/output_yara_integration.rs b/tests/output_yara_integration.rs
index 9468a75..e8cf416 100644
--- a/tests/output_yara_integration.rs
+++ b/tests/output_yara_integration.rs
@@ -18,6 +18,7 @@ fn make_string(text: &str) -> FoundString {
 
 fn make_metadata(binary_name: &str, count: usize) -> OutputMetadata {
     OutputMetadata::new(binary_name.to_string(), OutputFormat::Yara, count, count)
+        .with_generated_at("0".to_string())
 }
 
 #[test]
diff --git a/tests/snapshots/output_yara_integration__yara_all_tag_types.snap b/tests/snapshots/output_yara_integration__yara_all_tag_types.snap
index 29c2418..6b5cb58 100644
--- a/tests/snapshots/output_yara_integration__yara_all_tag_types.snap
+++ b/tests/snapshots/output_yara_integration__yara_all_tag_types.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: tags.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule tags_exe_strings {
   meta:
     description = "Strings extracted from tags.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: Domain
     // score: 0
diff --git a/tests/snapshots/output_yara_integration__yara_binary_name_sanitization.snap b/tests/snapshots/output_yara_integration__yara_binary_name_sanitization.snap
index 11e82b2..e25f563 100644
--- a/tests/snapshots/output_yara_integration__yara_binary_name_sanitization.snap
+++ b/tests/snapshots/output_yara_integration__yara_binary_name_sanitization.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: weird name.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule weird_name_exe_strings {
   meta:
     description = "Strings extracted from weird name.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: untagged
     // score: 0
diff --git a/tests/snapshots/output_yara_integration__yara_edge_case_names-2.snap b/tests/snapshots/output_yara_integration__yara_edge_case_names-2.snap
index 0427dfc..960b4c7 100644
--- a/tests/snapshots/output_yara_integration__yara_edge_case_names-2.snap
+++ b/tests/snapshots/output_yara_integration__yara_edge_case_names-2.snap
@@ -4,13 +4,13 @@ expression: output_special
 ---
 // YARA rule generated by Stringy
 // Binary: #$%
-// Generated: 1768722692
+// Generated: 0
 
 rule ____strings {
   meta:
     description = "Strings extracted from #$%"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: untagged
     // score: 0
diff --git a/tests/snapshots/output_yara_integration__yara_edge_case_names.snap b/tests/snapshots/output_yara_integration__yara_edge_case_names.snap
index 93718e1..f0553b0 100644
--- a/tests/snapshots/output_yara_integration__yara_edge_case_names.snap
+++ b/tests/snapshots/output_yara_integration__yara_edge_case_names.snap
@@ -4,13 +4,13 @@ expression: output_numbers
 ---
 // YARA rule generated by Stringy
 // Binary: 12345
-// Generated: 1768722692
+// Generated: 0
 
 rule _12345_strings {
   meta:
     description = "Strings extracted from 12345"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: untagged
     // score: 0
diff --git a/tests/snapshots/output_yara_integration__yara_empty_strings.snap b/tests/snapshots/output_yara_integration__yara_empty_strings.snap
index 895d38d..dc1e9cb 100644
--- a/tests/snapshots/output_yara_integration__yara_empty_strings.snap
+++ b/tests/snapshots/output_yara_integration__yara_empty_strings.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: empty.bin
-// Generated: 1768722692
+// Generated: 0
 
 rule empty_bin_strings {
   meta:
     description = "Strings extracted from empty.bin"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   condition:
     true
 }
diff --git a/tests/snapshots/output_yara_integration__yara_encoding_modifiers.snap b/tests/snapshots/output_yara_integration__yara_encoding_modifiers.snap
index b255b3f..3ae9427 100644
--- a/tests/snapshots/output_yara_integration__yara_encoding_modifiers.snap
+++ b/tests/snapshots/output_yara_integration__yara_encoding_modifiers.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: enc.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule enc_exe_strings {
   meta:
     description = "Strings extracted from enc.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: Resource
     // score: 0
diff --git a/tests/snapshots/output_yara_integration__yara_high_scores.snap b/tests/snapshots/output_yara_integration__yara_high_scores.snap
index 1f86286..9ce8eb9 100644
--- a/tests/snapshots/output_yara_integration__yara_high_scores.snap
+++ b/tests/snapshots/output_yara_integration__yara_high_scores.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: scores.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule scores_exe_strings {
   meta:
     description = "Strings extracted from scores.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: Domain
     // score: -10
diff --git a/tests/snapshots/output_yara_integration__yara_long_strings_skipped.snap b/tests/snapshots/output_yara_integration__yara_long_strings_skipped.snap
index 97f125a..4841282 100644
--- a/tests/snapshots/output_yara_integration__yara_long_strings_skipped.snap
+++ b/tests/snapshots/output_yara_integration__yara_long_strings_skipped.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: long.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule long_exe_strings {
   meta:
     description = "Strings extracted from long.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: Url
     // skipped (length > 200 chars): 201
diff --git a/tests/snapshots/output_yara_integration__yara_mixed_encodings.snap b/tests/snapshots/output_yara_integration__yara_mixed_encodings.snap
index 2a54f81..1c880d3 100644
--- a/tests/snapshots/output_yara_integration__yara_mixed_encodings.snap
+++ b/tests/snapshots/output_yara_integration__yara_mixed_encodings.snap
@@ -4,20 +4,20 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: mixed.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule mixed_exe_strings {
   meta:
     description = "Strings extracted from mixed.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: Domain
     // score: 0
     $Domain_1 = "utf8" ascii
     // tag: Resource
     // score: 0
-    $Resource_1 = "utf16" wide
+    $Resource_1 = { 00 75 00 74 00 66 00 31 00 36 }
     // tag: Url
     // score: 0
     $Url_1 = "ascii" ascii
diff --git a/tests/snapshots/output_yara_integration__yara_multiple_strings_different_tags.snap b/tests/snapshots/output_yara_integration__yara_multiple_strings_different_tags.snap
index b32a4a9..e5c2692 100644
--- a/tests/snapshots/output_yara_integration__yara_multiple_strings_different_tags.snap
+++ b/tests/snapshots/output_yara_integration__yara_multiple_strings_different_tags.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: diff-tag.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule diff_tag_exe_strings {
   meta:
     description = "Strings extracted from diff-tag.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: Domain
     // score: 0
diff --git a/tests/snapshots/output_yara_integration__yara_multiple_strings_same_tag.snap b/tests/snapshots/output_yara_integration__yara_multiple_strings_same_tag.snap
index 8bf2a3d..419dc58 100644
--- a/tests/snapshots/output_yara_integration__yara_multiple_strings_same_tag.snap
+++ b/tests/snapshots/output_yara_integration__yara_multiple_strings_same_tag.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: same-tag.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule same_tag_exe_strings {
   meta:
     description = "Strings extracted from same-tag.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: Url
     // score: 0
diff --git a/tests/snapshots/output_yara_integration__yara_no_tags.snap b/tests/snapshots/output_yara_integration__yara_no_tags.snap
index 1b53e20..cf88b43 100644
--- a/tests/snapshots/output_yara_integration__yara_no_tags.snap
+++ b/tests/snapshots/output_yara_integration__yara_no_tags.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: untagged.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule untagged_exe_strings {
   meta:
     description = "Strings extracted from untagged.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: untagged
     // score: 0
diff --git a/tests/snapshots/output_yara_integration__yara_single_string.snap b/tests/snapshots/output_yara_integration__yara_single_string.snap
index bfe7b2f..0501fb4 100644
--- a/tests/snapshots/output_yara_integration__yara_single_string.snap
+++ b/tests/snapshots/output_yara_integration__yara_single_string.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: single.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule single_exe_strings {
   meta:
     description = "Strings extracted from single.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: Import
     // score: 0
diff --git a/tests/snapshots/output_yara_integration__yara_special_characters.snap b/tests/snapshots/output_yara_integration__yara_special_characters.snap
index 5da9535..1e74269 100644
--- a/tests/snapshots/output_yara_integration__yara_special_characters.snap
+++ b/tests/snapshots/output_yara_integration__yara_special_characters.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: special.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule special_exe_strings {
   meta:
     description = "Strings extracted from special.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: filepath
     // score: 10
diff --git a/tests/snapshots/output_yara_integration__yara_unicode_in_strings.snap b/tests/snapshots/output_yara_integration__yara_unicode_in_strings.snap
index 52b4910..2d33457 100644
--- a/tests/snapshots/output_yara_integration__yara_unicode_in_strings.snap
+++ b/tests/snapshots/output_yara_integration__yara_unicode_in_strings.snap
@@ -4,13 +4,13 @@ expression: output
 ---
 // YARA rule generated by Stringy
 // Binary: unicode.exe
-// Generated: 1768722692
+// Generated: 0
 
 rule unicode_exe_strings {
   meta:
     description = "Strings extracted from unicode.exe"
     generated_by = "stringy"
-    generated_at = "1768722692"
+    generated_at = "0"
   strings:
     // tag: Domain
     // score: 0

From 14c3d822f19bf952c995606a391bd7c0775507fc Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 11:51:18 -0500
Subject: [PATCH 05/25] Enable superpowers plugin in Claude settings

Added 'superpowers@claude-plugins-official' to the enabledPlugins list in .claude/settings.json.
---
 .claude/settings.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.claude/settings.json b/.claude/settings.json
index 37c2a2b..9faa2d4 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -1,5 +1,6 @@
 {
   "enabledPlugins": {
-    "commit@cc-marketplace": true
+    "commit@cc-marketplace": true,
+    "superpowers@claude-plugins-official": true
   }
 }

From de2e8d56b354caf59253caaeb40ef3c886b22888 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 12:03:11 -0500
Subject: [PATCH 06/25] refactor(output): improve YARA formatter code quality
 and test coverage

Address issues identified during PR review:

- Rename misleading underscore-prefixed parameters (_strings, _metadata)
  to strings, metadata since they are actively used
- Return "CLOCK_ERROR" instead of "0" on timestamp failure to make
  errors clearly distinguishable from valid timestamps
- Remove dead code: get_yara_modifier() function and its tests after
  inlining "ascii" constant for ASCII/UTF-8 encodings
- Simplify utf16be_hex_string() using iterator chains with flat_map()
- Add Tag import and simplify tag_name() function

Add comprehensive unit tests for UTF-16 encoding functions:
- escape_yara_unicode_literal: basic escapes, control chars, unicode
  passthrough, empty string
- utf16be_hex_string: basic ASCII, empty string, non-ASCII BMP chars,
  surrogate pairs
- escape_yara_string: additional control character coverage
- with_generated_at builder method validation
- Default timestamp fallback behavior

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/output/mod.rs  |   9 +++
 src/output/yara.rs | 193 +++++++++++++++++++++++++++++----------------
 2 files changed, 136 insertions(+), 66 deletions(-)

diff --git a/src/output/mod.rs b/src/output/mod.rs
index 767d25f..9bbdb4c 100644
--- a/src/output/mod.rs
+++ b/src/output/mod.rs
@@ -185,6 +185,15 @@ mod tests {
         assert_eq!(other.filtered_strings, 1);
     }
 
+    #[test]
+    fn test_with_generated_at_builder() {
+        let metadata = OutputMetadata::new("test.bin".to_string(), OutputFormat::Yara, 0, 0);
+        assert!(metadata.generated_at.is_none());
+
+        let with_timestamp = metadata.with_generated_at("12345".to_string());
+        assert_eq!(with_timestamp.generated_at, Some("12345".to_string()));
+    }
+
     #[test]
     fn test_dispatch_logic_for_each_format() {
         let strings = vec![build_found_string("alpha")];
diff --git a/src/output/yara.rs b/src/output/yara.rs
index cd197c6..099fcbe 100644
--- a/src/output/yara.rs
+++ b/src/output/yara.rs
@@ -1,40 +1,40 @@
-use crate::types::{Encoding, FoundString, Result};
+use crate::types::{Encoding, FoundString, Result, Tag};
 
 use super::OutputMetadata;
 use std::collections::{BTreeMap, HashMap};
 use std::time::{SystemTime, UNIX_EPOCH};
 
 /// Format strings as YARA rule templates.
-pub fn format_yara(_strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
-    let timestamp = _metadata
+pub fn format_yara(strings: &[FoundString], metadata: &OutputMetadata) -> Result<String> {
+    let timestamp = metadata
         .generated_at
         .clone()
         .unwrap_or_else(current_timestamp);
-    let base_rule_name = sanitize_rule_name(&_metadata.binary_name);
+    let base_rule_name = sanitize_rule_name(&metadata.binary_name);
     let rule_name = format!("{}_strings", base_rule_name);
 
     let mut output = String::new();
     output.push_str("// YARA rule generated by Stringy\n");
-    output.push_str(&format!("// Binary: {}\n", _metadata.binary_name));
+    output.push_str(&format!("// Binary: {}\n", metadata.binary_name));
     output.push_str(&format!("// Generated: {}\n\n", timestamp));
 
     output.push_str(&format!("rule {} {{\n", rule_name));
     output.push_str("  meta:\n");
     output.push_str(&format!(
         "    description = \"Strings extracted from {}\"\n",
-        escape_yara_string(&_metadata.binary_name)
+        escape_yara_string(&metadata.binary_name)
     ));
     output.push_str("    generated_by = \"stringy\"\n");
     output.push_str(&format!("    generated_at = \"{}\"\n", timestamp));
 
-    if _strings.is_empty() {
+    if strings.is_empty() {
         output.push_str("  condition:\n");
         output.push_str("    true\n");
         output.push_str("}\n");
         return Ok(output);
     }
 
-    let grouped = group_strings_by_tag(_strings);
+    let grouped = group_strings_by_tag(strings);
     let mut strings_block = String::new();
     let mut counters: HashMap<String, usize> = HashMap::new();
     let mut included = 0usize;
@@ -69,11 +69,7 @@ pub fn format_yara(_strings: &[FoundString], _metadata: &OutputMetadata) -> Resu
                 }
                 Encoding::Ascii | Encoding::Utf8 => {
                     let escaped = escape_yara_string(&item.text);
-                    let modifier = get_yara_modifier(item.encoding);
-                    strings_block.push_str(&format!(
-                        "    {} = \"{}\" {}\n",
-                        var_name, escaped, modifier
-                    ));
+                    strings_block.push_str(&format!("    {} = \"{}\" ascii\n", var_name, escaped));
                 }
             }
             included += 1;
@@ -95,7 +91,9 @@ pub fn format_yara(_strings: &[FoundString], _metadata: &OutputMetadata) -> Resu
 fn current_timestamp() -> String {
     match SystemTime::now().duration_since(UNIX_EPOCH) {
         Ok(duration) => duration.as_secs().to_string(),
-        Err(_) => "0".to_string(),
+        // Return a clearly invalid timestamp if system clock is before Unix epoch.
+        // This avoids silently producing "0" which looks like a valid epoch timestamp.
+        Err(_) => "CLOCK_ERROR".to_string(),
     }
 }
 
@@ -182,57 +180,42 @@ fn escape_yara_unicode_literal(text: &str) -> String {
 }
 
 fn utf16be_hex_string(text: &str) -> String {
-    let mut bytes = Vec::new();
-    for unit in text.encode_utf16() {
-        bytes.extend_from_slice(&unit.to_be_bytes());
-    }
+    let hex_bytes: Vec<String> = text
+        .encode_utf16()
+        .flat_map(|unit| unit.to_be_bytes())
+        .map(|b| format!("{:02x}", b))
+        .collect();
 
-    if bytes.is_empty() {
+    if hex_bytes.is_empty() {
         return "{ }".to_string();
     }
 
-    let mut hex = String::new();
-    hex.push_str("{ ");
-    for (idx, byte) in bytes.iter().enumerate() {
-        if idx > 0 {
-            hex.push(' ');
-        }
-        hex.push_str(&format!("{:02x}", byte));
-    }
-    hex.push_str(" }");
-    hex
-}
-
-fn get_yara_modifier(encoding: Encoding) -> &'static str {
-    match encoding {
-        Encoding::Ascii | Encoding::Utf8 => "ascii",
-        Encoding::Utf16Le | Encoding::Utf16Be => "wide",
-    }
+    format!("{{ {} }}", hex_bytes.join(" "))
 }
 
-fn tag_name(tag: &crate::types::Tag) -> &'static str {
+fn tag_name(tag: &Tag) -> &'static str {
     match tag {
-        crate::types::Tag::Url => "Url",
-        crate::types::Tag::Domain => "Domain",
-        crate::types::Tag::IPv4 => "ipv4",
-        crate::types::Tag::IPv6 => "ipv6",
-        crate::types::Tag::FilePath => "filepath",
-        crate::types::Tag::RegistryPath => "regpath",
-        crate::types::Tag::Guid => "guid",
-        crate::types::Tag::Email => "Email",
-        crate::types::Tag::Base64 => "b64",
-        crate::types::Tag::FormatString => "fmt",
-        crate::types::Tag::UserAgent => "user-agent-ish",
-        crate::types::Tag::DemangledSymbol => "demangled",
-        crate::types::Tag::Import => "Import",
-        crate::types::Tag::Export => "Export",
-        crate::types::Tag::Version => "Version",
-        crate::types::Tag::Manifest => "Manifest",
-        crate::types::Tag::Resource => "Resource",
-        crate::types::Tag::DylibPath => "dylib-path",
-        crate::types::Tag::Rpath => "rpath",
-        crate::types::Tag::RpathVariable => "rpath-var",
-        crate::types::Tag::FrameworkPath => "framework-path",
+        Tag::Url => "Url",
+        Tag::Domain => "Domain",
+        Tag::IPv4 => "ipv4",
+        Tag::IPv6 => "ipv6",
+        Tag::FilePath => "filepath",
+        Tag::RegistryPath => "regpath",
+        Tag::Guid => "guid",
+        Tag::Email => "Email",
+        Tag::Base64 => "b64",
+        Tag::FormatString => "fmt",
+        Tag::UserAgent => "user-agent-ish",
+        Tag::DemangledSymbol => "demangled",
+        Tag::Import => "Import",
+        Tag::Export => "Export",
+        Tag::Version => "Version",
+        Tag::Manifest => "Manifest",
+        Tag::Resource => "Resource",
+        Tag::DylibPath => "dylib-path",
+        Tag::Rpath => "rpath",
+        Tag::RpathVariable => "rpath-var",
+        Tag::FrameworkPath => "framework-path",
     }
 }
 
@@ -289,14 +272,6 @@ mod tests {
         assert!(escaped.contains("\\t"));
     }
 
-    #[test]
-    fn test_get_yara_modifier() {
-        assert_eq!(get_yara_modifier(Encoding::Ascii), "ascii");
-        assert_eq!(get_yara_modifier(Encoding::Utf8), "ascii");
-        assert_eq!(get_yara_modifier(Encoding::Utf16Le), "wide");
-        assert_eq!(get_yara_modifier(Encoding::Utf16Be), "wide");
-    }
-
     #[test]
     fn test_group_strings_by_tag() {
         let strings = vec![
@@ -357,4 +332,90 @@ mod tests {
         let output = format_yara(&strings, &make_metadata()).expect("Formatting should succeed");
         assert!(output.contains("\\x"));
     }
+
+    #[test]
+    fn test_escape_yara_unicode_literal_basic() {
+        // Basic escapes
+        assert_eq!(escape_yara_unicode_literal("quote\""), "quote\\\"");
+        assert_eq!(escape_yara_unicode_literal("back\\slash"), "back\\\\slash");
+        assert_eq!(escape_yara_unicode_literal("line\nbreak"), "line\\nbreak");
+        assert_eq!(escape_yara_unicode_literal("tab\there"), "tab\\there");
+        assert_eq!(escape_yara_unicode_literal("return\rhere"), "return\\rhere");
+    }
+
+    #[test]
+    fn test_escape_yara_unicode_literal_control_chars() {
+        // Control characters should be hex-escaped
+        assert_eq!(escape_yara_unicode_literal("\x00"), "\\x00");
+        assert_eq!(escape_yara_unicode_literal("\x1f"), "\\x1f");
+    }
+
+    #[test]
+    fn test_escape_yara_unicode_literal_unicode_passthrough() {
+        // Non-control Unicode should pass through unescaped
+        let result = escape_yara_unicode_literal("\u{4E2D}\u{6587}");
+        assert!(
+            result.contains('\u{4E2D}'),
+            "Non-control Unicode should not be escaped"
+        );
+    }
+
+    #[test]
+    fn test_escape_yara_unicode_literal_empty() {
+        assert_eq!(escape_yara_unicode_literal(""), "");
+    }
+
+    #[test]
+    fn test_utf16be_hex_string_basic() {
+        // Basic ASCII - should be big-endian (00 followed by ASCII byte)
+        assert_eq!(utf16be_hex_string("A"), "{ 00 41 }");
+        assert_eq!(utf16be_hex_string("AB"), "{ 00 41 00 42 }");
+    }
+
+    #[test]
+    fn test_utf16be_hex_string_empty() {
+        assert_eq!(utf16be_hex_string(""), "{ }");
+    }
+
+    #[test]
+    fn test_utf16be_hex_string_non_ascii() {
+        // Non-ASCII Unicode (BMP) - Chinese character U+4E2D
+        let chinese = utf16be_hex_string("\u{4E2D}");
+        assert_eq!(chinese, "{ 4e 2d }");
+    }
+
+    #[test]
+    fn test_utf16be_hex_string_surrogate_pair() {
+        // Character requiring surrogate pair (outside BMP) - emoji U+1F600
+        let emoji = utf16be_hex_string("\u{1F600}");
+        // Should produce surrogate pair: D83D DE00
+        assert_eq!(emoji, "{ d8 3d de 00 }");
+    }
+
+    #[test]
+    fn test_escape_yara_string_control_characters() {
+        assert_eq!(escape_yara_string("\r"), "\\r");
+        assert_eq!(escape_yara_string("\x00"), "\\x00");
+        assert_eq!(escape_yara_string("\x08"), "\\b");
+        assert_eq!(escape_yara_string("\x0b"), "\\x0b");
+        assert_eq!(escape_yara_string("\x0c"), "\\x0c");
+        assert_eq!(escape_yara_string("\x7f"), "\\x7f");
+    }
+
+    #[test]
+    fn test_format_yara_uses_current_timestamp_when_not_set() {
+        // When generated_at is None, format_yara should use current_timestamp()
+        let metadata = OutputMetadata::new("test.bin".to_string(), OutputFormat::Yara, 0, 0);
+        // Note: generated_at is None
+        let output = format_yara(&[], &metadata).expect("Formatting should succeed");
+
+        // Should contain a timestamp in the generated_at field
+        assert!(output.contains("generated_at = \""));
+        // Timestamp should be numeric (or CLOCK_ERROR in exceptional cases)
+        assert!(
+            output.contains("generated_at = \"1")
+                || output.contains("generated_at = \"CLOCK_ERROR"),
+            "Timestamp should be numeric or CLOCK_ERROR"
+        );
+    }
 }

From 6c1b531ea24bd708e6791a4f90e17f4131978e54 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 12:11:03 -0500
Subject: [PATCH 07/25] fix(docs): clarify ASCII rule for Unicode handling

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 AGENTS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/AGENTS.md b/AGENTS.md
index b841448..2e71a4e 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -6,7 +6,7 @@
 
 1. **No `unsafe` code** - `#![forbid(unsafe_code)]` enforced
 2. **Zero warnings** - `cargo clippy -- -D warnings` must pass
-3. **ASCII only** - No emojis, em-dashes, smart quotes, or Unicode punctuation
+3. **ASCII only** - No emojis, em-dashes, smart quotes, or Unicode punctuation (except when explicity testing or working with Unicode strings or emjois)
 4. **File size limit** - Keep files under 500 lines; split larger files
 5. **No blanket `#[allow]`** - Any `allow` requires inline justification
 

From a122d32613a66b363ffc8305f2488ef9df53650b Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 12:12:11 -0500
Subject: [PATCH 08/25] fix(reviews): clarify ASCII rule for Unicode
 punctuation

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 .coderabbit.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.coderabbit.yml b/.coderabbit.yml
index f44fd03..22f80b3 100644
--- a/.coderabbit.yml
+++ b/.coderabbit.yml
@@ -351,7 +351,7 @@ reviews:
             - mode: "warning"
               name: "ASCII Only"
               instructions: |
-                  Verify that no Unicode punctuation is introduced:
+                  Verify that no Unicode punctuation is introduced unless explicitly required:
                   1. No emojis in code or documentation
                   2. No em-dashes - use regular hyphens
                   3. No smart quotes - use straight quotes

From 3b9c618ab12a15d3441fd31f5d0734a2430c8b76 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 12:40:25 -0500
Subject: [PATCH 09/25] chore(settings): remove enabled plugins from
 configuration

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 .claude/settings.json | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.claude/settings.json b/.claude/settings.json
index 9faa2d4..c72c6b7 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -1,6 +1,3 @@
 {
-  "enabledPlugins": {
-    "commit@cc-marketplace": true,
-    "superpowers@claude-plugins-official": true
-  }
+  "enabledPlugins": {}
 }

From b6689ce1271eaae11a82c4469e43dd353139a5de Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 12:53:09 -0500
Subject: [PATCH 10/25] chore(contributing): add contributing guidelines
 document

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 CONTRIBUTING.md | 86 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 CONTRIBUTING.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..869f001
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,86 @@
+# Contributing to Stringy
+
+Thanks for your interest in Stringy. This guide explains how to propose changes and what we expect for code quality.
+
+## Quick start
+
+1. Search existing issues and pull requests before filing a new one.
+2. For bugs, open an issue with a clear reproduction and expected vs actual behavior.
+3. For new features or larger changes, open an issue first to discuss scope.
+
+## Development setup
+
+Stringy uses Rust 2024 (MSRV 1.85+, see `rust-toolchain.toml`). We also use just for common tasks.
+
+Recommended workflow:
+
+- `just setup` (to install tools)
+- `just build` (compiles a debug build)
+- `just test` (runs tests)
+- `just lint` (runs linters)
+
+If you do not use just, the critical requirement is that:
+
+- `cargo clippy -- -D warnings` passes
+- `cargo fmt` produces no changes
+
+## Coding standards
+
+These rules are enforced by CI:
+
+- No unsafe code
+- Zero warnings (`clippy -D warnings`)
+- ASCII only in code and documentation, unless explicitly working with Unicode handling
+- Keep files under 500-600 lines; split when needed
+- No blanket `#[allow]` on modules or files
+- No async; this is a synchronous CLI tool
+
+Use thiserror for structured errors and include context (offsets, section names, file paths) when relevant.
+
+## Project-specific guidance
+
+Module layout:
+
+- `container/` handles format detection and section analysis
+- `extraction/` handles string extraction, filtering, and deduplication
+- `classification/` handles semantic tagging and ranking
+- `output/` handles output formatters
+- `types.rs` contains core data structures and error types
+  
+Key patterns:
+
+- Section weights: add new section weights in `container/*.rs` using existing match patterns. Higher weight means more likely to contain useful strings.
+- Semantic tags: add new Tag variants in `types.rs`, implement detection in `classification/semantic.rs`, and update any tag merging logic if needed.
+- Deduplication: preserve all occurrences and merge tags across occurrences in `extraction/dedup.rs`.
+- Public structs: keep public API structs non_exhaustive and provide explicit constructors.
+- Imports: prefer `stringy::extraction` or `stringy::types`. Do not import locally-defined types inside `extraction/mod.rs`.
+
+## Tests
+
+- Add or update tests for behavior changes.
+- Use insta snapshots for output verification when appropriate.
+- Integration tests live in tests/ and fixtures in tests/fixtures/.
+- Use insta snapshots for output verification when changing output formatters.
+
+Run:
+
+- `just test`
+
+## Pull requests
+
+- Keep PRs focused and small when possible.
+- Include a clear description of the problem and the solution.
+- Link related issues in the PR description.
+- Update documentation when behavior changes.
+
+## Documentation
+
+Docs live under docs/ and project planning artifacts are in project_plan/. Update them when you change user-facing behavior.
+
+## Security
+
+If you believe you found a security issue, please do not open a public issue. Use GitHub Security Advisories if available, or contact the maintainers privately.
+
+## Questions
+
+If you are unsure where to start, open an issue with your question and we will point you in the right direction.

From bec8192cc80acf690b5af63fc08d88c9d73b3ecb Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 13:50:16 -0500
Subject: [PATCH 11/25] refactor: address code review findings and add project
 documentation

Immediate fixes:
- Fix failing doctests in extraction/mod.rs by wrapping in fn main()
- Fix rustdoc warning in patterns/ip.rs by escaping [::1]:8080
- Fix O(n^2) algorithms in dedup.rs using HashSet for unique detection

New features:
- Add OutputFormatter trait for extensible output formatting
- Add #[non_exhaustive] to OutputFormat enum for API stability
- Add Hash derive to Encoding and StringSource enums

Documentation:
- Create CHANGELOG.md following Keep a Changelog format
- Create ROADMAP.md documenting medium/long-term improvements
- Create examples/ directory with basic_extraction, network_indicators,
  and output_formats examples

The O(n^2) fixes include:
- unique_sections detection now uses HashSet instead of Vec::contains
- unique_sources detection now uses HashSet instead of Vec::contains
- merge_tags now uses HashSet for deduplication
- Grouping key simplified from format!("{:?}", encoding) to Encoding

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CHANGELOG.md                      |  53 +++++++
 CONTRIBUTING.md                   |   2 +-
 ROADMAP.md                        | 239 ++++++++++++++++++++++++++++++
 examples/basic_extraction.rs      |  75 ++++++++++
 examples/network_indicators.rs    |  84 +++++++++++
 examples/output_formats.rs        |  72 +++++++++
 src/classification/patterns/ip.rs |   2 +-
 src/extraction/dedup.rs           |  27 +---
 src/extraction/mod.rs             |  66 +++++----
 src/lib.rs                        |   3 +-
 src/output/mod.rs                 |  42 ++++++
 src/types.rs                      |   4 +-
 12 files changed, 615 insertions(+), 54 deletions(-)
 create mode 100644 CHANGELOG.md
 create mode 100644 ROADMAP.md
 create mode 100644 examples/basic_extraction.rs
 create mode 100644 examples/network_indicators.rs
 create mode 100644 examples/output_formats.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..d82fa35
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,53 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+- Output formatters: JSON (JSONL), table (TTY-friendly), and YARA rule templates
+- `generated_at` timestamp support in output metadata for deterministic outputs
+- Ranking system for prioritizing extracted strings by relevance
+- Symbol demangling support for Rust mangled names
+- File path classification for POSIX, Windows, and registry paths
+- Semantic classification for URLs, domains, and IP addresses (IPv4/IPv6)
+- String deduplication with full occurrence metadata preservation
+- `CanonicalString` type for deduplicated strings with occurrence tracking
+- UTF-16 string extraction with confidence scoring
+- Noise filtering framework with entropy, linguistic, and repetition filters
+- Mach-O load command extraction with section weight normalization
+- Comprehensive PE support: section classification, import/export parsing, resource extraction
+- ELF symbol extraction with type support and visibility filtering
+- `#[non_exhaustive]` and builder pattern for `FoundString` public API
+- Contributing guidelines document
+
+### Changed
+- Repository renamed from StringyMcStringFace to Stringy
+- Improved YARA formatter code quality and test coverage
+- Clarified ASCII rule for Unicode handling in documentation
+
+### Fixed
+- Rustdoc warning for IPv6 address example in documentation
+
+### Dependencies
+- Updated criterion to 0.8.1
+- Updated actions/checkout to v6
+- Updated actions/download-artifact to v7
+- Updated actions/attest-build-provenance to v3
+- Updated actions/upload-artifact to v5
+- Updated github/codeql-action to v4
+- Updated EmbarkStudios/cargo-deny-action to v2
+
+## [0.1.0] - TBD
+
+Initial release with core functionality:
+
+### Added
+- ELF, PE, and Mach-O binary format detection and parsing
+- ASCII and UTF-8 string extraction from binary sections
+- Section-aware extraction with weight-based prioritization
+- Basic semantic tagging infrastructure
+- Command-line interface (in development)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 869f001..04b90e9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -46,7 +46,7 @@ Module layout:
 - `classification/` handles semantic tagging and ranking
 - `output/` handles output formatters
 - `types.rs` contains core data structures and error types
-  
+
 Key patterns:
 
 - Section weights: add new section weights in `container/*.rs` using existing match patterns. Higher weight means more likely to contain useful strings.
diff --git a/ROADMAP.md b/ROADMAP.md
new file mode 100644
index 0000000..30c8c58
--- /dev/null
+++ b/ROADMAP.md
@@ -0,0 +1,239 @@
+# Stringy Development Roadmap
+
+This document tracks medium-term and long-term improvements identified during the comprehensive code review (2026-01-18). Issues are organized by priority and category.
+
+## Medium-Term Issues (Next 1-3 Releases)
+
+### Architecture Improvements
+
+#### 1. Split `extraction/mod.rs` into smaller modules
+**Priority:** High
+**Current state:** 1542 lines (exceeds 500-line project limit by 1042 lines)
+**Files affected:** `src/extraction/mod.rs`
+
+Recommended split:
+- `src/extraction/config.rs` - Move `ExtractionConfig` and validation logic
+- `src/extraction/trait.rs` - Move `StringExtractor` trait definition
+- `src/extraction/basic.rs` - Move `BasicExtractor` implementation
+- `src/extraction/helpers.rs` - Move internal helper functions (`is_printable_text_byte`, `could_be_utf8_byte`, `extract_ascii_utf8_strings`)
+
+Other oversized files to address:
+| File | Lines | Overage |
+|------|-------|---------|
+| `src/extraction/pe_resources.rs` | 1449 | +949 |
+| `src/extraction/utf16.rs` | 1273 | +773 |
+| `src/extraction/dedup.rs` | 849 | +349 |
+| `src/extraction/ascii.rs` | 832 | +332 |
+| `src/output/table.rs` | 708 | +208 |
+| `src/extraction/filters.rs` | 702 | +202 |
+| `src/container/pe.rs` | 661 | +161 |
+| `src/container/elf.rs` | 627 | +127 |
+| `src/container/macho.rs` | 574 | +74 |
+| `src/types.rs` | 558 | +58 |
+
+#### 2. Move PE resources to container module
+**Priority:** Medium
+**Current state:** `src/extraction/pe_resources.rs` is in extraction but conceptually belongs in container
+**Rationale:** PE resource parsing is part of container analysis, not string extraction
+
+#### 3. Decouple semantic enrichment from extraction
+**Priority:** Medium
+**Current state:** `extraction` module imports from `classification` creating bidirectional dependency
+**Files affected:** `src/extraction/mod.rs:129`
+**Recommendation:** Move semantic enrichment to an orchestration layer that callers control
+
+#### 4. Add `#[non_exhaustive]` to remaining public enums
+**Priority:** Medium
+**Files affected:**
+- `src/types.rs:4-10` - `Encoding` enum
+- `src/types.rs:130-136` - `BinaryFormat` enum
+
+### Error Handling
+
+#### 5. Add `SerializationError` variant to `StringyError`
+**Priority:** Medium
+**Current state:** `ConfigError` is incorrectly used for JSON serialization failures
+**Files affected:** `src/output/json.rs:14-16`, `src/types.rs`
+
+#### 6. Add format-specific error variants
+**Priority:** Low
+**Recommendation:** Add `InvalidPeError`, `InvalidElfError`, `InvalidMachOError` instead of generic `ParseError(String)`
+
+### API Improvements
+
+#### 7. Add constructors to remaining public structs
+**Priority:** Medium
+**Files affected:** `src/types.rs`
+**Structs needing constructors:** `ImportInfo`, `ExportInfo`, `SectionInfo`
+**Rationale:** Required for `#[non_exhaustive]` compatibility
+
+#### 8. Add `#[allow]` justification comments
+**Priority:** Low
+**Files affected:**
+- `src/extraction/utf16.rs:334` - `#[allow(clippy::result_unit_err)]`
+- `src/extraction/utf16.rs:350` - `#[allow(dead_code)]`
+
+### Documentation
+
+#### 9. Update API documentation for accuracy
+**Priority:** Medium
+**Files affected:** `docs/src/api.md`
+**Issues:** Some function signatures don't match actual implementation
+
+#### 10. Add security considerations to README
+**Priority:** Medium
+**Content to add:** Document malware analysis use case, safe handling of untrusted binaries
+
+#### 11. Document deduplication feature in user docs
+**Priority:** Medium
+**Files affected:** README.md, `docs/src/string-extraction.md`
+
+### Performance
+
+#### 12. Add memory mapping for large files
+**Priority:** High
+**Current state:** Entire file is loaded into memory
+**Impact:** Processing 1GB+ binaries requires 1GB+ RAM
+**Recommendation:** Use `memmap2` crate for memory-mapped file access
+
+```rust
+// Recommended approach
+use memmap2::Mmap;
+use std::fs::File;
+
+let file = File::open(path)?;
+let mmap = unsafe { Mmap::map(&file)? };
+let data: &[u8] = &mmap;
+```
+
+#### 13. Optimize redundant regex matching
+**Priority:** Low
+**Files affected:** `src/classification/patterns/network.rs:92-106`
+**Issue:** URL_REGEX runs twice on URLs (in `classify_url` then `classify_domain`)
+
+### Testing
+
+#### 14. Set up code coverage metrics
+**Priority:** Medium
+**Tool:** `cargo-tarpaulin`
+**Command:** `cargo tarpaulin --out Html`
+
+#### 15. Add performance benchmarks
+**Priority:** Medium
+**Tool:** `criterion`
+**Focus areas:** Deduplication with large input sets, regex pattern matching
+
+#### 16. Add fuzzing for binary parsers
+**Priority:** Medium
+**Tool:** `cargo-fuzz`
+**Targets:** `container/*.rs` parsers with malformed input
+
+---
+
+## Long-Term Issues (Future Releases)
+
+### Performance Optimizations
+
+#### 17. Consider parallel extraction with rayon
+**Priority:** Low
+**Rationale:** Section-by-section extraction is embarrassingly parallel
+
+```rust
+use rayon::prelude::*;
+
+let section_strings: Vec<Vec<FoundString>> = sections
+    .par_iter()
+    .map(|section| extractor.extract_from_section(data, section, config))
+    .collect();
+```
+
+#### 18. Consider `Cow<str>` for hot paths
+**Priority:** Low
+**Files affected:** `src/types.rs:236-237`
+**Benefit:** Avoid cloning when strings could be borrowed
+
+#### 19. Consider `SmallVec` for tags
+**Priority:** Low
+**Field:** `FoundString::tags`
+**Rationale:** Typical 0-3 tags could use stack allocation with `SmallVec<[Tag; 4]>`
+
+### Dependency Management
+
+#### 20. Migrate to `std::sync::LazyLock`
+**Priority:** Low
+**Current state:** Uses `once_cell::sync::Lazy`
+**Target:** `std::sync::LazyLock` (stabilized in Rust 1.80)
+**Files affected:** All files in `src/classification/patterns/`
+
+### Feature Enhancements
+
+#### 21. Implement main CLI
+**Priority:** High
+**Current state:** `src/main.rs` is a stub with TODO
+**File:** `src/main.rs:18`
+
+#### 22. Integrate Mach-O load command strings
+**Priority:** Medium
+**Current state:** Feature exists but not integrated into main pipeline
+**File:** `src/container/macho.rs:198`
+
+#### 23. Parse all Mach-O architectures
+**Priority:** Low
+**Current state:** Only parses first architecture in fat binaries
+**File:** `src/container/macho.rs:312`
+
+### Build Configuration
+
+#### 24. Add feature flags for output formats
+**Priority:** Low
+**File:** `Cargo.toml`
+
+```toml
+[features]
+default = ["json", "yara", "table"]
+json = []
+yara = []
+table = []
+```
+
+#### 25. Add `include` field to Cargo.toml
+**Priority:** Low
+**Purpose:** Control what gets published to crates.io
+
+```toml
+[package]
+include = ["src/**/*", "Cargo.toml", "LICENSE", "README.md"]
+```
+
+---
+
+## Completed Items
+
+The following issues from the comprehensive review have been addressed:
+
+- [x] Fix failing doctests in `extraction/mod.rs` (2026-01-18)
+- [x] Fix rustdoc warning in `patterns/ip.rs:107` (2026-01-18)
+- [x] Create `CHANGELOG.md` (2026-01-18)
+- [x] Fix O(n^2) algorithms in `dedup.rs` using HashSet (2026-01-18)
+- [x] Add `OutputFormatter` trait for extensibility (2026-01-18)
+- [x] Add `#[non_exhaustive]` to `OutputFormat` enum (2026-01-18)
+- [x] Create `examples/` directory with usage examples (2026-01-18)
+- [x] Add `Hash` derive to `Encoding` and `StringSource` enums (2026-01-18)
+
+---
+
+## Review Summary
+
+**Overall Rating from Comprehensive Review: B+ (85/100)**
+
+| Dimension | Rating |
+|-----------|--------|
+| Code Quality | B+ |
+| Architecture | B+ |
+| Security | A |
+| Performance | B |
+| Testing | B+ |
+| Documentation | B+ |
+| Best Practices | A- |
+
+With the immediate issues addressed and medium-term improvements completed, this project would be ready for a stable 1.0 release.
diff --git a/examples/basic_extraction.rs b/examples/basic_extraction.rs
new file mode 100644
index 0000000..ce05ebb
--- /dev/null
+++ b/examples/basic_extraction.rs
@@ -0,0 +1,75 @@
+//! Basic string extraction from a binary file.
+//!
+//! This example demonstrates the fundamental workflow for extracting strings
+//! from a binary file using Stringy.
+//!
+//! Usage: cargo run --example basic_extraction <binary_file>
+
+use std::env;
+use std::fs;
+use stringy::container::{create_parser, detect_format};
+use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let args: Vec<String> = env::args().collect();
+    if args.len() != 2 {
+        eprintln!("Usage: {} <binary_file>", args[0]);
+        std::process::exit(1);
+    }
+
+    let path = &args[1];
+    println!("Analyzing: {}", path);
+
+    // Read the binary file
+    let data = fs::read(path)?;
+    println!("File size: {} bytes", data.len());
+
+    // Detect the binary format
+    let format = detect_format(&data);
+    println!("Detected format: {:?}", format);
+
+    // Create a parser for the detected format
+    let parser = create_parser(format)?;
+    let container_info = parser.parse(&data)?;
+
+    println!(
+        "Found {} sections, {} imports, {} exports",
+        container_info.sections.len(),
+        container_info.imports.len(),
+        container_info.exports.len()
+    );
+
+    // Extract strings using the basic extractor
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig::default();
+    let strings = extractor.extract(&data, &container_info, &config)?;
+
+    println!("\nExtracted {} strings\n", strings.len());
+
+    // Display the top 20 strings by score
+    let mut sorted_strings = strings.clone();
+    sorted_strings.sort_by(|a, b| b.score.cmp(&a.score));
+
+    println!("Top strings by score:");
+    println!("{:-<60}", "");
+    for string in sorted_strings.iter().take(20) {
+        let tags: Vec<_> = string.tags.iter().map(|t| format!("{:?}", t)).collect();
+        let tags_str = if tags.is_empty() {
+            String::new()
+        } else {
+            format!(" [{}]", tags.join(", "))
+        };
+        println!(
+            "{:4} | {:50}{}",
+            string.score,
+            if string.text.len() > 50 {
+                format!("{}...", &string.text[..47])
+            } else {
+                string.text.clone()
+            },
+            tags_str
+        );
+    }
+
+    Ok(())
+}
diff --git a/examples/network_indicators.rs b/examples/network_indicators.rs
new file mode 100644
index 0000000..f47d76d
--- /dev/null
+++ b/examples/network_indicators.rs
@@ -0,0 +1,84 @@
+//! Extract network indicators (URLs, IPs, domains) from a binary.
+//!
+//! This example demonstrates how to extract and filter strings that contain
+//! network-related indicators useful for threat intelligence.
+//!
+//! Usage: cargo run --example network_indicators <binary_file>
+
+use std::env;
+use std::fs;
+use stringy::container::{create_parser, detect_format};
+use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
+use stringy::types::Tag;
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let args: Vec<String> = env::args().collect();
+    if args.len() != 2 {
+        eprintln!("Usage: {} <binary_file>", args[0]);
+        std::process::exit(1);
+    }
+
+    let path = &args[1];
+    println!("Extracting network indicators from: {}\n", path);
+
+    // Read and parse the binary
+    let data = fs::read(path)?;
+    let format = detect_format(&data);
+    let parser = create_parser(format)?;
+    let container_info = parser.parse(&data)?;
+
+    // Extract strings with default configuration
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig::default();
+    let strings = extractor.extract(&data, &container_info, &config)?;
+
+    // Filter for network-related tags
+    let network_tags = [Tag::Url, Tag::Domain, Tag::IPv4, Tag::IPv6];
+
+    let network_strings: Vec<_> = strings
+        .iter()
+        .filter(|s| s.tags.iter().any(|t| network_tags.contains(t)))
+        .collect();
+
+    if network_strings.is_empty() {
+        println!("No network indicators found.");
+        return Ok(());
+    }
+
+    println!("Found {} network indicators:\n", network_strings.len());
+
+    // Group by tag type
+    println!("=== URLs ===");
+    for s in network_strings
+        .iter()
+        .filter(|s| s.tags.contains(&Tag::Url))
+    {
+        println!("  {}", s.text);
+    }
+
+    println!("\n=== Domains ===");
+    for s in network_strings
+        .iter()
+        .filter(|s| s.tags.contains(&Tag::Domain))
+    {
+        println!("  {}", s.text);
+    }
+
+    println!("\n=== IPv4 Addresses ===");
+    for s in network_strings
+        .iter()
+        .filter(|s| s.tags.contains(&Tag::IPv4))
+    {
+        println!("  {}", s.text);
+    }
+
+    println!("\n=== IPv6 Addresses ===");
+    for s in network_strings
+        .iter()
+        .filter(|s| s.tags.contains(&Tag::IPv6))
+    {
+        println!("  {}", s.text);
+    }
+
+    Ok(())
+}
diff --git a/examples/output_formats.rs b/examples/output_formats.rs
new file mode 100644
index 0000000..f52aaf0
--- /dev/null
+++ b/examples/output_formats.rs
@@ -0,0 +1,72 @@
+//! Demonstrate different output formats (JSON, Table, YARA).
+//!
+//! This example shows how to format extracted strings in different output
+//! formats suitable for various use cases.
+//!
+//! Usage: cargo run --example output_formats <binary_file> [format]
+//!
+//! Formats: table (default), json, yara
+
+use std::env;
+use std::fs;
+use stringy::container::{create_parser, detect_format};
+use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
+use stringy::output::{OutputFormat, OutputMetadata, format_output};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let args: Vec<String> = env::args().collect();
+    if args.len() < 2 {
+        eprintln!("Usage: {} <binary_file> [format]", args[0]);
+        eprintln!("Formats: table (default), json, yara");
+        std::process::exit(1);
+    }
+
+    let path = &args[1];
+    let format_arg = args.get(2).map(|s| s.as_str()).unwrap_or("table");
+
+    let output_format = match format_arg.to_lowercase().as_str() {
+        "table" => OutputFormat::Table,
+        "json" => OutputFormat::Json,
+        "yara" => OutputFormat::Yara,
+        _ => {
+            eprintln!("Unknown format: {}. Use table, json, or yara.", format_arg);
+            std::process::exit(1);
+        }
+    };
+
+    // Read and parse the binary
+    let data = fs::read(path)?;
+    let format = detect_format(&data);
+    let parser = create_parser(format)?;
+    let container_info = parser.parse(&data)?;
+
+    // Extract strings
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig::default();
+    let strings = extractor.extract(&data, &container_info, &config)?;
+
+    // Limit to top 50 strings for demonstration
+    let mut sorted_strings = strings;
+    sorted_strings.sort_by(|a, b| b.score.cmp(&a.score));
+    let top_strings: Vec<_> = sorted_strings.into_iter().take(50).collect();
+
+    // Create output metadata
+    let binary_name = std::path::Path::new(path)
+        .file_name()
+        .and_then(|n| n.to_str())
+        .unwrap_or("unknown")
+        .to_string();
+
+    let metadata = OutputMetadata::new(
+        binary_name,
+        output_format,
+        top_strings.len(),
+        top_strings.len(),
+    );
+
+    // Format and print output
+    let output = format_output(&top_strings, &metadata)?;
+    println!("{}", output);
+
+    Ok(())
+}
diff --git a/src/classification/patterns/ip.rs b/src/classification/patterns/ip.rs
index bb64164..98bed5e 100644
--- a/src/classification/patterns/ip.rs
+++ b/src/classification/patterns/ip.rs
@@ -104,7 +104,7 @@ pub fn is_ipv4_address(text: &str) -> bool {
 
 /// Checks if the given text is a valid IPv6 address
 ///
-/// This method handles bracketed IPv6 addresses (e.g., [::1]:8080),
+/// This method handles bracketed IPv6 addresses (e.g., `[::1]:8080`),
 /// strips any port suffix, and validates using both regex and standard library.
 ///
 /// # Arguments
diff --git a/src/extraction/dedup.rs b/src/extraction/dedup.rs
index b25bae0..53af1f7 100644
--- a/src/extraction/dedup.rs
+++ b/src/extraction/dedup.rs
@@ -7,7 +7,7 @@
 
 use crate::types::{Encoding, FoundString, StringSource, Tag};
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 
 /// A canonical string with all its occurrences
 ///
@@ -90,18 +90,16 @@ pub fn deduplicate(
     }
 
     // Group strings by (text, encoding) key
-    // Use string representation of encoding as HashMap key since Encoding doesn't implement Hash
-    let mut groups: HashMap<(String, String), Vec<FoundString>> = HashMap::new();
+    let mut groups: HashMap<(String, Encoding), Vec<FoundString>> = HashMap::new();
     for string in strings {
-        let encoding_str = format!("{:?}", string.encoding);
-        let key = (string.text.clone(), encoding_str);
+        let key = (string.text.clone(), string.encoding);
         groups.entry(key).or_default().push(string);
     }
 
     // Convert each group to a CanonicalString
     let mut canonical_strings: Vec<CanonicalString> = groups
         .into_iter()
-        .map(|((text, _encoding_str), found_strings)| {
+        .map(|((text, _encoding), found_strings)| {
             // Check if group meets dedup_threshold
             let meets_threshold = if let Some(threshold) = dedup_threshold {
                 found_strings.len() >= threshold
@@ -180,21 +178,11 @@ fn calculate_combined_score(occurrences: &[StringOccurrence]) -> i32 {
     };
 
     // Cross-section bonus: 10 points if string appears in different sections
-    let mut unique_sections = Vec::new();
-    for occ in occurrences.iter() {
-        if !unique_sections.contains(&occ.section) {
-            unique_sections.push(occ.section.clone());
-        }
-    }
+    let unique_sections: HashSet<_> = occurrences.iter().map(|occ| &occ.section).collect();
     let cross_section_bonus = if unique_sections.len() > 1 { 10 } else { 0 };
 
     // Multi-source bonus: 15 points if string appears from different sources
-    let mut unique_sources = Vec::new();
-    for occ in occurrences.iter() {
-        if !unique_sources.contains(&occ.source) {
-            unique_sources.push(occ.source);
-        }
-    }
+    let unique_sources: HashSet<_> = occurrences.iter().map(|occ| occ.source).collect();
     let multi_source_bonus = if unique_sources.len() > 1 { 15 } else { 0 };
 
     // Confidence boost: max_confidence * 10
@@ -220,10 +208,11 @@ fn calculate_combined_score(occurrences: &[StringOccurrence]) -> i32 {
 ///
 /// Vector of unique tags (order may vary since Tag doesn't implement Ord)
 fn merge_tags(occurrences: &[StringOccurrence]) -> Vec<Tag> {
+    let mut seen = HashSet::new();
     let mut tags = Vec::new();
     for occurrence in occurrences {
         for tag in &occurrence.original_tags {
-            if !tags.contains(tag) {
+            if seen.insert(tag.clone()) {
                 tags.push(tag.clone());
             }
         }
diff --git a/src/extraction/mod.rs b/src/extraction/mod.rs
index af814f8..ea11d32 100644
--- a/src/extraction/mod.rs
+++ b/src/extraction/mod.rs
@@ -315,18 +315,21 @@ impl ExtractionConfig {
 ///
 /// # Example
 ///
-/// ```rust
+/// ```rust,no_run
 /// use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
 /// use stringy::container::{detect_format, create_parser};
 ///
-/// let data = std::fs::read("binary_file")?;
-/// let format = detect_format(&data);
-/// let parser = create_parser(format)?;
-/// let container_info = parser.parse(&data)?;
+/// fn main() -> Result<(), Box<dyn std::error::Error>> {
+///     let data = std::fs::read("binary_file")?;
+///     let format = detect_format(&data);
+///     let parser = create_parser(format)?;
+///     let container_info = parser.parse(&data)?;
 ///
-/// let extractor = BasicExtractor::new();
-/// let config = ExtractionConfig::default();
-/// let strings = extractor.extract(&data, &container_info, &config)?;
+///     let extractor = BasicExtractor::new();
+///     let config = ExtractionConfig::default();
+///     let strings = extractor.extract(&data, &container_info, &config)?;
+///     Ok(())
+/// }
 /// ```
 pub trait StringExtractor {
     /// Extract strings from entire binary using container metadata
@@ -409,31 +412,34 @@ pub trait StringExtractor {
 /// use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
 /// use stringy::types::{ContainerInfo, SectionInfo, SectionType, BinaryFormat};
 ///
-/// let extractor = BasicExtractor::new();
-/// let config = ExtractionConfig::default();
+/// fn main() -> Result<(), Box<dyn std::error::Error>> {
+///     let extractor = BasicExtractor::new();
+///     let config = ExtractionConfig::default();
 ///
-/// // Create a simple container info for testing
-/// let section = SectionInfo {
-///     name: ".rodata".to_string(),
-///     offset: 0,
-///     size: 100,
-///     rva: Some(0x1000),
-///     section_type: SectionType::StringData,
-///     is_executable: false,
-///     is_writable: false,
-///     weight: 1.0,
-/// };
+///     // Create a simple container info for testing
+///     let section = SectionInfo {
+///         name: ".rodata".to_string(),
+///         offset: 0,
+///         size: 100,
+///         rva: Some(0x1000),
+///         section_type: SectionType::StringData,
+///         is_executable: false,
+///         is_writable: false,
+///         weight: 1.0,
+///     };
 ///
-/// let container_info = ContainerInfo::new(
-///     BinaryFormat::Elf,
-///     vec![section],
-///     vec![],
-///     vec![],
-///     None,
-/// );
+///     let container_info = ContainerInfo::new(
+///         BinaryFormat::Elf,
+///         vec![section],
+///         vec![],
+///         vec![],
+///         None,
+///     );
 ///
-/// let data = b"Hello World\0Test String\0";
-/// let strings = extractor.extract(data, &container_info, &config)?;
+///     let data = b"Hello World\0Test String\0";
+///     let strings = extractor.extract(data, &container_info, &config)?;
+///     Ok(())
+/// }
 /// ```
 #[derive(Debug, Clone)]
 pub struct BasicExtractor;
diff --git a/src/lib.rs b/src/lib.rs
index d340897..d5b5047 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -87,5 +87,6 @@ pub use extraction::{
 
 // Re-export output infrastructure types
 pub use output::{
-    OutputFormat, OutputMetadata, format_json, format_output, format_table_with_mode, format_yara,
+    OutputFormat, OutputFormatter, OutputMetadata, format_json, format_output,
+    format_table_with_mode, format_yara,
 };
diff --git a/src/output/mod.rs b/src/output/mod.rs
index 9bbdb4c..d7e0818 100644
--- a/src/output/mod.rs
+++ b/src/output/mod.rs
@@ -44,7 +44,49 @@ pub use json::format_json;
 pub use table::{format_table, format_table_with_mode};
 pub use yara::format_yara;
 
+/// Trait for output formatters.
+///
+/// Implementations of this trait provide different output formats for extracted
+/// strings. This trait enables extensibility by allowing custom formatters to be
+/// added without modifying the core dispatch logic.
+///
+/// # Example
+///
+/// ```rust
+/// use stringy::output::{OutputFormatter, OutputMetadata};
+/// use stringy::types::{FoundString, Result};
+///
+/// struct CustomFormatter;
+///
+/// impl OutputFormatter for CustomFormatter {
+///     fn format(&self, strings: &[FoundString], metadata: &OutputMetadata) -> Result<String> {
+///         Ok(format!("Custom: {} strings from {}", strings.len(), metadata.binary_name))
+///     }
+///
+///     fn name(&self) -> &'static str {
+///         "custom"
+///     }
+/// }
+/// ```
+pub trait OutputFormatter {
+    /// Format the extracted strings into the output representation.
+    ///
+    /// # Arguments
+    ///
+    /// * `strings` - The extracted strings to format.
+    /// * `metadata` - Output context including binary name and format settings.
+    ///
+    /// # Returns
+    ///
+    /// A formatted string on success, or an error if formatting fails.
+    fn format(&self, strings: &[FoundString], metadata: &OutputMetadata) -> Result<String>;
+
+    /// Returns the name of this formatter for identification purposes.
+    fn name(&self) -> &'static str;
+}
+
 /// Output format selection for Stringy formatters.
+#[non_exhaustive]
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum OutputFormat {
     /// Human-readable table format with TTY detection.
diff --git a/src/types.rs b/src/types.rs
index 745f1c6..69e253a 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -1,7 +1,7 @@
 use serde::{Deserialize, Serialize};
 
 /// Represents the encoding of an extracted string
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub enum Encoding {
     Ascii,
     Utf8,
@@ -69,7 +69,7 @@ pub enum SectionType {
 }
 
 /// Source of a string within the binary
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub enum StringSource {
     /// String found in section data
     SectionData,

From 0c2744e9b530528f34bef424f7878558c2f5beb5 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 14:29:08 -0500
Subject: [PATCH 12/25] chore(devcontainer): add Rust devcontainer
 configuration

chore(dependabot): update schedules to weekly for various ecosystems

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 .devcontainer/devcontainer.json | 62 +++++++++++++++++++++++++++++++++
 .github/dependabot.yml          |  8 +++--
 2 files changed, 68 insertions(+), 2 deletions(-)
 create mode 100644 .devcontainer/devcontainer.json

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..a935486
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,62 @@
+{
+	"name": "Rust",
+	"image": "mcr.microsoft.com/devcontainers/rust:2-1-trixie",
+	"features": {
+		"ghcr.io/devcontainers/features/docker-outside-of-docker:1": {
+			"installDockerBuildx": true,
+			"version": "latest",
+			"dockerDashComposeVersion": "v2"
+		},
+		"ghcr.io/devcontainers/features/github-cli:1": {
+			"installDirectlyFromGitHubRelease": true,
+			"version": "latest"
+		},
+		"ghcr.io/eitsupi/devcontainer-features/mdbook:1": {
+			"version": "latest"
+		},
+		"ghcr.io/jsburckhardt/devcontainer-features/bat:1": {},
+		"ghcr.io/jsburckhardt/devcontainer-features/just:1": {},
+		"ghcr.io/lee-orr/rusty-dev-containers/cargo-audit:0": {},
+		"ghcr.io/lee-orr/rusty-dev-containers/cargo-binstall:0": {},
+		"ghcr.io/lee-orr/rusty-dev-containers/cargo-deny:0": {},
+		"ghcr.io/lee-orr/rusty-dev-containers/cargo-llvm-cov:0": {},
+		"ghcr.io/lee-orr/rusty-dev-containers/cargo-nextest:0": {},
+		"ghcr.io/marcozac/devcontainer-features/goreleaser:1": {
+			"version": "latest"
+		},
+		"ghcr.io/devcontainers-extra/features/claude-code:1": {
+			"version": "latest"
+		},
+		"ghcr.io/devcontainers-extra/features/mise:1": {
+			"version": "latest"
+		},
+		"ghcr.io/devcontainers-extra/features/pre-commit:2": {
+			"version": "latest"
+		},
+		"ghcr.io/roul/devcontainer-features/mise-node:1": {},
+		"ghcr.io/roul/devcontainer-features/mise-python:1": {}
+	},
+	"customizations": {
+		"vscode": {
+			"extensions": [
+				"mikestead.dotenv",
+				"EditorConfig.EditorConfig",
+				"tamasfe.even-better-toml",
+				"github.vscode-github-actions",
+				"GitHub.vscode-pull-request-github",
+				"skellock.just",
+				"yzhang.markdown-all-in-one",
+				"bierner.markdown-checkbox",
+				"bierner.markdown-footnotes",
+				"bierner.markdown-mermaid",
+				"bierner.markdown-yaml-preamble",
+				"DavidAnson.vscode-markdownlint",
+				"1YiB.rust-bundle",
+				"rust-lang.rust-analyzer",
+				"foxundermoon.shell-format",
+				"redhat.vscode-yaml",
+				"ms-vscode-remote.remote-containers"
+			]
+		}
+	}
+}
\ No newline at end of file
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 4d6f904..7b04c4a 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -13,8 +13,12 @@ updates:
   - package-ecosystem: "github-actions"
     directory: "/"
     schedule:
-      interval: "daily"
+      interval: "weekly"
   - package-ecosystem: "rust-toolchain"
     directory: "/"
     schedule:
-      interval: "daily"
+      interval: "weekly"
+  - package-ecosystem: "devcontainers"
+    directory: "/"
+    schedule:
+      interval: "weekly"

From 6510b90d4fdb7039651253d43e408515b1a4626f Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 14:49:29 -0500
Subject: [PATCH 13/25] refactor(output): split table.rs into module directory

Split the oversized table.rs (708 lines) into a module directory
with four focused files, all under the 500-line project limit:

- table/mod.rs (120 lines): Public API, constants, entry points
- table/tty.rs (220 lines): TTY table rendering with column alignment
- table/plain.rs (96 lines): Plain text output for piping
- table/formatting.rs (326 lines): Shared utilities (truncate, pad, format_tags)

This addresses the ticket acceptance criterion requiring all files
to be under 500 lines.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/output/table.rs            | 708 ---------------------------------
 src/output/table/formatting.rs | 326 +++++++++++++++
 src/output/table/mod.rs        | 120 ++++++
 src/output/table/plain.rs      |  96 +++++
 src/output/table/tty.rs        | 226 +++++++++++
 5 files changed, 768 insertions(+), 708 deletions(-)
 delete mode 100644 src/output/table.rs
 create mode 100644 src/output/table/formatting.rs
 create mode 100644 src/output/table/mod.rs
 create mode 100644 src/output/table/plain.rs
 create mode 100644 src/output/table/tty.rs

diff --git a/src/output/table.rs b/src/output/table.rs
deleted file mode 100644
index 4844082..0000000
--- a/src/output/table.rs
+++ /dev/null
@@ -1,708 +0,0 @@
-//! Table output formatter for Stringy.
-//!
-//! This module provides human-readable table output with automatic TTY detection.
-//! When output is directed to a terminal (TTY), strings are displayed in an aligned
-//! table with headers showing String, Tags, Score, and Section columns. When output
-//! is piped or redirected (non-TTY), only the raw string text is emitted, one per line,
-//! for seamless integration with other command-line tools.
-//!
-//! # TTY Mode Example
-//!
-//! ```text
-//! String                                                       | Tags         | Score | Section
-//! -------------------------------------------------------------|--------------|-------|--------
-//! https://malware.example.com/beacon                           | url          |   150 | .rdata
-//! C:\Windows\System32\cmd.exe                                  | filepath     |   120 | .data
-//! GetProcAddress                                               | import       |    80 |
-//! ```
-//!
-//! # Non-TTY Mode Example
-//!
-//! ```text
-//! https://malware.example.com/beacon
-//! C:\Windows\System32\cmd.exe
-//! GetProcAddress
-//! ```
-//!
-//! # Column Layout
-//!
-//! - **String**: Up to 60 characters, truncated with `...` if longer
-//! - **Tags**: First 2-3 tags, comma-separated, max 20 characters
-//! - **Score**: Right-aligned integer score
-//! - **Section**: Section name where the string was found
-
-use std::io::IsTerminal;
-
-use crate::classification::ranking::RankingConfig;
-use crate::types::{FoundString, Result, Tag};
-
-use super::OutputMetadata;
-
-/// Maximum width for the string column before truncation.
-const STRING_COLUMN_WIDTH: usize = 60;
-
-/// Maximum width for the tags column.
-const TAGS_COLUMN_WIDTH: usize = 20;
-
-/// Maximum width for the score column.
-const SCORE_COLUMN_WIDTH: usize = 6;
-
-/// Maximum width for the section column.
-const SECTION_COLUMN_WIDTH: usize = 15;
-
-/// Format strings in a human-readable table format.
-///
-/// Automatically detects whether output is going to a TTY (terminal) and adjusts
-/// the format accordingly. In TTY mode, outputs an aligned table with headers.
-/// In non-TTY mode (piped/redirected), outputs plain strings one per line.
-///
-/// # Arguments
-///
-/// * `strings` - The extracted strings to format
-/// * `metadata` - Output context (currently unused but reserved for future features)
-///
-/// # Returns
-///
-/// A formatted string ready for output.
-pub fn format_table(strings: &[FoundString], metadata: &OutputMetadata) -> Result<String> {
-    let is_tty = std::io::stdout().is_terminal();
-    format_table_with_mode(strings, metadata, is_tty)
-}
-
-/// Format table with explicit TTY mode specification.
-///
-/// This function allows explicit control over the output mode, useful for testing
-/// and programmatic control over output format.
-///
-/// # Arguments
-///
-/// * `strings` - The extracted strings to format
-/// * `metadata` - Output context
-/// * `is_tty` - Whether to use TTY mode (true) or plain mode (false)
-pub fn format_table_with_mode(
-    strings: &[FoundString],
-    metadata: &OutputMetadata,
-    is_tty: bool,
-) -> Result<String> {
-    if is_tty {
-        format_table_tty(strings, metadata)
-    } else {
-        format_table_plain(strings)
-    }
-}
-
-/// Format strings as an aligned table for TTY output.
-///
-/// Creates a table with headers and aligned columns showing:
-/// - String text (truncated if necessary)
-/// - Tags (comma-separated, limited count)
-/// - Score (right-aligned)
-/// - Section name
-fn format_table_tty(strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
-    if strings.is_empty() {
-        return Ok(String::new());
-    }
-
-    let mut output = String::new();
-
-    // Calculate dynamic column widths based on content
-    let section_width = calculate_section_width(strings);
-    let tags_width = calculate_tags_width(strings);
-
-    // Build header
-    let header = format!(
-        "{} | {} | {} | {}",
-        pad_string("String", STRING_COLUMN_WIDTH, Alignment::Left),
-        pad_string("Tags", tags_width, Alignment::Left),
-        pad_string("Score", SCORE_COLUMN_WIDTH, Alignment::Right),
-        pad_string("Section", section_width, Alignment::Left),
-    );
-    output.push_str(&header);
-    output.push('\n');
-
-    // Build separator line
-    let separator = format!(
-        "{}-|-{}-|-{}-|-{}",
-        "-".repeat(STRING_COLUMN_WIDTH),
-        "-".repeat(tags_width),
-        "-".repeat(SCORE_COLUMN_WIDTH),
-        "-".repeat(section_width),
-    );
-    output.push_str(&separator);
-    output.push('\n');
-
-    // Build rows
-    for found_string in strings {
-        let truncated_text = truncate_string(&found_string.text, STRING_COLUMN_WIDTH);
-        let tags_display = format_tags(&found_string.tags);
-        let section_display = found_string.section.as_deref().unwrap_or("");
-
-        let row = format!(
-            "{} | {} | {} | {}",
-            pad_string(&truncated_text, STRING_COLUMN_WIDTH, Alignment::Left),
-            pad_string(&tags_display, tags_width, Alignment::Left),
-            pad_string(
-                &found_string.score.to_string(),
-                SCORE_COLUMN_WIDTH,
-                Alignment::Right
-            ),
-            pad_string(section_display, section_width, Alignment::Left),
-        );
-        output.push_str(&row);
-        output.push('\n');
-    }
-
-    // Remove trailing newline for consistency
-    if output.ends_with('\n') {
-        output.pop();
-    }
-
-    Ok(output)
-}
-
-/// Format strings as plain text for non-TTY output.
-///
-/// Outputs only the string text, one per line, suitable for piping to other tools.
-fn format_table_plain(strings: &[FoundString]) -> Result<String> {
-    let lines: Vec<String> = strings
-        .iter()
-        .map(|s| sanitize_plain_text(&s.text))
-        .collect();
-    Ok(lines.join("\n"))
-}
-
-/// Calculate the optimal width for the section column based on content.
-fn calculate_section_width(strings: &[FoundString]) -> usize {
-    let max_section_len = strings
-        .iter()
-        .filter_map(|s| s.section.as_ref())
-        .map(|s| s.len())
-        .max()
-        .unwrap_or(0);
-
-    // Minimum width is "Section" header length, maximum is SECTION_COLUMN_WIDTH
-    max_section_len.clamp("Section".len(), SECTION_COLUMN_WIDTH)
-}
-
-/// Calculate the optimal width for the tags column based on content.
-fn calculate_tags_width(strings: &[FoundString]) -> usize {
-    let max_tags_len = strings
-        .iter()
-        .map(|s| format_tags(&s.tags).len())
-        .max()
-        .unwrap_or(0);
-
-    // Minimum width is "Tags" header length, maximum is TAGS_COLUMN_WIDTH
-    max_tags_len.clamp("Tags".len(), TAGS_COLUMN_WIDTH)
-}
-
-/// Format tags for display in the table.
-///
-/// Converts tags to their display format using serde rename values where applicable.
-/// Limits output to `MAX_TAGS_DISPLAY` tags to prevent overflow.
-///
-/// # Arguments
-///
-/// * `tags` - Slice of tags to format
-///
-/// # Returns
-///
-/// Comma-separated string of tag names, or empty string if no tags.
-///
-/// # Examples
-///
-/// ```ignore
-/// let tags = vec![Tag::IPv4, Tag::FilePath];
-/// assert_eq!(format_tags(&tags), "ipv4, filepath");
-/// ```
-pub fn format_tags(tags: &[Tag]) -> String {
-    if tags.is_empty() {
-        return String::new();
-    }
-
-    let config = RankingConfig::default();
-    let max_boost = tags
-        .iter()
-        .map(|tag| tag_boost_value(tag, &config))
-        .max()
-        .unwrap_or(0);
-
-    let tag_strings: Vec<String> = tags
-        .iter()
-        .filter(|tag| tag_boost_value(tag, &config) == max_boost)
-        .map(tag_to_display_string)
-        .collect();
-
-    let result = tag_strings.join(", ");
-
-    // Truncate if still too long
-    if result.len() > TAGS_COLUMN_WIDTH {
-        truncate_string(&result, TAGS_COLUMN_WIDTH)
-    } else {
-        result
-    }
-}
-
-/// Sanitize plain text output so each string renders as a single line.
-///
-/// Replaces CRLF, LF, and CR with escaped sequences to preserve content
-/// while keeping output line-based.
-fn sanitize_plain_text(text: &str) -> String {
-    text.replace("\r\n", "\\r\\n")
-        .replace('\n', "\\n")
-        .replace('\r', "\\r")
-}
-
-/// Get the ranking boost value for a tag using the provided config.
-fn tag_boost_value(tag: &Tag, config: &RankingConfig) -> i32 {
-    config.tag_boosts.get(tag).copied().unwrap_or(0)
-}
-
-/// Convert a single tag to its display string.
-///
-/// Uses the serde rename value where defined, otherwise uses lowercase Debug format.
-fn tag_to_display_string(tag: &Tag) -> String {
-    match tag {
-        Tag::Url => "url".to_string(),
-        Tag::Domain => "domain".to_string(),
-        Tag::IPv4 => "ipv4".to_string(),
-        Tag::IPv6 => "ipv6".to_string(),
-        Tag::FilePath => "filepath".to_string(),
-        Tag::RegistryPath => "regpath".to_string(),
-        Tag::Guid => "guid".to_string(),
-        Tag::Email => "email".to_string(),
-        Tag::Base64 => "b64".to_string(),
-        Tag::FormatString => "fmt".to_string(),
-        Tag::UserAgent => "user-agent-ish".to_string(),
-        Tag::DemangledSymbol => "demangled".to_string(),
-        Tag::Import => "import".to_string(),
-        Tag::Export => "export".to_string(),
-        Tag::Version => "version".to_string(),
-        Tag::Manifest => "manifest".to_string(),
-        Tag::Resource => "resource".to_string(),
-        Tag::DylibPath => "dylib-path".to_string(),
-        Tag::Rpath => "rpath".to_string(),
-        Tag::RpathVariable => "rpath-var".to_string(),
-        Tag::FrameworkPath => "framework-path".to_string(),
-    }
-}
-
-/// Truncate a string to the specified maximum length.
-///
-/// If the string exceeds the maximum length, it is truncated and `...` is appended.
-/// Handles Unicode correctly by truncating at character boundaries.
-///
-/// # Arguments
-///
-/// * `s` - The string to truncate
-/// * `max_len` - Maximum length including the ellipsis
-///
-/// # Returns
-///
-/// The original string if it fits, or a truncated version with `...` appended.
-///
-/// # Examples
-///
-/// ```ignore
-/// assert_eq!(truncate_string("hello", 10), "hello");
-/// assert_eq!(truncate_string("hello world", 8), "hello...");
-/// ```
-pub fn truncate_string(s: &str, max_len: usize) -> String {
-    if s.len() <= max_len {
-        return s.to_string();
-    }
-
-    if max_len <= 3 {
-        return ".".repeat(max_len);
-    }
-
-    // Find a valid character boundary for truncation
-    let truncate_at = max_len - 3;
-    let mut end_index = truncate_at;
-
-    // Ensure we don't split a multi-byte character
-    for (idx, _) in s.char_indices() {
-        if idx <= truncate_at {
-            end_index = idx;
-        } else {
-            break;
-        }
-    }
-
-    // Handle case where we need to include at least one character
-    if end_index == 0 && !s.is_empty() {
-        if let Some((idx, _)) = s.char_indices().nth(1) {
-            end_index = idx;
-        } else {
-            end_index = s.len();
-        }
-    }
-
-    format!("{}...", &s[..end_index])
-}
-
-/// Text alignment for padding.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum Alignment {
-    /// Left-align text (pad on right).
-    Left,
-    /// Right-align text (pad on left).
-    Right,
-}
-
-/// Pad a string to a fixed width with the specified alignment.
-///
-/// # Arguments
-///
-/// * `s` - The string to pad
-/// * `width` - Target width
-/// * `alignment` - Left or right alignment
-///
-/// # Returns
-///
-/// The padded string.
-pub fn pad_string(s: &str, width: usize, alignment: Alignment) -> String {
-    match alignment {
-        Alignment::Left => format!("{:<width$}", s, width = width),
-        Alignment::Right => format!("{:>width$}", s, width = width),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::output::OutputFormat;
-    use crate::types::{Encoding, StringSource};
-
-    fn make_test_string(text: &str) -> FoundString {
-        FoundString::new(
-            text.to_string(),
-            Encoding::Ascii,
-            0x1000,
-            text.len() as u32,
-            StringSource::SectionData,
-        )
-    }
-
-    fn make_metadata() -> OutputMetadata {
-        OutputMetadata::new("test.bin".to_string(), OutputFormat::Table, 10, 10)
-    }
-
-    // Tests for format_tags
-    mod format_tags_tests {
-        use super::*;
-
-        #[test]
-        fn empty_tags() {
-            assert_eq!(format_tags(&[]), "");
-        }
-
-        #[test]
-        fn single_tag() {
-            assert_eq!(format_tags(&[Tag::Url]), "url");
-            assert_eq!(format_tags(&[Tag::IPv4]), "ipv4");
-            assert_eq!(format_tags(&[Tag::FilePath]), "filepath");
-        }
-
-        #[test]
-        fn two_tags() {
-            assert_eq!(format_tags(&[Tag::Url, Tag::Domain]), "url");
-            assert_eq!(format_tags(&[Tag::IPv4, Tag::FilePath]), "ipv4");
-        }
-
-        #[test]
-        fn three_tags() {
-            assert_eq!(format_tags(&[Tag::Url, Tag::Domain, Tag::IPv4]), "url");
-        }
-
-        #[test]
-        fn more_than_max_tags_truncated() {
-            let tags = vec![
-                Tag::Url,
-                Tag::Domain,
-                Tag::IPv4,
-                Tag::FilePath,
-                Tag::RegistryPath,
-            ];
-            assert_eq!(format_tags(&tags), "url");
-        }
-
-        #[test]
-        fn multiple_tags_same_priority() {
-            assert_eq!(format_tags(&[Tag::Import, Tag::Export]), "import, export");
-        }
-
-        #[test]
-        fn all_tag_variants_have_display() {
-            // Ensure all tag variants produce valid output
-            let all_tags = vec![
-                Tag::Url,
-                Tag::Domain,
-                Tag::IPv4,
-                Tag::IPv6,
-                Tag::FilePath,
-                Tag::RegistryPath,
-                Tag::Guid,
-                Tag::Email,
-                Tag::Base64,
-                Tag::FormatString,
-                Tag::UserAgent,
-                Tag::DemangledSymbol,
-                Tag::Import,
-                Tag::Export,
-                Tag::Version,
-                Tag::Manifest,
-                Tag::Resource,
-                Tag::DylibPath,
-                Tag::Rpath,
-                Tag::RpathVariable,
-                Tag::FrameworkPath,
-            ];
-
-            for tag in all_tags {
-                let display = tag_to_display_string(&tag);
-                assert!(!display.is_empty(), "Tag {:?} should have display", tag);
-                assert!(display.is_ascii(), "Tag display should be ASCII");
-            }
-        }
-    }
-
-    // Tests for truncate_string
-    mod truncate_string_tests {
-        use super::*;
-
-        #[test]
-        fn short_string_unchanged() {
-            assert_eq!(truncate_string("hello", 10), "hello");
-            assert_eq!(truncate_string("", 10), "");
-        }
-
-        #[test]
-        fn exact_length_unchanged() {
-            assert_eq!(truncate_string("hello", 5), "hello");
-        }
-
-        #[test]
-        fn long_string_truncated() {
-            assert_eq!(truncate_string("hello world", 8), "hello...");
-        }
-
-        #[test]
-        fn very_short_max_length() {
-            assert_eq!(truncate_string("hello", 3), "...");
-            assert_eq!(truncate_string("hello", 2), "..");
-            assert_eq!(truncate_string("hello", 1), ".");
-        }
-
-        #[test]
-        fn unicode_string_safe_truncation() {
-            // Ensure we don't split multi-byte characters
-            let unicode = "hello\u{1F600}world"; // emoji in the middle
-            let truncated = truncate_string(unicode, 8);
-            // Should truncate before the emoji to avoid splitting it
-            assert!(truncated.ends_with("..."));
-            assert!(truncated.len() <= 8);
-        }
-
-        #[test]
-        fn unicode_at_boundary() {
-            let text = "\u{4E2D}\u{6587}\u{6D4B}\u{8BD5}"; // Chinese characters
-            let truncated = truncate_string(text, 6);
-            assert!(truncated.is_char_boundary(truncated.len() - 3));
-        }
-    }
-
-    // Tests for pad_string
-    mod pad_string_tests {
-        use super::*;
-
-        #[test]
-        fn left_alignment() {
-            assert_eq!(pad_string("hi", 5, Alignment::Left), "hi   ");
-            assert_eq!(pad_string("hello", 5, Alignment::Left), "hello");
-        }
-
-        #[test]
-        fn right_alignment() {
-            assert_eq!(pad_string("hi", 5, Alignment::Right), "   hi");
-            assert_eq!(pad_string("hello", 5, Alignment::Right), "hello");
-        }
-
-        #[test]
-        fn exact_width() {
-            assert_eq!(pad_string("exact", 5, Alignment::Left), "exact");
-            assert_eq!(pad_string("exact", 5, Alignment::Right), "exact");
-        }
-
-        #[test]
-        fn empty_string() {
-            assert_eq!(pad_string("", 5, Alignment::Left), "     ");
-            assert_eq!(pad_string("", 5, Alignment::Right), "     ");
-        }
-    }
-
-    // Tests for format_table
-    mod format_table_tests {
-        use super::*;
-
-        #[test]
-        fn empty_strings_returns_empty() {
-            let result = format_table_with_mode(&[], &make_metadata(), true).unwrap();
-            assert_eq!(result, "");
-        }
-
-        #[test]
-        fn single_string_tty_mode() {
-            let strings = vec![make_test_string("test string")];
-            let result = format_table_with_mode(&strings, &make_metadata(), true).unwrap();
-
-            // Should have header, separator, and one data row
-            let lines: Vec<&str> = result.lines().collect();
-            assert_eq!(lines.len(), 3);
-            assert!(lines[0].contains("String"));
-            assert!(lines[0].contains("Tags"));
-            assert!(lines[0].contains("Score"));
-            assert!(lines[0].contains("Section"));
-            assert!(lines[1].contains("---"));
-            assert!(lines[2].contains("test string"));
-        }
-
-        #[test]
-        fn single_string_plain_mode() {
-            let strings = vec![make_test_string("test string")];
-            let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
-
-            assert_eq!(result, "test string");
-        }
-
-        #[test]
-        fn multiple_strings_plain_mode() {
-            let strings = vec![
-                make_test_string("first"),
-                make_test_string("second"),
-                make_test_string("third"),
-            ];
-            let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
-
-            assert_eq!(result, "first\nsecond\nthird");
-        }
-
-        #[test]
-        fn string_with_tags_displayed() {
-            let mut found = make_test_string("http://example.com");
-            found.tags = vec![Tag::Url, Tag::Domain];
-
-            let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
-            assert!(result.contains("url"));
-        }
-
-        #[test]
-        fn string_with_section_displayed() {
-            let found = make_test_string("test").with_section(".rodata".to_string());
-
-            let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
-            assert!(result.contains(".rodata"));
-        }
-
-        #[test]
-        fn string_with_score_displayed() {
-            let found = make_test_string("test").with_score(150);
-
-            let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
-            assert!(result.contains("150"));
-        }
-
-        #[test]
-        fn long_string_truncated_in_tty() {
-            let long_text = "a".repeat(100);
-            let strings = vec![make_test_string(&long_text)];
-            let result = format_table_with_mode(&strings, &make_metadata(), true).unwrap();
-
-            // Should contain truncated version with ...
-            assert!(result.contains("..."));
-            // Should not contain the full 100 character string
-            assert!(!result.contains(&long_text));
-        }
-
-        #[test]
-        fn long_string_not_truncated_in_plain() {
-            let long_text = "a".repeat(100);
-            let strings = vec![make_test_string(&long_text)];
-            let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
-
-            // Plain mode should have full string
-            assert_eq!(result, long_text);
-        }
-
-        #[test]
-        fn missing_optional_fields_handled() {
-            // String with no section, no tags, default score
-            let found = make_test_string("minimal");
-
-            let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
-            // Should not crash and should contain the string
-            assert!(result.contains("minimal"));
-        }
-
-        #[test]
-        fn special_characters_in_string() {
-            let strings = vec![make_test_string("tab\there"), make_test_string("pipe|here")];
-            let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
-
-            // Each string should be on its own line in output
-            let lines: Vec<&str> = result.lines().collect();
-            assert_eq!(lines.len(), 2);
-            assert!(lines[0].contains("tab\there"));
-            assert!(lines[1].contains("pipe|here"));
-        }
-
-        #[test]
-        fn string_with_embedded_newline() {
-            let strings = vec![make_test_string("line1\nline2")];
-            let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
-            assert_eq!(result, "line1\\nline2");
-        }
-    }
-
-    // Tests for column width calculation
-    mod column_width_tests {
-        use super::*;
-
-        #[test]
-        fn section_width_minimum() {
-            let strings = vec![make_test_string("test")];
-            let width = calculate_section_width(&strings);
-            assert_eq!(width, "Section".len());
-        }
-
-        #[test]
-        fn section_width_from_content() {
-            let strings = vec![make_test_string("test").with_section(".rodata.str1.1".to_string())];
-            let width = calculate_section_width(&strings);
-            assert_eq!(width, ".rodata.str1.1".len());
-        }
-
-        #[test]
-        fn section_width_capped_at_max() {
-            let long_section = "a".repeat(50);
-            let strings = vec![make_test_string("test").with_section(long_section)];
-            let width = calculate_section_width(&strings);
-            assert_eq!(width, SECTION_COLUMN_WIDTH);
-        }
-
-        #[test]
-        fn tags_width_minimum() {
-            let strings = vec![make_test_string("test")];
-            let width = calculate_tags_width(&strings);
-            assert_eq!(width, "Tags".len());
-        }
-
-        #[test]
-        fn tags_width_from_content() {
-            let mut found = make_test_string("test");
-            found.tags = vec![Tag::Url, Tag::Domain];
-            let width = calculate_tags_width(&[found]);
-            assert_eq!(width, "Tags".len());
-        }
-    }
-}
diff --git a/src/output/table/formatting.rs b/src/output/table/formatting.rs
new file mode 100644
index 0000000..02c8e4a
--- /dev/null
+++ b/src/output/table/formatting.rs
@@ -0,0 +1,326 @@
+//! String formatting utilities for table output.
+//!
+//! This module provides shared utilities for formatting strings, tags, and
+//! text alignment used by both TTY and plain output modes.
+
+use crate::classification::ranking::RankingConfig;
+use crate::types::Tag;
+
+use super::TAGS_COLUMN_WIDTH;
+
+/// Text alignment for padding.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Alignment {
+    /// Left-align text (pad on right).
+    Left,
+    /// Right-align text (pad on left).
+    Right,
+}
+
+/// Format tags for display in the table.
+///
+/// Converts tags to their display format using serde rename values where applicable.
+/// Shows only tags with the highest boost value to prioritize important tags.
+///
+/// # Arguments
+///
+/// * `tags` - Slice of tags to format
+///
+/// # Returns
+///
+/// Comma-separated string of tag names, or empty string if no tags.
+///
+/// # Examples
+///
+/// ```ignore
+/// let tags = vec![Tag::IPv4, Tag::FilePath];
+/// assert_eq!(format_tags(&tags), "ipv4");
+/// ```
+pub fn format_tags(tags: &[Tag]) -> String {
+    if tags.is_empty() {
+        return String::new();
+    }
+
+    let config = RankingConfig::default();
+    let max_boost = tags
+        .iter()
+        .map(|tag| tag_boost_value(tag, &config))
+        .max()
+        .unwrap_or(0);
+
+    let tag_strings: Vec<String> = tags
+        .iter()
+        .filter(|tag| tag_boost_value(tag, &config) == max_boost)
+        .map(tag_to_display_string)
+        .collect();
+
+    let result = tag_strings.join(", ");
+
+    // Truncate if still too long
+    if result.len() > TAGS_COLUMN_WIDTH {
+        truncate_string(&result, TAGS_COLUMN_WIDTH)
+    } else {
+        result
+    }
+}
+
+/// Get the ranking boost value for a tag using the provided config.
+fn tag_boost_value(tag: &Tag, config: &RankingConfig) -> i32 {
+    config.tag_boosts.get(tag).copied().unwrap_or(0)
+}
+
+/// Convert a single tag to its display string.
+///
+/// Uses the serde rename value where defined, otherwise uses lowercase Debug format.
+pub(crate) fn tag_to_display_string(tag: &Tag) -> String {
+    match tag {
+        Tag::Url => "url".to_string(),
+        Tag::Domain => "domain".to_string(),
+        Tag::IPv4 => "ipv4".to_string(),
+        Tag::IPv6 => "ipv6".to_string(),
+        Tag::FilePath => "filepath".to_string(),
+        Tag::RegistryPath => "regpath".to_string(),
+        Tag::Guid => "guid".to_string(),
+        Tag::Email => "email".to_string(),
+        Tag::Base64 => "b64".to_string(),
+        Tag::FormatString => "fmt".to_string(),
+        Tag::UserAgent => "user-agent-ish".to_string(),
+        Tag::DemangledSymbol => "demangled".to_string(),
+        Tag::Import => "import".to_string(),
+        Tag::Export => "export".to_string(),
+        Tag::Version => "version".to_string(),
+        Tag::Manifest => "manifest".to_string(),
+        Tag::Resource => "resource".to_string(),
+        Tag::DylibPath => "dylib-path".to_string(),
+        Tag::Rpath => "rpath".to_string(),
+        Tag::RpathVariable => "rpath-var".to_string(),
+        Tag::FrameworkPath => "framework-path".to_string(),
+    }
+}
+
+/// Truncate a string to the specified maximum length.
+///
+/// If the string exceeds the maximum length, it is truncated and `...` is appended.
+/// Handles Unicode correctly by truncating at character boundaries.
+///
+/// # Arguments
+///
+/// * `s` - The string to truncate
+/// * `max_len` - Maximum length including the ellipsis
+///
+/// # Returns
+///
+/// The original string if it fits, or a truncated version with `...` appended.
+///
+/// # Examples
+///
+/// ```ignore
+/// assert_eq!(truncate_string("hello", 10), "hello");
+/// assert_eq!(truncate_string("hello world", 8), "hello...");
+/// ```
+pub fn truncate_string(s: &str, max_len: usize) -> String {
+    if s.len() <= max_len {
+        return s.to_string();
+    }
+
+    if max_len <= 3 {
+        return ".".repeat(max_len);
+    }
+
+    // Find a valid character boundary for truncation
+    let truncate_at = max_len - 3;
+    let mut end_index = truncate_at;
+
+    // Ensure we don't split a multi-byte character
+    for (idx, _) in s.char_indices() {
+        if idx <= truncate_at {
+            end_index = idx;
+        } else {
+            break;
+        }
+    }
+
+    // Handle case where we need to include at least one character
+    if end_index == 0 && !s.is_empty() {
+        if let Some((idx, _)) = s.char_indices().nth(1) {
+            end_index = idx;
+        } else {
+            end_index = s.len();
+        }
+    }
+
+    format!("{}...", &s[..end_index])
+}
+
+/// Pad a string to a fixed width with the specified alignment.
+///
+/// # Arguments
+///
+/// * `s` - The string to pad
+/// * `width` - Target width
+/// * `alignment` - Left or right alignment
+///
+/// # Returns
+///
+/// The padded string.
+pub fn pad_string(s: &str, width: usize, alignment: Alignment) -> String {
+    match alignment {
+        Alignment::Left => format!("{:<width$}", s, width = width),
+        Alignment::Right => format!("{:>width$}", s, width = width),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    mod format_tags_tests {
+        use super::*;
+
+        #[test]
+        fn empty_tags() {
+            assert_eq!(format_tags(&[]), "");
+        }
+
+        #[test]
+        fn single_tag() {
+            assert_eq!(format_tags(&[Tag::Url]), "url");
+            assert_eq!(format_tags(&[Tag::IPv4]), "ipv4");
+            assert_eq!(format_tags(&[Tag::FilePath]), "filepath");
+        }
+
+        #[test]
+        fn two_tags() {
+            assert_eq!(format_tags(&[Tag::Url, Tag::Domain]), "url");
+            assert_eq!(format_tags(&[Tag::IPv4, Tag::FilePath]), "ipv4");
+        }
+
+        #[test]
+        fn three_tags() {
+            assert_eq!(format_tags(&[Tag::Url, Tag::Domain, Tag::IPv4]), "url");
+        }
+
+        #[test]
+        fn more_than_max_tags_truncated() {
+            let tags = vec![
+                Tag::Url,
+                Tag::Domain,
+                Tag::IPv4,
+                Tag::FilePath,
+                Tag::RegistryPath,
+            ];
+            assert_eq!(format_tags(&tags), "url");
+        }
+
+        #[test]
+        fn multiple_tags_same_priority() {
+            assert_eq!(format_tags(&[Tag::Import, Tag::Export]), "import, export");
+        }
+
+        #[test]
+        fn all_tag_variants_have_display() {
+            // Ensure all tag variants produce valid output
+            let all_tags = vec![
+                Tag::Url,
+                Tag::Domain,
+                Tag::IPv4,
+                Tag::IPv6,
+                Tag::FilePath,
+                Tag::RegistryPath,
+                Tag::Guid,
+                Tag::Email,
+                Tag::Base64,
+                Tag::FormatString,
+                Tag::UserAgent,
+                Tag::DemangledSymbol,
+                Tag::Import,
+                Tag::Export,
+                Tag::Version,
+                Tag::Manifest,
+                Tag::Resource,
+                Tag::DylibPath,
+                Tag::Rpath,
+                Tag::RpathVariable,
+                Tag::FrameworkPath,
+            ];
+
+            for tag in all_tags {
+                let display = tag_to_display_string(&tag);
+                assert!(!display.is_empty(), "Tag {:?} should have display", tag);
+                assert!(display.is_ascii(), "Tag display should be ASCII");
+            }
+        }
+    }
+
+    mod truncate_string_tests {
+        use super::*;
+
+        #[test]
+        fn short_string_unchanged() {
+            assert_eq!(truncate_string("hello", 10), "hello");
+            assert_eq!(truncate_string("", 10), "");
+        }
+
+        #[test]
+        fn exact_length_unchanged() {
+            assert_eq!(truncate_string("hello", 5), "hello");
+        }
+
+        #[test]
+        fn long_string_truncated() {
+            assert_eq!(truncate_string("hello world", 8), "hello...");
+        }
+
+        #[test]
+        fn very_short_max_length() {
+            assert_eq!(truncate_string("hello", 3), "...");
+            assert_eq!(truncate_string("hello", 2), "..");
+            assert_eq!(truncate_string("hello", 1), ".");
+        }
+
+        #[test]
+        fn unicode_string_safe_truncation() {
+            // Ensure we don't split multi-byte characters
+            let unicode = "hello\u{1F600}world"; // emoji in the middle
+            let truncated = truncate_string(unicode, 8);
+            // Should truncate before the emoji to avoid splitting it
+            assert!(truncated.ends_with("..."));
+            assert!(truncated.len() <= 8);
+        }
+
+        #[test]
+        fn unicode_at_boundary() {
+            let text = "\u{4E2D}\u{6587}\u{6D4B}\u{8BD5}"; // Chinese characters
+            let truncated = truncate_string(text, 6);
+            assert!(truncated.is_char_boundary(truncated.len() - 3));
+        }
+    }
+
+    mod pad_string_tests {
+        use super::*;
+
+        #[test]
+        fn left_alignment() {
+            assert_eq!(pad_string("hi", 5, Alignment::Left), "hi   ");
+            assert_eq!(pad_string("hello", 5, Alignment::Left), "hello");
+        }
+
+        #[test]
+        fn right_alignment() {
+            assert_eq!(pad_string("hi", 5, Alignment::Right), "   hi");
+            assert_eq!(pad_string("hello", 5, Alignment::Right), "hello");
+        }
+
+        #[test]
+        fn exact_width() {
+            assert_eq!(pad_string("exact", 5, Alignment::Left), "exact");
+            assert_eq!(pad_string("exact", 5, Alignment::Right), "exact");
+        }
+
+        #[test]
+        fn empty_string() {
+            assert_eq!(pad_string("", 5, Alignment::Left), "     ");
+            assert_eq!(pad_string("", 5, Alignment::Right), "     ");
+        }
+    }
+}
diff --git a/src/output/table/mod.rs b/src/output/table/mod.rs
new file mode 100644
index 0000000..9bcb95a
--- /dev/null
+++ b/src/output/table/mod.rs
@@ -0,0 +1,120 @@
+//! Table output formatter for Stringy.
+//!
+//! This module provides human-readable table output with automatic TTY detection.
+//! When output is directed to a terminal (TTY), strings are displayed in an aligned
+//! table with headers showing String, Tags, Score, and Section columns. When output
+//! is piped or redirected (non-TTY), only the raw string text is emitted, one per line,
+//! for seamless integration with other command-line tools.
+//!
+//! # TTY Mode Example
+//!
+//! ```text
+//! String                                                       | Tags         | Score | Section
+//! -------------------------------------------------------------|--------------|-------|--------
+//! https://malware.example.com/beacon                           | url          |   150 | .rdata
+//! C:\Windows\System32\cmd.exe                                  | filepath     |   120 | .data
+//! GetProcAddress                                               | import       |    80 |
+//! ```
+//!
+//! # Non-TTY Mode Example
+//!
+//! ```text
+//! https://malware.example.com/beacon
+//! C:\Windows\System32\cmd.exe
+//! GetProcAddress
+//! ```
+//!
+//! # Column Layout
+//!
+//! - **String**: Up to 60 characters, truncated with `...` if longer
+//! - **Tags**: First 2-3 tags, comma-separated, max 20 characters
+//! - **Score**: Right-aligned integer score
+//! - **Section**: Section name where the string was found
+
+mod formatting;
+mod plain;
+mod tty;
+
+use std::io::IsTerminal;
+
+use crate::types::{FoundString, Result};
+
+use super::OutputMetadata;
+
+// Re-export public items from submodules
+pub use formatting::{Alignment, format_tags, pad_string, truncate_string};
+
+/// Maximum width for the string column before truncation.
+pub(crate) const STRING_COLUMN_WIDTH: usize = 60;
+
+/// Maximum width for the tags column.
+pub(crate) const TAGS_COLUMN_WIDTH: usize = 20;
+
+/// Maximum width for the score column.
+pub(crate) const SCORE_COLUMN_WIDTH: usize = 6;
+
+/// Maximum width for the section column.
+pub(crate) const SECTION_COLUMN_WIDTH: usize = 15;
+
+/// Format strings in a human-readable table format.
+///
+/// Automatically detects whether output is going to a TTY (terminal) and adjusts
+/// the format accordingly. In TTY mode, outputs an aligned table with headers.
+/// In non-TTY mode (piped/redirected), outputs plain strings one per line.
+///
+/// # Arguments
+///
+/// * `strings` - The extracted strings to format
+/// * `metadata` - Output context (currently unused but reserved for future features)
+///
+/// # Returns
+///
+/// A formatted string ready for output.
+pub fn format_table(strings: &[FoundString], metadata: &OutputMetadata) -> Result<String> {
+    let is_tty = std::io::stdout().is_terminal();
+    format_table_with_mode(strings, metadata, is_tty)
+}
+
+/// Format table with explicit TTY mode specification.
+///
+/// This function allows explicit control over the output mode, useful for testing
+/// and programmatic control over output format.
+///
+/// # Arguments
+///
+/// * `strings` - The extracted strings to format
+/// * `metadata` - Output context
+/// * `is_tty` - Whether to use TTY mode (true) or plain mode (false)
+pub fn format_table_with_mode(
+    strings: &[FoundString],
+    metadata: &OutputMetadata,
+    is_tty: bool,
+) -> Result<String> {
+    if is_tty {
+        tty::format_table_tty(strings, metadata)
+    } else {
+        plain::format_table_plain(strings)
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod test_helpers {
+    use crate::output::OutputFormat;
+    use crate::types::{Encoding, FoundString, StringSource};
+
+    use super::OutputMetadata;
+
+    pub fn make_test_string(text: &str) -> FoundString {
+        FoundString::new(
+            text.to_string(),
+            Encoding::Ascii,
+            0x1000,
+            text.len() as u32,
+            StringSource::SectionData,
+        )
+    }
+
+    pub fn make_metadata() -> OutputMetadata {
+        OutputMetadata::new("test.bin".to_string(), OutputFormat::Table, 10, 10)
+    }
+}
diff --git a/src/output/table/plain.rs b/src/output/table/plain.rs
new file mode 100644
index 0000000..edab83b
--- /dev/null
+++ b/src/output/table/plain.rs
@@ -0,0 +1,96 @@
+//! Plain text output for non-TTY environments.
+//!
+//! This module provides simple one-string-per-line output suitable for piping
+//! to other command-line tools like grep, awk, or sed.
+
+use crate::types::{FoundString, Result};
+
+/// Format strings as plain text for non-TTY output.
+///
+/// Outputs only the string text, one per line, suitable for piping to other tools.
+pub(super) fn format_table_plain(strings: &[FoundString]) -> Result<String> {
+    let lines: Vec<String> = strings
+        .iter()
+        .map(|s| sanitize_plain_text(&s.text))
+        .collect();
+    Ok(lines.join("\n"))
+}
+
+/// Sanitize plain text output so each string renders as a single line.
+///
+/// Replaces CRLF, LF, and CR with escaped sequences to preserve content
+/// while keeping output line-based.
+fn sanitize_plain_text(text: &str) -> String {
+    text.replace("\r\n", "\\r\\n")
+        .replace('\n', "\\n")
+        .replace('\r', "\\r")
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::output::table::format_table_with_mode;
+    use crate::output::table::test_helpers::{make_metadata, make_test_string};
+
+    #[test]
+    fn single_string_plain_mode() {
+        let strings = vec![make_test_string("test string")];
+        let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+
+        assert_eq!(result, "test string");
+    }
+
+    #[test]
+    fn multiple_strings_plain_mode() {
+        let strings = vec![
+            make_test_string("first"),
+            make_test_string("second"),
+            make_test_string("third"),
+        ];
+        let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+
+        assert_eq!(result, "first\nsecond\nthird");
+    }
+
+    #[test]
+    fn long_string_not_truncated_in_plain() {
+        let long_text = "a".repeat(100);
+        let strings = vec![make_test_string(&long_text)];
+        let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+
+        // Plain mode should have full string
+        assert_eq!(result, long_text);
+    }
+
+    #[test]
+    fn special_characters_in_string() {
+        let strings = vec![make_test_string("tab\there"), make_test_string("pipe|here")];
+        let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+
+        // Each string should be on its own line in output
+        let lines: Vec<&str> = result.lines().collect();
+        assert_eq!(lines.len(), 2);
+        assert!(lines[0].contains("tab\there"));
+        assert!(lines[1].contains("pipe|here"));
+    }
+
+    #[test]
+    fn string_with_embedded_newline() {
+        let strings = vec![make_test_string("line1\nline2")];
+        let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+        assert_eq!(result, "line1\\nline2");
+    }
+
+    #[test]
+    fn string_with_crlf() {
+        let strings = vec![make_test_string("line1\r\nline2")];
+        let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+        assert_eq!(result, "line1\\r\\nline2");
+    }
+
+    #[test]
+    fn string_with_cr() {
+        let strings = vec![make_test_string("line1\rline2")];
+        let result = format_table_with_mode(&strings, &make_metadata(), false).unwrap();
+        assert_eq!(result, "line1\\rline2");
+    }
+}
diff --git a/src/output/table/tty.rs b/src/output/table/tty.rs
new file mode 100644
index 0000000..38ed658
--- /dev/null
+++ b/src/output/table/tty.rs
@@ -0,0 +1,226 @@
+//! TTY mode table output for Stringy.
+//!
+//! This module provides formatted table output with aligned columns for terminal display.
+
+use crate::types::{FoundString, Result};
+
+use super::formatting::{Alignment, format_tags, pad_string, truncate_string};
+use super::{
+    OutputMetadata, SCORE_COLUMN_WIDTH, SECTION_COLUMN_WIDTH, STRING_COLUMN_WIDTH,
+    TAGS_COLUMN_WIDTH,
+};
+
+/// Format strings as an aligned table for TTY output.
+///
+/// Creates a table with headers and aligned columns showing:
+/// - String text (truncated if necessary)
+/// - Tags (comma-separated, limited count)
+/// - Score (right-aligned)
+/// - Section name
+pub(super) fn format_table_tty(
+    strings: &[FoundString],
+    _metadata: &OutputMetadata,
+) -> Result<String> {
+    if strings.is_empty() {
+        return Ok(String::new());
+    }
+
+    let mut output = String::new();
+
+    // Calculate dynamic column widths based on content
+    let section_width = calculate_section_width(strings);
+    let tags_width = calculate_tags_width(strings);
+
+    // Build header
+    let header = format!(
+        "{} | {} | {} | {}",
+        pad_string("String", STRING_COLUMN_WIDTH, Alignment::Left),
+        pad_string("Tags", tags_width, Alignment::Left),
+        pad_string("Score", SCORE_COLUMN_WIDTH, Alignment::Right),
+        pad_string("Section", section_width, Alignment::Left),
+    );
+    output.push_str(&header);
+    output.push('\n');
+
+    // Build separator line
+    let separator = format!(
+        "{}-|-{}-|-{}-|-{}",
+        "-".repeat(STRING_COLUMN_WIDTH),
+        "-".repeat(tags_width),
+        "-".repeat(SCORE_COLUMN_WIDTH),
+        "-".repeat(section_width),
+    );
+    output.push_str(&separator);
+    output.push('\n');
+
+    // Build rows
+    for found_string in strings {
+        let truncated_text = truncate_string(&found_string.text, STRING_COLUMN_WIDTH);
+        let tags_display = format_tags(&found_string.tags);
+        let section_display = found_string.section.as_deref().unwrap_or("");
+
+        let row = format!(
+            "{} | {} | {} | {}",
+            pad_string(&truncated_text, STRING_COLUMN_WIDTH, Alignment::Left),
+            pad_string(&tags_display, tags_width, Alignment::Left),
+            pad_string(
+                &found_string.score.to_string(),
+                SCORE_COLUMN_WIDTH,
+                Alignment::Right
+            ),
+            pad_string(section_display, section_width, Alignment::Left),
+        );
+        output.push_str(&row);
+        output.push('\n');
+    }
+
+    // Remove trailing newline for consistency
+    if output.ends_with('\n') {
+        output.pop();
+    }
+
+    Ok(output)
+}
+
+/// Calculate the optimal width for the section column based on content.
+fn calculate_section_width(strings: &[FoundString]) -> usize {
+    let max_section_len = strings
+        .iter()
+        .filter_map(|s| s.section.as_ref())
+        .map(|s| s.len())
+        .max()
+        .unwrap_or(0);
+
+    // Minimum width is "Section" header length, maximum is SECTION_COLUMN_WIDTH
+    max_section_len.clamp("Section".len(), SECTION_COLUMN_WIDTH)
+}
+
+/// Calculate the optimal width for the tags column based on content.
+fn calculate_tags_width(strings: &[FoundString]) -> usize {
+    let max_tags_len = strings
+        .iter()
+        .map(|s| format_tags(&s.tags).len())
+        .max()
+        .unwrap_or(0);
+
+    // Minimum width is "Tags" header length, maximum is TAGS_COLUMN_WIDTH
+    max_tags_len.clamp("Tags".len(), TAGS_COLUMN_WIDTH)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::output::table::format_table_with_mode;
+    use crate::output::table::test_helpers::{make_metadata, make_test_string};
+    use crate::types::Tag;
+
+    #[test]
+    fn empty_strings_returns_empty() {
+        let result = format_table_with_mode(&[], &make_metadata(), true).unwrap();
+        assert_eq!(result, "");
+    }
+
+    #[test]
+    fn single_string_tty_mode() {
+        let strings = vec![make_test_string("test string")];
+        let result = format_table_with_mode(&strings, &make_metadata(), true).unwrap();
+
+        // Should have header, separator, and one data row
+        let lines: Vec<&str> = result.lines().collect();
+        assert_eq!(lines.len(), 3);
+        assert!(lines[0].contains("String"));
+        assert!(lines[0].contains("Tags"));
+        assert!(lines[0].contains("Score"));
+        assert!(lines[0].contains("Section"));
+        assert!(lines[1].contains("---"));
+        assert!(lines[2].contains("test string"));
+    }
+
+    #[test]
+    fn string_with_tags_displayed() {
+        let mut found = make_test_string("http://example.com");
+        found.tags = vec![Tag::Url, Tag::Domain];
+
+        let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
+        assert!(result.contains("url"));
+    }
+
+    #[test]
+    fn string_with_section_displayed() {
+        let found = make_test_string("test").with_section(".rodata".to_string());
+
+        let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
+        assert!(result.contains(".rodata"));
+    }
+
+    #[test]
+    fn string_with_score_displayed() {
+        let found = make_test_string("test").with_score(150);
+
+        let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
+        assert!(result.contains("150"));
+    }
+
+    #[test]
+    fn long_string_truncated_in_tty() {
+        let long_text = "a".repeat(100);
+        let strings = vec![make_test_string(&long_text)];
+        let result = format_table_with_mode(&strings, &make_metadata(), true).unwrap();
+
+        // Should contain truncated version with ...
+        assert!(result.contains("..."));
+        // Should not contain the full 100 character string
+        assert!(!result.contains(&long_text));
+    }
+
+    #[test]
+    fn missing_optional_fields_handled() {
+        // String with no section, no tags, default score
+        let found = make_test_string("minimal");
+
+        let result = format_table_with_mode(&[found], &make_metadata(), true).unwrap();
+        // Should not crash and should contain the string
+        assert!(result.contains("minimal"));
+    }
+
+    mod column_width_tests {
+        use super::*;
+
+        #[test]
+        fn section_width_minimum() {
+            let strings = vec![make_test_string("test")];
+            let width = calculate_section_width(&strings);
+            assert_eq!(width, "Section".len());
+        }
+
+        #[test]
+        fn section_width_from_content() {
+            let strings = vec![make_test_string("test").with_section(".rodata.str1.1".to_string())];
+            let width = calculate_section_width(&strings);
+            assert_eq!(width, ".rodata.str1.1".len());
+        }
+
+        #[test]
+        fn section_width_capped_at_max() {
+            let long_section = "a".repeat(50);
+            let strings = vec![make_test_string("test").with_section(long_section)];
+            let width = calculate_section_width(&strings);
+            assert_eq!(width, SECTION_COLUMN_WIDTH);
+        }
+
+        #[test]
+        fn tags_width_minimum() {
+            let strings = vec![make_test_string("test")];
+            let width = calculate_tags_width(&strings);
+            assert_eq!(width, "Tags".len());
+        }
+
+        #[test]
+        fn tags_width_from_content() {
+            let mut found = make_test_string("test");
+            found.tags = vec![Tag::Url, Tag::Domain];
+            let width = calculate_tags_width(&[found]);
+            assert_eq!(width, "Tags".len());
+        }
+    }
+}

From 5c53d91d4d711e1f9e3d9785fc95136216a12073 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 15:14:26 -0500
Subject: [PATCH 14/25] fix(yara): correct UTF-16LE encoding and prevent
 injection attacks

Fix two issues in YARA output formatter:

1. UTF-16LE non-ASCII handling: The `wide` modifier only works for ASCII
   characters. Non-ASCII UTF-16LE strings now use hex encoding instead
   of the incorrect `wide` modifier approach.
   - Add `utf16le_hex_string` helper (mirrors `utf16be_hex_string`)
   - Branch on `is_ascii()` to choose appropriate encoding

2. Injection prevention: Escape `binary_name` and `timestamp` in comment
   headers and meta fields to prevent newline/quote injection attacks.

Add 8 new tests covering:
- UTF-16LE hex string encoding (basic, empty, non-ASCII, surrogate pairs)
- ASCII UTF-16LE still uses wide modifier
- Non-ASCII UTF-16LE uses hex encoding
- Binary name injection escaping
- Timestamp injection escaping

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 ROADMAP.md         | 167 ++++++++++++++++++++-------------------------
 src/output/yara.rs | 125 +++++++++++++++++++++++++++++++--
 2 files changed, 195 insertions(+), 97 deletions(-)

diff --git a/ROADMAP.md b/ROADMAP.md
index 30c8c58..bc4676c 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -7,94 +7,88 @@ This document tracks medium-term and long-term improvements identified during th
 ### Architecture Improvements
 
 #### 1. Split `extraction/mod.rs` into smaller modules
-**Priority:** High
-**Current state:** 1542 lines (exceeds 500-line project limit by 1042 lines)
-**Files affected:** `src/extraction/mod.rs`
+
+**Priority:** High **Current state:** 1542 lines (exceeds 500-line project limit by 1042 lines) **Files affected:** `src/extraction/mod.rs`
 
 Recommended split:
+
 - `src/extraction/config.rs` - Move `ExtractionConfig` and validation logic
 - `src/extraction/trait.rs` - Move `StringExtractor` trait definition
 - `src/extraction/basic.rs` - Move `BasicExtractor` implementation
 - `src/extraction/helpers.rs` - Move internal helper functions (`is_printable_text_byte`, `could_be_utf8_byte`, `extract_ascii_utf8_strings`)
 
 Other oversized files to address:
-| File | Lines | Overage |
-|------|-------|---------|
-| `src/extraction/pe_resources.rs` | 1449 | +949 |
-| `src/extraction/utf16.rs` | 1273 | +773 |
-| `src/extraction/dedup.rs` | 849 | +349 |
-| `src/extraction/ascii.rs` | 832 | +332 |
-| `src/output/table.rs` | 708 | +208 |
-| `src/extraction/filters.rs` | 702 | +202 |
-| `src/container/pe.rs` | 661 | +161 |
-| `src/container/elf.rs` | 627 | +127 |
-| `src/container/macho.rs` | 574 | +74 |
-| `src/types.rs` | 558 | +58 |
+
+| File                             | Lines | Overage |
+| -------------------------------- | ----- | ------- |
+| `src/extraction/pe_resources.rs` | 1449  | +949    |
+| `src/extraction/utf16.rs`        | 1273  | +773    |
+| `src/extraction/dedup.rs`        | 849   | +349    |
+| `src/extraction/ascii.rs`        | 832   | +332    |
+| `src/output/table.rs`            | 708   | +208    |
+| `src/extraction/filters.rs`      | 702   | +202    |
+| `src/container/pe.rs`            | 661   | +161    |
+| `src/container/elf.rs`           | 627   | +127    |
+| `src/container/macho.rs`         | 574   | +74     |
+| `src/types.rs`                   | 558   | +58     |
 
 #### 2. Move PE resources to container module
-**Priority:** Medium
-**Current state:** `src/extraction/pe_resources.rs` is in extraction but conceptually belongs in container
-**Rationale:** PE resource parsing is part of container analysis, not string extraction
+
+**Priority:** Medium **Current state:** `src/extraction/pe_resources.rs` is in extraction but conceptually belongs in container **Rationale:** PE resource parsing is part of container analysis, not string extraction
 
 #### 3. Decouple semantic enrichment from extraction
-**Priority:** Medium
-**Current state:** `extraction` module imports from `classification` creating bidirectional dependency
-**Files affected:** `src/extraction/mod.rs:129`
-**Recommendation:** Move semantic enrichment to an orchestration layer that callers control
+
+**Priority:** Medium **Current state:** `extraction` module imports from `classification` creating bidirectional dependency **Files affected:** `src/extraction/mod.rs:129` **Recommendation:** Move semantic enrichment to an orchestration layer that callers control
 
 #### 4. Add `#[non_exhaustive]` to remaining public enums
-**Priority:** Medium
-**Files affected:**
+
+**Priority:** Medium **Files affected:**
+
 - `src/types.rs:4-10` - `Encoding` enum
 - `src/types.rs:130-136` - `BinaryFormat` enum
 
 ### Error Handling
 
 #### 5. Add `SerializationError` variant to `StringyError`
-**Priority:** Medium
-**Current state:** `ConfigError` is incorrectly used for JSON serialization failures
-**Files affected:** `src/output/json.rs:14-16`, `src/types.rs`
+
+**Priority:** Medium **Current state:** `ConfigError` is incorrectly used for JSON serialization failures **Files affected:** `src/output/json.rs:14-16`, `src/types.rs`
 
 #### 6. Add format-specific error variants
-**Priority:** Low
-**Recommendation:** Add `InvalidPeError`, `InvalidElfError`, `InvalidMachOError` instead of generic `ParseError(String)`
+
+**Priority:** Low **Recommendation:** Add `InvalidPeError`, `InvalidElfError`, `InvalidMachOError` instead of generic `ParseError(String)`
 
 ### API Improvements
 
 #### 7. Add constructors to remaining public structs
-**Priority:** Medium
-**Files affected:** `src/types.rs`
-**Structs needing constructors:** `ImportInfo`, `ExportInfo`, `SectionInfo`
-**Rationale:** Required for `#[non_exhaustive]` compatibility
+
+**Priority:** Medium **Files affected:** `src/types.rs` **Structs needing constructors:** `ImportInfo`, `ExportInfo`, `SectionInfo` **Rationale:** Required for `#[non_exhaustive]` compatibility
 
 #### 8. Add `#[allow]` justification comments
-**Priority:** Low
-**Files affected:**
+
+**Priority:** Low **Files affected:**
+
 - `src/extraction/utf16.rs:334` - `#[allow(clippy::result_unit_err)]`
 - `src/extraction/utf16.rs:350` - `#[allow(dead_code)]`
 
 ### Documentation
 
 #### 9. Update API documentation for accuracy
-**Priority:** Medium
-**Files affected:** `docs/src/api.md`
-**Issues:** Some function signatures don't match actual implementation
+
+**Priority:** Medium **Files affected:** `docs/src/api.md` **Issues:** Some function signatures don't match actual implementation
 
 #### 10. Add security considerations to README
-**Priority:** Medium
-**Content to add:** Document malware analysis use case, safe handling of untrusted binaries
+
+**Priority:** Medium **Content to add:** Document malware analysis use case, safe handling of untrusted binaries
 
 #### 11. Document deduplication feature in user docs
-**Priority:** Medium
-**Files affected:** README.md, `docs/src/string-extraction.md`
+
+**Priority:** Medium **Files affected:** README.md, `docs/src/string-extraction.md`
 
 ### Performance
 
 #### 12. Add memory mapping for large files
-**Priority:** High
-**Current state:** Entire file is loaded into memory
-**Impact:** Processing 1GB+ binaries requires 1GB+ RAM
-**Recommendation:** Use `memmap2` crate for memory-mapped file access
+
+**Priority:** High **Current state:** Entire file is loaded into memory **Impact:** Processing 1GB+ binaries requires 1GB+ RAM **Recommendation:** Use `memmap2` crate for memory-mapped file access
 
 ```rust
 // Recommended approach
@@ -107,26 +101,22 @@ let data: &[u8] = &mmap;
 ```
 
 #### 13. Optimize redundant regex matching
-**Priority:** Low
-**Files affected:** `src/classification/patterns/network.rs:92-106`
-**Issue:** URL_REGEX runs twice on URLs (in `classify_url` then `classify_domain`)
+
+**Priority:** Low **Files affected:** `src/classification/patterns/network.rs:92-106` **Issue:** URL_REGEX runs twice on URLs (in `classify_url` then `classify_domain`)
 
 ### Testing
 
 #### 14. Set up code coverage metrics
-**Priority:** Medium
-**Tool:** `cargo-tarpaulin`
-**Command:** `cargo tarpaulin --out Html`
+
+**Priority:** Medium **Tool:** `cargo-tarpaulin` **Command:** `cargo tarpaulin --out Html`
 
 #### 15. Add performance benchmarks
-**Priority:** Medium
-**Tool:** `criterion`
-**Focus areas:** Deduplication with large input sets, regex pattern matching
+
+**Priority:** Medium **Tool:** `criterion` **Focus areas:** Deduplication with large input sets, regex pattern matching
 
 #### 16. Add fuzzing for binary parsers
-**Priority:** Medium
-**Tool:** `cargo-fuzz`
-**Targets:** `container/*.rs` parsers with malformed input
+
+**Priority:** Medium **Tool:** `cargo-fuzz` **Targets:** `container/*.rs` parsers with malformed input
 
 ---
 
@@ -135,8 +125,8 @@ let data: &[u8] = &mmap;
 ### Performance Optimizations
 
 #### 17. Consider parallel extraction with rayon
-**Priority:** Low
-**Rationale:** Section-by-section extraction is embarrassingly parallel
+
+**Priority:** Low **Rationale:** Section-by-section extraction is embarrassingly parallel
 
 ```rust
 use rayon::prelude::*;
@@ -148,45 +138,38 @@ let section_strings: Vec<Vec<FoundString>> = sections
 ```
 
 #### 18. Consider `Cow<str>` for hot paths
-**Priority:** Low
-**Files affected:** `src/types.rs:236-237`
-**Benefit:** Avoid cloning when strings could be borrowed
+
+**Priority:** Low **Files affected:** `src/types.rs:236-237` **Benefit:** Avoid cloning when strings could be borrowed
 
 #### 19. Consider `SmallVec` for tags
-**Priority:** Low
-**Field:** `FoundString::tags`
-**Rationale:** Typical 0-3 tags could use stack allocation with `SmallVec<[Tag; 4]>`
+
+**Priority:** Low **Field:** `FoundString::tags` **Rationale:** Typical 0-3 tags could use stack allocation with `SmallVec<[Tag; 4]>`
 
 ### Dependency Management
 
 #### 20. Migrate to `std::sync::LazyLock`
-**Priority:** Low
-**Current state:** Uses `once_cell::sync::Lazy`
-**Target:** `std::sync::LazyLock` (stabilized in Rust 1.80)
-**Files affected:** All files in `src/classification/patterns/`
+
+**Priority:** Low **Current state:** Uses `once_cell::sync::Lazy` **Target:** `std::sync::LazyLock` (stabilized in Rust 1.80) **Files affected:** All files in `src/classification/patterns/`
 
 ### Feature Enhancements
 
 #### 21. Implement main CLI
-**Priority:** High
-**Current state:** `src/main.rs` is a stub with TODO
-**File:** `src/main.rs:18`
+
+**Priority:** High **Current state:** `src/main.rs` is a stub with TODO **File:** `src/main.rs:18`
 
 #### 22. Integrate Mach-O load command strings
-**Priority:** Medium
-**Current state:** Feature exists but not integrated into main pipeline
-**File:** `src/container/macho.rs:198`
+
+**Priority:** Medium **Current state:** Feature exists but not integrated into main pipeline **File:** `src/container/macho.rs:198`
 
 #### 23. Parse all Mach-O architectures
-**Priority:** Low
-**Current state:** Only parses first architecture in fat binaries
-**File:** `src/container/macho.rs:312`
+
+**Priority:** Low **Current state:** Only parses first architecture in fat binaries **File:** `src/container/macho.rs:312`
 
 ### Build Configuration
 
 #### 24. Add feature flags for output formats
-**Priority:** Low
-**File:** `Cargo.toml`
+
+**Priority:** Low **File:** `Cargo.toml`
 
 ```toml
 [features]
@@ -197,8 +180,8 @@ table = []
 ```
 
 #### 25. Add `include` field to Cargo.toml
-**Priority:** Low
-**Purpose:** Control what gets published to crates.io
+
+**Priority:** Low **Purpose:** Control what gets published to crates.io
 
 ```toml
 [package]
@@ -226,14 +209,14 @@ The following issues from the comprehensive review have been addressed:
 
 **Overall Rating from Comprehensive Review: B+ (85/100)**
 
-| Dimension | Rating |
-|-----------|--------|
-| Code Quality | B+ |
-| Architecture | B+ |
-| Security | A |
-| Performance | B |
-| Testing | B+ |
-| Documentation | B+ |
-| Best Practices | A- |
+| Dimension      | Rating |
+| -------------- | ------ |
+| Code Quality   | B+     |
+| Architecture   | B+     |
+| Security       | A      |
+| Performance    | B      |
+| Testing        | B+     |
+| Documentation  | B+     |
+| Best Practices | A-     |
 
 With the immediate issues addressed and medium-term improvements completed, this project would be ready for a stable 1.0 release.
diff --git a/src/output/yara.rs b/src/output/yara.rs
index 099fcbe..ca2bfe7 100644
--- a/src/output/yara.rs
+++ b/src/output/yara.rs
@@ -15,8 +15,14 @@ pub fn format_yara(strings: &[FoundString], metadata: &OutputMetadata) -> Result
 
     let mut output = String::new();
     output.push_str("// YARA rule generated by Stringy\n");
-    output.push_str(&format!("// Binary: {}\n", metadata.binary_name));
-    output.push_str(&format!("// Generated: {}\n\n", timestamp));
+    output.push_str(&format!(
+        "// Binary: {}\n",
+        escape_yara_string(&metadata.binary_name)
+    ));
+    output.push_str(&format!(
+        "// Generated: {}\n\n",
+        escape_yara_string(&timestamp)
+    ));
 
     output.push_str(&format!("rule {} {{\n", rule_name));
     output.push_str("  meta:\n");
@@ -25,7 +31,10 @@ pub fn format_yara(strings: &[FoundString], metadata: &OutputMetadata) -> Result
         escape_yara_string(&metadata.binary_name)
     ));
     output.push_str("    generated_by = \"stringy\"\n");
-    output.push_str(&format!("    generated_at = \"{}\"\n", timestamp));
+    output.push_str(&format!(
+        "    generated_at = \"{}\"\n",
+        escape_yara_string(&timestamp)
+    ));
 
     if strings.is_empty() {
         output.push_str("  condition:\n");
@@ -64,8 +73,14 @@ pub fn format_yara(strings: &[FoundString], metadata: &OutputMetadata) -> Result
                     strings_block.push_str(&format!("    {} = {}\n", var_name, hex));
                 }
                 Encoding::Utf16Le => {
-                    let escaped = escape_yara_unicode_literal(&item.text);
-                    strings_block.push_str(&format!("    {} = \"{}\" wide\n", var_name, escaped));
+                    if item.text.is_ascii() {
+                        let escaped = escape_yara_unicode_literal(&item.text);
+                        strings_block
+                            .push_str(&format!("    {} = \"{}\" wide\n", var_name, escaped));
+                    } else {
+                        let hex = utf16le_hex_string(&item.text);
+                        strings_block.push_str(&format!("    {} = {}\n", var_name, hex));
+                    }
                 }
                 Encoding::Ascii | Encoding::Utf8 => {
                     let escaped = escape_yara_string(&item.text);
@@ -193,6 +208,20 @@ fn utf16be_hex_string(text: &str) -> String {
     format!("{{ {} }}", hex_bytes.join(" "))
 }
 
+fn utf16le_hex_string(text: &str) -> String {
+    let hex_bytes: Vec<String> = text
+        .encode_utf16()
+        .flat_map(|unit| unit.to_le_bytes())
+        .map(|b| format!("{:02x}", b))
+        .collect();
+
+    if hex_bytes.is_empty() {
+        return "{ }".to_string();
+    }
+
+    format!("{{ {} }}", hex_bytes.join(" "))
+}
+
 fn tag_name(tag: &Tag) -> &'static str {
     match tag {
         Tag::Url => "Url",
@@ -418,4 +447,90 @@ mod tests {
             "Timestamp should be numeric or CLOCK_ERROR"
         );
     }
+
+    #[test]
+    fn test_utf16le_hex_string_basic() {
+        // Basic ASCII - should be little-endian (ASCII byte followed by 00)
+        assert_eq!(utf16le_hex_string("A"), "{ 41 00 }");
+        assert_eq!(utf16le_hex_string("AB"), "{ 41 00 42 00 }");
+    }
+
+    #[test]
+    fn test_utf16le_hex_string_empty() {
+        assert_eq!(utf16le_hex_string(""), "{ }");
+    }
+
+    #[test]
+    fn test_utf16le_hex_string_non_ascii() {
+        // Non-ASCII Unicode (BMP) - Chinese character U+4E2D
+        let chinese = utf16le_hex_string("\u{4E2D}");
+        assert_eq!(chinese, "{ 2d 4e }");
+    }
+
+    #[test]
+    fn test_utf16le_hex_string_surrogate_pair() {
+        // Character requiring surrogate pair (outside BMP) - emoji U+1F600
+        let emoji = utf16le_hex_string("\u{1F600}");
+        // Should produce surrogate pair: 3D D8 00 DE (little-endian)
+        assert_eq!(emoji, "{ 3d d8 00 de }");
+    }
+
+    #[test]
+    fn test_utf16le_ascii_uses_wide_modifier() {
+        // ASCII UTF-16LE should use "wide" modifier
+        let mut string = make_string("test");
+        string.encoding = Encoding::Utf16Le;
+        let output = format_yara(&[string], &make_metadata()).expect("Formatting should succeed");
+        assert!(
+            output.contains("wide"),
+            "ASCII UTF-16LE should use wide modifier"
+        );
+        assert!(output.contains("\"test\""));
+    }
+
+    #[test]
+    fn test_utf16le_non_ascii_uses_hex() {
+        // Non-ASCII UTF-16LE should use hex string, not wide modifier
+        let mut string = make_string("\u{4E2D}\u{6587}");
+        string.encoding = Encoding::Utf16Le;
+        let output = format_yara(&[string], &make_metadata()).expect("Formatting should succeed");
+        assert!(
+            !output.contains("wide"),
+            "Non-ASCII UTF-16LE should not use wide modifier"
+        );
+        assert!(
+            output.contains("{ 2d 4e 87 65 }"),
+            "Non-ASCII UTF-16LE should use hex encoding"
+        );
+    }
+
+    #[test]
+    fn test_binary_name_injection_escaped_in_comments() {
+        // Binary name with newlines should be escaped in comments
+        let mut metadata = make_metadata();
+        metadata.binary_name = "evil\nname".to_string();
+        let output = format_yara(&[], &metadata).expect("Formatting should succeed");
+        // Should contain escaped newline, not literal
+        assert!(
+            output.contains("evil\\nname"),
+            "Newlines in binary_name should be escaped"
+        );
+        assert!(
+            !output.contains("evil\nname"),
+            "Literal newlines should not appear"
+        );
+    }
+
+    #[test]
+    fn test_timestamp_injection_escaped_in_meta() {
+        // Timestamp with special characters should be escaped
+        let mut metadata = make_metadata();
+        metadata.generated_at = Some("2024\"\n//attack".to_string());
+        let output = format_yara(&[], &metadata).expect("Formatting should succeed");
+        // Should contain escaped characters
+        assert!(
+            output.contains("2024\\\"\\n//attack"),
+            "Special chars in timestamp should be escaped"
+        );
+    }
 }

From f4388bebf2f33fe92f9cc55c95a71960344f2cdb Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sun, 18 Jan 2026 15:32:02 -0500
Subject: [PATCH 15/25] chore(tests): add comprehensive testing strategy
 analysis

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 TESTING_ANALYSIS.md | 499 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 499 insertions(+)
 create mode 100644 TESTING_ANALYSIS.md

diff --git a/TESTING_ANALYSIS.md b/TESTING_ANALYSIS.md
new file mode 100644
index 0000000..dee19ca
--- /dev/null
+++ b/TESTING_ANALYSIS.md
@@ -0,0 +1,499 @@
+# Stringy Testing Strategy Analysis
+
+## Executive Summary
+
+### Overall Test Health: STRONG with Minor Gaps
+
+- **Total Tests**: 535 tests (280 unit + 219 integration + 36 ignored/doctest)
+- **Test Pass Rate**: 98.9% (529 passed, 6 failed/ignored)
+- **Test Coverage**: 6,106 test lines vs 14,138 source lines (43% ratio)
+- **Test Modules**: 24 modules with unit tests
+- **Fixtures**: 5 binary fixtures (ELF, Mach-O, PE with/without resources)
+
+## Test Distribution Analysis
+
+### Unit Tests (280 tests, 24 modules)
+
+**Coverage by Module**:
+
+- `classification/` - 70 tests (patterns, ranking, symbols, semantic)
+- `container/` - 42 tests (ELF, PE, Mach-O parsers)
+- `extraction/` - 95 tests (ASCII, UTF-16, dedup, filters, resources)
+- `output/` - 51 tests (JSON, YARA, table formatters)
+- `types.rs` - 4 tests (serialization/deserialization)
+
+### Integration Tests (219 tests, 13 test files)
+
+**Test Files**:
+
+1. `integration_elf.rs` (10 tests) - ELF parsing and extraction
+2. `integration_extraction.rs` (9 tests) - End-to-end extraction
+3. `integration_macho.rs` (15 tests) - Mach-O parsing and load commands
+4. `integration_pe.rs` (22 tests) - PE parsing and resource extraction
+5. `test_ascii_extraction.rs` (14 tests) - ASCII extraction scenarios
+6. `test_ascii_integration.rs` (14 tests) - ASCII integration tests
+7. `test_deduplication.rs` (5 tests) - Deduplication workflows
+8. `test_noise_filters.rs` (9 tests) - Noise filtering heuristics
+9. `test_utf16_extraction.rs` (5 tests) - UTF-16 extraction
+10. `classification_integration.rs` (27 tests) - Semantic classification
+11. `output_json_integration.rs` (41 tests) - JSON output format
+12. `output_table_integration.rs` (27 tests) - Table output format
+13. `output_yara_integration.rs` (41 tests) - YARA rule generation
+
+### Test Infrastructure
+
+**Snapshot Testing**: Using `insta` for output validation
+
+- JSON output snapshots
+- YARA rule snapshots
+- Table format snapshots
+
+**Test Fixtures**: Well-organized in `tests/fixtures/`
+
+- Source code (`test_binary.c`)
+- ELF binary (`test_binary_elf`)
+- Mach-O binary (`test_binary_macho`)
+- PE binary (`test_binary_pe.exe`)
+- PE with resources (`test_binary_with_resources.exe`)
+- Resource definition files (`.rc`, `.res`)
+- Comprehensive README with rebuild instructions
+
+## Critical Findings
+
+### 1. Doctest Failures (2 failures)
+
+**Issue**: Two doctests failing due to missing error handling in example code
+
+```text
+src\extraction\mod.rs - extraction::StringExtractor (line 318)
+src\extraction\mod.rs - extraction::BasicExtractor (line 408)
+```
+
+**Problem**: Doctests use `?` operator without proper return type:
+
+```rust
+fn main() {  // Should be: fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let data = std::fs::read("binary_file")?;  // Error: can't use ? in fn returning ()
+    ...
+}
+```
+
+**Severity**: MEDIUM - Documentation examples don't compile, misleading users
+
+**Fix Required**: Add proper return types to doctest main functions
+
+### 2. Performance/Large Input Tests Missing
+
+**Critical Gap**: No tests for O(n^2) algorithms identified in previous phase
+
+**Affected Code**:
+
+- `src/extraction/dedup.rs:183-188` - Cross-section deduplication (vector contains)
+- `src/extraction/dedup.rs:222-231` - Tag merging (vector contains)
+
+**Current Dedup Tests**:
+
+- `test_deduplication_with_basic_extractor` - Small input (6 strings)
+- `test_deduplication_metadata_preservation` - Small input (2 strings)
+- `test_deduplication_with_real_fixture` - Uses test fixture (unknown size)
+- `test_deduplication_score_bonuses` - 2 strings
+- `test_extract_canonical_preserves_occurrences` - Small input
+
+**Missing Coverage**:
+
+- No tests with 1,000+ duplicate strings
+- No performance regression tests
+- No benchmark for deduplication scalability
+
+**Severity**: HIGH - Performance bottlenecks not validated
+
+**Recommendation**: Add performance tests for large inputs
+
+### 3. Main Binary Untested
+
+**Issue**: `src/main.rs` has no tests (stub implementation)
+
+```rust
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let _cli = Cli::parse();
+
+    // TODO: Implement main extraction pipeline
+    println!("Stringy - Binary string extraction tool");
+    println!("Implementation coming soon...");
+
+    Ok(())
+}
+```
+
+**Severity**: LOW - Main is a stub, library is well-tested
+
+**Impact**: End-to-end CLI testing not possible until main is implemented
+
+### 4. Bounds Checking Coverage
+
+**Question from Previous Phase**: Are bounds checks in `extraction/mod.rs:688-699` tested?
+
+```rust
+if section_offset >= data.len() {
+    return Ok(Vec::new());
+}
+
+let end_offset = section_offset
+    .checked_add(section_size)
+    .unwrap_or(data.len())
+    .min(data.len());
+```
+
+**Test Coverage Analysis**:
+
+- `test_string_at_section_boundary` in `test_ascii_extraction.rs:76-100` - Tests section boundary extraction
+- `test_extract_from_section_basic` in integration tests - Tests basic section extraction
+- `integration_extraction.rs` - Multiple boundary tests
+
+**Verdict**: PARTIALLY COVERED
+
+- Boundary conditions tested
+- Edge case: Section offset beyond data length - NEEDS EXPLICIT TEST
+- Edge case: Section size overflow - NEEDS EXPLICIT TEST
+
+**Missing Test Cases**:
+
+```rust
+#[test]
+fn test_section_beyond_file_boundary() {
+    // Section offset > data.len()
+}
+
+#[test]
+fn test_section_size_overflow() {
+    // section_offset + section_size overflows
+}
+```
+
+**Severity**: MEDIUM - Edge cases not explicitly validated
+
+## Test Quality Metrics
+
+### 1. Assertion Density
+
+**Good Examples**:
+
+- `classification/patterns/` - High density (multiple assertions per test)
+- `output/yara.rs` tests - Comprehensive validation of output format
+- `extraction/dedup.rs` tests - Multiple assertions for score calculation
+
+**Average Tests per Module**:
+
+- Classification: 2.9 tests per function
+- Extraction: 2.1 tests per function
+- Output: 3.5 tests per function
+
+**Verdict**: GOOD - Adequate test coverage per module
+
+### 2. Edge Case Coverage
+
+**Well-Tested Edge Cases**:
+
+- Empty input (`test_empty_input`, `test_empty_strings_produces_minimal_rule`)
+- Null/zero values (`test_no_valid_strings`)
+- Boundary conditions (`test_string_at_section_boundary`, `test_boundary_conditions`)
+- Unicode edge cases (`test_truncate_string_unicode_at_boundary`, `test_escape_yara_unicode_literal_empty`)
+- Threshold boundaries (`test_entropy_filter_edge_cases`)
+
+**Missing Edge Cases**:
+
+- Large input (1,000+ strings) - NO TESTS
+- Malformed binaries - LIMITED TESTS
+- Section size overflow - NO EXPLICIT TEST
+- Memory exhaustion scenarios - NO TESTS
+
+**Verdict**: GOOD for typical cases, WEAK for extreme cases
+
+### 3. Test Isolation
+
+**Positive Findings**:
+
+- Each test creates its own test data
+- No shared mutable state
+- Fixtures are read-only
+- Tests can run in parallel (proven by test suite execution)
+
+**Verdict**: EXCELLENT - Tests are properly isolated
+
+### 4. Regression Protection
+
+**Snapshot Testing**:
+
+- `insta` used for output format validation
+- JSON, YARA, table outputs have snapshot tests
+- Changes to output format require explicit snapshot updates
+
+**Verdict**: EXCELLENT - Good regression protection via snapshots
+
+## Coverage Gaps by Priority
+
+### HIGH Priority Gaps
+
+1. **Performance Tests for Deduplication**
+    - Test with 10,000+ duplicate strings
+    - Validate O(n^2) algorithms don't cause timeout
+    - File: `tests/test_deduplication_performance.rs` (MISSING)
+
+2. **Doctest Fixes**
+    - Fix `extraction::StringExtractor` doctest (line 318)
+    - Fix `extraction::BasicExtractor` doctest (line 408)
+    - Files: `src/extraction/mod.rs`
+
+3. **Bounds Checking Edge Cases**
+    - Section offset beyond file boundary
+    - Section size causing integer overflow
+    - File: `tests/test_extraction_edge_cases.rs` (MISSING)
+
+### MEDIUM Priority Gaps
+
+1. **Malformed Binary Handling**
+    - Truncated ELF headers
+    - Invalid PE signatures
+    - Corrupted Mach-O load commands
+    - File: `tests/test_malformed_binaries.rs` (MISSING)
+
+2. **Regex Pattern Edge Cases**
+    - URL regex with edge cases (IPv6 in URLs, Unicode domains)
+    - Email regex with uncommon formats
+    - Path regex with UNC paths edge cases
+    - Files: Pattern test modules (PARTIAL)
+
+3. **Resource Extraction Error Paths**
+    - PE resource directory corruption
+    - Version info parsing failures
+    - String table malformed data
+    - File: `src/extraction/pe_resources.rs` tests (PARTIAL)
+
+### LOW Priority Gaps
+
+1. **Main Binary CLI Testing**
+    - Integration tests for CLI argument parsing
+    - File: `tests/cli_integration.rs` (MISSING, but main is stub)
+
+2. **Memory Leak Tests**
+    - Large file processing without memory growth
+    - File: Performance test suite (MISSING)
+
+3. **Concurrency Tests**
+    - Parallel extraction from multiple files
+    - Thread safety validation
+    - File: Concurrency test suite (MISSING)
+
+## Test Infrastructure Assessment
+
+### Strengths
+
+1. **Excellent Fixture Management**
+    - Well-documented rebuild process
+    - Multiple binary formats covered
+    - Source code available for reproduction
+
+2. **Comprehensive Integration Tests**
+    - 219 integration tests covering end-to-end scenarios
+    - Real binary fixtures used
+    - All output formats tested
+
+3. **Snapshot Testing**
+    - `insta` framework well-utilized
+    - Output format changes tracked
+    - Easy to review snapshot diffs
+
+4. **Test Organization**
+    - Clear separation: unit vs integration
+    - Logical grouping by functionality
+    - Consistent naming conventions
+
+### Weaknesses
+
+1. **No Performance Benchmarks**
+    - No `criterion` benchmarks
+    - No performance regression detection
+    - Large input scenarios untested
+
+2. **No Fuzzing Tests**
+    - No `cargo-fuzz` integration
+    - Binary parsing not fuzz-tested
+    - String extraction not fuzz-tested
+
+3. **No Code Coverage Metrics**
+    - `cargo-tarpaulin` not installed
+    - No coverage reports in CI
+    - Unknown actual code coverage percentage
+
+4. **Limited Error Injection**
+    - Few tests for error paths
+    - Missing tests for resource failures
+    - I/O error handling not tested
+
+## Recommendations
+
+### Immediate Actions (Week 1)
+
+1. **Fix Doctest Failures**
+
+   ```rust
+   // In src/extraction/mod.rs (line 318 and 408)
+   // Change: fn main() {
+   // To: fn main() -> Result<(), Box<dyn std::error::Error>> {
+   // Add: Ok(()) at end of function
+   ```
+
+2. **Add Performance Tests**
+
+   ```rust
+   // tests/test_deduplication_performance.rs
+   #[test]
+   #[ignore] // Marked as ignored for normal runs
+   fn test_deduplication_large_input() {
+       // Test with 10,000 duplicate strings
+   }
+   ```
+
+3. **Add Bounds Checking Tests**
+
+   ```rust
+   // tests/test_extraction_edge_cases.rs
+   #[test]
+   fn test_section_beyond_boundary() {
+       // Section offset > data.len()
+   }
+   ```
+
+### Short-term Improvements (Month 1)
+
+1. **Add Fuzzing**
+    - Install `cargo-fuzz`
+    - Fuzz container parsers (ELF, PE, Mach-O)
+    - Fuzz string extractors (ASCII, UTF-16)
+
+2. **Enable Code Coverage**
+    - Install `cargo-tarpaulin`
+    - Add coverage to CI pipeline
+    - Set coverage threshold (80% target)
+
+3. **Add Malformed Binary Tests**
+    - Create corrupted fixtures
+    - Test graceful error handling
+    - Verify no panics on invalid input
+
+### Long-term Enhancements (Quarter 1)
+
+1. **Performance Benchmarks**
+    - Add `criterion` benchmarks
+    - Track deduplication performance
+    - Track classification performance
+    - Add to CI for regression detection
+
+2. **Property-Based Testing**
+    - Add `proptest` or `quickcheck`
+    - Generate random binaries
+    - Verify invariants (no panics, valid output)
+
+3. **CLI Integration Tests**
+    - Implement main binary
+    - Add end-to-end CLI tests
+    - Test output redirection, error handling
+
+4. **Concurrency Tests**
+    - Test thread safety
+    - Test parallel file processing
+    - Validate no data races
+
+## Test Quality Score
+
+### Category Scores (0-10)
+
+- **Coverage Breadth**: 8/10 - Most code paths tested, some edge cases missing
+- **Coverage Depth**: 7/10 - Good assertions, but performance/stress tests lacking
+- **Test Isolation**: 10/10 - Excellent isolation, no shared state
+- **Edge Case Coverage**: 6/10 - Common cases covered, extreme cases missing
+- **Regression Protection**: 9/10 - Snapshot tests provide strong protection
+- **Performance Testing**: 2/10 - No performance tests, benchmarks missing
+- **Error Path Testing**: 6/10 - Some error paths tested, but incomplete
+- **Documentation**: 7/10 - Good fixture docs, some doctests broken
+
+### Overall Score: 6.9/10 (GOOD)
+
+**Strengths**:
+
+- Strong unit and integration test coverage
+- Excellent test isolation and organization
+- Good snapshot testing for output formats
+- Comprehensive fixture management
+
+**Critical Weaknesses**:
+
+- No performance/stress testing
+- Missing large input validation
+- No fuzzing or property-based testing
+- Code coverage metrics unavailable
+
+## Comparison to Industry Standards
+
+### TDD Compliance
+
+**Current State**: PARTIAL TDD
+
+- Tests exist for all major features
+- Good test-first evidence in git history
+- Some features lack comprehensive edge case tests
+
+**TDD Cycle Metrics** (Not tracked):
+
+- Red-green-refactor cycle time: UNKNOWN
+- Test-first compliance: ESTIMATED 60-70%
+- Test growth rate: Not measured
+
+**Recommendation**: Add TDD metrics tracking
+
+### Test Pyramid Balance
+
+**Current Distribution**:
+
+- Unit Tests: 52% (280/535) - GOOD
+- Integration Tests: 41% (219/535) - GOOD
+- End-to-End Tests: 7% (36/535) - LOW (but main is stub)
+
+**Verdict**: BALANCED - Good unit/integration ratio
+
+### Industry Benchmarks
+
+- **Test-to-Code Ratio**: 43% (6,106 test lines / 14,138 src lines) - ACCEPTABLE (industry: 30-50%)
+- **Test Count**: 535 tests for 14k LOC - GOOD (industry: ~1 test per 30 LOC)
+- **Test Pass Rate**: 98.9% - EXCELLENT (industry: >95%)
+
+## Test Execution Performance
+
+**Test Suite Speed**: FAST
+
+- Unit tests: 0.04s (258 tests)
+- Integration tests: ~1.5s (219 tests)
+- Total execution: <20s including doctests
+
+**Verdict**: EXCELLENT - Fast feedback loop
+
+## Conclusion
+
+The Stringy project demonstrates **strong testing practices** with comprehensive unit and integration test coverage. The test suite provides good regression protection through snapshot testing and maintains excellent test isolation.
+
+**Key Strengths**:
+
+1. High test count (535 tests)
+2. Well-organized test structure
+3. Excellent fixture management
+4. Fast test execution
+
+**Critical Improvements Needed**:
+
+1. Fix failing doctests (IMMEDIATE)
+2. Add performance/stress tests (HIGH PRIORITY)
+3. Add bounds checking edge case tests (MEDIUM PRIORITY)
+4. Enable code coverage metrics (MEDIUM PRIORITY)
+5. Add fuzzing for binary parsers (LONG-TERM)
+
+**Recommendation**: The test infrastructure is solid, but adding performance tests and fixing doctests should be immediate priorities before production release.

From 7306f48295948290246868a64d79fd97e1b04322 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Mon, 19 Jan 2026 21:58:26 +0000
Subject: [PATCH 16/25] chore(devcontainer): update Docker features and remove
 unused ones

- Added 'moby' configuration to Docker feature
- Removed unused features from the devcontainer configuration
- Added mise.toml for tool dependencies

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 .devcontainer/devcontainer.json | 20 +++-----------------
 mise.toml                       | 13 +++++++++++++
 2 files changed, 16 insertions(+), 17 deletions(-)
 create mode 100644 mise.toml

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index a935486..b145780 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -5,7 +5,8 @@
 		"ghcr.io/devcontainers/features/docker-outside-of-docker:1": {
 			"installDockerBuildx": true,
 			"version": "latest",
-			"dockerDashComposeVersion": "v2"
+			"dockerDashComposeVersion": "v2",
+			"moby": false
 		},
 		"ghcr.io/devcontainers/features/github-cli:1": {
 			"installDirectlyFromGitHubRelease": true,
@@ -14,27 +15,12 @@
 		"ghcr.io/eitsupi/devcontainer-features/mdbook:1": {
 			"version": "latest"
 		},
-		"ghcr.io/jsburckhardt/devcontainer-features/bat:1": {},
-		"ghcr.io/jsburckhardt/devcontainer-features/just:1": {},
-		"ghcr.io/lee-orr/rusty-dev-containers/cargo-audit:0": {},
-		"ghcr.io/lee-orr/rusty-dev-containers/cargo-binstall:0": {},
-		"ghcr.io/lee-orr/rusty-dev-containers/cargo-deny:0": {},
-		"ghcr.io/lee-orr/rusty-dev-containers/cargo-llvm-cov:0": {},
-		"ghcr.io/lee-orr/rusty-dev-containers/cargo-nextest:0": {},
-		"ghcr.io/marcozac/devcontainer-features/goreleaser:1": {
-			"version": "latest"
-		},
 		"ghcr.io/devcontainers-extra/features/claude-code:1": {
 			"version": "latest"
 		},
 		"ghcr.io/devcontainers-extra/features/mise:1": {
 			"version": "latest"
-		},
-		"ghcr.io/devcontainers-extra/features/pre-commit:2": {
-			"version": "latest"
-		},
-		"ghcr.io/roul/devcontainer-features/mise-node:1": {},
-		"ghcr.io/roul/devcontainer-features/mise-python:1": {}
+		}
 	},
 	"customizations": {
 		"vscode": {
diff --git a/mise.toml b/mise.toml
new file mode 100644
index 0000000..58e2eee
--- /dev/null
+++ b/mise.toml
@@ -0,0 +1,13 @@
+[tools]
+actionlint        = "1.7.10"
+cargo-binstall    = "1.16.7"
+cargo-insta       = "1.46.1"
+claude            = "latest"
+goreleaser        = "2.13.3"
+just              = "1.46.0"
+markdownlint-cli2 = "0.20.0"
+mdbook            = "0.5.2"
+pre-commit        = "4.5.1"
+prettier          = "3.8.0"
+python            = "3.14.2"
+rust              = "1.92.0"

From d52047a5ae73a3912b9c68506d382faa0143b8fe Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Mon, 19 Jan 2026 22:20:49 +0000
Subject: [PATCH 17/25] chore(setup): update setup commands and add mise
 installation

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 justfile  | 41 +++++++++++++++++++++++++----------------
 mise.toml |  2 ++
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/justfile b/justfile
index 96c4802..34a4d7a 100644
--- a/justfile
+++ b/justfile
@@ -53,37 +53,52 @@ rmrf path:
 # Development setup
 [windows]
 setup:
-    Set-Location "{{ root }}"
+    mise trust
+    mise install
     rustup component add rustfmt clippy llvm-tools-preview
-    cargo install cargo-binstall --locked
     @just mdformat-install
     Write-Host "Note: You may need to restart your shell for pipx PATH changes to take effect"
 
 [unix]
 setup:
-    cd "{{ root }}"
+    mise trust
+    mise install
     rustup component add rustfmt clippy llvm-tools-preview
-    cargo install cargo-binstall --locked
     @just mdformat-install
     echo "Note: You may need to restart your shell for pipx PATH changes to take effect"
 
-# Install development tools (extended setup)
+# Install tool versions defined in mise.toml
+[windows]
+mise-install:
+    mise trust
+    mise install
+
+[unix]
+mise-install:
+    mise trust
+    mise install
+
+# Install development tools not managed by mise
 [windows]
 install-tools:
+    @just mise-install
     cargo binstall --disable-telemetry cargo-llvm-cov cargo-audit cargo-deny cargo-dist cargo-release cargo-cyclonedx cargo-auditable cargo-nextest --locked
 
 [unix]
 install-tools:
+    @just mise-install
     cargo binstall --disable-telemetry cargo-llvm-cov cargo-audit cargo-deny cargo-dist cargo-release cargo-cyclonedx cargo-auditable cargo-nextest --locked
 
-# Install mdBook and plugins for documentation
+# Install mdBook plugins for documentation
 [windows]
 docs-install:
-    cargo binstall mdbook mdbook-admonish mdbook-mermaid mdbook-linkcheck mdbook-toc mdbook-open-on-gh mdbook-tabs mdbook-i18n-helpers
+    @just mise-install
+    cargo binstall mdbook-admonish mdbook-mermaid mdbook-linkcheck mdbook-toc mdbook-open-on-gh mdbook-tabs mdbook-i18n-helpers
 
 [unix]
 docs-install:
-    cargo binstall mdbook mdbook-admonish mdbook-mermaid mdbook-linkcheck mdbook-toc mdbook-open-on-gh mdbook-tabs mdbook-i18n-helpers
+    @just mise-install
+    cargo binstall mdbook-admonish mdbook-mermaid mdbook-linkcheck mdbook-toc mdbook-open-on-gh mdbook-tabs mdbook-i18n-helpers
 
 # Install pipx for Python tool management
 [windows]
@@ -132,7 +147,7 @@ format: fmt format-json-yaml format-docs fmt-justfile
 # Individual format recipes
 
 format-json-yaml:
-    npx prettier --write "**/*.{json,yaml,yml}"
+    prettier --write "**/*.{json,yaml,yml}"
 
 [windows]
 format-docs:
@@ -140,7 +155,6 @@ format-docs:
 
 [unix]
 format-docs:
-    cd "{{ root }}"
     @if command -v mdformat >/dev/null 2>&1; then find . -type f -name "*.md" -not -path "./target/*" -not -path "./node_modules/*" -exec mdformat {} + ; else echo "mdformat not found. Run 'just mdformat-install' first."; fi
 
 fmt:
@@ -191,10 +205,9 @@ pre-commit-run:
 
 # Format a single file (for pre-commit hooks)
 format-files +FILES:
-    npx prettier --write --config .prettierrc.json {{ FILES }}
+    prettier --write --config .prettierrc.json {{ FILES }}
 
 megalinter:
-    cd "{{ root }}"
     npx mega-linter-runner --flavor rust
 
 # =============================================================================
@@ -213,26 +226,22 @@ test:
 # Test justfile cross-platform functionality
 [windows]
 test-justfile:
-    Set-Location "{{ root }}"
     $p = (Get-Location).Path; Write-Host "Current directory: $p"; Write-Host "Expected directory: {{ root }}"
 
 [unix]
 test-justfile:
-    cd "{{ root }}"
     /bin/echo "Current directory: $(pwd -P)"
     /bin/echo "Expected directory: {{ root }}"
 
 # Test cross-platform file system helpers
 [windows]
 test-fs:
-    Set-Location "{{ root }}"
     @just rmrf tmp/xfstest
     @just ensure-dir tmp/xfstest/sub
     @just rmrf tmp/xfstest
 
 [unix]
 test-fs:
-    cd "{{ root }}"
     @just rmrf tmp/xfstest
     @just ensure-dir tmp/xfstest/sub
     @just rmrf tmp/xfstest
diff --git a/mise.toml b/mise.toml
index 58e2eee..94ebd4d 100644
--- a/mise.toml
+++ b/mise.toml
@@ -3,6 +3,8 @@ actionlint        = "1.7.10"
 cargo-binstall    = "1.16.7"
 cargo-insta       = "1.46.1"
 claude            = "latest"
+cyclonedx         = "0.29.2"
+git-cliff         = "2.11.0"
 goreleaser        = "2.13.3"
 just              = "1.46.0"
 markdownlint-cli2 = "0.20.0"

From 1cb3744c38b784d50c5c4590273cef2dac3fe910 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Mon, 19 Jan 2026 22:24:13 +0000
Subject: [PATCH 18/25] chore(cleanup): remove megalinter configurations and
 references

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 .gitignore         |  2 --
 .mdformat.toml     |  4 +---
 .mega-linter.yml   | 48 ----------------------------------------------
 cspell.config.yaml |  1 -
 justfile           |  3 ---
 5 files changed, 1 insertion(+), 57 deletions(-)
 delete mode 100644 .mega-linter.yml

diff --git a/.gitignore b/.gitignore
index 4b8b60f..98b1e83 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,8 +121,6 @@ docs/book/
 .envrc
 .direnv/
 
-megalinter-reports/
-
 # Override global gitignore
 !bin/
 # Added by goreleaser init:
diff --git a/.mdformat.toml b/.mdformat.toml
index 8f1e01d..57f1a18 100644
--- a/.mdformat.toml
+++ b/.mdformat.toml
@@ -7,7 +7,6 @@ exclude = [
     "**/*.tpl.md",
     "**/CHANGELOG.md",
     "target/**",
-    "megalinter-reports/**",
 ]
 validate = true
 number = true
@@ -26,5 +25,4 @@ extensions = [
 
 [plugin.mkdocs]
 align_semantic_breaks_in_lists = true
-ignore_missing_references = true
-
+ignore_missing_references      = true
diff --git a/.mega-linter.yml b/.mega-linter.yml
deleted file mode 100644
index 37a81da..0000000
--- a/.mega-linter.yml
+++ /dev/null
@@ -1,48 +0,0 @@
----
-# MegaLinter configuration for Stringy
-# This configuration minimizes false positives while maintaining code quality
-
-# Apply linter fixes where safe
-APPLY_FIXES: all
-
-# File/directory exclusions
-EXCLUDED_DIRECTORIES:
-  - target
-  - dist
-  - build
-  - node_modules
-  - .git
-  - .cache
-  - coverage
-  - docs/book
-  - docs/build
-
-# All linters now properly configured
-
-# ActionLint configuration - suppress shellcheck issues in generated cargo-dist file
-ACTION_ACTIONLINT_ARGUMENTS:
-  - --ignore=SC2086:info
-  - --ignore=SC2129:style
-  - --ignore=SC2001:style
-
-# File-specific exclusions for generated content
-FILTER_REGEX_EXCLUDE: |
-  \.github/workflows/release\.yml
-
-# Lychee configuration for link checking
-SPELL_LYCHEE_ARGUMENTS:
-  - --no-progress
-  - --exclude-loopback
-  - --exclude-private
-  - --exclude-mail
-  - --timeout=10
-
-# Markdown table formatting exclusions
-MARKDOWN_MARKDOWN_TABLE_FORMATTER_FILTER_REGEX_EXCLUDE: |
-  README\.md
-
-# Prettier configuration - respect .prettierignore
-JSON_PRETTIER_ARGUMENTS:
-  - --check
-YAML_PRETTIER_ARGUMENTS:
-  - --check
diff --git a/cspell.config.yaml b/cspell.config.yaml
index 89200af..dad0288 100644
--- a/cspell.config.yaml
+++ b/cspell.config.yaml
@@ -100,7 +100,6 @@ words:
   - mdformat
   - actionlint
   - lychee
-  - megalinter
   - cspell
   - justfile
 
diff --git a/justfile b/justfile
index 34a4d7a..359c785 100644
--- a/justfile
+++ b/justfile
@@ -207,9 +207,6 @@ pre-commit-run:
 format-files +FILES:
     prettier --write --config .prettierrc.json {{ FILES }}
 
-megalinter:
-    npx mega-linter-runner --flavor rust
-
 # =============================================================================
 # BUILDING AND TESTING
 # =============================================================================

From 3b821e5cc7cc89279c22989428786638bbc41e39 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Mon, 19 Jan 2026 23:05:25 +0000
Subject: [PATCH 19/25] chore(mise): add node version to tools configuration

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 mise.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mise.toml b/mise.toml
index 94ebd4d..c162dd4 100644
--- a/mise.toml
+++ b/mise.toml
@@ -9,6 +9,7 @@ goreleaser        = "2.13.3"
 just              = "1.46.0"
 markdownlint-cli2 = "0.20.0"
 mdbook            = "0.5.2"
+node              = "25.4.0"
 pre-commit        = "4.5.1"
 prettier          = "3.8.0"
 python            = "3.14.2"

From 704e7c5bfd3ceaf87acfe35cbad610fb09c134f7 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Mon, 19 Jan 2026 18:10:10 -0500
Subject: [PATCH 20/25] refactor(yara): split module to stay under 500-line
 limit

Extract YARA string escaping utilities to separate escaping.rs module:

- escape_yara_string, escape_yara_unicode_literal

- utf16be_hex_string, utf16le_hex_string

- All associated tests

The original yara.rs was 536 lines, now split into:

- yara/mod.rs (357 lines) - main formatting logic

- yara/escaping.rs (204 lines) - escaping utilities

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/output/yara/escaping.rs         | 204 ++++++++++++++++++++++++++++
 src/output/{yara.rs => yara/mod.rs} | 197 ++-------------------------
 2 files changed, 214 insertions(+), 187 deletions(-)
 create mode 100644 src/output/yara/escaping.rs
 rename src/output/{yara.rs => yara/mod.rs} (64%)

diff --git a/src/output/yara/escaping.rs b/src/output/yara/escaping.rs
new file mode 100644
index 0000000..ad30421
--- /dev/null
+++ b/src/output/yara/escaping.rs
@@ -0,0 +1,204 @@
+//! YARA string escaping and encoding utilities.
+//!
+//! Provides functions for escaping strings and encoding them to hex formats
+//! suitable for YARA rule strings.
+
+/// Escape a string for use in YARA string literals (ASCII/UTF-8).
+///
+/// Handles control characters, backslashes, quotes, and non-printable bytes.
+pub fn escape_yara_string(text: &str) -> String {
+    let mut escaped = String::new();
+    for byte in text.as_bytes() {
+        match *byte {
+            b'\\' => escaped.push_str("\\\\"),
+            b'"' => escaped.push_str("\\\""),
+            b'\n' => escaped.push_str("\\n"),
+            b'\r' => escaped.push_str("\\r"),
+            b'\t' => escaped.push_str("\\t"),
+            0x08 => escaped.push_str("\\b"),
+            0x0b => escaped.push_str("\\x0b"),
+            0x0c => escaped.push_str("\\x0c"),
+            0x00..=0x1f | 0x7f..=0xff => {
+                escaped.push_str(&format!("\\x{:02x}", byte));
+            }
+            _ => escaped.push(*byte as char),
+        }
+    }
+    escaped
+}
+
+/// Escape a Unicode string for use with YARA's `wide` modifier.
+///
+/// This preserves non-control Unicode characters while escaping control characters
+/// and special YARA syntax characters.
+pub fn escape_yara_unicode_literal(text: &str) -> String {
+    let mut escaped = String::new();
+    for ch in text.chars() {
+        match ch {
+            '\\' => escaped.push_str("\\\\"),
+            '"' => escaped.push_str("\\\""),
+            '\n' => escaped.push_str("\\n"),
+            '\r' => escaped.push_str("\\r"),
+            '\t' => escaped.push_str("\\t"),
+            _ if ch.is_control() => {
+                let mut buf = [0; 4];
+                let encoded = ch.encode_utf8(&mut buf);
+                for byte in encoded.as_bytes() {
+                    escaped.push_str(&format!("\\x{:02x}", byte));
+                }
+            }
+            _ => escaped.push(ch),
+        }
+    }
+    escaped
+}
+
+/// Convert a string to UTF-16 big-endian hex format for YARA.
+///
+/// Returns a hex string like `{ 00 41 00 42 }` for "AB".
+pub fn utf16be_hex_string(text: &str) -> String {
+    let hex_bytes: Vec<String> = text
+        .encode_utf16()
+        .flat_map(|unit| unit.to_be_bytes())
+        .map(|b| format!("{:02x}", b))
+        .collect();
+
+    if hex_bytes.is_empty() {
+        return "{ }".to_string();
+    }
+
+    format!("{{ {} }}", hex_bytes.join(" "))
+}
+
+/// Convert a string to UTF-16 little-endian hex format for YARA.
+///
+/// Returns a hex string like `{ 41 00 42 00 }` for "AB".
+pub fn utf16le_hex_string(text: &str) -> String {
+    let hex_bytes: Vec<String> = text
+        .encode_utf16()
+        .flat_map(|unit| unit.to_le_bytes())
+        .map(|b| format!("{:02x}", b))
+        .collect();
+
+    if hex_bytes.is_empty() {
+        return "{ }".to_string();
+    }
+
+    format!("{{ {} }}", hex_bytes.join(" "))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    mod escape_yara_string_tests {
+        use super::*;
+
+        #[test]
+        fn basic_escapes() {
+            let input = "quote\" backslash\\ line\n tab\t";
+            let escaped = escape_yara_string(input);
+            assert!(escaped.contains("\\\""));
+            assert!(escaped.contains("\\\\"));
+            assert!(escaped.contains("\\n"));
+            assert!(escaped.contains("\\t"));
+        }
+
+        #[test]
+        fn control_characters() {
+            assert_eq!(escape_yara_string("\r"), "\\r");
+            assert_eq!(escape_yara_string("\x00"), "\\x00");
+            assert_eq!(escape_yara_string("\x08"), "\\b");
+            assert_eq!(escape_yara_string("\x0b"), "\\x0b");
+            assert_eq!(escape_yara_string("\x0c"), "\\x0c");
+            assert_eq!(escape_yara_string("\x7f"), "\\x7f");
+        }
+    }
+
+    mod escape_yara_unicode_literal_tests {
+        use super::*;
+
+        #[test]
+        fn basic_escapes() {
+            assert_eq!(escape_yara_unicode_literal("quote\""), "quote\\\"");
+            assert_eq!(escape_yara_unicode_literal("back\\slash"), "back\\\\slash");
+            assert_eq!(escape_yara_unicode_literal("line\nbreak"), "line\\nbreak");
+            assert_eq!(escape_yara_unicode_literal("tab\there"), "tab\\there");
+            assert_eq!(escape_yara_unicode_literal("return\rhere"), "return\\rhere");
+        }
+
+        #[test]
+        fn control_chars_hex_escaped() {
+            assert_eq!(escape_yara_unicode_literal("\x00"), "\\x00");
+            assert_eq!(escape_yara_unicode_literal("\x1f"), "\\x1f");
+        }
+
+        #[test]
+        fn unicode_passthrough() {
+            let result = escape_yara_unicode_literal("\u{4E2D}\u{6587}");
+            assert!(
+                result.contains('\u{4E2D}'),
+                "Non-control Unicode should not be escaped"
+            );
+        }
+
+        #[test]
+        fn empty_string() {
+            assert_eq!(escape_yara_unicode_literal(""), "");
+        }
+    }
+
+    mod utf16be_hex_string_tests {
+        use super::*;
+
+        #[test]
+        fn basic_ascii() {
+            assert_eq!(utf16be_hex_string("A"), "{ 00 41 }");
+            assert_eq!(utf16be_hex_string("AB"), "{ 00 41 00 42 }");
+        }
+
+        #[test]
+        fn empty_string() {
+            assert_eq!(utf16be_hex_string(""), "{ }");
+        }
+
+        #[test]
+        fn non_ascii_unicode() {
+            let chinese = utf16be_hex_string("\u{4E2D}");
+            assert_eq!(chinese, "{ 4e 2d }");
+        }
+
+        #[test]
+        fn surrogate_pair() {
+            let emoji = utf16be_hex_string("\u{1F600}");
+            assert_eq!(emoji, "{ d8 3d de 00 }");
+        }
+    }
+
+    mod utf16le_hex_string_tests {
+        use super::*;
+
+        #[test]
+        fn basic_ascii() {
+            assert_eq!(utf16le_hex_string("A"), "{ 41 00 }");
+            assert_eq!(utf16le_hex_string("AB"), "{ 41 00 42 00 }");
+        }
+
+        #[test]
+        fn empty_string() {
+            assert_eq!(utf16le_hex_string(""), "{ }");
+        }
+
+        #[test]
+        fn non_ascii_unicode() {
+            let chinese = utf16le_hex_string("\u{4E2D}");
+            assert_eq!(chinese, "{ 2d 4e }");
+        }
+
+        #[test]
+        fn surrogate_pair() {
+            let emoji = utf16le_hex_string("\u{1F600}");
+            assert_eq!(emoji, "{ 3d d8 00 de }");
+        }
+    }
+}
diff --git a/src/output/yara.rs b/src/output/yara/mod.rs
similarity index 64%
rename from src/output/yara.rs
rename to src/output/yara/mod.rs
index ca2bfe7..5043278 100644
--- a/src/output/yara.rs
+++ b/src/output/yara/mod.rs
@@ -1,4 +1,14 @@
+//! YARA rule generation from extracted strings.
+//!
+//! Generates YARA rule templates suitable for malware analysis and detection.
+//! Strings are grouped by tag and formatted with appropriate encoding modifiers.
+
+mod escaping;
+
 use crate::types::{Encoding, FoundString, Result, Tag};
+use escaping::{
+    escape_yara_string, escape_yara_unicode_literal, utf16be_hex_string, utf16le_hex_string,
+};
 
 use super::OutputMetadata;
 use std::collections::{BTreeMap, HashMap};
@@ -151,77 +161,6 @@ fn sanitize_identifier(name: &str) -> String {
     }
 }
 
-fn escape_yara_string(text: &str) -> String {
-    let mut escaped = String::new();
-    for byte in text.as_bytes() {
-        match *byte {
-            b'\\' => escaped.push_str("\\\\"),
-            b'"' => escaped.push_str("\\\""),
-            b'\n' => escaped.push_str("\\n"),
-            b'\r' => escaped.push_str("\\r"),
-            b'\t' => escaped.push_str("\\t"),
-            0x08 => escaped.push_str("\\b"),
-            0x0b => escaped.push_str("\\x0b"),
-            0x0c => escaped.push_str("\\x0c"),
-            0x00..=0x1f | 0x7f..=0xff => {
-                escaped.push_str(&format!("\\x{:02x}", byte));
-            }
-            _ => escaped.push(*byte as char),
-        }
-    }
-    escaped
-}
-
-fn escape_yara_unicode_literal(text: &str) -> String {
-    let mut escaped = String::new();
-    for ch in text.chars() {
-        match ch {
-            '\\' => escaped.push_str("\\\\"),
-            '"' => escaped.push_str("\\\""),
-            '\n' => escaped.push_str("\\n"),
-            '\r' => escaped.push_str("\\r"),
-            '\t' => escaped.push_str("\\t"),
-            _ if ch.is_control() => {
-                let mut buf = [0; 4];
-                let encoded = ch.encode_utf8(&mut buf);
-                for byte in encoded.as_bytes() {
-                    escaped.push_str(&format!("\\x{:02x}", byte));
-                }
-            }
-            _ => escaped.push(ch),
-        }
-    }
-    escaped
-}
-
-fn utf16be_hex_string(text: &str) -> String {
-    let hex_bytes: Vec<String> = text
-        .encode_utf16()
-        .flat_map(|unit| unit.to_be_bytes())
-        .map(|b| format!("{:02x}", b))
-        .collect();
-
-    if hex_bytes.is_empty() {
-        return "{ }".to_string();
-    }
-
-    format!("{{ {} }}", hex_bytes.join(" "))
-}
-
-fn utf16le_hex_string(text: &str) -> String {
-    let hex_bytes: Vec<String> = text
-        .encode_utf16()
-        .flat_map(|unit| unit.to_le_bytes())
-        .map(|b| format!("{:02x}", b))
-        .collect();
-
-    if hex_bytes.is_empty() {
-        return "{ }".to_string();
-    }
-
-    format!("{{ {} }}", hex_bytes.join(" "))
-}
-
 fn tag_name(tag: &Tag) -> &'static str {
     match tag {
         Tag::Url => "Url",
@@ -291,16 +230,6 @@ mod tests {
         assert_eq!(sanitize_rule_name(""), "_");
     }
 
-    #[test]
-    fn test_escape_yara_string() {
-        let input = "quote\" backslash\\ line\n tab\t";
-        let escaped = escape_yara_string(input);
-        assert!(escaped.contains("\\\""));
-        assert!(escaped.contains("\\\\"));
-        assert!(escaped.contains("\\n"));
-        assert!(escaped.contains("\\t"));
-    }
-
     #[test]
     fn test_group_strings_by_tag() {
         let strings = vec![
@@ -362,85 +291,12 @@ mod tests {
         assert!(output.contains("\\x"));
     }
 
-    #[test]
-    fn test_escape_yara_unicode_literal_basic() {
-        // Basic escapes
-        assert_eq!(escape_yara_unicode_literal("quote\""), "quote\\\"");
-        assert_eq!(escape_yara_unicode_literal("back\\slash"), "back\\\\slash");
-        assert_eq!(escape_yara_unicode_literal("line\nbreak"), "line\\nbreak");
-        assert_eq!(escape_yara_unicode_literal("tab\there"), "tab\\there");
-        assert_eq!(escape_yara_unicode_literal("return\rhere"), "return\\rhere");
-    }
-
-    #[test]
-    fn test_escape_yara_unicode_literal_control_chars() {
-        // Control characters should be hex-escaped
-        assert_eq!(escape_yara_unicode_literal("\x00"), "\\x00");
-        assert_eq!(escape_yara_unicode_literal("\x1f"), "\\x1f");
-    }
-
-    #[test]
-    fn test_escape_yara_unicode_literal_unicode_passthrough() {
-        // Non-control Unicode should pass through unescaped
-        let result = escape_yara_unicode_literal("\u{4E2D}\u{6587}");
-        assert!(
-            result.contains('\u{4E2D}'),
-            "Non-control Unicode should not be escaped"
-        );
-    }
-
-    #[test]
-    fn test_escape_yara_unicode_literal_empty() {
-        assert_eq!(escape_yara_unicode_literal(""), "");
-    }
-
-    #[test]
-    fn test_utf16be_hex_string_basic() {
-        // Basic ASCII - should be big-endian (00 followed by ASCII byte)
-        assert_eq!(utf16be_hex_string("A"), "{ 00 41 }");
-        assert_eq!(utf16be_hex_string("AB"), "{ 00 41 00 42 }");
-    }
-
-    #[test]
-    fn test_utf16be_hex_string_empty() {
-        assert_eq!(utf16be_hex_string(""), "{ }");
-    }
-
-    #[test]
-    fn test_utf16be_hex_string_non_ascii() {
-        // Non-ASCII Unicode (BMP) - Chinese character U+4E2D
-        let chinese = utf16be_hex_string("\u{4E2D}");
-        assert_eq!(chinese, "{ 4e 2d }");
-    }
-
-    #[test]
-    fn test_utf16be_hex_string_surrogate_pair() {
-        // Character requiring surrogate pair (outside BMP) - emoji U+1F600
-        let emoji = utf16be_hex_string("\u{1F600}");
-        // Should produce surrogate pair: D83D DE00
-        assert_eq!(emoji, "{ d8 3d de 00 }");
-    }
-
-    #[test]
-    fn test_escape_yara_string_control_characters() {
-        assert_eq!(escape_yara_string("\r"), "\\r");
-        assert_eq!(escape_yara_string("\x00"), "\\x00");
-        assert_eq!(escape_yara_string("\x08"), "\\b");
-        assert_eq!(escape_yara_string("\x0b"), "\\x0b");
-        assert_eq!(escape_yara_string("\x0c"), "\\x0c");
-        assert_eq!(escape_yara_string("\x7f"), "\\x7f");
-    }
-
     #[test]
     fn test_format_yara_uses_current_timestamp_when_not_set() {
-        // When generated_at is None, format_yara should use current_timestamp()
         let metadata = OutputMetadata::new("test.bin".to_string(), OutputFormat::Yara, 0, 0);
-        // Note: generated_at is None
         let output = format_yara(&[], &metadata).expect("Formatting should succeed");
 
-        // Should contain a timestamp in the generated_at field
         assert!(output.contains("generated_at = \""));
-        // Timestamp should be numeric (or CLOCK_ERROR in exceptional cases)
         assert!(
             output.contains("generated_at = \"1")
                 || output.contains("generated_at = \"CLOCK_ERROR"),
@@ -448,36 +304,8 @@ mod tests {
         );
     }
 
-    #[test]
-    fn test_utf16le_hex_string_basic() {
-        // Basic ASCII - should be little-endian (ASCII byte followed by 00)
-        assert_eq!(utf16le_hex_string("A"), "{ 41 00 }");
-        assert_eq!(utf16le_hex_string("AB"), "{ 41 00 42 00 }");
-    }
-
-    #[test]
-    fn test_utf16le_hex_string_empty() {
-        assert_eq!(utf16le_hex_string(""), "{ }");
-    }
-
-    #[test]
-    fn test_utf16le_hex_string_non_ascii() {
-        // Non-ASCII Unicode (BMP) - Chinese character U+4E2D
-        let chinese = utf16le_hex_string("\u{4E2D}");
-        assert_eq!(chinese, "{ 2d 4e }");
-    }
-
-    #[test]
-    fn test_utf16le_hex_string_surrogate_pair() {
-        // Character requiring surrogate pair (outside BMP) - emoji U+1F600
-        let emoji = utf16le_hex_string("\u{1F600}");
-        // Should produce surrogate pair: 3D D8 00 DE (little-endian)
-        assert_eq!(emoji, "{ 3d d8 00 de }");
-    }
-
     #[test]
     fn test_utf16le_ascii_uses_wide_modifier() {
-        // ASCII UTF-16LE should use "wide" modifier
         let mut string = make_string("test");
         string.encoding = Encoding::Utf16Le;
         let output = format_yara(&[string], &make_metadata()).expect("Formatting should succeed");
@@ -490,7 +318,6 @@ mod tests {
 
     #[test]
     fn test_utf16le_non_ascii_uses_hex() {
-        // Non-ASCII UTF-16LE should use hex string, not wide modifier
         let mut string = make_string("\u{4E2D}\u{6587}");
         string.encoding = Encoding::Utf16Le;
         let output = format_yara(&[string], &make_metadata()).expect("Formatting should succeed");
@@ -506,11 +333,9 @@ mod tests {
 
     #[test]
     fn test_binary_name_injection_escaped_in_comments() {
-        // Binary name with newlines should be escaped in comments
         let mut metadata = make_metadata();
         metadata.binary_name = "evil\nname".to_string();
         let output = format_yara(&[], &metadata).expect("Formatting should succeed");
-        // Should contain escaped newline, not literal
         assert!(
             output.contains("evil\\nname"),
             "Newlines in binary_name should be escaped"
@@ -523,11 +348,9 @@ mod tests {
 
     #[test]
     fn test_timestamp_injection_escaped_in_meta() {
-        // Timestamp with special characters should be escaped
         let mut metadata = make_metadata();
         metadata.generated_at = Some("2024\"\n//attack".to_string());
         let output = format_yara(&[], &metadata).expect("Formatting should succeed");
-        // Should contain escaped characters
         assert!(
             output.contains("2024\\\"\\n//attack"),
             "Special chars in timestamp should be escaped"

From c4ec73be8be98817df69f7e6c574a7769a1a2c4b Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Tue, 20 Jan 2026 00:23:20 +0000
Subject: [PATCH 21/25] chore(agents): fix formatting in critical rules section

Signed-off-by: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
---
 AGENTS.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 2e71a4e..2dec2aa 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -6,8 +6,7 @@
 
 1. **No `unsafe` code** - `#![forbid(unsafe_code)]` enforced
 2. **Zero warnings** - `cargo clippy -- -D warnings` must pass
-3. **ASCII only** - No emojis, em-dashes, smart quotes, or Unicode punctuation (except when explicity testing or working with Unicode strings or emjois)
-4. **File size limit** - Keep files under 500 lines; split larger files
+3. **ASCII only** - No emojis, em-dashes, smart quotes, or Unicode punctuation (except when explicity testing or working with Unicode strings or emjois)4. **File size limit** - Keep files under 500 lines; split larger files
 5. **No blanket `#[allow]`** - Any `allow` requires inline justification
 
 ## Project Summary

From 74c71bc8945d1089f7f68fef5aba21e957039c7a Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sat, 24 Jan 2026 13:31:00 -0500
Subject: [PATCH 22/25] docs(AGENTS): improve AI agent guidelines with fixes
 and additions

Fix formatting bug where rules 3 and 4 were merged on one line, correct
typos, add Rust version requirements, expand development commands, clarify
module structure, and document key dependencies.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .gitignore            |  6 ++++++
 .repomixignore        |  4 ++++
 .vscode/settings.json | 12 ++++++++++++
 AGENTS.md             | 21 ++++++++++++++++++---
 4 files changed, 40 insertions(+), 3 deletions(-)
 create mode 100644 .repomixignore
 create mode 100644 .vscode/settings.json

diff --git a/.gitignore b/.gitignore
index 98b1e83..6657408 100644
--- a/.gitignore
+++ b/.gitignore
@@ -125,3 +125,9 @@ docs/book/
 !bin/
 # Added by goreleaser init:
 .intentionally-empty-file.o
+
+
+megalinter-reports/*
+target/*
+stringy-output/*
+tests/fixtures/*
diff --git a/.repomixignore b/.repomixignore
new file mode 100644
index 0000000..cff354a
--- /dev/null
+++ b/.repomixignore
@@ -0,0 +1,4 @@
+megalinter-reports/*
+target/*
+stringy-output/*
+tests/fixtures/*
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..868f790
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,12 @@
+{
+    "ruff.path": [
+        "${workspaceFolder}/.vscode/mise-tools/ruff"
+    ],
+    "ruff.interpreter": [
+        "${workspaceFolder}/.vscode/mise-tools/python"
+    ],
+    "python.defaultInterpreterPath": "${workspaceFolder}/.vscode/mise-tools/python",
+    "debug.javascript.defaultRuntimeExecutable": {
+        "pwa-node": "${workspaceFolder}/.vscode/mise-tools/node"
+    }
+}
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
index 2dec2aa..e18adf4 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -6,13 +6,16 @@
 
 1. **No `unsafe` code** - `#![forbid(unsafe_code)]` enforced
 2. **Zero warnings** - `cargo clippy -- -D warnings` must pass
-3. **ASCII only** - No emojis, em-dashes, smart quotes, or Unicode punctuation (except when explicity testing or working with Unicode strings or emjois)4. **File size limit** - Keep files under 500 lines; split larger files
+3. **ASCII only** - No emojis, em-dashes, smart quotes, or Unicode punctuation (except when explicitly testing or working with Unicode strings or emojis)
+4. **File size limit** - Keep files under 500 lines; split larger files
 5. **No blanket `#[allow]`** - Any `allow` requires inline justification
 
 ## Project Summary
 
 Stringy extracts meaningful strings from ELF, PE, and Mach-O binaries using format-specific knowledge and semantic classification. Unlike standard `strings`, it is section-aware and semantically intelligent.
 
+**Rust**: Edition 2024, MSRV 1.91
+
 **Data flow**: Binary -> Format Detection -> Container Parsing -> String Extraction -> Deduplication -> Classification -> Ranking -> Output
 
 ## Module Structure
@@ -21,8 +24,8 @@ Stringy extracts meaningful strings from ELF, PE, and Mach-O binaries using form
 | ----------------- | ---------------------------------------------------------------- |
 | `container/`      | Format detection, section analysis, imports/exports via `goblin` |
 | `extraction/`     | ASCII/UTF-8/UTF-16 extraction, deduplication, PE resources       |
-| `classification/` | Semantic tagging (URLs, IPs, domains, paths, GUIDs)              |
-| `output/`         | Formatters (JSON, human-readable, YARA-friendly)                 |
+| `classification/` | Semantic tagging (URLs, IPs, domains, paths, GUIDs), ranking     |
+| `output/`         | Formatters: `json/`, `table/` (tty/plain), `yara/`               |
 | `types/`          | Core data structures, error handling with `thiserror`            |
 
 ## Key Patterns
@@ -47,6 +50,10 @@ just test       # Run tests with nextest
 just lint       # Full lint suite
 just fix        # Auto-fix clippy warnings
 just ci-check   # Full CI suite locally
+just build      # Debug build
+just run <args> # Run stringy with arguments
+just bench      # Run benchmarks
+just format     # Format all (Rust, JSON, YAML, Markdown, Justfile)
 ```
 
 ## Testing
@@ -59,6 +66,14 @@ just ci-check   # Full CI suite locally
 
 Import from `stringy::extraction` or `stringy::types`, not deeply nested paths. Re-exports are in `lib.rs`.
 
+## Key Dependencies
+
+- `goblin` - Binary format parsing (ELF, PE, Mach-O)
+- `pelite` - PE resource extraction
+- `thiserror` - Error type definitions
+- `insta` - Snapshot testing (dev)
+- `criterion` - Benchmarking (dev)
+
 ## Adding Features
 
 **New semantic tag**: Add variant to `Tag` enum in `types.rs`, implement pattern in `classification/semantic.rs`

From 96635563536ef92e61883545ba504af5fdfa9ecb Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sat, 24 Jan 2026 18:14:51 -0500
Subject: [PATCH 23/25] fix: address PR review comments for output formatters

- Fix test metadata count mismatches in output_table_integration.rs
- Remove underscore prefix from used parameters in json.rs
- Use crate re-export for RankingConfig import
- Fix truncate_string edge case for wide multibyte characters
- Update table docs to reflect actual tag selection behavior
- Add control character sanitization for TTY output
- Use UTF-8 encoding for Unicode test content
- Add Claude Code AI assistance section to CONTRIBUTING.md
- Update snapshots for encoding and sanitization changes

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CONTRIBUTING.md                               |   4 +
 TESTING_ANALYSIS.md                           | 192 ++++++++++--------
 src/output/json.rs                            |   8 +-
 src/output/table/formatting.rs                |  16 +-
 src/output/table/mod.rs                       |   2 +-
 src/output/table/tty.rs                       |  24 ++-
 tests/output_json_integration.rs              |   9 +-
 tests/output_table_integration.rs             |   4 +-
 ...son_integration__json_unicode_content.snap |   3 +-
 ...e_integration__tty_special_characters.snap |   3 +-
 10 files changed, 160 insertions(+), 105 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 04b90e9..64a7366 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -81,6 +81,10 @@ Docs live under docs/ and project planning artifacts are in project_plan/. Updat
 
 If you believe you found a security issue, please do not open a public issue. Use GitHub Security Advisories if available, or contact the maintainers privately.
 
+## AI-assisted development
+
+This project includes Claude Code configuration in `.claude/settings.json`. These settings enable plugins that help maintain code quality and follow project conventions. If you use Claude Code, the configuration will be applied automatically.
+
 ## Questions
 
 If you are unsure where to start, open an issue with your question and we will point you in the right direction.
diff --git a/TESTING_ANALYSIS.md b/TESTING_ANALYSIS.md
index dee19ca..30b54bc 100644
--- a/TESTING_ANALYSIS.md
+++ b/TESTING_ANALYSIS.md
@@ -26,15 +26,15 @@
 
 **Test Files**:
 
-1. `integration_elf.rs` (10 tests) - ELF parsing and extraction
-2. `integration_extraction.rs` (9 tests) - End-to-end extraction
-3. `integration_macho.rs` (15 tests) - Mach-O parsing and load commands
-4. `integration_pe.rs` (22 tests) - PE parsing and resource extraction
-5. `test_ascii_extraction.rs` (14 tests) - ASCII extraction scenarios
-6. `test_ascii_integration.rs` (14 tests) - ASCII integration tests
-7. `test_deduplication.rs` (5 tests) - Deduplication workflows
-8. `test_noise_filters.rs` (9 tests) - Noise filtering heuristics
-9. `test_utf16_extraction.rs` (5 tests) - UTF-16 extraction
+01. `integration_elf.rs` (10 tests) - ELF parsing and extraction
+02. `integration_extraction.rs` (9 tests) - End-to-end extraction
+03. `integration_macho.rs` (15 tests) - Mach-O parsing and load commands
+04. `integration_pe.rs` (22 tests) - PE parsing and resource extraction
+05. `test_ascii_extraction.rs` (14 tests) - ASCII extraction scenarios
+06. `test_ascii_integration.rs` (14 tests) - ASCII integration tests
+07. `test_deduplication.rs` (5 tests) - Deduplication workflows
+08. `test_noise_filters.rs` (9 tests) - Noise filtering heuristics
+09. `test_utf16_extraction.rs` (5 tests) - UTF-16 extraction
 10. `classification_integration.rs` (27 tests) - Semantic classification
 11. `output_json_integration.rs` (41 tests) - JSON output format
 12. `output_table_integration.rs` (27 tests) - Table output format
@@ -235,100 +235,117 @@ fn test_section_size_overflow() {
 ### HIGH Priority Gaps
 
 1. **Performance Tests for Deduplication**
-    - Test with 10,000+ duplicate strings
-    - Validate O(n^2) algorithms don't cause timeout
-    - File: `tests/test_deduplication_performance.rs` (MISSING)
+
+   - Test with 10,000+ duplicate strings
+   - Validate O(n^2) algorithms don't cause timeout
+   - File: `tests/test_deduplication_performance.rs` (MISSING)
 
 2. **Doctest Fixes**
-    - Fix `extraction::StringExtractor` doctest (line 318)
-    - Fix `extraction::BasicExtractor` doctest (line 408)
-    - Files: `src/extraction/mod.rs`
+
+   - Fix `extraction::StringExtractor` doctest (line 318)
+   - Fix `extraction::BasicExtractor` doctest (line 408)
+   - Files: `src/extraction/mod.rs`
 
 3. **Bounds Checking Edge Cases**
-    - Section offset beyond file boundary
-    - Section size causing integer overflow
-    - File: `tests/test_extraction_edge_cases.rs` (MISSING)
+
+   - Section offset beyond file boundary
+   - Section size causing integer overflow
+   - File: `tests/test_extraction_edge_cases.rs` (MISSING)
 
 ### MEDIUM Priority Gaps
 
 1. **Malformed Binary Handling**
-    - Truncated ELF headers
-    - Invalid PE signatures
-    - Corrupted Mach-O load commands
-    - File: `tests/test_malformed_binaries.rs` (MISSING)
+
+   - Truncated ELF headers
+   - Invalid PE signatures
+   - Corrupted Mach-O load commands
+   - File: `tests/test_malformed_binaries.rs` (MISSING)
 
 2. **Regex Pattern Edge Cases**
-    - URL regex with edge cases (IPv6 in URLs, Unicode domains)
-    - Email regex with uncommon formats
-    - Path regex with UNC paths edge cases
-    - Files: Pattern test modules (PARTIAL)
+
+   - URL regex with edge cases (IPv6 in URLs, Unicode domains)
+   - Email regex with uncommon formats
+   - Path regex with UNC paths edge cases
+   - Files: Pattern test modules (PARTIAL)
 
 3. **Resource Extraction Error Paths**
-    - PE resource directory corruption
-    - Version info parsing failures
-    - String table malformed data
-    - File: `src/extraction/pe_resources.rs` tests (PARTIAL)
+
+   - PE resource directory corruption
+   - Version info parsing failures
+   - String table malformed data
+   - File: `src/extraction/pe_resources.rs` tests (PARTIAL)
 
 ### LOW Priority Gaps
 
 1. **Main Binary CLI Testing**
-    - Integration tests for CLI argument parsing
-    - File: `tests/cli_integration.rs` (MISSING, but main is stub)
+
+   - Integration tests for CLI argument parsing
+   - File: `tests/cli_integration.rs` (MISSING, but main is stub)
 
 2. **Memory Leak Tests**
-    - Large file processing without memory growth
-    - File: Performance test suite (MISSING)
+
+   - Large file processing without memory growth
+   - File: Performance test suite (MISSING)
 
 3. **Concurrency Tests**
-    - Parallel extraction from multiple files
-    - Thread safety validation
-    - File: Concurrency test suite (MISSING)
+
+   - Parallel extraction from multiple files
+   - Thread safety validation
+   - File: Concurrency test suite (MISSING)
 
 ## Test Infrastructure Assessment
 
 ### Strengths
 
 1. **Excellent Fixture Management**
-    - Well-documented rebuild process
-    - Multiple binary formats covered
-    - Source code available for reproduction
+
+   - Well-documented rebuild process
+   - Multiple binary formats covered
+   - Source code available for reproduction
 
 2. **Comprehensive Integration Tests**
-    - 219 integration tests covering end-to-end scenarios
-    - Real binary fixtures used
-    - All output formats tested
+
+   - 219 integration tests covering end-to-end scenarios
+   - Real binary fixtures used
+   - All output formats tested
 
 3. **Snapshot Testing**
-    - `insta` framework well-utilized
-    - Output format changes tracked
-    - Easy to review snapshot diffs
+
+   - `insta` framework well-utilized
+   - Output format changes tracked
+   - Easy to review snapshot diffs
 
 4. **Test Organization**
-    - Clear separation: unit vs integration
-    - Logical grouping by functionality
-    - Consistent naming conventions
+
+   - Clear separation: unit vs integration
+   - Logical grouping by functionality
+   - Consistent naming conventions
 
 ### Weaknesses
 
 1. **No Performance Benchmarks**
-    - No `criterion` benchmarks
-    - No performance regression detection
-    - Large input scenarios untested
+
+   - No `criterion` benchmarks
+   - No performance regression detection
+   - Large input scenarios untested
 
 2. **No Fuzzing Tests**
-    - No `cargo-fuzz` integration
-    - Binary parsing not fuzz-tested
-    - String extraction not fuzz-tested
+
+   - No `cargo-fuzz` integration
+   - Binary parsing not fuzz-tested
+   - String extraction not fuzz-tested
 
 3. **No Code Coverage Metrics**
-    - `cargo-tarpaulin` not installed
-    - No coverage reports in CI
-    - Unknown actual code coverage percentage
+
+   - `cargo-tarpaulin` not installed
+   - No coverage reports in CI
+   - Unknown actual code coverage percentage
 
 4. **Limited Error Injection**
-    - Few tests for error paths
-    - Missing tests for resource failures
-    - I/O error handling not tested
+
+   - Few tests for error paths
+   - Missing tests for resource failures
+   - I/O error handling not tested
 
 ## Recommendations
 
@@ -367,42 +384,49 @@ fn test_section_size_overflow() {
 ### Short-term Improvements (Month 1)
 
 1. **Add Fuzzing**
-    - Install `cargo-fuzz`
-    - Fuzz container parsers (ELF, PE, Mach-O)
-    - Fuzz string extractors (ASCII, UTF-16)
+
+   - Install `cargo-fuzz`
+   - Fuzz container parsers (ELF, PE, Mach-O)
+   - Fuzz string extractors (ASCII, UTF-16)
 
 2. **Enable Code Coverage**
-    - Install `cargo-tarpaulin`
-    - Add coverage to CI pipeline
-    - Set coverage threshold (80% target)
+
+   - Install `cargo-tarpaulin`
+   - Add coverage to CI pipeline
+   - Set coverage threshold (80% target)
 
 3. **Add Malformed Binary Tests**
-    - Create corrupted fixtures
-    - Test graceful error handling
-    - Verify no panics on invalid input
+
+   - Create corrupted fixtures
+   - Test graceful error handling
+   - Verify no panics on invalid input
 
 ### Long-term Enhancements (Quarter 1)
 
 1. **Performance Benchmarks**
-    - Add `criterion` benchmarks
-    - Track deduplication performance
-    - Track classification performance
-    - Add to CI for regression detection
+
+   - Add `criterion` benchmarks
+   - Track deduplication performance
+   - Track classification performance
+   - Add to CI for regression detection
 
 2. **Property-Based Testing**
-    - Add `proptest` or `quickcheck`
-    - Generate random binaries
-    - Verify invariants (no panics, valid output)
+
+   - Add `proptest` or `quickcheck`
+   - Generate random binaries
+   - Verify invariants (no panics, valid output)
 
 3. **CLI Integration Tests**
-    - Implement main binary
-    - Add end-to-end CLI tests
-    - Test output redirection, error handling
+
+   - Implement main binary
+   - Add end-to-end CLI tests
+   - Test output redirection, error handling
 
 4. **Concurrency Tests**
-    - Test thread safety
-    - Test parallel file processing
-    - Validate no data races
+
+   - Test thread safety
+   - Test parallel file processing
+   - Validate no data races
 
 ## Test Quality Score
 
@@ -473,7 +497,7 @@ fn test_section_size_overflow() {
 
 - Unit tests: 0.04s (258 tests)
 - Integration tests: ~1.5s (219 tests)
-- Total execution: <20s including doctests
+- Total execution: \<20s including doctests
 
 **Verdict**: EXCELLENT - Fast feedback loop
 
diff --git a/src/output/json.rs b/src/output/json.rs
index 635d8aa..ce5c986 100644
--- a/src/output/json.rs
+++ b/src/output/json.rs
@@ -3,13 +3,13 @@ use crate::types::{FoundString, Result, StringyError};
 use super::OutputMetadata;
 
 /// Format strings as JSONL output, one object per line.
-pub fn format_json(_strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
-    if _strings.is_empty() {
+pub fn format_json(strings: &[FoundString], _metadata: &OutputMetadata) -> Result<String> {
+    if strings.is_empty() {
         return Ok(String::new());
     }
 
-    let mut lines = Vec::with_capacity(_strings.len());
-    for item in _strings {
+    let mut lines = Vec::with_capacity(strings.len());
+    for item in strings {
         if !item.confidence.is_finite() {
             return Err(StringyError::ConfigError(
                 "JSON serialization failed: non-finite confidence".to_string(),
diff --git a/src/output/table/formatting.rs b/src/output/table/formatting.rs
index 02c8e4a..06fcbea 100644
--- a/src/output/table/formatting.rs
+++ b/src/output/table/formatting.rs
@@ -3,7 +3,7 @@
 //! This module provides shared utilities for formatting strings, tags, and
 //! text alignment used by both TTY and plain output modes.
 
-use crate::classification::ranking::RankingConfig;
+use crate::classification::RankingConfig;
 use crate::types::Tag;
 
 use super::TAGS_COLUMN_WIDTH;
@@ -129,9 +129,9 @@ pub fn truncate_string(s: &str, max_len: usize) -> String {
 
     // Find a valid character boundary for truncation
     let truncate_at = max_len - 3;
-    let mut end_index = truncate_at;
+    let mut end_index = 0;
 
-    // Ensure we don't split a multi-byte character
+    // Find the last char boundary that fits within truncate_at bytes
     for (idx, _) in s.char_indices() {
         if idx <= truncate_at {
             end_index = idx;
@@ -140,13 +140,9 @@ pub fn truncate_string(s: &str, max_len: usize) -> String {
         }
     }
 
-    // Handle case where we need to include at least one character
-    if end_index == 0 && !s.is_empty() {
-        if let Some((idx, _)) = s.char_indices().nth(1) {
-            end_index = idx;
-        } else {
-            end_index = s.len();
-        }
+    // If the first character is too wide to fit with "...", just return dots
+    if end_index == 0 {
+        return ".".repeat(max_len.min(3));
     }
 
     format!("{}...", &s[..end_index])
diff --git a/src/output/table/mod.rs b/src/output/table/mod.rs
index 9bcb95a..8c79829 100644
--- a/src/output/table/mod.rs
+++ b/src/output/table/mod.rs
@@ -27,7 +27,7 @@
 //! # Column Layout
 //!
 //! - **String**: Up to 60 characters, truncated with `...` if longer
-//! - **Tags**: First 2-3 tags, comma-separated, max 20 characters
+//! - **Tags**: Tags with highest boost value shown, max 20 characters
 //! - **Score**: Right-aligned integer score
 //! - **Section**: Section name where the string was found
 
diff --git a/src/output/table/tty.rs b/src/output/table/tty.rs
index 38ed658..918186a 100644
--- a/src/output/table/tty.rs
+++ b/src/output/table/tty.rs
@@ -5,6 +5,27 @@
 use crate::types::{FoundString, Result};
 
 use super::formatting::{Alignment, format_tags, pad_string, truncate_string};
+
+/// Sanitize a string for TTY display by replacing control characters.
+///
+/// Replaces newlines, tabs, and other control characters with visible escape sequences
+/// to prevent broken table layout.
+fn sanitize_for_display(s: &str) -> String {
+    let mut result = String::with_capacity(s.len());
+    for c in s.chars() {
+        match c {
+            '\n' => result.push_str("\\n"),
+            '\r' => result.push_str("\\r"),
+            '\t' => result.push_str("\\t"),
+            '\x00'..='\x1f' | '\x7f' => {
+                // Other control characters shown as \xNN
+                result.push_str(&format!("\\x{:02x}", c as u8));
+            }
+            _ => result.push(c),
+        }
+    }
+    result
+}
 use super::{
     OutputMetadata, SCORE_COLUMN_WIDTH, SECTION_COLUMN_WIDTH, STRING_COLUMN_WIDTH,
     TAGS_COLUMN_WIDTH,
@@ -55,7 +76,8 @@ pub(super) fn format_table_tty(
 
     // Build rows
     for found_string in strings {
-        let truncated_text = truncate_string(&found_string.text, STRING_COLUMN_WIDTH);
+        let sanitized_text = sanitize_for_display(&found_string.text);
+        let truncated_text = truncate_string(&sanitized_text, STRING_COLUMN_WIDTH);
         let tags_display = format_tags(&found_string.tags);
         let section_display = found_string.section.as_deref().unwrap_or("");
 
diff --git a/tests/output_json_integration.rs b/tests/output_json_integration.rs
index 4590956..7e74d88 100644
--- a/tests/output_json_integration.rs
+++ b/tests/output_json_integration.rs
@@ -227,8 +227,15 @@ fn test_json_long_strings() {
 
 #[test]
 fn test_json_unicode_content() {
+    // Use UTF-8 encoding for non-ASCII content
     let unicode = "\u{4E2D}\u{6587}\u{5B57}\u{7B26}";
-    let strings = vec![make_string(unicode)];
+    let strings = vec![FoundString::new(
+        unicode.to_string(),
+        Encoding::Utf8,
+        0x1000,
+        unicode.len() as u32,
+        StringSource::SectionData,
+    )];
     let output = format_json(&strings, &make_metadata(1)).unwrap();
     assert_snapshot!(output);
 }
diff --git a/tests/output_table_integration.rs b/tests/output_table_integration.rs
index 3446464..e63a45e 100644
--- a/tests/output_table_integration.rs
+++ b/tests/output_table_integration.rs
@@ -274,7 +274,7 @@ fn test_plain_multiple_strings() {
             .with_tags(vec![Tag::Import])
             .with_score(80),
     ];
-    let result = format_table_with_mode(&strings, &make_metadata(4), false).unwrap();
+    let result = format_table_with_mode(&strings, &make_metadata(3), false).unwrap();
     assert_snapshot!(result);
 }
 
@@ -294,7 +294,7 @@ fn test_plain_preserves_special_characters() {
         make_string("quote\"here"),
         make_string("line1\nline2"),
     ];
-    let result = format_table_with_mode(&strings, &make_metadata(3), false).unwrap();
+    let result = format_table_with_mode(&strings, &make_metadata(4), false).unwrap();
     assert_snapshot!(result);
 }
 
diff --git a/tests/snapshots/output_json_integration__json_unicode_content.snap b/tests/snapshots/output_json_integration__json_unicode_content.snap
index 77c2d01..6f94b92 100644
--- a/tests/snapshots/output_json_integration__json_unicode_content.snap
+++ b/tests/snapshots/output_json_integration__json_unicode_content.snap
@@ -1,5 +1,6 @@
 ---
 source: tests/output_json_integration.rs
+assertion_line: 240
 expression: output
 ---
-{"text":"中文字符","encoding":"Ascii","offset":4096,"rva":null,"section":null,"length":12,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
+{"text":"中文字符","encoding":"Utf8","offset":4096,"rva":null,"section":null,"length":12,"tags":[],"score":0,"source":"SectionData","confidence":1.0}
diff --git a/tests/snapshots/output_table_integration__tty_special_characters.snap b/tests/snapshots/output_table_integration__tty_special_characters.snap
index 2ebce1e..2718e43 100644
--- a/tests/snapshots/output_table_integration__tty_special_characters.snap
+++ b/tests/snapshots/output_table_integration__tty_special_characters.snap
@@ -1,9 +1,10 @@
 ---
 source: tests/output_table_integration.rs
+assertion_line: 142
 expression: result
 ---
 String                                                       | Tags     |  Score | Section
 -------------------------------------------------------------|----------|--------|--------
-string with	tab                                              |          |     10 | .data  
+string with\ttab                                             |          |     10 | .data  
 pipe|character                                               |          |     10 | .data  
 backslash\here                                               | filepath |     20 | .rdata

From 5ccbff1e1f61fa9c87de3af82575f81628762287 Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sat, 24 Jan 2026 18:21:37 -0500
Subject: [PATCH 24/25] chore: cleanup devcontainer, justfile, and AGENTS.md

- Remove duplicate rust-bundle extension, keep official rust-analyzer
- Refactor setup recipes to reuse mise-install recipe
- Fix markdown lint: convert emphasis-as-heading to list items

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .devcontainer/devcontainer.json | 1 -
 AGENTS.md                       | 5 ++---
 justfile                        | 6 ++----
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index b145780..92c4a66 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -37,7 +37,6 @@
 				"bierner.markdown-mermaid",
 				"bierner.markdown-yaml-preamble",
 				"DavidAnson.vscode-markdownlint",
-				"1YiB.rust-bundle",
 				"rust-lang.rust-analyzer",
 				"foxundermoon.shell-format",
 				"redhat.vscode-yaml",
diff --git a/AGENTS.md b/AGENTS.md
index e18adf4..c7baa6a 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -14,9 +14,8 @@
 
 Stringy extracts meaningful strings from ELF, PE, and Mach-O binaries using format-specific knowledge and semantic classification. Unlike standard `strings`, it is section-aware and semantically intelligent.
 
-**Rust**: Edition 2024, MSRV 1.91
-
-**Data flow**: Binary -> Format Detection -> Container Parsing -> String Extraction -> Deduplication -> Classification -> Ranking -> Output
+- **Rust**: Edition 2024, MSRV 1.91
+- **Data flow**: Binary -> Format Detection -> Container Parsing -> String Extraction -> Deduplication -> Classification -> Ranking -> Output
 
 ## Module Structure
 
diff --git a/justfile b/justfile
index 359c785..e2279f8 100644
--- a/justfile
+++ b/justfile
@@ -53,16 +53,14 @@ rmrf path:
 # Development setup
 [windows]
 setup:
-    mise trust
-    mise install
+    @just mise-install
     rustup component add rustfmt clippy llvm-tools-preview
     @just mdformat-install
     Write-Host "Note: You may need to restart your shell for pipx PATH changes to take effect"
 
 [unix]
 setup:
-    mise trust
-    mise install
+    @just mise-install
     rustup component add rustfmt clippy llvm-tools-preview
     @just mdformat-install
     echo "Note: You may need to restart your shell for pipx PATH changes to take effect"

From e4d1e15628669d48adcf55ac09824354792043dc Mon Sep 17 00:00:00 2001
From: UncleSp1d3r <unclesp1d3r@evilbitlabs.io>
Date: Sat, 24 Jan 2026 18:28:13 -0500
Subject: [PATCH 25/25] refactor(justfile): use mise exec for all tool commands

Add mise_exec variable and prefix all tool commands (cargo, prettier,
pre-commit, actionlint, cspell, markdownlint, lychee, dist, mdbook,
goreleaser) with mise exec to ensure correct tool versions are used
regardless of shell activation state.

Also adds dotenv-load and ignore-comments settings for consistency
with other projects.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 justfile | 104 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 55 insertions(+), 49 deletions(-)

diff --git a/justfile b/justfile
index e2279f8..278d9bc 100644
--- a/justfile
+++ b/justfile
@@ -1,9 +1,15 @@
 # Cross-platform justfile using OS annotations
 # Windows uses PowerShell, Unix uses bash
 
-set shell := ["bash", "-c"]
+set shell := ["bash", "-cu"]
 set windows-shell := ["powershell", "-NoProfile", "-Command"]
+set dotenv-load := true
+set ignore-comments := true
 
+# Use mise to manage all dev tools (cargo, node, pre-commit, etc.)
+# See mise.toml for tool versions
+
+mise_exec := "mise exec --"
 root := justfile_dir()
 
 # =============================================================================
@@ -80,23 +86,23 @@ mise-install:
 [windows]
 install-tools:
     @just mise-install
-    cargo binstall --disable-telemetry cargo-llvm-cov cargo-audit cargo-deny cargo-dist cargo-release cargo-cyclonedx cargo-auditable cargo-nextest --locked
+    @{{ mise_exec }} cargo binstall --disable-telemetry cargo-llvm-cov cargo-audit cargo-deny cargo-dist cargo-release cargo-cyclonedx cargo-auditable cargo-nextest --locked
 
 [unix]
 install-tools:
     @just mise-install
-    cargo binstall --disable-telemetry cargo-llvm-cov cargo-audit cargo-deny cargo-dist cargo-release cargo-cyclonedx cargo-auditable cargo-nextest --locked
+    @{{ mise_exec }} cargo binstall --disable-telemetry cargo-llvm-cov cargo-audit cargo-deny cargo-dist cargo-release cargo-cyclonedx cargo-auditable cargo-nextest --locked
 
 # Install mdBook plugins for documentation
 [windows]
 docs-install:
     @just mise-install
-    cargo binstall mdbook-admonish mdbook-mermaid mdbook-linkcheck mdbook-toc mdbook-open-on-gh mdbook-tabs mdbook-i18n-helpers
+    @{{ mise_exec }} cargo binstall mdbook-admonish mdbook-mermaid mdbook-linkcheck mdbook-toc mdbook-open-on-gh mdbook-tabs mdbook-i18n-helpers
 
 [unix]
 docs-install:
     @just mise-install
-    cargo binstall mdbook-admonish mdbook-mermaid mdbook-linkcheck mdbook-toc mdbook-open-on-gh mdbook-tabs mdbook-i18n-helpers
+    @{{ mise_exec }} cargo binstall mdbook-admonish mdbook-mermaid mdbook-linkcheck mdbook-toc mdbook-open-on-gh mdbook-tabs mdbook-i18n-helpers
 
 # Install pipx for Python tool management
 [windows]
@@ -145,7 +151,7 @@ format: fmt format-json-yaml format-docs fmt-justfile
 # Individual format recipes
 
 format-json-yaml:
-    prettier --write "**/*.{json,yaml,yml}"
+    @{{ mise_exec }} prettier --write "**/*.{json,yaml,yml}"
 
 [windows]
 format-docs:
@@ -156,16 +162,16 @@ format-docs:
     @if command -v mdformat >/dev/null 2>&1; then find . -type f -name "*.md" -not -path "./target/*" -not -path "./node_modules/*" -exec mdformat {} + ; else echo "mdformat not found. Run 'just mdformat-install' first."; fi
 
 fmt:
-    @cargo fmt --all
+    @{{ mise_exec }} cargo fmt --all
 
 fmt-check:
-    @cargo fmt --all --check
+    @{{ mise_exec }} cargo fmt --all --check
 
 lint-rust: fmt-check
-    @cargo clippy --workspace --all-targets --all-features -- -D warnings
+    @{{ mise_exec }} cargo clippy --workspace --all-targets --all-features -- -D warnings
 
 lint-rust-min:
-    @cargo clippy --workspace --all-targets --no-default-features -- -D warnings
+    @{{ mise_exec }} cargo clippy --workspace --all-targets --no-default-features -- -D warnings
 
 # Format justfile
 fmt-justfile:
@@ -180,43 +186,43 @@ lint: lint-rust lint-actions lint-spell lint-docs lint-justfile
 
 # Individual lint recipes
 lint-actions:
-    actionlint .github/workflows/*.yml
+    @{{ mise_exec }} actionlint .github/workflows/*.yml
 
 lint-spell:
-    cspell "**" --config cspell.config.yaml
+    @{{ mise_exec }} cspell "**" --config cspell.config.yaml
 
 lint-docs:
-    markdownlint docs/**/*.md README.md
-    lychee docs/**/*.md README.md
+    @{{ mise_exec }} markdownlint docs/**/*.md README.md
+    @{{ mise_exec }} lychee docs/**/*.md README.md
 
 alias lint-just := lint-justfile
 
 # Run clippy with fixes
 fix:
-    cargo clippy --fix --allow-dirty --allow-staged
+    @{{ mise_exec }} cargo clippy --fix --allow-dirty --allow-staged
 
 # Quick development check
 check: pre-commit-run lint
 
 pre-commit-run:
-    pre-commit run -a
+    @{{ mise_exec }} pre-commit run -a
 
 # Format a single file (for pre-commit hooks)
 format-files +FILES:
-    prettier --write --config .prettierrc.json {{ FILES }}
+    @{{ mise_exec }} prettier --write --config .prettierrc.json {{ FILES }}
 
 # =============================================================================
 # BUILDING AND TESTING
 # =============================================================================
 
 build:
-    @cargo build --workspace
+    @{{ mise_exec }} cargo build --workspace
 
 build-release:
-    @cargo build --workspace --release
+    @{{ mise_exec }} cargo build --workspace --release
 
 test:
-    @cargo nextest run --workspace --no-capture
+    @{{ mise_exec }} cargo nextest run --workspace --no-capture
 
 # Test justfile cross-platform functionality
 [windows]
@@ -242,11 +248,11 @@ test-fs:
     @just rmrf tmp/xfstest
 
 test-ci:
-    cargo nextest run --workspace --no-capture
+    @{{ mise_exec }} cargo nextest run --workspace --no-capture
 
 # Run all tests including ignored/slow tests across workspace
 test-all:
-    cargo nextest run --workspace --no-capture -- --ignored
+    @{{ mise_exec }} cargo nextest run --workspace --no-capture -- --ignored
 
 # =============================================================================
 # BENCHMARKING
@@ -254,17 +260,17 @@ test-all:
 
 # Run all benchmarks
 bench:
-    @cargo bench --workspace
+    @{{ mise_exec }} cargo bench --workspace
 
 # =============================================================================
 # SECURITY AND AUDITING
 # =============================================================================
 
 audit:
-    cargo audit
+    @{{ mise_exec }} cargo audit
 
 deny:
-    cargo deny check
+    @{{ mise_exec }} cargo deny check
 
 # =============================================================================
 # CI AND QUALITY ASSURANCE
@@ -272,11 +278,11 @@ deny:
 
 # Generate coverage report
 coverage:
-    cargo llvm-cov --workspace --lcov --output-path lcov.info
+    @{{ mise_exec }} cargo llvm-cov --workspace --lcov --output-path lcov.info
 
 # Check coverage thresholds
 coverage-check:
-    cargo llvm-cov --workspace --lcov --output-path lcov.info --fail-under-lines 9.7
+    @{{ mise_exec }} cargo llvm-cov --workspace --lcov --output-path lcov.info --fail-under-lines 9.7
 
 # Full local CI parity check
 ci-check: pre-commit-run fmt-check lint-rust lint-rust-min test-ci build-release audit coverage-check dist-plan
@@ -286,29 +292,29 @@ ci-check: pre-commit-run fmt-check lint-rust lint-rust-min test-ci build-release
 # =============================================================================
 
 run *args:
-    @cargo run -p stringy -- {{ args }}
+    @{{ mise_exec }} cargo run -p stringy -- {{ args }}
 
 # =============================================================================
 # DISTRIBUTION AND PACKAGING
 # =============================================================================
 
 dist:
-    @dist build
+    @{{ mise_exec }} dist build
 
 dist-check:
-    @dist check
+    @{{ mise_exec }} dist check
 
 dist-plan:
-    @dist plan
+    @{{ mise_exec }} dist plan
 
 # Regenerate cargo-dist CI workflow safely
 dist-generate-ci:
-    dist generate --ci github
+    @{{ mise_exec }} dist generate --ci github
     @echo "Generated CI workflow. Remember to fix any expression errors if they exist."
     @echo "Run 'just lint:actions' to validate the generated workflow."
 
 install:
-    @cargo install --path .
+    @{{ mise_exec }} cargo install --path .
 
 # =============================================================================
 # DOCUMENTATION
@@ -320,18 +326,18 @@ docs-build:
     #!/usr/bin/env bash
     set -euo pipefail
     # Build rustdoc
-    cargo doc --no-deps --document-private-items --target-dir docs/book/api-temp
+    {{ mise_exec }} cargo doc --no-deps --document-private-items --target-dir docs/book/api-temp
     # Move rustdoc output to final location
     mkdir -p docs/book/api
     cp -r docs/book/api-temp/doc/* docs/book/api/
     rm -rf docs/book/api-temp
     # Build mdBook
-    cd docs && mdbook build
+    cd docs && {{ mise_exec }} mdbook build
 
 # Serve documentation locally with live reload
 [unix]
 docs-serve:
-    cd docs && mdbook serve --open
+    cd docs && {{ mise_exec }} mdbook serve --open
 
 # Clean documentation artifacts
 [unix]
@@ -341,7 +347,7 @@ docs-clean:
 # Check documentation (build + link validation + formatting)
 [unix]
 docs-check:
-    cd docs && mdbook build
+    cd docs && {{ mise_exec }} mdbook build
     @just fmt-check
 
 # Generate and serve documentation
@@ -358,12 +364,12 @@ docs:
 
 # Test GoReleaser configuration
 goreleaser-check:
-    @goreleaser check
+    @{{ mise_exec }} goreleaser check
 
 # Build binaries locally with GoReleaser (test build process)
 [windows]
 goreleaser-build:
-    @goreleaser build --clean
+    @{{ mise_exec }} goreleaser build --clean
 
 [unix]
 goreleaser-build:
@@ -379,12 +385,12 @@ goreleaser-build:
         # Ensure the system linker sees the correct syslibroot and frameworks
         export RUSTFLAGS="${RUSTFLAGS:-} -C link-arg=-Wl,-syslibroot,${SDKROOT_PATH} -C link-arg=-F${SDKROOT_PATH}/System/Library/Frameworks"
     fi
-    goreleaser build --clean
+    {{ mise_exec }} goreleaser build --clean
 
 # Run snapshot release (test full pipeline without publishing)
 [windows]
 goreleaser-snapshot:
-    @goreleaser release --snapshot --clean
+    @{{ mise_exec }} goreleaser release --snapshot --clean
 
 [unix]
 goreleaser-snapshot:
@@ -400,12 +406,12 @@ goreleaser-snapshot:
         # Ensure the system linker sees the correct syslibroot and frameworks
         export RUSTFLAGS="${RUSTFLAGS:-} -C link-arg=-Wl,-syslibroot,${SDKROOT_PATH} -C link-arg=-F${SDKROOT_PATH}/System/Library/Frameworks"
     fi
-    goreleaser release --snapshot --clean
+    {{ mise_exec }} goreleaser release --snapshot --clean
 
 # Test GoReleaser with specific target
 [windows]
 goreleaser-build-target target:
-    @goreleaser build --clean --single-target {{ target }}
+    @{{ mise_exec }} goreleaser build --clean --single-target {{ target }}
 
 [unix]
 goreleaser-build-target target:
@@ -421,7 +427,7 @@ goreleaser-build-target target:
         # Ensure the system linker sees the correct syslibroot and frameworks
         export RUSTFLAGS="${RUSTFLAGS:-} -C link-arg=-Wl,-syslibroot,${SDKROOT_PATH} -C link-arg=-F${SDKROOT_PATH}/System/Library/Frameworks"
     fi
-    goreleaser build --clean --single-target {{ target }}
+    {{ mise_exec }} goreleaser build --clean --single-target {{ target }}
 
 # Clean GoReleaser artifacts
 goreleaser-clean:
@@ -432,16 +438,16 @@ goreleaser-clean:
 # =============================================================================
 
 release:
-    @cargo release
+    @{{ mise_exec }} cargo release
 
 release-dry-run:
-    @cargo release --dry-run
+    @{{ mise_exec }} cargo release --dry-run
 
 release-patch:
-    @cargo release patch
+    @{{ mise_exec }} cargo release patch
 
 release-minor:
-    @cargo release minor
+    @{{ mise_exec }} cargo release minor
 
 release-major:
-    @cargo release major
+    @{{ mise_exec }} cargo release major