diff --git a/docs/architecture.md b/docs/architecture.md
index 59584b0..8cd53c4 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -301,33 +301,44 @@ Merged block: lines 10-30 (score=max(0.15, 0.18, 0.12) = 0.18)
### 6. Output Formatting
-**Generates structured XML output with metadata.**
+**Generates pretty-printed, structured XML output with metadata.**
```xml
-
-[Sun Dec 04 07:18:00 2005] [error] mod_jk child workerEnv in error state 6
-[Sun Dec 04 07:18:00 2005] [notice] workerEnv.init() ok /etc/httpd/conf/workers2.properties
-[Sun Dec 04 07:18:00 2005] [error] mod_jk child workerEnv in error state 7
-[Sun Dec 04 07:45:45 2005] [error] [client 63.13.186.196] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 08:54:17 2005] [error] [client 147.31.138.75] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 09:35:12 2005] [error] [client 207.203.80.15] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 10:53:30 2005] [error] [client 218.76.139.20] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 11:11:07 2005] [error] [client 24.147.151.74] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 11:33:18 2005] [error] [client 211.141.93.88] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 11:42:43 2005] [error] [client 216.127.124.16] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 12:33:13 2005] [error] [client 208.51.151.210] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 13:32:32 2005] [error] [client 65.68.235.27] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 14:29:00 2005] [error] [client 4.245.93.87] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 15:18:36 2005] [error] [client 67.154.58.130] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 15:59:01 2005] [error] [client 24.83.37.136] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 16:24:03 2005] [notice] jk2_init() Found child 1219 in scoreboard slot 6
-[Sun Dec 04 16:24:05 2005] [error] [client 58.225.62.140] Directory index forbidden by rule: /var/www/html/
-[Sun Dec 04 16:24:06 2005] [notice] workerEnv.init() ok /etc/httpd/conf/workers2.properties
-[Sun Dec 04 16:24:06 2005] [error] mod_jk child workerEnv in error state 6
-[Sun Dec 04 16:31:07 2005] [notice] jk2_init() Found child 1248 in scoreboard slot 7
-
+
+
+
+
+ [Sun Dec 04 07:18:00 2005] [error] mod_jk child workerEnv in error state 6
+ [Sun Dec 04 07:18:00 2005] [notice] workerEnv.init() ok /etc/httpd/conf/workers2.properties
+ [Sun Dec 04 07:18:00 2005] [error] mod_jk child workerEnv in error state 7
+ [Sun Dec 04 07:45:45 2005] [error] [client 63.13.186.196] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 08:54:17 2005] [error] [client 147.31.138.75] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 09:35:12 2005] [error] [client 207.203.80.15] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 10:53:30 2005] [error] [client 218.76.139.20] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 11:11:07 2005] [error] [client 24.147.151.74] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 11:33:18 2005] [error] [client 211.141.93.88] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 11:42:43 2005] [error] [client 216.127.124.16] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 12:33:13 2005] [error] [client 208.51.151.210] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 13:32:32 2005] [error] [client 65.68.235.27] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 14:29:00 2005] [error] [client 4.245.93.87] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 15:18:36 2005] [error] [client 67.154.58.130] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 15:59:01 2005] [error] [client 24.83.37.136] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 16:24:03 2005] [notice] jk2_init() Found child 1219 in scoreboard slot 6
+ [Sun Dec 04 16:24:05 2005] [error] [client 58.225.62.140] Directory index forbidden by rule: /var/www/html/
+ [Sun Dec 04 16:24:06 2005] [notice] workerEnv.init() ok /etc/httpd/conf/workers2.properties
+ [Sun Dec 04 16:24:06 2005] [error] mod_jk child workerEnv in error state 6
+ [Sun Dec 04 16:31:07 2005] [notice] jk2_init() Found child 1248 in scoreboard slot 7
+
+
+
```
+**Output structure:**
+- Valid XML with proper declaration and root element
+- Pretty-printed with indentation for readability
+- XML special characters (`&`, `<`, `>`) are automatically escaped
+- Each `` contains metadata and original log content
+
**Metadata included:**
- **Line range**: References back to original file
- **Anomaly score**: Quantifies unusualness
diff --git a/pyproject.toml b/pyproject.toml
index 372895c..aef6361 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "cordon"
-version = "0.3.2"
+version = "0.3.3"
description = "Semantic anomaly detection for system log files"
readme = "README.md"
requires-python = ">=3.10"
diff --git a/src/cordon/__init__.py b/src/cordon/__init__.py
index bdb6e92..d1efde8 100644
--- a/src/cordon/__init__.py
+++ b/src/cordon/__init__.py
@@ -2,7 +2,7 @@
from cordon.core.types import AnalysisResult, MergedBlock, ScoredWindow, TextWindow
from cordon.pipeline import SemanticLogAnalyzer
-__version__ = "0.3.2"
+__version__ = "0.3.3"
__all__ = [
"SemanticLogAnalyzer",
diff --git a/src/cordon/postprocess/formatter.py b/src/cordon/postprocess/formatter.py
index 45f68cf..698a014 100644
--- a/src/cordon/postprocess/formatter.py
+++ b/src/cordon/postprocess/formatter.py
@@ -1,5 +1,6 @@
from collections.abc import Sequence
from pathlib import Path
+from xml.sax.saxutils import escape
from cordon.core.types import MergedBlock
@@ -26,10 +27,10 @@ def format_blocks(self, merged_blocks: Sequence[MergedBlock], original_file: Pat
Formatted string with XML tags and original content
"""
if not merged_blocks:
- return ""
+ return '\n'
# merged blocks are sorted by start_line from the merger
- output_parts = []
+ output_parts = ['', "", ""]
block_idx = 0
current_line = 1
@@ -51,18 +52,31 @@ def format_blocks(self, merged_blocks: Sequence[MergedBlock], original_file: Pat
content_lines.append(next_line)
current_line += 1
- # format the block
+ # format the block with indentation
tag = (
- f''
)
content = "".join(content_lines)
- output_parts.append(f"{tag}\n{content}")
+ # Escape XML special characters to ensure valid XML output
+ escaped_content = escape(content)
+
+ # Indent each line of content for pretty printing
+ # Preserve whitespace-only lines from original content
+ indented_content = "\n".join(
+ " " + line if line else line for line in escaped_content.splitlines()
+ )
+
+ output_parts.append(f"{tag}\n{indented_content}\n ")
+ output_parts.append("") # blank line between blocks
# move to next block
block_idx += 1
current_line += 1
- # join blocks with double newline separator
- return "\n\n".join(output_parts)
+ # close the root element
+ output_parts.append("")
+
+ # join with newlines
+ return "\n".join(output_parts)
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 203533b..a87d749 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -56,7 +56,9 @@ def test_analyze_empty_log(self) -> None:
assert result.total_windows == 0
assert result.significant_windows == 0
assert result.merged_blocks == 0
- assert result.output == ""
+ assert (
+ result.output == '\n'
+ )
finally:
temp_path.unlink()
diff --git a/tests/test_postprocess.py b/tests/test_postprocess.py
index 8ce03ea..8cd7d27 100644
--- a/tests/test_postprocess.py
+++ b/tests/test_postprocess.py
@@ -136,6 +136,9 @@ def test_format_single_block(self) -> None:
formatter = OutputFormatter()
output = formatter.format_blocks(blocks, temp_path)
+ assert '' in output
+ assert "" in output
+ assert "" in output
assert '' in output
assert "line 1\n" in output
assert "line 2\n" in output
@@ -161,6 +164,9 @@ def test_format_multiple_blocks(self) -> None:
formatter = OutputFormatter()
output = formatter.format_blocks(blocks, temp_path)
+ assert '' in output
+ assert "" in output
+ assert "" in output
assert '' in output
assert '' in output
assert output.count("") == 2
@@ -181,6 +187,40 @@ def test_format_empty_blocks(self) -> None:
formatter = OutputFormatter()
output = formatter.format_blocks(blocks, temp_path)
- assert output == ""
+ assert output == '\n'
+ finally:
+ temp_path.unlink()
+
+ def test_format_escapes_xml_special_chars(self) -> None:
+ """Test that XML special characters are properly escaped."""
+ with NamedTemporaryFile(mode="w", delete=False, suffix=".log") as f:
+ f.write("command: test |& tee file.txt\n")
+ f.write("error: x < y && z > 10\n")
+ f.write("message: \"quoted\" & 'single'\n")
+ temp_path = Path(f.name)
+
+ try:
+ from cordon.core.types import MergedBlock
+
+ blocks = [MergedBlock(start_line=1, end_line=3, original_windows=(0,), max_score=0.8)]
+
+ formatter = OutputFormatter()
+ output = formatter.format_blocks(blocks, temp_path)
+
+ # Verify XML structure
+ assert '' in output
+ assert "" in output
+ assert "" in output
+ # Verify XML special characters are escaped (& < > must be escaped in text content)
+ assert "&" in output
+ assert "<" in output
+ assert ">" in output
+ # Verify the content is properly escaped
+ assert "command: test |& tee file.txt" in output
+ assert "error: x < y && z > 10" in output
+ # Verify raw special characters are not present where they should be escaped
+ assert "|& tee" not in output # & should be escaped
+ assert "x < y" not in output # < should be escaped
+ assert "z > 10" not in output # > should be escaped
finally:
temp_path.unlink()