Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 34 additions & 23 deletions docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -301,33 +301,44 @@ Merged block: lines 10-30 (score=max(0.15, 0.18, 0.12) = 0.18)

### 6. Output Formatting

**Generates structured XML output with metadata.**
**Generates pretty-printed, structured XML output with metadata.**

```xml
<block lines="581-600" score="0.1746">
[Sun Dec 04 07:18:00 2005] [error] mod_jk child workerEnv in error state 6
[Sun Dec 04 07:18:00 2005] [notice] workerEnv.init() ok /etc/httpd/conf/workers2.properties
[Sun Dec 04 07:18:00 2005] [error] mod_jk child workerEnv in error state 7
[Sun Dec 04 07:45:45 2005] [error] [client 63.13.186.196] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 08:54:17 2005] [error] [client 147.31.138.75] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 09:35:12 2005] [error] [client 207.203.80.15] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 10:53:30 2005] [error] [client 218.76.139.20] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 11:11:07 2005] [error] [client 24.147.151.74] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 11:33:18 2005] [error] [client 211.141.93.88] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 11:42:43 2005] [error] [client 216.127.124.16] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 12:33:13 2005] [error] [client 208.51.151.210] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 13:32:32 2005] [error] [client 65.68.235.27] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 14:29:00 2005] [error] [client 4.245.93.87] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 15:18:36 2005] [error] [client 67.154.58.130] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 15:59:01 2005] [error] [client 24.83.37.136] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 16:24:03 2005] [notice] jk2_init() Found child 1219 in scoreboard slot 6
[Sun Dec 04 16:24:05 2005] [error] [client 58.225.62.140] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 16:24:06 2005] [notice] workerEnv.init() ok /etc/httpd/conf/workers2.properties
[Sun Dec 04 16:24:06 2005] [error] mod_jk child workerEnv in error state 6
[Sun Dec 04 16:31:07 2005] [notice] jk2_init() Found child 1248 in scoreboard slot 7
</block>
<?xml version="1.0" encoding="UTF-8"?>
<anomalies>

<block lines="581-600" score="0.1746">
[Sun Dec 04 07:18:00 2005] [error] mod_jk child workerEnv in error state 6
[Sun Dec 04 07:18:00 2005] [notice] workerEnv.init() ok /etc/httpd/conf/workers2.properties
[Sun Dec 04 07:18:00 2005] [error] mod_jk child workerEnv in error state 7
[Sun Dec 04 07:45:45 2005] [error] [client 63.13.186.196] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 08:54:17 2005] [error] [client 147.31.138.75] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 09:35:12 2005] [error] [client 207.203.80.15] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 10:53:30 2005] [error] [client 218.76.139.20] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 11:11:07 2005] [error] [client 24.147.151.74] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 11:33:18 2005] [error] [client 211.141.93.88] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 11:42:43 2005] [error] [client 216.127.124.16] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 12:33:13 2005] [error] [client 208.51.151.210] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 13:32:32 2005] [error] [client 65.68.235.27] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 14:29:00 2005] [error] [client 4.245.93.87] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 15:18:36 2005] [error] [client 67.154.58.130] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 15:59:01 2005] [error] [client 24.83.37.136] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 16:24:03 2005] [notice] jk2_init() Found child 1219 in scoreboard slot 6
[Sun Dec 04 16:24:05 2005] [error] [client 58.225.62.140] Directory index forbidden by rule: /var/www/html/
[Sun Dec 04 16:24:06 2005] [notice] workerEnv.init() ok /etc/httpd/conf/workers2.properties
[Sun Dec 04 16:24:06 2005] [error] mod_jk child workerEnv in error state 6
[Sun Dec 04 16:31:07 2005] [notice] jk2_init() Found child 1248 in scoreboard slot 7
</block>

</anomalies>
```

**Output structure:**
- Valid XML with proper declaration and root element
- Pretty-printed with indentation for readability
- XML special characters (`&`, `<`, `>`) are automatically escaped
- Each `<block>` contains metadata and original log content

**Metadata included:**
- **Line range**: References back to original file
- **Anomaly score**: Quantifies unusualness
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "cordon"
version = "0.3.2"
version = "0.3.3"
description = "Semantic anomaly detection for system log files"
readme = "README.md"
requires-python = ">=3.10"
Expand Down
2 changes: 1 addition & 1 deletion src/cordon/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from cordon.core.types import AnalysisResult, MergedBlock, ScoredWindow, TextWindow
from cordon.pipeline import SemanticLogAnalyzer

__version__ = "0.3.2"
__version__ = "0.3.3"

__all__ = [
"SemanticLogAnalyzer",
Expand Down
28 changes: 21 additions & 7 deletions src/cordon/postprocess/formatter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections.abc import Sequence
from pathlib import Path
from xml.sax.saxutils import escape

from cordon.core.types import MergedBlock

Expand All @@ -26,10 +27,10 @@ def format_blocks(self, merged_blocks: Sequence[MergedBlock], original_file: Pat
Formatted string with XML tags and original content
"""
if not merged_blocks:
return ""
return '<?xml version="1.0" encoding="UTF-8"?>\n<anomalies></anomalies>'

# merged blocks are sorted by start_line from the merger
output_parts = []
output_parts = ['<?xml version="1.0" encoding="UTF-8"?>', "<anomalies>", ""]
block_idx = 0
current_line = 1

Expand All @@ -51,18 +52,31 @@ def format_blocks(self, merged_blocks: Sequence[MergedBlock], original_file: Pat
content_lines.append(next_line)
current_line += 1

# format the block
# format the block with indentation
tag = (
f'<block lines="{block.start_line}-{block.end_line}" '
f' <block lines="{block.start_line}-{block.end_line}" '
f'score="{block.max_score:.4f}">'
)
content = "".join(content_lines)
output_parts.append(f"{tag}\n{content}</block>")
# Escape XML special characters to ensure valid XML output
escaped_content = escape(content)

# Indent each line of content for pretty printing
# Preserve whitespace-only lines from original content
indented_content = "\n".join(
" " + line if line else line for line in escaped_content.splitlines()
)

output_parts.append(f"{tag}\n{indented_content}\n </block>")
output_parts.append("") # blank line between blocks

# move to next block
block_idx += 1

current_line += 1

# join blocks with double newline separator
return "\n\n".join(output_parts)
# close the root element
output_parts.append("</anomalies>")

# join with newlines
return "\n".join(output_parts)
4 changes: 3 additions & 1 deletion tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ def test_analyze_empty_log(self) -> None:
assert result.total_windows == 0
assert result.significant_windows == 0
assert result.merged_blocks == 0
assert result.output == ""
assert (
result.output == '<?xml version="1.0" encoding="UTF-8"?>\n<anomalies></anomalies>'
)
finally:
temp_path.unlink()

Expand Down
42 changes: 41 additions & 1 deletion tests/test_postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ def test_format_single_block(self) -> None:
formatter = OutputFormatter()
output = formatter.format_blocks(blocks, temp_path)

assert '<?xml version="1.0" encoding="UTF-8"?>' in output
assert "<anomalies>" in output
assert "</anomalies>" in output
assert '<block lines="1-2" score="0.8000">' in output
assert "line 1\n" in output
assert "line 2\n" in output
Expand All @@ -161,6 +164,9 @@ def test_format_multiple_blocks(self) -> None:
formatter = OutputFormatter()
output = formatter.format_blocks(blocks, temp_path)

assert '<?xml version="1.0" encoding="UTF-8"?>' in output
assert "<anomalies>" in output
assert "</anomalies>" in output
assert '<block lines="1-2" score="0.8000">' in output
assert '<block lines="5-7" score="0.9000">' in output
assert output.count("</block>") == 2
Expand All @@ -181,6 +187,40 @@ def test_format_empty_blocks(self) -> None:
formatter = OutputFormatter()
output = formatter.format_blocks(blocks, temp_path)

assert output == ""
assert output == '<?xml version="1.0" encoding="UTF-8"?>\n<anomalies></anomalies>'
finally:
temp_path.unlink()

def test_format_escapes_xml_special_chars(self) -> None:
"""Test that XML special characters are properly escaped."""
with NamedTemporaryFile(mode="w", delete=False, suffix=".log") as f:
f.write("command: test |& tee file.txt\n")
f.write("error: x < y && z > 10\n")
f.write("message: \"quoted\" & 'single'\n")
temp_path = Path(f.name)

try:
from cordon.core.types import MergedBlock

blocks = [MergedBlock(start_line=1, end_line=3, original_windows=(0,), max_score=0.8)]

formatter = OutputFormatter()
output = formatter.format_blocks(blocks, temp_path)

# Verify XML structure
assert '<?xml version="1.0" encoding="UTF-8"?>' in output
assert "<anomalies>" in output
assert "</anomalies>" in output
# Verify XML special characters are escaped (& < > must be escaped in text content)
assert "&amp;" in output
assert "&lt;" in output
assert "&gt;" in output
# Verify the content is properly escaped
assert "command: test |&amp; tee file.txt" in output
assert "error: x &lt; y &amp;&amp; z &gt; 10" in output
# Verify raw special characters are not present where they should be escaped
assert "|& tee" not in output # & should be escaped
assert "x < y" not in output # < should be escaped
assert "z > 10" not in output # > should be escaped
finally:
temp_path.unlink()