Parse unstructured logs by learning patterns automatically. No regex required.
- Automatic pattern learning - Analyzes log files and learns structural patterns
- Smart field naming - Infers meaningful field names from context (method, status, path, etc.)
- Type detection - Automatically detects timestamps, IPs, URLs, UUIDs, numbers, booleans
- Multi-line support - Handles stack traces and continuation lines
- Format drift detection - Detects when log formats change mid-file
- Multiple outputs - JSON Lines, SQLite, DuckDB, Parquet
- Performance optimized - Streaming processing and parallel learning for large files
- Testing utilities - Mocks, fixtures, and sample data generators for reliable integrations
pip install log-sculptor
# With optional outputs
pip install log-sculptor[duckdb] # DuckDB support
pip install log-sculptor[parquet] # Parquet support
pip install log-sculptor[all] # All optional dependencies# Learn and parse in one step
log-sculptor auto server.log -f jsonl -o parsed.jsonl
# Or separate steps for reuse
log-sculptor learn server.log -o patterns.json
log-sculptor parse server.log -p patterns.json -f jsonl -o parsed.jsonlLearn patterns and parse in one step.
log-sculptor auto server.log -f jsonl -o output.jsonl
log-sculptor auto server.log -f sqlite -o logs.db
log-sculptor auto server.log -f duckdb -o logs.duckdb # requires [duckdb]
log-sculptor auto server.log -f parquet -o logs.parquet # requires [parquet]
# With multi-line support (stack traces, continuations)
log-sculptor auto server.log --multiline -f jsonl -o output.jsonlLearn patterns from a log file.
log-sculptor learn server.log -o patterns.json
# With clustering for similar patterns
log-sculptor learn server.log -o patterns.json --cluster
# Incremental learning (update existing patterns)
log-sculptor learn new.log --update patterns.json -o patterns.json
# Handle multi-line entries
log-sculptor learn server.log -o patterns.json --multilineParse a log file using learned patterns.
log-sculptor parse server.log -p patterns.json -f jsonl -o output.jsonl
log-sculptor parse server.log -p patterns.json -f sqlite -o logs.db --include-rawDisplay patterns from a patterns file.
log-sculptor show patterns.jsonValidate patterns against a log file.
log-sculptor validate patterns.json server.log
# Exit codes: 0 = all matched, 1 = partial match, 2 = no matchesMerge similar patterns in a patterns file.
log-sculptor merge patterns.json -o merged.json --threshold 0.8Detect format changes in a log file.
log-sculptor drift server.log -p patterns.json
log-sculptor drift server.log -p patterns.json --window 50Learn patterns using parallel processing (for large files).
log-sculptor fast-learn large.log -o patterns.json --workers 4Generate sample log data for testing and demos.
log-sculptor generate sample.log -t app -n 1000
log-sculptor generate apache.log -t apache -n 500 --seed 42
log-sculptor generate mixed.log -t mixed -n 1000 # For drift testingAvailable types: app, apache, syslog, json, mixed
{"line_number": 1, "pattern_id": "a1b2c3", "matched": true, "fields": {"timestamp": "2024-01-15T10:30:00", "level": "INFO", "message": "Server started"}}Creates two tables:
patterns- Pattern metadata (id, frequency, confidence, structure, example)logs- Parsed records with extracted fields as columns
Same schema as SQLite, optimized for analytical queries. Requires pip install log-sculptor[duckdb].
Columnar format for efficient analytics. Creates output.parquet for logs and output_patterns.parquet for patterns. Requires pip install log-sculptor[parquet].
from log_sculptor.core import learn_patterns, parse_logs, PatternSet
# Learn patterns
patterns = learn_patterns("server.log")
patterns.save("patterns.json")
# Parse logs
for record in parse_logs("server.log", patterns):
print(record.fields)
# Load existing patterns
patterns = PatternSet.load("patterns.json")
# Incremental learning
new_patterns = learn_patterns("new_logs.log")
patterns.update(new_patterns, merge=True)
# Merge similar patterns
patterns.merge_similar(threshold=0.8)from log_sculptor.core.streaming import stream_parse, parallel_learn
# Memory-efficient parsing
for record in stream_parse("large.log", patterns):
process(record)
# Parallel pattern learning
patterns = parallel_learn("large.log", num_workers=4)from log_sculptor.core import detect_drift
report = detect_drift("server.log", patterns)
print(f"Format changes: {len(report.format_changes)}")
for change in report.format_changes:
print(f" Line {change.line_number}: {change.old_pattern_id} -> {change.new_pattern_id}")-
Tokenization - Lines are split into typed tokens (TIMESTAMP, IP, QUOTED, BRACKET, NUMBER, WORD, PUNCT, WHITESPACE)
-
Clustering - Lines with identical token signatures are grouped together
-
Pattern Generation - Each cluster becomes a pattern with fields for variable tokens
-
Smart Naming - Field names are inferred from context:
- Previous token as indicator ("status 200" -> field named "status")
- Value patterns (GET/POST -> "method", 404 -> "status", /api/users -> "path")
- Token types (timestamps, IPs, UUIDs get appropriate names)
-
Type Detection - Field values are typed:
- Timestamps (ISO 8601, Apache CLF, syslog, Unix epoch)
- IPs, URLs, UUIDs
- Integers, floats, booleans
log-sculptor includes comprehensive testing utilities for building reliable integrations.
from log_sculptor.testing import (
generate_apache_logs,
generate_syslog,
generate_json_logs,
generate_app_logs,
write_sample_logs,
)
# Generate Apache logs
for line in generate_apache_logs(count=100, seed=42):
print(line)
# Generate JSON structured logs
for line in generate_json_logs(count=50):
print(line)
# Write directly to file
write_sample_logs("test.log", generator="app", count=1000, seed=42)from log_sculptor.testing import (
MockFileReader,
MockFileWriter,
MockPatternMatcher,
MockTypeDetector,
)
# Mock file reader
reader = MockFileReader()
reader.add_file("/test.log", ["line1", "line2", "line3"])
lines = reader.read_lines(Path("/test.log"))
assert reader.read_count == 1
# Mock pattern matcher with custom responses
matcher = MockPatternMatcher()
matcher.add_response("GET /api", MockPattern(id="http"), {"method": "GET"})
pattern, fields = matcher.match("GET /api")from log_sculptor.testing import (
create_test_patterns,
create_test_log_file,
SandboxContext,
isolated_test,
)
# Create test patterns
patterns = create_test_patterns(count=3, with_examples=True)
# Create test log file
create_test_log_file(Path("test.log"), generator="apache", count=100)
# Isolated test environment with mocks
with isolated_test() as ctx:
log_file = ctx.create_log_file("test.log", generator="app", count=50)
ctx.add_mock_file("/virtual.log", ["mock line 1", "mock line 2"])
# Tests run in isolation, temp files cleaned up automaticallyfrom log_sculptor import get_container, register, resolve, reset_container
from log_sculptor.di import FileReader, FileWriter
# Register custom implementations
class MyFileReader:
def read_lines(self, path):
# Custom implementation
pass
register(FileReader, lambda: MyFileReader())
reader = resolve(FileReader)
# Reset for test isolation
reset_container()log-sculptor integrates with pytest-agents for enhanced test organization and AI-powered testing capabilities.
import pytest
@pytest.mark.unit
def test_pattern_learning(tmp_path):
"""Unit test with pytest-agents marker."""
from log_sculptor.core.patterns import learn_patterns
from log_sculptor.testing.generators import write_sample_logs
log_file = tmp_path / "test.log"
write_sample_logs(log_file, generator="apache", count=50, seed=42)
patterns = learn_patterns(log_file)
assert len(patterns.patterns) > 0
@pytest.mark.integration
def test_full_workflow(tmp_path):
"""Integration test with pytest-agents marker."""
# Full learn -> save -> load -> parse workflow
pass
@pytest.mark.performance
def test_large_file_parsing(tmp_path):
"""Performance benchmark test."""
passInstall with: pip install pytest-agents (requires Python 3.11+)
MIT