Skip to content

Commit 73d8ef1

Browse files
committed
feat: Add Runtime Signals to the knowledge graph: Cobertura test coverage report processing, associated new entity and relationship types, and integrate into graph building.
1 parent 566a255 commit 73d8ef1

6 files changed

Lines changed: 237 additions & 14 deletions

File tree

src/knowcode/cli.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@
1111
from knowcode.context_synthesizer import ContextSynthesizer
1212
from knowcode.graph_builder import GraphBuilder
1313
from knowcode.knowledge_store import KnowledgeStore
14-
from knowcode.models import EntityKind
14+
from knowcode.models import EntityKind, RelationshipKind
1515

1616

1717
@click.group()
1818
@click.version_option(version=__version__)
19-
def main() -> None:
19+
def cli() -> None:
2020
"""KnowCode - Transform your codebase into an effective knowledge base."""
2121
pass
2222

2323

24-
@main.command()
24+
@cli.command()
2525
@click.argument("directory", type=click.Path(exists=True, file_okay=False))
2626
@click.option(
2727
"--output", "-o",
@@ -39,20 +39,28 @@ def main() -> None:
3939
default=False,
4040
help="Analyze git history and add temporal context.",
4141
)
42-
def analyze(directory: str, output: str, ignore: tuple[str, ...], temporal: bool) -> None:
42+
@click.option(
43+
"--coverage",
44+
type=click.Path(exists=True, dir_okay=False),
45+
help="Path to Cobertura XML coverage report.",
46+
)
47+
def analyze(directory: str, output: str, ignore: tuple[str, ...], temporal: bool, coverage: Optional[str]) -> None:
4348
"""Scan and analyze a codebase.
4449
4550
DIRECTORY: Path to the codebase to analyze.
4651
"""
4752
click.echo(f"Analyzing: {directory}")
4853
click.echo(f"Temporal analysis: {'Enabled' if temporal else 'Disabled'}")
54+
if coverage:
55+
click.echo(f"Coverage report: {coverage}")
4956

5057
# Build graph
5158
builder = GraphBuilder()
5259
builder.build_from_directory(
5360
root_dir=directory,
5461
additional_ignores=list(ignore),
5562
analyze_temporal=temporal,
63+
coverage_path=Path(coverage) if coverage else None,
5664
)
5765

5866
# Create store and save
@@ -72,7 +80,7 @@ def analyze(directory: str, output: str, ignore: tuple[str, ...], temporal: bool
7280
click.echo(f"\n Saved to: {save_path}")
7381

7482

75-
@main.command()
83+
@cli.command()
7684
@click.argument("query_type", type=click.Choice(["callers", "callees", "deps", "search"]))
7785
@click.argument("target")
7886
@click.option(
@@ -179,7 +187,7 @@ def query(query_type: str, target: str, store: str, as_json: bool) -> None:
179187
click.echo(f" • {name}{extra}")
180188

181189

182-
@main.command()
190+
@cli.command()
183191
@click.argument("target")
184192
@click.option(
185193
"--store", "-s",
@@ -227,7 +235,7 @@ def context(target: str, store: str, max_tokens: int) -> None:
227235
click.echo("(truncated)", err=True)
228236

229237

230-
@main.command()
238+
@cli.command()
231239
@click.option(
232240
"--store", "-s",
233241
type=click.Path(exists=True),
@@ -280,7 +288,7 @@ def export(store: str, output: str) -> None:
280288
click.echo(f" Index: {index_path}")
281289

282290

283-
@main.command()
291+
@cli.command()
284292
@click.option(
285293
"--store", "-s",
286294
type=click.Path(exists=True),
@@ -320,9 +328,8 @@ def stats(store: str) -> None:
320328
click.echo(f" {kind}: {count}")
321329

322330

323-
if __name__ == "__main__":
324-
main()
325-
@main.command()
331+
332+
@cli.command()
326333
@click.argument("target", required=False)
327334
@click.option(
328335
"--store", "-s",
@@ -406,3 +413,6 @@ def history(target: Optional[str], store: str, limit: int) -> None:
406413
date = commit.metadata.get("date", "")
407414
click.echo(f" {date} {commit.name} {stats}: {commit.docstring.splitlines()[0]}")
408415

416+
417+
if __name__ == "__main__":
418+
cli()

src/knowcode/graph_builder.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from knowcode.parsers.javascript_parser import JavaScriptParser
99
from knowcode.parsers.java_parser import JavaParser
1010
from knowcode.scanner import FileInfo, Scanner
11+
from knowcode.signals import CoverageProcessor
1112
from knowcode.temporal import TemporalAnalyzer
1213

1314

@@ -31,6 +32,7 @@ def build_from_directory(
3132
root_dir: str | Path,
3233
additional_ignores: Optional[list[str]] = None,
3334
analyze_temporal: bool = False,
35+
coverage_path: Optional[Path] = None,
3436
) -> "GraphBuilder":
3537
"""Build graph by scanning and parsing a directory.
3638
@@ -58,6 +60,12 @@ def build_from_directory(
5860
result = temporal_analyzer.analyze_history()
5961
self._merge_result(result)
6062

63+
# Coverage Analysis
64+
if coverage_path:
65+
coverage_processor = CoverageProcessor(root_dir)
66+
result = coverage_processor.process_cobertura(coverage_path)
67+
self._merge_result(result)
68+
6169
return self
6270

6371
def build_from_files(self, files: list[FileInfo]) -> "GraphBuilder":

src/knowcode/models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ class EntityKind(str, Enum):
2121
# Temporal entities
2222
COMMIT = "commit"
2323
AUTHOR = "author"
24+
# Runtime entities
25+
TEST_RUN = "test_run"
26+
COVERAGE_REPORT = "coverage_report"
2427

2528

2629
class RelationshipKind(str, Enum):
@@ -35,6 +38,9 @@ class RelationshipKind(str, Enum):
3538
CHANGED_BY = "changed_by" # Entity -> Commit
3639
AUTHORED = "authored" # Author -> Commit
3740
MODIFIED = "modified" # Commit -> Entity
41+
# Runtime relationships
42+
COVERS = "covers" # Report/Test -> Entity
43+
EXECUTED_BY = "executed_by" # Entity -> Report/Test
3844

3945

4046
@dataclass

src/knowcode/parsers/yaml_parser.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,12 @@ def _extract_keys(
105105
) -> None:
106106
"""Recursively extract configuration keys."""
107107
for key, value in data.items():
108-
qualified_name = f"{prefix}{key}" if prefix else key
108+
str_key = str(key)
109+
qualified_name = f"{prefix}{str_key}" if prefix else str_key
109110
key_id = f"{file_path}::{qualified_name}"
110111

111112
# Try to find line number for this key
112-
line_num = self._find_key_line(key, prefix, lines)
113+
line_num = self._find_key_line(str_key, prefix, lines)
113114

114115
# Determine value representation
115116
if isinstance(value, dict):
@@ -122,7 +123,7 @@ def _extract_keys(
122123
key_entity = Entity(
123124
id=key_id,
124125
kind=EntityKind.CONFIG_KEY,
125-
name=key,
126+
name=str_key,
126127
qualified_name=qualified_name,
127128
location=Location(
128129
file_path=str(file_path),

src/knowcode/signals.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
"""Runtime signal processing (e.g., coverage, traces)."""
2+
3+
import xml.etree.ElementTree as ET
4+
from pathlib import Path
5+
from typing import Optional
6+
7+
from knowcode.models import (
8+
Entity,
9+
EntityKind,
10+
Location,
11+
ParseResult,
12+
Relationship,
13+
RelationshipKind,
14+
)
15+
16+
17+
class CoverageProcessor:
18+
"""Process coverage reports."""
19+
20+
def __init__(self, root_dir: str | Path) -> None:
21+
"""Initialize coverage processor.
22+
23+
Args:
24+
root_dir: Root directory of the codebase (for relative path resolution).
25+
"""
26+
self.root_dir = Path(root_dir).resolve()
27+
28+
def process_cobertura(self, xml_path: str | Path) -> ParseResult:
29+
"""Process a Cobertura XML coverage report.
30+
31+
Args:
32+
xml_path: Path to coverage.xml.
33+
34+
Returns:
35+
ParseResult containing COVERAGE_REPORT entity and COVERS relationships.
36+
"""
37+
xml_path = Path(xml_path)
38+
if not xml_path.exists():
39+
return ParseResult(
40+
file_path=str(xml_path),
41+
entities=[],
42+
relationships=[],
43+
errors=[f"Coverage file not found: {xml_path}"],
44+
)
45+
46+
entities: list[Entity] = []
47+
relationships: list[Relationship] = []
48+
errors: list[str] = []
49+
50+
try:
51+
tree = ET.parse(xml_path)
52+
root = tree.getroot()
53+
54+
# Create Report Entity
55+
report_id = f"coverage::{xml_path.name}"
56+
# Extract timestamp if available available in root attributes usually 'timestamp'
57+
timestamp = root.get("timestamp", str(xml_path.stat().st_mtime))
58+
59+
report_entity = Entity(
60+
id=report_id,
61+
kind=EntityKind.COVERAGE_REPORT,
62+
name=f"Coverage Report ({xml_path.name})",
63+
qualified_name=xml_path.name,
64+
location=Location(str(xml_path), 0, 0),
65+
metadata={
66+
"timestamp": timestamp,
67+
"line-rate": root.get("line-rate", "0"),
68+
"branch-rate": root.get("branch-rate", "0"),
69+
},
70+
)
71+
entities.append(report_entity)
72+
73+
# Traverse packages -> classes -> lines
74+
# Structure: coverage -> packages -> package -> classes -> class -> lines -> line
75+
76+
# We want to map files/classes to the report.
77+
# <class name="knowcode.models" filename="src/knowcode/models.py" line-rate="1.0" ...>
78+
79+
for cls in root.findall(".//class"):
80+
filename = cls.get("filename")
81+
if not filename:
82+
continue
83+
84+
# Resolve file path to simple module ID
85+
# We assume standard module ID: /abs/path/to/file::filename_stem
86+
# filename in coverage.xml is usually relative to root
87+
abs_file_path = (self.root_dir / filename).resolve()
88+
module_name = abs_file_path.stem
89+
module_id = f"{abs_file_path}::{module_name}"
90+
91+
line_rate = cls.get("line-rate", "0")
92+
93+
# Relationship: REPORT -> COVERS -> MODULE
94+
relationships.append(
95+
Relationship(
96+
source_id=report_id,
97+
target_id=module_id,
98+
kind=RelationshipKind.COVERS,
99+
metadata={
100+
"line-rate": line_rate,
101+
"hits": cls.get("lines-covered", "0") + "/" + cls.get("lines-valid", "0")
102+
}
103+
)
104+
)
105+
106+
# We could map specific lines to entities if we had line ranges of entities loaded.
107+
# Since CoverageProcessor runs independently or after graph build,
108+
# we usually just link to the File/Module level for MVP.
109+
# Detailed line mapping requires access to the full graph to find which entity covers line X.
110+
# For v1.4 MVP, linking to Module is sufficient.
111+
112+
# Note: We can also add "EXECUTED_BY" from Module to Report
113+
relationships.append(
114+
Relationship(
115+
source_id=module_id,
116+
target_id=report_id,
117+
kind=RelationshipKind.EXECUTED_BY
118+
)
119+
)
120+
121+
except ET.ParseError as e:
122+
errors.append(f"Invalid XML format: {e}")
123+
except Exception as e:
124+
errors.append(f"Error processing coverage: {e}")
125+
126+
return ParseResult(
127+
file_path=str(xml_path),
128+
entities=entities,
129+
relationships=relationships,
130+
errors=errors,
131+
)

tests/test_signals.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""Tests for Signal Ingestion."""
2+
3+
import pytest
4+
from pathlib import Path
5+
from knowcode.signals import CoverageProcessor
6+
from knowcode.models import EntityKind, RelationshipKind
7+
8+
@pytest.fixture
9+
def coverage_xml(tmp_path):
10+
"""Create a sample coverage.xml."""
11+
content = """<?xml version="1.0" ?>
12+
<coverage line-rate="0.5" branch-rate="0.0" lines-covered="10" lines-valid="20" timestamp="123456789">
13+
<packages>
14+
<package name="my_package" line-rate="0.5" branch-rate="0.0" complexity="0.0">
15+
<classes>
16+
<class name="module_a" filename="module_a.py" line-rate="1.0" branch-rate="0.0" complexity="0.0" lines-covered="5" lines-valid="5">
17+
<lines>
18+
<line hits="1" number="1"/>
19+
<line hits="1" number="2"/>
20+
</lines>
21+
</class>
22+
<class name="module_b" filename="module_b.py" line-rate="0.0" branch-rate="0.0" complexity="0.0" lines-covered="0" lines-valid="5">
23+
<lines>
24+
<line hits="0" number="1"/>
25+
</lines>
26+
</class>
27+
</classes>
28+
</package>
29+
</packages>
30+
</coverage>
31+
"""
32+
path = tmp_path / "coverage.xml"
33+
path.write_text(content, encoding="utf-8")
34+
return path
35+
36+
def test_process_cobertura(tmp_path, coverage_xml):
37+
"""Test Cobertura XML processing."""
38+
# Create dummy files so they resolve
39+
(tmp_path / "module_a.py").touch()
40+
(tmp_path / "module_b.py").touch()
41+
42+
processor = CoverageProcessor(tmp_path)
43+
result = processor.process_cobertura(coverage_xml)
44+
45+
assert not result.errors
46+
47+
# Check Report Entity
48+
reports = [e for e in result.entities if e.kind == EntityKind.COVERAGE_REPORT]
49+
assert len(reports) == 1
50+
report = reports[0]
51+
assert report.name == "Coverage Report (coverage.xml)"
52+
assert report.metadata["line-rate"] == "0.5"
53+
54+
# Check Relationships
55+
# Report -> COVERS -> Module A
56+
covers_rels = [r for r in result.relationships if r.kind == RelationshipKind.COVERS]
57+
assert len(covers_rels) == 2 # module_a and module_b
58+
59+
# Verify module IDs are correct (absolute path)
60+
targets = {r.target_id for r in covers_rels}
61+
assert any("module_a.py" in t for t in targets)
62+
assert any("module_b.py" in t for t in targets)
63+
64+
# Check metadata on relationships
65+
rel_a = next(r for r in result.relationships if "module_a.py" in r.target_id and r.kind == RelationshipKind.COVERS)
66+
assert rel_a.metadata["line-rate"] == "1.0"
67+
assert rel_a.metadata["hits"] == "5/5"

0 commit comments

Comments
 (0)