diff --git a/desloppify/engine/detectors/coverage/mapping_imports.py b/desloppify/engine/detectors/coverage/mapping_imports.py index f975d257..ddd46d6f 100644 --- a/desloppify/engine/detectors/coverage/mapping_imports.py +++ b/desloppify/engine/detectors/coverage/mapping_imports.py @@ -8,11 +8,13 @@ from desloppify.engine.hook_registry import get_lang_hook + def _load_lang_test_coverage_module(lang_name: str | None): """Load language-specific test coverage helpers from ``lang//test_coverage.py``.""" return get_lang_hook(lang_name, "test_coverage") or object() + def _infer_lang_name(test_files: set[str], production_files: set[str]) -> str | None: """Infer language from known file extensions when explicit lang is unavailable.""" paths = list(test_files) + list(production_files) @@ -40,6 +42,33 @@ def _infer_lang_name(test_files: set[str], production_files: set[str]) -> str | return None + +def _discover_additional_test_mapping_files( + test_files: set[str], + production_files: set[str], + lang_name: str | None = None, +) -> set[str]: + """Allow language hooks to contribute mapping-only files for coverage discovery.""" + if lang_name is None: + lang_name = _infer_lang_name(test_files, production_files) + mod = _load_lang_test_coverage_module(lang_name) + discover = getattr(mod, "discover_test_mapping_files", None) + if not callable(discover): + return set() + + discovered = discover(test_files, production_files) + if not discovered: + return set() + + result: set[str] = set() + for path in discovered: + if not path: + continue + result.add(str(Path(path).resolve())) + return result + + + def _resolve_import( spec: str, test_path: str, @@ -53,6 +82,7 @@ def _resolve_import( return None + def _resolve_barrel_reexports( filepath: str, production_files: set[str], @@ -68,6 +98,7 @@ def _resolve_barrel_reexports( return set() + def _parse_test_imports( test_path: str, production_files: set[str], @@ -111,6 +142,7 @@ def _parse_test_imports( __all__ = [ + "_discover_additional_test_mapping_files", "_infer_lang_name", "_load_lang_test_coverage_module", "_parse_test_imports", diff --git a/desloppify/engine/detectors/test_coverage/detector.py b/desloppify/engine/detectors/test_coverage/detector.py index 1b7f9502..1080cb49 100644 --- a/desloppify/engine/detectors/test_coverage/detector.py +++ b/desloppify/engine/detectors/test_coverage/detector.py @@ -8,6 +8,9 @@ naming_based_mapping, transitive_coverage, ) +from desloppify.engine.detectors.coverage.mapping_imports import ( + _discover_additional_test_mapping_files, +) from desloppify.engine.policy.zones import FileZoneMap from .discovery import ( @@ -21,6 +24,7 @@ ) + def detect_test_coverage( graph: dict, zone_map: FileZoneMap, @@ -49,14 +53,23 @@ def detect_test_coverage( entries = _no_tests_issues(scorable, graph, lang_name, complexity_map) return entries, potential - directly_tested = set(inline_tested) + mapping_test_files = set(test_files) if test_files: + mapping_test_files |= _discover_additional_test_mapping_files( + test_files, + production_files, + lang_name, + ) + + directly_tested = set(inline_tested) + if mapping_test_files: directly_tested |= import_based_mapping( graph, - test_files, + mapping_test_files, production_files, lang_name, ) + if test_files: directly_tested |= naming_based_mapping(test_files, production_files, lang_name) transitively_tested = transitive_coverage(directly_tested, graph, production_files) diff --git a/desloppify/languages/_framework/treesitter/analysis/unused_imports.py b/desloppify/languages/_framework/treesitter/analysis/unused_imports.py index ea2727a8..f0689991 100644 --- a/desloppify/languages/_framework/treesitter/analysis/unused_imports.py +++ b/desloppify/languages/_framework/treesitter/analysis/unused_imports.py @@ -147,27 +147,28 @@ def _extract_import_name(import_path: str) -> str: "MyApp::Model::User" -> "User" "Data.List" -> "List" """ - # Strip common path separators and take the last segment. - for sep in ("::", ".", "/", "\\"): - if sep in import_path: - parts = import_path.split(sep) - # Filter out empty segments and take the last. - parts = [p for p in parts if p] + candidate = import_path.strip() + for sep in ("/", "\\"): + if sep in candidate: + parts = [p for p in candidate.split(sep) if p] if parts: - name = parts[-1] - # Strip file extensions. - for ext in (".go", ".rs", ".rb", ".py", ".js", ".jsx", ".ts", - ".tsx", ".java", ".kt", ".cs", ".fs", ".ml", - ".ex", ".erl", ".hs", ".lua", ".zig", ".pm", - ".sh", ".pl", ".scala", ".swift", ".php", - ".dart", ".mjs", ".cjs"): - if name.endswith(ext): - name = name[:-len(ext)] - break - return name - - # No separator — the path itself is the name. - return import_path + candidate = parts[-1] + + for ext in (".go", ".rs", ".rb", ".py", ".js", ".jsx", ".ts", + ".tsx", ".java", ".kt", ".cs", ".fs", ".ml", + ".ex", ".erl", ".hs", ".lua", ".zig", ".pm", + ".sh", ".pl", ".scala", ".swift", ".php", + ".dart", ".mjs", ".cjs", ".h", ".hh", ".hpp"): + if candidate.endswith(ext): + return candidate[:-len(ext)] + + for sep in ("::", "."): + if sep in candidate: + parts = [p for p in candidate.split(sep) if p] + if parts: + return parts[-1] + + return candidate __all__ = ["detect_unused_imports"] diff --git a/desloppify/languages/cxx/__init__.py b/desloppify/languages/cxx/__init__.py index 14db8afb..fb49c31a 100644 --- a/desloppify/languages/cxx/__init__.py +++ b/desloppify/languages/cxx/__init__.py @@ -49,6 +49,11 @@ def detect_lang_security_detailed(self, files, zone_map) -> LangSecurityResult: return detect_cxx_security(files, zone_map) def __init__(self): + tree_sitter_phases = [ + phase for phase in all_treesitter_phases("cpp") + if phase.label != "Unused imports" + ] + super().__init__( name="cxx", extensions=CXX_EXTENSIONS, @@ -61,7 +66,7 @@ def __init__(self): DetectorPhase("Structural analysis", phase_structural), DetectorPhase("Coupling + cycles + orphaned", phase_coupling), DetectorPhase("cppcheck", phase_cppcheck_issue), - *all_treesitter_phases("cpp"), + *tree_sitter_phases, detector_phase_signature(), detector_phase_test_coverage(), detector_phase_security(), diff --git a/desloppify/languages/cxx/detectors/security.py b/desloppify/languages/cxx/detectors/security.py index b2c10b9e..2d457085 100644 --- a/desloppify/languages/cxx/detectors/security.py +++ b/desloppify/languages/cxx/detectors/security.py @@ -659,7 +659,13 @@ def detect_cxx_security( tool_results.append(_run_clang_tidy(scan_root, scoped_files)) tool_results.append(_run_cppcheck(scan_root, scoped_files)) - tool_entries = [entry for result in tool_results for entry in result.entries] + scoped_file_set = {str(Path(filepath).resolve()) for filepath in scoped_files} + tool_entries = [ + entry + for result in tool_results + for entry in result.entries + if str(Path(str(entry.get("file", ""))).resolve()) in scoped_file_set + ] covered_files = { filepath for result in tool_results diff --git a/desloppify/languages/cxx/test_coverage.py b/desloppify/languages/cxx/test_coverage.py index e36e6149..653086c0 100644 --- a/desloppify/languages/cxx/test_coverage.py +++ b/desloppify/languages/cxx/test_coverage.py @@ -15,10 +15,17 @@ BARREL_BASENAMES: set[str] = set() _INCLUDE_RE = re.compile(r'(?m)^\s*#include\s*[<"]([^>"]+)[>"]') _SOURCE_EXTENSIONS = (".c", ".cc", ".cpp", ".cxx") +_HEADER_EXTENSIONS = (".h", ".hh", ".hpp") +_CMAKE_COMMENT_RE = re.compile(r"(?m)#.*$") +_CMAKE_COMMAND_RE = re.compile(r"\b(?:add_executable|add_library|target_sources)\s*\(", re.IGNORECASE) +_CMAKE_SOURCE_SPEC_RE = re.compile( + r'"([^"\n]+\.(?:cpp|cxx|cc|c|hpp|hh|h))"|([^\s()"]+\.(?:cpp|cxx|cc|c|hpp|hh|h))', + re.IGNORECASE, +) _TESTABLE_LOGIC_RE = re.compile( r"\b(?:class|struct|enum|namespace)\b|^\s*(?:inline\s+|static\s+)?[A-Za-z_]\w*(?:[\s*&:<>]+[A-Za-z_]\w*)*\s+\w+\s*\(", re.MULTILINE, - ) +) def has_testable_logic(filepath: str, content: str) -> bool: @@ -31,6 +38,7 @@ def has_testable_logic(filepath: str, content: str) -> bool: return bool(_TESTABLE_LOGIC_RE.search(content)) + def _match_candidate(candidate: Path, production_files: set[str]) -> str | None: resolved = str(candidate.resolve()) normalized = {str(Path(path).resolve()): path for path in production_files} @@ -39,6 +47,7 @@ def _match_candidate(candidate: Path, production_files: set[str]) -> str | None: return None + def resolve_import_spec( spec: str, test_path: str, @@ -67,15 +76,74 @@ def resolve_import_spec( return None + def resolve_barrel_reexports(filepath: str, production_files: set[str]) -> set[str]: """C/C++ has no barrel-file re-export expansion.""" del filepath, production_files return set() + +def _unique_preserving_order(specs: list[str]) -> list[str]: + seen: set[str] = set() + ordered: list[str] = [] + for spec in specs: + cleaned = (spec or "").strip() + if not cleaned or cleaned in seen: + continue + seen.add(cleaned) + ordered.append(cleaned) + return ordered + + + +def _parse_cmake_source_specs(content: str) -> list[str]: + if not _CMAKE_COMMAND_RE.search(content): + return [] + stripped = _CMAKE_COMMENT_RE.sub("", content) + specs: list[str] = [] + for quoted, bare in _CMAKE_SOURCE_SPEC_RE.findall(stripped): + spec = quoted or bare + if spec: + specs.append(spec) + return _unique_preserving_order(specs) + + + def parse_test_import_specs(content: str) -> list[str]: - """Return include-like specs from test content.""" - return [match.group(1).strip() for match in _INCLUDE_RE.finditer(content)] + """Return include-like specs from test content and test build files.""" + include_specs = [match.group(1).strip() for match in _INCLUDE_RE.finditer(content)] + cmake_specs = _parse_cmake_source_specs(content) + return _unique_preserving_order(include_specs + cmake_specs) + + + +def _iter_test_tree_ancestors(test_file: Path) -> list[Path]: + ancestors = [test_file.parent, *test_file.parents] + stop_at: int | None = None + for index, ancestor in enumerate(ancestors): + if ancestor.name.lower() in {"tests", "test"}: + stop_at = index + break + if stop_at is None: + return [] + return ancestors[: stop_at + 1] + + + +def discover_test_mapping_files(test_files: set[str], production_files: set[str]) -> set[str]: + """Find CMake/Make build files that define test target sources within test trees.""" + del production_files + discovered: set[str] = set() + for test_path in sorted(test_files): + test_file = Path(test_path).resolve() + for ancestor in _iter_test_tree_ancestors(test_file): + for build_file in ("CMakeLists.txt", "Makefile"): + candidate = ancestor / build_file + if candidate.is_file(): + discovered.add(str(candidate.resolve())) + return discovered + def map_test_to_source(test_path: str, production_set: set[str]) -> str | None: @@ -104,6 +172,7 @@ def map_test_to_source(test_path: str, production_set: set[str]) -> str | None: return None + def strip_test_markers(basename: str) -> str | None: """Strip common C/C++ test-name markers to derive source basename.""" stem, ext = os.path.splitext(basename) @@ -120,6 +189,7 @@ def strip_test_markers(basename: str) -> str | None: return None + def strip_comments(content: str) -> str: """Strip C-style comments while preserving string literals.""" return strip_c_style_comments(content) diff --git a/desloppify/languages/cxx/tests/test_coverage.py b/desloppify/languages/cxx/tests/test_coverage.py index e4ab2a29..e5909d4e 100644 --- a/desloppify/languages/cxx/tests/test_coverage.py +++ b/desloppify/languages/cxx/tests/test_coverage.py @@ -1,6 +1,24 @@ from __future__ import annotations +from pathlib import Path + import desloppify.languages.cxx.test_coverage as cxx_cov +from desloppify.engine.detectors.test_coverage.detector import detect_test_coverage +from desloppify.engine.policy.zones import FileZoneMap, Zone, ZoneRule + + +def _make_zone_map(file_list: list[str]) -> FileZoneMap: + rules = [ + ZoneRule(Zone.TEST, ["test_", ".test.", ".spec.", "/tests/", "\\tests\\", "/__tests__/", "\\__tests__\\"]), + ] + return FileZoneMap(file_list, rules) + + +def _write(tmp_path: Path, rel_path: str, content: str) -> str: + target = tmp_path / rel_path + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(content, encoding="utf-8") + return str(target) def test_strip_test_markers_for_cxx(): @@ -14,6 +32,21 @@ def test_parse_test_import_specs_extracts_includes(): assert cxx_cov.parse_test_import_specs(content) == ["widget.hpp", "gtest/gtest.h"] +def test_parse_test_import_specs_extracts_cmake_sources(): + content = """ +add_executable(WidgetBehaviorTest + widget_behavior.cpp + ../src/widget.cpp + ../src/widget.hpp +) +""" + assert cxx_cov.parse_test_import_specs(content) == [ + "widget_behavior.cpp", + "../src/widget.cpp", + "../src/widget.hpp", + ] + + def test_has_testable_logic_accepts_function_definitions_without_regex_crash(): assert cxx_cov.has_testable_logic("widget.cpp", "int widget() { return 1; }\n") is True assert cxx_cov.has_testable_logic("widget_test.cpp", "int widget() { return 1; }\n") is False @@ -30,9 +63,9 @@ def test_map_test_to_source_and_resolve_import_spec(tmp_path): source.parent.mkdir(parents=True) test_file.parent.mkdir(parents=True) - source.write_text("int widget() { return 1; }\n") - header.write_text("int widget();\n") - test_file.write_text('#include "../src/widget.hpp"\n') + source.write_text("int widget() { return 1; }\n", encoding="utf-8") + header.write_text("int widget();\n", encoding="utf-8") + test_file.write_text('#include "../src/widget.hpp"\n', encoding="utf-8") production = {str(source.resolve()), str(header.resolve())} @@ -41,3 +74,50 @@ def test_map_test_to_source_and_resolve_import_spec(tmp_path): cxx_cov.resolve_import_spec("../src/widget.hpp", str(test_file), production) == str(header.resolve()) ) + + +def test_discover_test_mapping_files_finds_cmakelists_within_test_tree(tmp_path): + test_file = tmp_path / "tests" / "kernel_parity" / "widget_behavior.cpp" + cmake_file = tmp_path / "tests" / "CMakeLists.txt" + nested_cmake = tmp_path / "tests" / "kernel_parity" / "CMakeLists.txt" + test_file.parent.mkdir(parents=True) + test_file.write_text("// test\n", encoding="utf-8") + cmake_file.write_text("add_executable(WidgetBehaviorTest widget_behavior.cpp ../src/widget.cpp)\n", encoding="utf-8") + nested_cmake.write_text("add_library(ParityHelpers ../src/widget.hpp)\n", encoding="utf-8") + + discovered = cxx_cov.discover_test_mapping_files({str(test_file.resolve())}, set()) + + assert discovered == {str(cmake_file.resolve()), str(nested_cmake.resolve())} + + +def test_detect_test_coverage_uses_cmake_test_sources_for_direct_mapping(tmp_path): + prod = _write(tmp_path, "src/widget.cpp", "int widget() { return 1; }\n" * 12) + test_file = _write( + tmp_path, + "tests/widget_behavior.cpp", + '#include \n\nTEST(WidgetBehavior, Smoke) {\n EXPECT_EQ(1, 1);\n}\n', + ) + _write( + tmp_path, + "tests/CMakeLists.txt", + "add_executable(WidgetBehaviorTest\n" + " widget_behavior.cpp\n" + " ../src/widget.cpp\n" + ")\n", + ) + + zone_map = _make_zone_map([prod, test_file]) + graph = { + prod: {"imports": set(), "importer_count": 0}, + test_file: {"imports": set(), "importer_count": 0}, + } + + entries, potential = detect_test_coverage(graph, zone_map, "cxx") + + assert potential > 0 + untested = [ + entry + for entry in entries + if entry["file"] == prod and entry["detail"]["kind"] in {"untested_module", "untested_critical"} + ] + assert untested == [] diff --git a/desloppify/languages/cxx/tests/test_init.py b/desloppify/languages/cxx/tests/test_init.py index 66393f0f..35a5172d 100644 --- a/desloppify/languages/cxx/tests/test_init.py +++ b/desloppify/languages/cxx/tests/test_init.py @@ -38,3 +38,9 @@ def fake_all_treesitter_phases(spec_name: str): assert captured["spec_name"] == "cpp" assert "Tree-sitter sentinel" in labels + + +def test_cxx_excludes_unused_imports_phase(): + cfg = cxx_mod.CxxConfig() + labels = {phase.label for phase in cfg.phases} + assert "Unused imports" not in labels \ No newline at end of file diff --git a/desloppify/languages/cxx/tests/test_security.py b/desloppify/languages/cxx/tests/test_security.py index 2c52f1e4..1ea01e9b 100644 --- a/desloppify/languages/cxx/tests/test_security.py +++ b/desloppify/languages/cxx/tests/test_security.py @@ -91,6 +91,48 @@ def _fake_run_tool_result(cmd, path, parser, **_kwargs): assert entry["detail"]["check_id"] == "clang-analyzer-security.insecureAPI.strcpy" +def test_detect_cxx_security_ignores_findings_outside_scanned_files( + tmp_path, + monkeypatch, + ): + source = tmp_path / "src" / "unsafe.cpp" + source.parent.mkdir(parents=True) + source.write_text("int main() { return 0; }\n") + (tmp_path / "compile_commands.json").write_text("[]\n") + external_header = tmp_path / "vendor" / "external.hpp" + + def _fake_which(cmd: str) -> str | None: + return "C:/tools/clang-tidy.exe" if cmd == "clang-tidy" else None + + def _fake_run_tool_result(cmd, path, parser, **_kwargs): + assert cmd.startswith("clang-tidy ") + output = ( + f"{source}:4:5: warning: call to 'strcpy' is insecure because it can overflow " + "[clang-analyzer-security.insecureAPI.strcpy]\n" + f"{external_header}:18:3: warning: declaration uses reserved identifier " + "[cert-dcl37-c]\n" + ) + return ToolRunResult(entries=parser(output, path), status="ok", returncode=1) + + monkeypatch.setattr( + security_mod, + "shutil", + SimpleNamespace(which=_fake_which), + raising=False, + ) + monkeypatch.setattr( + security_mod, + "run_tool_result", + _fake_run_tool_result, + raising=False, + ) + + result = detect_cxx_security([str(source.resolve())], zone_map=None) + + assert len(result.entries) == 1 + assert result.entries[0]["file"] == str(source.resolve()) + + def test_detect_cxx_security_uses_cppcheck_when_clang_tidy_missing_without_reduced_coverage( tmp_path, monkeypatch, @@ -500,15 +542,15 @@ def _fake_run_tool_result(cmd, path, parser, **_kwargs): } -def test_normalize_tool_entries_ignores_cppcheck_syntax_error_with_fontsystem_name(): +def test_normalize_tool_entries_ignores_cppcheck_syntax_error_with_projectish_name(): entries = security_mod._normalize_tool_entries( [ { - "file": r"D:/repo/FontSystem.h", + "file": r"D:/repo/WidgetCatalog.h", "line": 9, "severity": "error", "check_id": "syntaxError", - "message": "Code 'namespaceFontSystem{' is invalid C code.", + "message": "Code 'namespaceWidgetCatalog{' is invalid C code.", "source": "cppcheck", } ] @@ -568,3 +610,46 @@ def test_cxx_config_security_hook_returns_lang_result(tmp_path): assert result.files_scanned == 1 assert result.entries assert result.entries[0]["detail"]["kind"] == "command_injection" + + +def test_detect_cxx_security_ignores_findings_outside_scoped_files( + tmp_path, + monkeypatch, + ): + source = tmp_path / "src" / "unsafe.cpp" + source.parent.mkdir(parents=True) + source.write_text("int main() { return 0; }\n") + (tmp_path / "compile_commands.json").write_text("[]\n") + + external_header = tmp_path / "vendor" / "external.hpp" + + def _fake_which(cmd: str) -> str | None: + return "C:/tools/clang-tidy.exe" if cmd == "clang-tidy" else None + + def _fake_run_tool_result(cmd, path, parser, **_kwargs): + assert str(path.resolve()) == str(tmp_path.resolve()) + output = ( + f"{source}:4:5: warning: call to 'strcpy' is insecure because it can overflow " + "[clang-analyzer-security.insecureAPI.strcpy]\n" + f"{external_header}:18:3: warning: declaration uses reserved identifier " + "[cert-dcl37-c]\n" + ) + return ToolRunResult(entries=parser(output, path), status="ok", returncode=1) + + monkeypatch.setattr( + security_mod, + "shutil", + SimpleNamespace(which=_fake_which), + raising=False, + ) + monkeypatch.setattr( + security_mod, + "run_tool_result", + _fake_run_tool_result, + raising=False, + ) + + result = detect_cxx_security([str(source.resolve())], zone_map=None) + + assert len(result.entries) == 1 + assert result.entries[0]["file"] == str(source.resolve()) \ No newline at end of file diff --git a/desloppify/tests/lang/common/test_treesitter_analysis_direct.py b/desloppify/tests/lang/common/test_treesitter_analysis_direct.py index eccdfee2..c06e2e2a 100644 --- a/desloppify/tests/lang/common/test_treesitter_analysis_direct.py +++ b/desloppify/tests/lang/common/test_treesitter_analysis_direct.py @@ -186,6 +186,8 @@ def test_unused_import_helpers_and_detection(monkeypatch) -> None: assert unused_imports_mod._extract_alias(go_alias) == "pkg" assert unused_imports_mod._extract_import_name("pkg/module") == "module" assert unused_imports_mod._extract_import_name("crate::Thing") == "Thing" + assert unused_imports_mod._extract_import_name("WidgetCatalog.hpp") == "WidgetCatalog" + assert unused_imports_mod._extract_import_name("vendor/json.hpp") == "json" import_node = FakeNode( "import_statement",