Skip to content

Commit 7041372

Browse files
authored
feat(code-mappings): Handle Java monorepo source roots in auto-derivation task (#112655)
## Summary - derive Java/Kotlin source roots from repo tree structure so monorepo modules map to their package root instead of stopping at `src/main/java` or `src/main/kotlin` - allow multiple code mappings from the same repo when identical Java package paths exist in multiple subprojects - move the Java-specific helper logic into a dedicated utility module and add realistic monorepo coverage for GraphQL and OpenTelemetry layouts from [sentry-java](https://github.com/getsentry/sentry-java) ## Test Plan - `.venv/bin/pytest -svv --reuse-db tests/sentry/issues/auto_source_code_config/test_code_mapping.py tests/sentry/api/endpoints/issues/test_organization_derive_code_mappings.py tests/sentry/issues/auto_source_code_config/test_process_event.py` - `.venv/bin/pre-commit run --files src/sentry/issues/auto_source_code_config/code_mapping.py src/sentry/issues/auto_source_code_config/utils/java.py tests/sentry/issues/auto_source_code_config/test_code_mapping.py tests/sentry/api/endpoints/issues/test_organization_derive_code_mappings.py tests/sentry/issues/auto_source_code_config/test_process_event.py` ## Migration - no migration needed; `RepositoryProjectPathConfig` already uses `(project, stack_root, source_root)` and this PR only changes derivation logic plus an in-memory dedupe key
1 parent bb84ef6 commit 7041372

File tree

9 files changed

+540
-42
lines changed

9 files changed

+540
-42
lines changed

src/sentry/issues/auto_source_code_config/code_mapping.py

Lines changed: 61 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ class CodeMapping(NamedTuple):
4141
SLASH = "/"
4242
BACKSLASH = "\\" # This is the Python representation of a single backslash
4343

44+
CodeMappingKey = tuple[str, str]
45+
4446

4547
def derive_code_mappings(
4648
organization: Organization,
@@ -67,7 +69,8 @@ class CodeMappingTreesHelper:
6769

6870
def __init__(self, trees: Mapping[str, RepoTree]):
6971
self.trees = trees
70-
self.code_mappings: dict[str, CodeMapping] = {}
72+
# Multiple source roots may legitimately share the same stack root in one monorepo.
73+
self.code_mappings: dict[CodeMappingKey, CodeMapping] = {}
7174

7275
def generate_code_mappings(
7376
self, frames: Sequence[Mapping[str, Any]], platform: str | None = None
@@ -111,7 +114,9 @@ def get_file_and_repo_matches(self, frame_filename: FrameInfo) -> list[dict[str,
111114
extra = {"stack_path": stack_path, "source_path": source_path}
112115

113116
try:
114-
stack_root, source_root = find_roots(frame_filename, source_path)
117+
stack_root, source_root = find_roots(
118+
frame_filename, source_path, repo_tree.files
119+
)
115120
except UnexpectedPathException:
116121
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
117122
continue
@@ -160,19 +165,19 @@ def _stacktrace_buckets(
160165
def _process_stackframes(self, buckets: Mapping[str, Sequence[FrameInfo]]) -> bool:
161166
"""This processes all stackframes and returns if a new code mapping has been generated"""
162167
reprocess = False
163-
for stackframe_root, stackframes in buckets.items():
164-
if not self.code_mappings.get(stackframe_root):
165-
for frame_filename in stackframes:
166-
code_mapping = self._find_code_mapping(frame_filename)
167-
if code_mapping:
168+
for stackframes in buckets.values():
169+
for frame_filename in stackframes:
170+
for code_mapping in self._find_code_mappings(frame_filename):
171+
mapping_key = (code_mapping.stacktrace_root, code_mapping.source_path)
172+
if mapping_key not in self.code_mappings:
168173
# This allows processing some stack frames that
169174
# were matching more than one file
170175
reprocess = True
171-
self.code_mappings[stackframe_root] = code_mapping
176+
self.code_mappings[mapping_key] = code_mapping
172177
return reprocess
173178

174-
def _find_code_mapping(self, frame_filename: FrameInfo) -> CodeMapping | None:
175-
"""Look for the file path through all the trees and a generate code mapping for it if a match is found"""
179+
def _find_code_mappings(self, frame_filename: FrameInfo) -> list[CodeMapping]:
180+
"""Look for the file path through all the trees and generate code mappings for it."""
176181
code_mappings: list[CodeMapping] = []
177182
# XXX: This will need optimization by changing the data structure of the trees
178183
for repo_full_name in self.trees.keys():
@@ -191,13 +196,17 @@ def _find_code_mapping(self, frame_filename: FrameInfo) -> CodeMapping | None:
191196

192197
if len(code_mappings) == 0:
193198
logger.warning("No files matched for %s", frame_filename.raw_path)
194-
return None
195-
# This means that the file has been found in more than one repo
196-
elif len(code_mappings) > 1:
199+
return []
200+
201+
unique_code_mappings = {
202+
(code_mapping.stacktrace_root, code_mapping.source_path): code_mapping
203+
for code_mapping in code_mappings
204+
}
205+
if len({code_mapping.repo.name for code_mapping in unique_code_mappings.values()}) > 1:
197206
logger.warning("More than one repo matched %s", frame_filename.raw_path)
198-
return None
207+
return []
199208

200-
return code_mappings[0]
209+
return list(unique_code_mappings.values())
201210

202211
def _generate_code_mapping_from_tree(
203212
self,
@@ -214,34 +223,44 @@ def _generate_code_mapping_from_tree(
214223
if self._is_potential_match(src_path, frame_filename)
215224
]
216225

217-
if len(matched_files) != 1:
226+
if len(matched_files) == 0:
218227
return []
219228

220-
stack_path = frame_filename.raw_path
221-
source_path = matched_files[0]
222-
223-
extra = {"stack_path": stack_path, "source_path": source_path}
224-
try:
225-
stack_root, source_root = find_roots(frame_filename, source_path)
226-
except UnexpectedPathException:
227-
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
229+
if len(matched_files) > 1 and not all(
230+
frame_filename.has_source_roots_override(source_path, repo_tree.files)
231+
for source_path in matched_files
232+
):
228233
return []
229234

230-
extra.update({"stack_root": stack_root, "source_root": source_root})
231-
if stack_path.replace(stack_root, source_root, 1).replace("\\", "/") != source_path:
232-
logger.warning(
233-
"Unexpected stack_path/source_path found. A code mapping was not generated.",
234-
extra=extra,
235-
)
236-
return []
235+
code_mappings: dict[tuple[str, str], CodeMapping] = {}
236+
for source_path in matched_files:
237+
stack_path = frame_filename.raw_path
238+
extra = {"stack_path": stack_path, "source_path": source_path}
239+
try:
240+
stack_root, source_root = find_roots(frame_filename, source_path, repo_tree.files)
241+
except UnexpectedPathException:
242+
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
243+
continue
244+
245+
extra.update({"stack_root": stack_root, "source_root": source_root})
246+
if stack_path.replace(stack_root, source_root, 1).replace("\\", "/") != source_path:
247+
logger.warning(
248+
"Unexpected stack_path/source_path found. A code mapping was not generated.",
249+
extra=extra,
250+
)
251+
continue
237252

238-
return [
239-
CodeMapping(
253+
code_mapping = CodeMapping(
240254
repo=repo_tree.repo,
241255
stacktrace_root=stack_root,
242256
source_path=source_root,
243257
)
244-
]
258+
code_mappings[(code_mapping.stacktrace_root, code_mapping.source_path)] = code_mapping
259+
260+
if len(matched_files) > 1 and len(code_mappings) != len(matched_files):
261+
return []
262+
263+
return list(code_mappings.values())
245264

246265
def _is_potential_match(self, src_file: str, frame_filename: FrameInfo) -> bool:
247266
"""
@@ -419,7 +438,9 @@ def get_sorted_code_mapping_configs(project: Project) -> list[RepositoryProjectP
419438
return sorted_configs
420439

421440

422-
def find_roots(frame_filename: FrameInfo, source_path: str) -> tuple[str, str]:
441+
def find_roots(
442+
frame_filename: FrameInfo, source_path: str, repo_files: Sequence[str] | None = None
443+
) -> tuple[str, str]:
423444
"""
424445
Returns a tuple containing the stack_root, and the source_root.
425446
If there is no overlap, raise an exception since this should not happen
@@ -444,9 +465,11 @@ def find_roots(frame_filename: FrameInfo, source_path: str) -> tuple[str, str]:
444465
# "Packaged" logic
445466
# e.g. stack_path: some_package/src/foo.py -> source_path: src/foo.py
446467
source_prefix = source_path.rpartition(stack_path)[0]
447-
return (
448-
f"{stack_root}{frame_filename.stack_root}/".replace("//", "/"),
449-
f"{source_prefix}{frame_filename.stack_root}/".replace("//", "/"),
468+
return frame_filename.resolve_source_roots(
469+
source_path=source_path,
470+
source_prefix=source_prefix,
471+
stack_root_prefix=stack_root,
472+
repo_files=repo_files,
450473
)
451474
elif stack_path.endswith(source_path):
452475
stack_prefix = stack_path.rpartition(source_path)[0]

src/sentry/issues/auto_source_code_config/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
from sentry.integrations.types import IntegrationProviderSlug
77

8+
from .utils.java import find_java_source_roots
9+
810
METRIC_PREFIX = "auto_source_code_config"
911
DERIVED_ENHANCEMENTS_OPTION_KEY = "sentry:derived_grouping_enhancements"
1012
SUPPORTED_INTEGRATIONS = [IntegrationProviderSlug.GITHUB.value]
@@ -19,6 +21,7 @@
1921
# e.g. com.foo.bar.Baz$handle$1, Baz.kt -> com/foo/bar/Baz.kt
2022
"extract_filename_from_module": True,
2123
"create_in_app_stack_trace_rules": True,
24+
"source_roots_resolver": find_java_source_roots,
2225
"extensions": ["kt", "kts", "java", "jsp", "scala", "sc"],
2326
},
2427
"javascript": {"extensions": ["js", "jsx", "mjs", "tsx", "ts"]},

src/sentry/issues/auto_source_code_config/frame_info.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,12 @@
1414
NeedsExtension,
1515
UnsupportedFrameInfo,
1616
)
17-
from .utils.platform import PlatformConfig, supported_platform
17+
from .utils.platform import (
18+
PlatformConfig,
19+
SourceRootsResolver,
20+
noop_source_roots_resolver,
21+
supported_platform,
22+
)
1823

1924
NOT_FOUND = -1
2025

@@ -24,20 +29,27 @@
2429

2530
def create_frame_info(frame: Mapping[str, Any], platform: str | None = None) -> FrameInfo:
2631
"""Factory function to create the appropriate FrameInfo instance."""
32+
source_roots_resolver: SourceRootsResolver = noop_source_roots_resolver
2733
if platform and supported_platform(platform):
2834
platform_config = PlatformConfig(platform)
35+
source_roots_resolver = platform_config.get_source_roots_resolver()
2936
if platform_config.extracts_filename_from_module():
30-
return ModuleBasedFrameInfo(frame)
37+
return ModuleBasedFrameInfo(frame, source_roots_resolver)
3138

32-
return PathBasedFrameInfo(frame)
39+
return PathBasedFrameInfo(frame, source_roots_resolver)
3340

3441

3542
class FrameInfo(ABC):
3643
raw_path: str
3744
normalized_path: str
3845
stack_root: str
3946

40-
def __init__(self, frame: Mapping[str, Any]) -> None:
47+
def __init__(
48+
self,
49+
frame: Mapping[str, Any],
50+
source_roots_resolver: SourceRootsResolver = noop_source_roots_resolver,
51+
) -> None:
52+
self._source_roots_resolver = source_roots_resolver
4153
self.process_frame(frame)
4254

4355
def __repr__(self) -> str:
@@ -53,6 +65,24 @@ def process_frame(self, frame: Mapping[str, Any]) -> None:
5365
"""Process the frame and set the necessary attributes."""
5466
raise NotImplementedError("Subclasses must implement process_frame")
5567

68+
def has_source_roots_override(self, source_path: str, repo_files: Sequence[str] | None) -> bool:
69+
return self._source_roots_resolver(source_path, repo_files) is not None
70+
71+
def resolve_source_roots(
72+
self,
73+
source_path: str,
74+
source_prefix: str,
75+
stack_root_prefix: str = "",
76+
repo_files: Sequence[str] | None = None,
77+
) -> tuple[str, str]:
78+
if source_roots_override := self._source_roots_resolver(source_path, repo_files):
79+
return source_roots_override
80+
81+
return (
82+
f"{stack_root_prefix}{self.stack_root}/".replace("//", "/"),
83+
f"{source_prefix}{self.stack_root}/".replace("//", "/"),
84+
)
85+
5686

5787
class ModuleBasedFrameInfo(FrameInfo):
5888
def process_frame(self, frame: Mapping[str, Any]) -> None:
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
from collections.abc import Sequence
2+
3+
SLASH = "/"
4+
JAVA_SOURCE_ROOT_MARKERS = ("src/main/java/", "src/main/kotlin/")
5+
6+
7+
def get_java_source_set_root(source_path: str) -> str | None:
8+
"""Return the repo path through the Java/Kotlin source-set marker.
9+
10+
Example:
11+
`module/src/main/java/io/sentry/Foo.java` -> `module/src/main/java/`
12+
"""
13+
for marker in JAVA_SOURCE_ROOT_MARKERS:
14+
prefix, separator, _ = source_path.partition(marker)
15+
if separator:
16+
return f"{prefix}{separator}"
17+
18+
return None
19+
20+
21+
def find_package_root_relative_to_source_set(
22+
source_root: str, repo_files: Sequence[str]
23+
) -> str | None:
24+
"""Walk a source set until the directory tree stops being a single-child chain.
25+
26+
Examples:
27+
`["module/src/main/java/io/sentry/graphql/Foo.java"]` with
28+
`source_root="module/src/main/java/"` returns `io/sentry/graphql/`.
29+
30+
`["module/src/main/java/io/sentry/asyncprofiler/jfr/JfrParser.java",
31+
"module/src/main/java/io/sentry/asyncprofiler/metrics/ProfileMetric.java"]`
32+
with `source_root="module/src/main/java/"` returns `io/sentry/asyncprofiler/`.
33+
"""
34+
relative_paths = [
35+
file.removeprefix(source_root) for file in repo_files if file.startswith(source_root)
36+
]
37+
if not relative_paths:
38+
return None
39+
40+
package_root = ""
41+
while True:
42+
has_file = False
43+
subdirs: set[str] = set()
44+
45+
for relative_path in relative_paths:
46+
if package_root:
47+
if not relative_path.startswith(package_root):
48+
continue
49+
remainder = relative_path[len(package_root) :]
50+
else:
51+
remainder = relative_path
52+
53+
if not remainder:
54+
continue
55+
56+
if SLASH not in remainder:
57+
has_file = True
58+
break
59+
60+
subdirs.add(remainder.split(SLASH, 1)[0])
61+
if len(subdirs) > 1:
62+
break
63+
64+
if has_file or len(subdirs) != 1:
65+
return package_root
66+
67+
package_root = f"{package_root}{subdirs.pop()}{SLASH}"
68+
69+
70+
def find_java_source_roots(
71+
source_path: str, repo_files: Sequence[str] | None
72+
) -> tuple[str, str] | None:
73+
"""Return `(stack_root, source_root)` from a Java/Kotlin repo path.
74+
75+
Example:
76+
`sentry-graphql-core/src/main/java/io/sentry/graphql/GraphQLFetcher.java`
77+
becomes
78+
`("io/sentry/graphql/", "sentry-graphql-core/src/main/java/io/sentry/graphql/")`.
79+
"""
80+
if not repo_files:
81+
return None
82+
83+
if not (source_root := get_java_source_set_root(source_path)):
84+
return None
85+
86+
if (package_root := find_package_root_relative_to_source_set(source_root, repo_files)) is None:
87+
return None
88+
89+
return package_root, f"{source_root}{package_root}"

src/sentry/issues/auto_source_code_config/utils/platform.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
1+
from collections.abc import Callable, Sequence
12
from typing import Any
23

34
from sentry.models.organization import Organization
45

56
from ..constants import PLATFORMS_CONFIG
67

8+
SourceRootsResolver = Callable[[str, Sequence[str] | None], tuple[str, str] | None]
9+
10+
11+
def noop_source_roots_resolver(
12+
source_path: str, repo_files: Sequence[str] | None
13+
) -> tuple[str, str] | None:
14+
return None
15+
716

817
def supported_platform(platform: str) -> bool:
918
"""Return True if the platform is supported"""
@@ -45,3 +54,6 @@ def extracts_filename_from_module(self) -> bool:
4554

4655
def creates_in_app_stack_trace_rules(self) -> bool:
4756
return self.config.get("create_in_app_stack_trace_rules", False)
57+
58+
def get_source_roots_resolver(self) -> SourceRootsResolver:
59+
return self.config.get("source_roots_resolver", noop_source_roots_resolver)

0 commit comments

Comments
 (0)