Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 61 additions & 38 deletions src/sentry/issues/auto_source_code_config/code_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class CodeMapping(NamedTuple):
SLASH = "/"
BACKSLASH = "\\" # This is the Python representation of a single backslash

CodeMappingKey = tuple[str, str]


def derive_code_mappings(
organization: Organization,
Expand All @@ -67,7 +69,8 @@ class CodeMappingTreesHelper:

def __init__(self, trees: Mapping[str, RepoTree]):
self.trees = trees
self.code_mappings: dict[str, CodeMapping] = {}
# Multiple source roots may legitimately share the same stack root in one monorepo.
self.code_mappings: dict[CodeMappingKey, CodeMapping] = {}

def generate_code_mappings(
self, frames: Sequence[Mapping[str, Any]], platform: str | None = None
Expand Down Expand Up @@ -111,7 +114,9 @@ def get_file_and_repo_matches(self, frame_filename: FrameInfo) -> list[dict[str,
extra = {"stack_path": stack_path, "source_path": source_path}

try:
stack_root, source_root = find_roots(frame_filename, source_path)
stack_root, source_root = find_roots(
frame_filename, source_path, repo_tree.files
)
except UnexpectedPathException:
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
continue
Expand Down Expand Up @@ -160,19 +165,19 @@ def _stacktrace_buckets(
def _process_stackframes(self, buckets: Mapping[str, Sequence[FrameInfo]]) -> bool:
"""This processes all stackframes and returns if a new code mapping has been generated"""
reprocess = False
for stackframe_root, stackframes in buckets.items():
if not self.code_mappings.get(stackframe_root):
for frame_filename in stackframes:
code_mapping = self._find_code_mapping(frame_filename)
if code_mapping:
for stackframes in buckets.values():
for frame_filename in stackframes:
for code_mapping in self._find_code_mappings(frame_filename):
mapping_key = (code_mapping.stacktrace_root, code_mapping.source_path)
if mapping_key not in self.code_mappings:
# This allows processing some stack frames that
# were matching more than one file
reprocess = True
self.code_mappings[stackframe_root] = code_mapping
self.code_mappings[mapping_key] = code_mapping
return reprocess

def _find_code_mapping(self, frame_filename: FrameInfo) -> CodeMapping | None:
"""Look for the file path through all the trees and a generate code mapping for it if a match is found"""
def _find_code_mappings(self, frame_filename: FrameInfo) -> list[CodeMapping]:
"""Look for the file path through all the trees and generate code mappings for it."""
code_mappings: list[CodeMapping] = []
# XXX: This will need optimization by changing the data structure of the trees
for repo_full_name in self.trees.keys():
Expand All @@ -191,13 +196,17 @@ def _find_code_mapping(self, frame_filename: FrameInfo) -> CodeMapping | None:

if len(code_mappings) == 0:
logger.warning("No files matched for %s", frame_filename.raw_path)
return None
# This means that the file has been found in more than one repo
elif len(code_mappings) > 1:
return []

unique_code_mappings = {
(code_mapping.stacktrace_root, code_mapping.source_path): code_mapping
for code_mapping in code_mappings
}
if len({code_mapping.repo.name for code_mapping in unique_code_mappings.values()}) > 1:
logger.warning("More than one repo matched %s", frame_filename.raw_path)
return None
return []

return code_mappings[0]
return list(unique_code_mappings.values())

def _generate_code_mapping_from_tree(
self,
Expand All @@ -214,34 +223,44 @@ def _generate_code_mapping_from_tree(
if self._is_potential_match(src_path, frame_filename)
]

if len(matched_files) != 1:
if len(matched_files) == 0:
return []

stack_path = frame_filename.raw_path
source_path = matched_files[0]

extra = {"stack_path": stack_path, "source_path": source_path}
try:
stack_root, source_root = find_roots(frame_filename, source_path)
except UnexpectedPathException:
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
if len(matched_files) > 1 and not all(
frame_filename.has_source_roots_override(source_path, repo_tree.files)
for source_path in matched_files
):
return []

extra.update({"stack_root": stack_root, "source_root": source_root})
if stack_path.replace(stack_root, source_root, 1).replace("\\", "/") != source_path:
logger.warning(
"Unexpected stack_path/source_path found. A code mapping was not generated.",
extra=extra,
)
return []
code_mappings: dict[tuple[str, str], CodeMapping] = {}
for source_path in matched_files:
stack_path = frame_filename.raw_path
extra = {"stack_path": stack_path, "source_path": source_path}
try:
stack_root, source_root = find_roots(frame_filename, source_path, repo_tree.files)
except UnexpectedPathException:
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
continue

extra.update({"stack_root": stack_root, "source_root": source_root})
if stack_path.replace(stack_root, source_root, 1).replace("\\", "/") != source_path:
logger.warning(
"Unexpected stack_path/source_path found. A code mapping was not generated.",
extra=extra,
)
continue

return [
CodeMapping(
code_mapping = CodeMapping(
repo=repo_tree.repo,
stacktrace_root=stack_root,
source_path=source_root,
)
]
code_mappings[(code_mapping.stacktrace_root, code_mapping.source_path)] = code_mapping

if len(matched_files) > 1 and len(code_mappings) != len(matched_files):
return []

return list(code_mappings.values())

def _is_potential_match(self, src_file: str, frame_filename: FrameInfo) -> bool:
"""
Expand Down Expand Up @@ -419,7 +438,9 @@ def get_sorted_code_mapping_configs(project: Project) -> list[RepositoryProjectP
return sorted_configs


def find_roots(frame_filename: FrameInfo, source_path: str) -> tuple[str, str]:
def find_roots(
frame_filename: FrameInfo, source_path: str, repo_files: Sequence[str] | None = None
) -> tuple[str, str]:
"""
Returns a tuple containing the stack_root, and the source_root.
If there is no overlap, raise an exception since this should not happen
Expand All @@ -444,9 +465,11 @@ def find_roots(frame_filename: FrameInfo, source_path: str) -> tuple[str, str]:
# "Packaged" logic
# e.g. stack_path: some_package/src/foo.py -> source_path: src/foo.py
source_prefix = source_path.rpartition(stack_path)[0]
return (
f"{stack_root}{frame_filename.stack_root}/".replace("//", "/"),
f"{source_prefix}{frame_filename.stack_root}/".replace("//", "/"),
return frame_filename.resolve_source_roots(
source_path=source_path,
source_prefix=source_prefix,
stack_root_prefix=stack_root,
repo_files=repo_files,
)
elif stack_path.endswith(source_path):
stack_prefix = stack_path.rpartition(source_path)[0]
Expand Down
3 changes: 3 additions & 0 deletions src/sentry/issues/auto_source_code_config/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from sentry.integrations.types import IntegrationProviderSlug

from .utils.java import find_java_source_roots

METRIC_PREFIX = "auto_source_code_config"
DERIVED_ENHANCEMENTS_OPTION_KEY = "sentry:derived_grouping_enhancements"
SUPPORTED_INTEGRATIONS = [IntegrationProviderSlug.GITHUB.value]
Expand All @@ -19,6 +21,7 @@
# e.g. com.foo.bar.Baz$handle$1, Baz.kt -> com/foo/bar/Baz.kt
"extract_filename_from_module": True,
"create_in_app_stack_trace_rules": True,
"source_roots_resolver": find_java_source_roots,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm glad seeing this file still being a good source for declaring the configuration of each platform 🦾

"extensions": ["kt", "kts", "java", "jsp", "scala", "sc"],
},
"javascript": {"extensions": ["js", "jsx", "mjs", "tsx", "ts"]},
Expand Down
38 changes: 34 additions & 4 deletions src/sentry/issues/auto_source_code_config/frame_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@
NeedsExtension,
UnsupportedFrameInfo,
)
from .utils.platform import PlatformConfig, supported_platform
from .utils.platform import (
PlatformConfig,
SourceRootsResolver,
noop_source_roots_resolver,
supported_platform,
)

NOT_FOUND = -1

Expand All @@ -24,20 +29,27 @@

def create_frame_info(frame: Mapping[str, Any], platform: str | None = None) -> FrameInfo:
"""Factory function to create the appropriate FrameInfo instance."""
source_roots_resolver: SourceRootsResolver = noop_source_roots_resolver
if platform and supported_platform(platform):
platform_config = PlatformConfig(platform)
source_roots_resolver = platform_config.get_source_roots_resolver()
if platform_config.extracts_filename_from_module():
return ModuleBasedFrameInfo(frame)
return ModuleBasedFrameInfo(frame, source_roots_resolver)

return PathBasedFrameInfo(frame)
return PathBasedFrameInfo(frame, source_roots_resolver)


class FrameInfo(ABC):
raw_path: str
normalized_path: str
stack_root: str

def __init__(self, frame: Mapping[str, Any]) -> None:
def __init__(
self,
frame: Mapping[str, Any],
source_roots_resolver: SourceRootsResolver = noop_source_roots_resolver,
) -> None:
self._source_roots_resolver = source_roots_resolver
self.process_frame(frame)

def __repr__(self) -> str:
Expand All @@ -53,6 +65,24 @@ def process_frame(self, frame: Mapping[str, Any]) -> None:
"""Process the frame and set the necessary attributes."""
raise NotImplementedError("Subclasses must implement process_frame")

def has_source_roots_override(self, source_path: str, repo_files: Sequence[str] | None) -> bool:
return self._source_roots_resolver(source_path, repo_files) is not None

def resolve_source_roots(
self,
source_path: str,
source_prefix: str,
stack_root_prefix: str = "",
repo_files: Sequence[str] | None = None,
) -> tuple[str, str]:
if source_roots_override := self._source_roots_resolver(source_path, repo_files):
return source_roots_override

return (
f"{stack_root_prefix}{self.stack_root}/".replace("//", "/"),
f"{source_prefix}{self.stack_root}/".replace("//", "/"),
)


class ModuleBasedFrameInfo(FrameInfo):
def process_frame(self, frame: Mapping[str, Any]) -> None:
Expand Down
89 changes: 89 additions & 0 deletions src/sentry/issues/auto_source_code_config/utils/java.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from collections.abc import Sequence

SLASH = "/"
JAVA_SOURCE_ROOT_MARKERS = ("src/main/java/", "src/main/kotlin/")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As per convo with @romtsn , these are to cover the majority of cases.



def get_java_source_set_root(source_path: str) -> str | None:
"""Return the repo path through the Java/Kotlin source-set marker.

Example:
`module/src/main/java/io/sentry/Foo.java` -> `module/src/main/java/`
"""
for marker in JAVA_SOURCE_ROOT_MARKERS:
prefix, separator, _ = source_path.partition(marker)
if separator:
return f"{prefix}{separator}"

return None


def find_package_root_relative_to_source_set(
source_root: str, repo_files: Sequence[str]
) -> str | None:
"""Walk a source set until the directory tree stops being a single-child chain.

Examples:
`["module/src/main/java/io/sentry/graphql/Foo.java"]` with
`source_root="module/src/main/java/"` returns `io/sentry/graphql/`.

`["module/src/main/java/io/sentry/asyncprofiler/jfr/JfrParser.java",
"module/src/main/java/io/sentry/asyncprofiler/metrics/ProfileMetric.java"]`
with `source_root="module/src/main/java/"` returns `io/sentry/asyncprofiler/`.
"""
relative_paths = [
file.removeprefix(source_root) for file in repo_files if file.startswith(source_root)
]
if not relative_paths:
return None

package_root = ""
while True:
has_file = False
subdirs: set[str] = set()

for relative_path in relative_paths:
if package_root:
if not relative_path.startswith(package_root):
continue
remainder = relative_path[len(package_root) :]
else:
remainder = relative_path

if not remainder:
continue

if SLASH not in remainder:
has_file = True
break

subdirs.add(remainder.split(SLASH, 1)[0])
if len(subdirs) > 1:
break

if has_file or len(subdirs) != 1:
return package_root

package_root = f"{package_root}{subdirs.pop()}{SLASH}"


def find_java_source_roots(
source_path: str, repo_files: Sequence[str] | None
) -> tuple[str, str] | None:
"""Return `(stack_root, source_root)` from a Java/Kotlin repo path.

Example:
`sentry-graphql-core/src/main/java/io/sentry/graphql/GraphQLFetcher.java`
becomes
`("io/sentry/graphql/", "sentry-graphql-core/src/main/java/io/sentry/graphql/")`.
"""
if not repo_files:
return None

if not (source_root := get_java_source_set_root(source_path)):
return None

if (package_root := find_package_root_relative_to_source_set(source_root, repo_files)) is None:
return None

return package_root, f"{source_root}{package_root}"
12 changes: 12 additions & 0 deletions src/sentry/issues/auto_source_code_config/utils/platform.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
from collections.abc import Callable, Sequence
from typing import Any

from sentry.models.organization import Organization

from ..constants import PLATFORMS_CONFIG

SourceRootsResolver = Callable[[str, Sequence[str] | None], tuple[str, str] | None]


def noop_source_roots_resolver(
source_path: str, repo_files: Sequence[str] | None
) -> tuple[str, str] | None:
return None


def supported_platform(platform: str) -> bool:
"""Return True if the platform is supported"""
Expand Down Expand Up @@ -45,3 +54,6 @@ def extracts_filename_from_module(self) -> bool:

def creates_in_app_stack_trace_rules(self) -> bool:
return self.config.get("create_in_app_stack_trace_rules", False)

def get_source_roots_resolver(self) -> SourceRootsResolver:
return self.config.get("source_roots_resolver", noop_source_roots_resolver)
Loading
Loading