-
-
Notifications
You must be signed in to change notification settings - Fork 4.7k
Expand file tree
/
Copy pathcode_mapping.py
More file actions
505 lines (429 loc) · 20.5 KB
/
code_mapping.py
File metadata and controls
505 lines (429 loc) · 20.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
from __future__ import annotations
import logging
from collections import defaultdict
from collections.abc import Mapping, Sequence
from typing import Any, NamedTuple
from sentry.integrations.models.repository_project_path_config import RepositoryProjectPathConfig
from sentry.integrations.source_code_management.repo_trees import (
RepoAndBranch,
RepoTree,
RepoTreesIntegration,
)
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.models.repository import Repository
from sentry.utils import metrics
from sentry.utils.event_frames import EventFrame, try_munge_frame_path
from .constants import METRIC_PREFIX
from .errors import (
DoesNotFollowJavaPackageNamingConvention,
MissingModuleOrAbsPath,
NeedsExtension,
UnexpectedPathException,
UnsupportedFrameInfo,
)
from .frame_info import FrameInfo, create_frame_info
from .integration_utils import InstallationNotFoundError, get_installation
from .utils.misc import get_straight_path_prefix_end_index
logger = logging.getLogger(__name__)
class CodeMapping(NamedTuple):
repo: RepoAndBranch
stacktrace_root: str
source_path: str
SLASH = "/"
BACKSLASH = "\\" # This is the Python representation of a single backslash
CodeMappingKey = tuple[str, str]
def derive_code_mappings(
organization: Organization,
frame: Mapping[str, Any],
platform: str | None = None,
) -> list[dict[str, str]]:
installation = get_installation(organization)
if not isinstance(installation, RepoTreesIntegration):
return []
trees = installation.get_trees_for_org()
trees_helper = CodeMappingTreesHelper(trees)
try:
frame_filename = create_frame_info(frame, platform)
return trees_helper.get_file_and_repo_matches(frame_filename)
except NeedsExtension:
logger.warning("Needs extension: %s", frame.get("filename"))
return []
# call generate_code_mappings() after you initialize CodeMappingTreesHelper
class CodeMappingTreesHelper:
platform: str | None = None
def __init__(self, trees: Mapping[str, RepoTree]):
self.trees = trees
# Multiple source roots may legitimately share the same stack root in one monorepo.
self.code_mappings: dict[CodeMappingKey, CodeMapping] = {}
def generate_code_mappings(
self, frames: Sequence[Mapping[str, Any]], platform: str | None = None
) -> list[CodeMapping]:
"""Generate code mappings based on the initial trees object and the list of stack traces"""
# We need to make sure that calling this method with a new list of stack traces
# should always start with a clean slate
self.code_mappings = {}
self.platform = platform
with metrics.timer(
f"{METRIC_PREFIX}.generate_code_mappings.duration", tags={"platform": platform}
):
buckets: dict[str, list[FrameInfo]] = self._stacktrace_buckets(frames)
# We reprocess stackframes until we are told that no code mappings were produced
# This is order to reprocess past stackframes in light of newly discovered code mappings
# This allows for idempotency since the order of the stackframes will not matter
# This has no performance issue because stackframes that match an existing code mapping
# will be skipped
while True:
if not self._process_stackframes(buckets):
break
return list(self.code_mappings.values())
def get_file_and_repo_matches(self, frame_filename: FrameInfo) -> list[dict[str, str]]:
"""List all the files in a repo that match the frame_filename"""
file_matches = []
for repo_full_name in self.trees.keys():
repo_tree = self.trees[repo_full_name]
matches = [
src_path
for src_path in repo_tree.files
if self._is_potential_match(src_path, frame_filename)
]
for file in matches:
stack_path = frame_filename.raw_path
source_path = file
extra = {"stack_path": stack_path, "source_path": source_path}
try:
stack_root, source_root = find_roots(
frame_filename, source_path, repo_tree.files
)
except UnexpectedPathException:
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
continue
extra.update({"stack_root": stack_root, "source_root": source_root})
if stack_path.replace(stack_root, source_root, 1).replace("\\", "/") != source_path:
logger.warning(
"Unexpected stack_path/source_path found. A code mapping was not generated.",
extra=extra,
)
else:
file_matches.append(
{
"filename": file,
"repo_name": repo_tree.repo.name,
"repo_branch": repo_tree.repo.branch,
"stacktrace_root": stack_root,
"source_path": source_root,
}
)
return file_matches
def _stacktrace_buckets(
self, frames: Sequence[Mapping[str, Any]]
) -> dict[str, list[FrameInfo]]:
"""Groups stacktraces into buckets based on the root of the stacktrace path"""
buckets: defaultdict[str, list[FrameInfo]] = defaultdict(list)
for frame in frames:
try:
frame_filename = create_frame_info(frame, self.platform)
# Any files without a top directory will be grouped together
buckets[frame_filename.stack_root].append(frame_filename)
except UnsupportedFrameInfo:
logger.warning("Frame's filepath not supported: %s", frame.get("filename"))
except MissingModuleOrAbsPath:
logger.warning("Do not panic. I'm collecting this data.")
except NeedsExtension:
logger.warning("Needs extension: %s", frame.get("filename"))
except DoesNotFollowJavaPackageNamingConvention:
pass
except Exception:
logger.exception("Unable to split stacktrace path into buckets")
return buckets
def _process_stackframes(self, buckets: Mapping[str, Sequence[FrameInfo]]) -> bool:
"""This processes all stackframes and returns if a new code mapping has been generated"""
reprocess = False
for stackframes in buckets.values():
for frame_filename in stackframes:
for code_mapping in self._find_code_mappings(frame_filename):
mapping_key = (code_mapping.stacktrace_root, code_mapping.source_path)
if mapping_key not in self.code_mappings:
# This allows processing some stack frames that
# were matching more than one file
reprocess = True
self.code_mappings[mapping_key] = code_mapping
return reprocess
def _find_code_mappings(self, frame_filename: FrameInfo) -> list[CodeMapping]:
"""Look for the file path through all the trees and generate code mappings for it."""
code_mappings: list[CodeMapping] = []
# XXX: This will need optimization by changing the data structure of the trees
for repo_full_name in self.trees.keys():
try:
code_mappings.extend(
self._generate_code_mapping_from_tree(
self.trees[repo_full_name], frame_filename
)
)
except NotImplementedError:
logger.exception(
"Code mapping failed for module with no package name. Processing continues."
)
except Exception:
logger.exception("Unexpected error. Processing continues.")
if len(code_mappings) == 0:
logger.warning("No files matched for %s", frame_filename.raw_path)
return []
unique_code_mappings = {
(code_mapping.stacktrace_root, code_mapping.source_path): code_mapping
for code_mapping in code_mappings
}
if len({code_mapping.repo.name for code_mapping in unique_code_mappings.values()}) > 1:
logger.warning("More than one repo matched %s", frame_filename.raw_path)
return []
return list(unique_code_mappings.values())
def _generate_code_mapping_from_tree(
self,
repo_tree: RepoTree,
frame_filename: FrameInfo,
) -> list[CodeMapping]:
"""
Finds a match in the repo tree and generates a code mapping for it. At most one code mapping is generated, if any.
If more than one potential match is found, do not generate a code mapping and return an empty list.
"""
matched_files = [
src_path
for src_path in repo_tree.files
if self._is_potential_match(src_path, frame_filename)
]
if len(matched_files) == 0:
return []
if len(matched_files) > 1 and not all(
frame_filename.has_source_roots_override(source_path, repo_tree.files)
for source_path in matched_files
):
return []
code_mappings: dict[tuple[str, str], CodeMapping] = {}
for source_path in matched_files:
stack_path = frame_filename.raw_path
extra = {"stack_path": stack_path, "source_path": source_path}
try:
stack_root, source_root = find_roots(frame_filename, source_path, repo_tree.files)
except UnexpectedPathException:
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
continue
extra.update({"stack_root": stack_root, "source_root": source_root})
if stack_path.replace(stack_root, source_root, 1).replace("\\", "/") != source_path:
logger.warning(
"Unexpected stack_path/source_path found. A code mapping was not generated.",
extra=extra,
)
continue
code_mapping = CodeMapping(
repo=repo_tree.repo,
stacktrace_root=stack_root,
source_path=source_root,
)
code_mappings[(code_mapping.stacktrace_root, code_mapping.source_path)] = code_mapping
if len(matched_files) > 1 and len(code_mappings) != len(matched_files):
return []
return list(code_mappings.values())
def _is_potential_match(self, src_file: str, frame_filename: FrameInfo) -> bool:
"""
Tries to see if the stacktrace without the root matches the file from the
source code. Use existing code mappings to exclude some source files
"""
def _list_endswith(l1: Sequence[str], l2: Sequence[str]) -> bool:
if len(l2) > len(l1):
l1, l2 = l2, l1
l1_idx = len(l1) - 1
l2_idx = len(l2) - 1
while l2_idx >= 0:
if l2[l2_idx] != l1[l1_idx]:
return False
l1_idx -= 1
l2_idx -= 1
return True
# Exit early because we should not be processing source files for existing code maps
if self._matches_existing_code_mappings(src_file):
return False
src_file_items = src_file.split("/")
frame_items = frame_filename.normalized_path.split("/")
if len(src_file_items) > len(frame_items): # Mono repos
return _list_endswith(src_file_items, frame_items)
elif len(frame_items) > len(src_file_items): # Absolute paths
return _list_endswith(frame_items, src_file_items)
else: # exact match
return src_file == frame_filename.normalized_path
def _matches_existing_code_mappings(self, src_file: str) -> bool:
"""Check if the source file is already covered by an existing code mapping"""
return any(
code_mapping.source_path
for code_mapping in self.code_mappings.values()
if src_file.startswith(f"{code_mapping.source_path}/")
)
def __repr__(self) -> str:
return f"CodeMappingTreesHelper(trees={self.trees}, code_mappings={self.code_mappings})"
def convert_stacktrace_frame_path_to_source_path(
frame: EventFrame,
code_mapping: RepositoryProjectPathConfig,
platform: str | None,
sdk_name: str | None,
) -> str | None:
"""
Applies the given code mapping to the given stacktrace frame and returns the source path.
If the code mapping does not apply to the frame, returns None.
"""
stack_root = code_mapping.stack_root
# In most cases, code mappings get applied to frame.filename, but some platforms such as Java
# contain folder info in other parts of the frame (e.g. frame.module="com.example.app.MainActivity"
# gets transformed to "com/example/app/MainActivity.java"), so in those cases we use the
# transformed path instead.
stacktrace_path = (
try_munge_frame_path(frame=frame, platform=platform, sdk_name=sdk_name) or frame.filename
)
if stacktrace_path and stacktrace_path.startswith(code_mapping.stack_root):
return (
stacktrace_path.replace(stack_root, code_mapping.source_root, 1)
.replace("\\", "/")
.lstrip("/")
)
# Some platforms only provide the file's name without folder paths, so we
# need to use the absolute path instead. If the code mapping has a non-empty
# stack_root value and it matches the absolute path, we do the mapping on it.
if frame.abs_path and frame.abs_path.startswith(code_mapping.stack_root):
return (
frame.abs_path.replace(stack_root, code_mapping.source_root, 1)
.replace("\\", "/")
.lstrip("/")
)
return None
def create_code_mapping(
organization: Organization,
code_mapping: CodeMapping,
project: Project,
) -> RepositoryProjectPathConfig:
installation = get_installation(organization)
# It helps with typing since org_integration can be None
if not installation.org_integration:
raise InstallationNotFoundError
repository, _ = Repository.objects.get_or_create(
name=code_mapping.repo.name,
organization_id=organization.id,
defaults={"integration_id": installation.model.id},
)
new_code_mapping, _ = RepositoryProjectPathConfig.objects.update_or_create(
project=project,
stack_root=code_mapping.stacktrace_root,
source_root=code_mapping.source_path,
defaults={
"repository": repository,
"organization_id": organization.id,
"integration_id": installation.model.id,
"organization_integration_id": installation.org_integration.id,
"default_branch": code_mapping.repo.branch,
# This function is called from the UI, thus, we know that the code mapping is user generated
"automatically_generated": False,
},
)
return new_code_mapping
def get_sorted_code_mapping_configs(project: Project) -> list[RepositoryProjectPathConfig]:
"""
Returns the code mapping config list for a project sorted based on precedence.
User generated code mappings are evaluated before Sentry generated code mappings.
Code mappings with absolute path stack roots are evaluated before relative path stack roots.
Code mappings with more defined stack trace roots are evaluated before less defined stack trace
roots.
`project`: The project to get the list of sorted code mapping configs for
"""
# xxx(meredith): if there are ever any changes to this query, make
# sure that we are still ordering by `id` because we want to make sure
# the ordering is deterministic
# codepath mappings must have an associated integration for stacktrace linking.
configs = (
RepositoryProjectPathConfig.objects.filter(
project=project, organization_integration_id__isnull=False
)
.select_related("repository")
.order_by("id")
)
sorted_configs: list[RepositoryProjectPathConfig] = []
try:
for config in configs:
inserted = False
for index, sorted_config in enumerate(sorted_configs):
# This check will ensure that all user defined code mappings will come before Sentry generated ones
if (
(sorted_config.automatically_generated and not config.automatically_generated)
or ( # Insert absolute paths before relative paths
not sorted_config.stack_root.startswith("/")
and config.stack_root.startswith("/")
)
or ( # Insert more defined stack roots before less defined ones
(sorted_config.automatically_generated == config.automatically_generated)
and config.stack_root.startswith(sorted_config.stack_root)
)
):
sorted_configs.insert(index, config)
inserted = True
break
if not inserted:
# Insert the code mapping at the back if it's Sentry generated or at the front if it is user defined
if config.automatically_generated:
sorted_configs.insert(len(sorted_configs), config)
else:
sorted_configs.insert(0, config)
except Exception:
logger.exception("There was a failure sorting the code mappings")
return sorted_configs
def find_roots(
frame_filename: FrameInfo, source_path: str, repo_files: Sequence[str] | None = None
) -> tuple[str, str]:
"""
Returns a tuple containing the stack_root, and the source_root.
If there is no overlap, raise an exception since this should not happen
"""
if not source_path:
raise UnexpectedPathException("Source path is empty")
stack_path = frame_filename.raw_path
stack_root = ""
if stack_path[0] == "/" or stack_path[0] == "\\":
stack_root += stack_path[0]
stack_path = stack_path[1:]
if stack_path == source_path:
# e.g. stack_path: foo/foo.py -> source_path: foo/foo.py
return (stack_root, "")
elif source_path.endswith(stack_path):
if stack_path.find("/") == -1:
# Single-file path (e.g. stack_path: foo.py -> source_path: src/foo.py)
return ("", source_path.replace(stack_path, ""))
else:
# "Packaged" logic
# e.g. stack_path: some_package/src/foo.py -> source_path: src/foo.py
source_prefix = source_path.rpartition(stack_path)[0]
return frame_filename.resolve_source_roots(
source_path=source_path,
source_prefix=source_prefix,
stack_root_prefix=stack_root,
repo_files=repo_files,
)
elif stack_path.endswith(source_path):
stack_prefix = stack_path.rpartition(source_path)[0]
return (f"{stack_root}{stack_prefix}", "")
stack_path_delim = SLASH if SLASH in stack_path else BACKSLASH
if stack_path_delim == BACKSLASH:
stack_path = stack_path.replace(BACKSLASH, SLASH)
if (straight_path_idx := get_straight_path_prefix_end_index(stack_path)) > 0:
stack_root += stack_path[:straight_path_idx]
stack_path = stack_path[straight_path_idx:]
overlap_to_check: list[str] = stack_path.split(SLASH)
stack_root_items: list[str] = []
while overlap_to_check:
if (overlap := SLASH.join(overlap_to_check)) and source_path.endswith(overlap):
source_root = source_path.rpartition(overlap)[0]
stack_root += stack_path_delim.join(stack_root_items)
if stack_root and stack_root[-1] != stack_path_delim: # append trailing slash
stack_root = f"{stack_root}{stack_path_delim}"
if source_root and source_root[-1] != SLASH:
source_root = f"{source_root}{SLASH}"
return (stack_root, source_root)
# increase stack root specificity, decrease overlap specifity
stack_root_items.append(overlap_to_check.pop(0))
# validate_source_url should have ensured the file names match
# so if we get here something went wrong and there is a bug
raise UnexpectedPathException("Could not find common root from paths")