Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

### Bug Fixes

- fix(stackwalking): Use debug info to inform scanning by @loewenheim in [#1905](https://github.com/getsentry/symbolicator/pull/1905)

## 26.3.1

### New Features ✨
Expand Down
177 changes: 132 additions & 45 deletions crates/symbolicator-native/src/symbolication/process_minidump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ use symbolicator_sources::{ObjectId, ObjectType, SourceConfig};
use tokio::sync::Notify;

use crate::caches::cficaches::{CfiCacheActor, CfiModuleInfo, FetchCfiCache, FetchedCfiCache};
use crate::caches::derived::DerivedCache;
use crate::caches::symcaches::{FetchSymCache, OwnedSymCache, SymCacheActor};
use crate::interface::{
CompleteObjectInfo, CompletedSymbolicationResponse, ProcessMinidump, RawFrame, RawStacktrace,
Registers, RewriteRules, SymbolicateStacktraces, SystemInfo,
Expand Down Expand Up @@ -156,6 +158,13 @@ struct SymbolicatorSymbolProvider {
object_type: ObjectType,
/// The actor used for fetching CFI.
cficache_actor: CfiCacheActor,
/// The actor used for fetching symcaches.
///
/// Note that debug information is currently _not_ used to symbolicate frames
/// during stackwalking (which `rust-minidump` supports in principle). Rather,
/// we use it as a sanity check for `rust-minidump`'s stackwalker. See the
/// implementation of `fill_symbol` for details.
symcache_actor: SymCacheActor,
/// The debug ID of the first module in the minidump
/// (by address).
///
Expand All @@ -177,6 +186,7 @@ impl SymbolicatorSymbolProvider {
scope: Scope,
sources: Arc<[SourceConfig]>,
cficache_actor: CfiCacheActor,
symcache_actor: SymCacheActor,
object_type: ObjectType,
first_module_debug_id: Option<DebugId>,
rewrite_first_module: RewriteRules,
Expand All @@ -185,13 +195,37 @@ impl SymbolicatorSymbolProvider {
scope,
sources,
cficache_actor,
symcache_actor,
object_type,
first_module_debug_id,
rewrite_first_module,
cficaches: Default::default(),
}
}

fn object_id_from_module(&self, module: &dyn Module) -> ObjectId {
let code_file = non_empty_file_name(&module.code_file());
let mut debug_file = module.debug_file().as_deref().and_then(non_empty_file_name);

// Rewrite the first module's debug file according to the configured rewrite rules.
if module.debug_identifier() == self.first_module_debug_id
&& let Some(new_debug_file) = debug_file
.as_ref()
.and_then(|debug_file| self.rewrite_first_module.rewrite(debug_file))
{
debug_file = Some(new_debug_file);
}

ObjectId {
code_id: module.code_identifier(),
code_file,
debug_id: module.debug_identifier(),
debug_file,
debug_checksum: None,
object_type: self.object_type,
}
}

/// Fetches CFI for the given module, parses it into a `SymbolFile`, and stores it internally.
async fn load_cfi_module(&self, module: &(dyn Module + Sync)) -> Arc<FetchedCfiCache> {
let key = LookupKey::new(module);
Expand Down Expand Up @@ -220,27 +254,7 @@ impl SymbolicatorSymbolProvider {

let sources = self.sources.clone();
let scope = self.scope.clone();

let code_file = non_empty_file_name(&module.code_file());
let mut debug_file = module.debug_file().as_deref().and_then(non_empty_file_name);

// Rewrite the first module's debug file according to the configured rewrite rules.
if module.debug_identifier() == self.first_module_debug_id
&& let Some(new_debug_file) = debug_file
.as_ref()
.and_then(|debug_file| self.rewrite_first_module.rewrite(debug_file))
{
debug_file = Some(new_debug_file);
}

let identifier = ObjectId {
code_id: module.code_identifier(),
code_file,
debug_id: module.debug_identifier(),
debug_file,
debug_checksum: None,
object_type: self.object_type,
};
let identifier = self.object_id_from_module(module);

let cficache = self
.cficache_actor
Expand All @@ -266,51 +280,121 @@ impl SymbolicatorSymbolProvider {
}
cficache
}
}

#[async_trait]
impl SymbolProvider for SymbolicatorSymbolProvider {
async fn fill_symbol(
/// Fetches debug info for the given module and parses it into a `SymCache`.
async fn load_symcache(&self, module: &(dyn Module + Sync)) -> DerivedCache<OwnedSymCache> {
let sources = self.sources.clone();
let scope = self.scope.clone();
let identifier = self.object_id_from_module(module);

self.symcache_actor
.fetch(FetchSymCache {
object_type: self.object_type,
identifier,
sources,
scope,
})
// NOTE: this `bind_hub` is important!
// `load_symcache` is being called concurrently from `rust-minidump` via
// `join_all`. We do need proper isolation of any async task that might
// manipulate any Sentry scope.
.bind_hub(Hub::new_from_top(Hub::current()))
.await
}

/// Try to validate the given frame's instruction address against the CFI for the module
/// (if we have it).
///
/// Returns `false` if we have CFI for the module, but that CFI doesn't cover the frame's
/// instruction address. Returns `true` in any other case.
///
/// This helps the `rust-minidump`'s stack-walker to make better decisions.
/// If the successfully loaded CFI unwind info can not find the potential instruction
/// the passed instruction is most likely not a valid instruction from this module.
/// But since the instruction maps into the range of this module, we know it cannot
/// be part of a different module either -> it's not a valid instruction.
///
/// See: <https://github.com/rust-minidump/rust-minidump/blob/32e01a0e54d987025aa64486ebc4154f7f6b16d2/minidump-unwind/src/lib.rs#L865-L877>
async fn check_frame_by_cfi(
&self,
module: &(dyn Module + Sync),
frame: &mut (dyn FrameSymbolizer + Send),
) -> Result<(), FillSymbolError> {
// This function is being called for every context frame,
) -> bool {
// This function is being called (through `fill_symbol`) for every context frame,
// regardless if any stack walking happens.
// In contrast, `walk_frame` below will be skipped in case the minidump
// does not contain any actionable stack memory that would allow stack walking.
// Loading this module here means we will be able to backfill a possibly missing
// debug_id/file.
let cfi_module = self.load_cfi_module(module).await;

// Try to validate the given instruction against the CFI module contents.
//
// This helps the `rust-minidump`'s stack-walker to make better decisions.
// Returning an error here will aggressively create frames.
// To indicate a that a potential instruction is not valid, we can return success
// here but also not fill in the symbol name.
//
// If the successfully loaded CFI unwind info can not find the potential instruction
// the passed instruction is most likely not a valid instruction from this module.
// But since the instruction maps into the range of this module, we know it cannot
// be part of a different module either -> it's not a valid instruction.
// Returning success in this case, means the frame will be skipped.
//
// See: https://github.com/rust-minidump/rust-minidump/blob/32e01a0e54d987025aa64486ebc4154f7f6b16d2/minidump-unwind/src/lib.rs#L865-L877
if let Ok(Some(cached)) = &cfi_module.cache {
let cfi = &cached.0.cfi_stack_info;
let instruction = frame.get_instruction() - module.base_address();

if !cfi.is_empty() && cfi.get(instruction).is_none() {
// We definitely do have CFI information loaded, but the given instruction does not
// map to any stack frame info.
return Ok(());
return false;
}
}

// In any other case, always return an error here to signal that we have no useful symbol information to
// contribute. Doing nothing would stop the stack scanning prematurely.
Err(FillSymbolError {})
true
}

/// Try to validate the given frame's instruction address against the debug information
/// for the module (if we have it).
///
/// Returns `false` if we have debug info for the module, but that debug info doesn't cover the frame's
/// instruction address. Returns `true` in any other case.
///
/// This helps the `rust-minidump`'s stack-walker to make better decisions.
/// If the successfully loaded symcache can not find the potential instruction
/// the passed instruction is most likely not a valid instruction from this module.
/// But since the instruction maps into the range of this module, we know it cannot
/// be part of a different module either -> it's not a valid instruction.
///
/// See: <https://github.com/rust-minidump/rust-minidump/blob/32e01a0e54d987025aa64486ebc4154f7f6b16d2/minidump-unwind/src/lib.rs#L865-L877>
async fn check_frame_by_symbol_info(
&self,
module: &(dyn Module + Sync),
frame: &mut (dyn FrameSymbolizer + Send),
) -> bool {
let symcache = self.load_symcache(module).await;

if let Ok(cached) = &symcache.cache {
let instruction = frame.get_instruction() - module.base_address();

if cached.get().lookup(instruction).next().is_none() {
// We definitely do have symbol information loaded, but the given instruction does not
// map to any symbol info.
return false;
}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing emptiness check on symcache causes false frame rejection

Medium Severity

The symcache lookup path has no emptiness guard, unlike the parallel CFI path. For CFI, empty data is wrapped in Option::None (which doesn't match Ok(Some(cached))), and there's an additional !cfi.is_empty() check. The symcache path has neither protection — a successfully loaded but symbolless symcache (e.g., from a stripped binary found via ObjectPurpose::Debug) will match if let Ok(cached) and lookup(instruction).next().is_none() will return true, falsely setting valid_by_symbol_info to false. Combined with valid_by_cfi = false, this causes valid frames to be discarded — the very problem the PR aims to fix.

Additional Locations (1)
Fix in Cursor Fix in Web

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really worried about this case.

}

true
}
}

#[async_trait]
impl SymbolProvider for SymbolicatorSymbolProvider {
async fn fill_symbol(
&self,
module: &(dyn Module + Sync),
frame: &mut (dyn FrameSymbolizer + Send),
) -> Result<(), FillSymbolError> {
if self.check_frame_by_cfi(module, frame).await
|| self.check_frame_by_symbol_info(module, frame).await
{
// If either CFI or symbol info does not reject the frame, return an error here to signal that
// we have no useful symbol information to contribute. Doing nothing would stop the stack scanning prematurely.
Err(FillSymbolError {})
} else {
// To indicate a that a potential instruction is not valid, we can return success
// here but also not fill in the symbol name. This will cause `rust-minidump`'s stack
// scanner to disregard the frame.
Ok(())
}
}

async fn walk_frame(
Expand Down Expand Up @@ -359,6 +443,7 @@ fn object_info_from_minidump_module(ty: ObjectType, module: &MinidumpModule) ->

async fn stackwalk(
cficaches: CfiCacheActor,
symcaches: SymCacheActor,
minidump: &Minidump,
scope: Scope,
sources: Arc<[SourceConfig]>,
Expand Down Expand Up @@ -387,6 +472,7 @@ async fn stackwalk(
scope,
sources,
cficaches,
symcaches,
ty,
first_module_debug_id,
rewrite_first_module,
Expand Down Expand Up @@ -540,6 +626,7 @@ impl SymbolicationActor {
duration,
} = stackwalk(
self.cficaches.clone(),
self.symcaches.clone(),
&minidump,
scope.clone(),
sources.clone(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use super::native::{get_relative_caller_addr, symbolicate_native_frame};
pub struct SymbolicationActor {
demangle_cache: DemangleCache,
pub(crate) objects: ObjectsActor,
symcaches: SymCacheActor,
pub(crate) symcaches: SymCacheActor,
pub(crate) cficaches: CfiCacheActor,
ppdb_caches: PortablePdbCacheActor,
pub(crate) sourcefiles_cache: Arc<SourceFilesCache>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,39 @@ async fn test_minidump_macos() {
assert_snapshot!(res);
}

/// Verifies that debug information is used to guide stack scanning.
///
/// The minidump being tested has frame pointers disabled, so in the absence of (usable) CFI,
/// there is no choice but to scan. Left to its own devices, the stack scanner can produce a lot
/// of spurious frames, so we want to use both CFI and debug info that we have available to guide
/// it. The heuristic is as follows: if stack scanning would produce an instruction address
/// * falling into some module,
/// * for which we have CFI or debug info,
/// * which doesn't cover that instruction address,
/// then that address is probably bogus and we discard the frame.
///
/// The CFI half of this heuristic was implemented in
/// https://github.com/getsentry/symbolicator/pull/1651.
/// https://github.com/getsentry/symbolicator/pull/1913 added the debug info part.
///
/// This test simulates an actual customer situation: we have both debug info and CFI
/// for the minidump, but the CFI is truncated/poor. If we only used CFI to constrain the
/// stack scanner, all frames in `sentry_example` would be rejected. However, since we also use
/// the (good) debug info to check the frames, they are retained.
///
/// The `dyld` frames in the stacktrace are found by scanning, and since we have no CFI or
/// debug info at all for that module, they're all accepted.
#[tokio::test]
async fn test_minidump_macos_broken_cfi() {
let res = stackwalk_minidump("macos-no-fp.dmp").await;
let crashed_thread = res
.stacktraces
.iter()
.find(|st| st.is_requesting.unwrap_or_default())
.unwrap();
assert_snapshot!(crashed_thread);
}

#[tokio::test]
async fn test_minidump_linux() {
let res = stackwalk_minidump("linux.dmp").await;
Expand Down
Loading
Loading