Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/src)
if(PROTOC_FOUND)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.h
DEPENDS protoc ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty.proto
COMMAND protoc ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty.proto
--cpp_out=${CMAKE_CURRENT_BINARY_DIR}/src
Expand All @@ -381,6 +382,7 @@ add_custom_command(
else()
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.h
COMMAND protoc ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty.proto
--cpp_out=${CMAKE_CURRENT_BINARY_DIR}/src
-I${CMAKE_CURRENT_SOURCE_DIR}/src
Expand All @@ -395,6 +397,7 @@ add_library(libbloaty STATIC
src/bloaty.h
src/disassemble.cc
${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.h
src/dwarf/attr.h
src/dwarf/attr.cc
src/dwarf/dwarf_util.cc
Expand Down
17 changes: 17 additions & 0 deletions doc/using.md
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,23 @@ can create with `dsymutil`:
```
$ dsymutil bloaty
$ strip bloaty (optional)
$ ./bloaty -d symbols bloaty # Auto-discovers bloaty.dSYM
```

Bloaty will automatically discover and load debug symbols from
`bloaty.dSYM/Contents/Resources/DWARF/bloaty` when analyzing `bloaty`.

If you used `dsymutil -o` to write the dSYM to a different directory,
you can explicitly specify its location:

```
$ dsymutil bloaty -o /path/to/symbols/bloaty.dSYM
$ ./bloaty -d symbols --dsym=/path/to/symbols/bloaty.dSYM bloaty
```

For cross-platform compatibility, `--debug-file` still works:

```
$ ./bloaty -d symbols --debug-file=bloaty.dSYM/Contents/Resources/DWARF/bloaty bloaty
```

Expand Down
158 changes: 156 additions & 2 deletions src/bloaty.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ typedef size_t z_size_t;

#include <atomic>
#include <cmath>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <limits>
Expand Down Expand Up @@ -1526,6 +1527,9 @@ class Bloaty {
void AddFilename(const std::string& filename, bool base_file);
void AddDebugFilename(const std::string& filename);
void AddSourceMapFilename(const std::string& filename);
void AddDsymFilename(const Options& options, const std::string& filename);
void TryAutoLoadDsym(const std::string& binary_path,
const std::string& build_id);

size_t GetSourceCount() const { return sources_.size(); }

Expand Down Expand Up @@ -1573,6 +1577,15 @@ class Bloaty {

std::unique_ptr<ObjectFile> GetObjectFile(const std::string& filename) const;

bool TryGetBuildId(const std::string& filename, std::string* build_id);
bool TryValidateAndAddDsym(const std::string& dsym_path,
const std::string& main_binary_path,
bool strict_validation);

std::filesystem::path ConstructDsymPath(const std::filesystem::path& binary_path);
std::filesystem::path GetDsymInternalPath(const std::filesystem::path& dsym_bundle,
const std::string& binary_name);

const InputFileFactory& file_factory_;
const Options options_;

Expand Down Expand Up @@ -1634,6 +1647,9 @@ void Bloaty::AddFilename(const std::string& filename, bool is_base) {
auto object_file = GetObjectFile(filename);
std::string build_id = object_file->GetBuildId();

// Automatically try to load dSYM file for Mach-O binaries
TryAutoLoadDsym(filename, build_id);

if (is_base) {
base_files_.push_back({filename, build_id});
} else {
Expand Down Expand Up @@ -1663,6 +1679,137 @@ void Bloaty::AddSourceMapFilename(const std::string& filename) {
sourcemap_files_[sourcemap_build_id] = sourcemap_filename;
}

bool Bloaty::TryGetBuildId(const std::string& filename, std::string* build_id) {
try {
auto object_file = GetObjectFile(filename);
*build_id = object_file->GetBuildId();
return true;
} catch (const std::exception&) {
return false;
}
}

bool Bloaty::TryValidateAndAddDsym(const std::string& dsym_path,
const std::string& main_binary_path,
bool strict_validation) {
std::string main_build_id, dsym_build_id;

if (!TryGetBuildId(main_binary_path, &main_build_id) ||
!TryGetBuildId(dsym_path, &dsym_build_id)) {
if (strict_validation) {
THROWF("Cannot read build ID from files: main=$0, dsym=$1",
main_binary_path, dsym_path);
}
return false;
}

if (main_build_id != dsym_build_id) {
if (strict_validation) {
THROWF("dSYM file $0 has mismatched build ID (expected $1, got $2)",
dsym_path,
absl::BytesToHexString(main_build_id),
absl::BytesToHexString(dsym_build_id));
} else {
WARN("dSYM file $0 has mismatched build ID (expected $1, got $2) - skipping",
dsym_path,
absl::BytesToHexString(main_build_id),
absl::BytesToHexString(dsym_build_id));
}
return false;
}

AddDebugFilename(dsym_path);
return true;
}

std::filesystem::path Bloaty::ConstructDsymPath(const std::filesystem::path& binary_path) {
std::filesystem::path binary_file(binary_path);
std::string binary_name = binary_file.filename().string();
std::filesystem::path binary_dir = binary_file.parent_path();

std::filesystem::path dsym_bundle = binary_dir / (binary_name + ".dSYM");
return dsym_bundle / "Contents" / "Resources" / "DWARF" / binary_name;
}

std::filesystem::path Bloaty::GetDsymInternalPath(const std::filesystem::path& dsym_bundle,
const std::string& binary_name) {
return dsym_bundle / "Contents" / "Resources" / "DWARF" / binary_name;
}

// Loads dSYM files explicitly specified by the user using the --dsym flag.
//
// This function processes dSYM bundle paths with strict validation:
// - Warns if no dSYM files match the input binaries
// - Throws if the dSYM bundle path doesn't exist or isn't a directory
// - Throws if dSYM files are invalid or unreadable
// - Throws if buildids don't match between binary and dSYM
//
// The strict behaviour is intentional: when users explicitly specify --dsym,
// they expect it to work and want to know immediately if there are problems.
void Bloaty::AddDsymFilename(const Options& options,
const std::string& filename) {
if (!std::filesystem::exists(filename)) {
THROWF("couldn't open '$0'", filename.c_str());
}

std::filesystem::path dsym_bundle(filename);
// Only support dSYM bundles
if (std::filesystem::is_directory(dsym_bundle)) {
bool found_any = false;
for (const auto& binary_filename : options.filename()) {
std::filesystem::path binary_name = std::filesystem::path(binary_filename).filename();
std::filesystem::path dsym_file = GetDsymInternalPath(dsym_bundle, binary_name.string());

if (std::filesystem::exists(dsym_file)) {
if (TryValidateAndAddDsym(dsym_file.string(), binary_filename,
/*strict_validation=*/true)) {
found_any = true;
}
}
}
if (!found_any) {
WARN("dSYM bundle '$0' contains no debug files matching input binaries",
filename);
}
} else {
THROWF("'$0' is not a dSYM bundle directory", filename);
}
}

// Automatically discovers and loads dSYM files using macOS conventions.
//
// This function attempts to find dSYM bundles using the standard macOS layout:
// <binary_dir>/<binary_name>.dSYM/Contents/Resources/DWARF/<binary_name>
//
// Key behavioural differences from AddDsymFilename():
// - Never throws exceptions or breaks analysis if discovery fails
// - Ignores invalid/corrupted dSYM files
// - Warns on buildid mismatches but continues without the dSYM
//
// The permissive behaviour is intentional because auto-loading is speculative and
// shouldn't break analysis of the main binary. Users didn't explicitly
// request the dSYM so failures should be silent.
void Bloaty::TryAutoLoadDsym(const std::string& binary_path,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the user docs, I would expect that auto-loading of a dSYM performs exactly the same logic as explicitly specifying --dsym, but that doesn't seem to be the case.

Is there a reason to have two distinct functions for this? If so, can we add more explanation in the user docs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They have slightly different semantics and have different error handling.

For auto loading, I think lookup should be silent best effort but not throw/error if it fails. Where if a user specifies explicitly a --dsym and it's invalid or missing, that it should be reported to the user e.g. AddDsymFilename will throw if if file doesn't exist or if not a dSYM bundle directory.

I can explain in the documentation, but I think there's probably a little bit of refactoring that I can do here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated the diff which I hope explains the differences between the two functions in terms of behaviour. i.e. autoloading is silent best effort, whereas the explicit --dsym option loading is validated more strictly. Please let me know you have any feedback on it.

This is a bit of a sale pitch to add the option but I think this patch, specifically the autoloading part, will definitely improve the out of the box usability for users with mach-o binaries. I hit both of these scenarios when first using bloaty.

With patch:

$ clang hw.c -g
$ bloaty -d compileunits ~/a.out
    FILE SIZE        VM SIZE    
 --------------  -------------- 
  48.7%  16.0Ki  33.3%  16.0Ki    [__DATA_CONST]
   2.6%     866  33.3%  16.0Ki    [__LINKEDIT]
  45.0%  14.8Ki  31.0%  14.9Ki    [__TEXT]
   3.2%  1.06Ki   2.0%     984    [Mach-O Headers]
   0.3%      88   0.2%      88    [__TEXT,__unwind_info]
   0.2%      82   0.2%      82    hw.c
   0.0%       8   0.0%       8    [__DATA_CONST,__got]
 100.0%  32.9Ki 100.0%  48.0Ki    TOTAL

vs

$ bloaty -d compileunits ~/a.out
bloaty: missing debug info

vs manually specifying the full path into the dSYM companion file

$ bloaty -d compileunits ~/a.out  --debug-file ~/a.out.dSYM/Contents/Resources/DWARF/a.out
    FILE SIZE        VM SIZE    
 --------------  -------------- 
  48.7%  16.0Ki  33.3%  16.0Ki    [__DATA_CONST]
   2.6%     866  33.3%  16.0Ki    [__LINKEDIT]
  45.0%  14.8Ki  31.0%  14.9Ki    [__TEXT]
   3.2%  1.06Ki   2.0%     984    [Mach-O Headers]
   0.3%      88   0.2%      88    [__TEXT,__unwind_info]
   0.2%      82   0.2%      82    hw.c
   0.0%       8   0.0%       8    [__DATA_CONST,__got]
 100.0%  32.9Ki 100.0%  48.0Ki    TOTAL

const std::string& build_id) {
if (build_id.empty()) {
return;
}

std::filesystem::path dsym_file = ConstructDsymPath(binary_path);

if (std::filesystem::exists(dsym_file)) {
// Use non-strict validation because auto-loading is speculative
// GetObjectFile() will throw if the file isn't a valid Mach-O and
// we want auto-loading to silently fail rather than break analysis.
if (TryValidateAndAddDsym(dsym_file.string(), binary_path,
/*strict_validation=*/false)) {
if (verbose_level > 1) {
printf("Found matching dSYM file: %s\n", dsym_file.string().c_str());
}
}
}
}

void Bloaty::DefineCustomDataSource(const CustomDataSource& source) {
if (source.base_data_source() == "symbols") {
THROW(
Expand Down Expand Up @@ -2030,6 +2177,7 @@ USAGE: bloaty [OPTION]... FILE... [-- BASE_FILE...]
-c FILE Load configuration from <file>.
-d SOURCE,SOURCE Comma-separated list of sources to scan.
--debug-file=FILE Use this file for debug symbols and/or symbol table.
--dsym=FILE Use this dSYM file or bundle (Mach-O only).
--source-map=ID=FILE
Use this source map file for the binary. The ID can be
the build ID (or Wasm sourceMappingURL) or the file path
Expand Down Expand Up @@ -2224,6 +2372,8 @@ bool DoParseOptions(bool skip_unknown, int* argc, char** argv[],
}
} else if (args.TryParseOption("--debug-file", &option)) {
options->add_debug_filename(std::string(option));
} else if (args.TryParseOption("--dsym", &option)) {
options->add_dsym_path(std::string(option));
} else if (args.TryParseUint64Option("--debug-fileoff", &uint64_option)) {
if (options->has_debug_fileoff()) {
THROW("currently we only support a single debug fileoff");
Expand Down Expand Up @@ -2349,6 +2499,8 @@ void BloatyDoMain(const Options& options, const InputFileFactory& file_factory,
THROW("max_rows_per_level must be at least 1");
}

verbose_level = options.verbose_level();

for (auto& filename : options.filename()) {
bloaty.AddFilename(filename, false);
}
Expand All @@ -2361,6 +2513,10 @@ void BloatyDoMain(const Options& options, const InputFileFactory& file_factory,
bloaty.AddDebugFilename(debug_filename);
}

for (const auto& dsym_path : options.dsym_path()) {
bloaty.AddDsymFilename(options, dsym_path);
}

for (auto& sourcemap : options.source_map()) {
bloaty.AddSourceMapFilename(sourcemap);
}
Expand All @@ -2380,8 +2536,6 @@ void BloatyDoMain(const Options& options, const InputFileFactory& file_factory,
}
}

verbose_level = options.verbose_level();

if (options.data_source_size() > 0) {
bloaty.ScanAndRollup(options, output);
} else if (options.has_disassemble_function()) {
Expand Down
3 changes: 3 additions & 0 deletions src/bloaty.proto
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ message Options {
// Build id to source map file names, delimited by '='.
repeated string source_map = 15;

// Mach-O specific option to specify path to a dSYM file.
repeated string dsym_path = 16;

// The data sources to scan in each file. At least one data source must be
// specified. If more than one source is specified, the output is
// hierarchical.
Expand Down
Loading