From a5c716ad7650eee74d8a7afafac448134ce5cb47 Mon Sep 17 00:00:00 2001 From: John O'Laughlin Date: Tue, 20 Jan 2026 17:09:07 -0800 Subject: [PATCH 1/6] Add per-address cycle histogram for line-level profiling - Add collect_address_histogram option to ProfileOptions - Track cycles per instruction address in OnExecute() - Add WriteAddressHistogram() to export JSON for disassembly viewer Co-Authored-By: Claude Opus 4.5 --- include/profiler.h | 22 ++++++++++++++++++++++ src/profiler.cpp | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/include/profiler.h b/include/profiler.h index 2e9f277..504b68e 100644 --- a/include/profiler.h +++ b/include/profiler.h @@ -43,6 +43,7 @@ enum class ProfileMode { struct ProfileOptions { ProfileMode mode = ProfileMode::Simple; uint32_t sample_rate = 1; // 1 = every instruction, N = every Nth instruction + bool collect_address_histogram = false; // Collect per-address cycle counts }; /** @@ -188,6 +189,25 @@ class Profiler { */ void PrintReport(std::ostream& out, size_t max_functions = 0) const; + // ------------------------------------------------------------------------- + // Address Histogram (for line-by-line profiling) + // ------------------------------------------------------------------------- + + /** + * Get per-address cycle histogram + * @return Map of PC address to cycles spent at that address + */ + const std::unordered_map& GetAddressHistogram() const { + return address_cycles_; + } + + /** + * Write address histogram to JSON file for use with disassembly viewer + * @param path Output file path + * @return true on success + */ + bool WriteAddressHistogram(const std::string& path) const; + // ------------------------------------------------------------------------- // Internal (called by cpu_hook) // ------------------------------------------------------------------------- @@ -210,10 +230,12 @@ class Profiler { std::vector functions_; // Sorted by start_addr std::unordered_map stats_; + std::unordered_map address_cycles_; // Per-address histogram std::vector call_stack_; // For CallStack mode ProfileMode mode_ = ProfileMode::Simple; bool running_ = false; + bool collect_address_histogram_ = false; uint32_t last_pc_ = 0; int64_t last_cycles_ = 0; uint64_t total_cycles_ = 0; diff --git a/src/profiler.cpp b/src/profiler.cpp index ee4de6b..3100664 100644 --- a/src/profiler.cpp +++ b/src/profiler.cpp @@ -170,6 +170,7 @@ void Profiler::Start(const ProfileOptions& options) { mode_ = options.mode; sample_rate_ = options.sample_rate > 0 ? options.sample_rate : 1; + collect_address_histogram_ = options.collect_address_histogram; sample_counter_ = 0; pending_cycles_ = 0; g_active_profiler = this; @@ -192,6 +193,7 @@ void Profiler::Reset() { for (auto& kv : stats_) { kv.second = FunctionStats(); } + address_cycles_.clear(); call_stack_.clear(); total_cycles_ = 0; pending_cycles_ = 0; @@ -262,6 +264,11 @@ void Profiler::OnExecute(uint32_t pc) { pending_cycles_ = 0; } + // Collect per-address histogram if enabled + if (collect_address_histogram_) { + address_cycles_[pc] += delta; + } + // Attribute cycles to current function const FunctionDef* func = LookupFunction(pc); if (func) { @@ -379,6 +386,38 @@ void Profiler::PrintReport(std::ostream& out, size_t max_functions) const { << "\n"; } +bool Profiler::WriteAddressHistogram(const std::string& path) const { + std::ofstream out(path); + if (!out) { + return false; + } + + // Write JSON format for use with disassembly viewer + out << "{\n"; + out << " \"sample_rate\": " << sample_rate_ << ",\n"; + out << " \"total_cycles\": " << total_cycles_ << ",\n"; + out << " \"address_count\": " << address_cycles_.size() << ",\n"; + out << " \"addresses\": {\n"; + + // Sort addresses for deterministic output + std::vector> sorted_addrs( + address_cycles_.begin(), address_cycles_.end()); + std::sort(sorted_addrs.begin(), sorted_addrs.end()); + + bool first = true; + for (const auto& kv : sorted_addrs) { + if (!first) out << ",\n"; + first = false; + out << " \"" << std::hex << std::setfill('0') << std::setw(8) + << kv.first << "\": " << std::dec << kv.second; + } + + out << "\n }\n"; + out << "}\n"; + + return out.good(); +} + bool Profiler::IsCallOpcode(uint16_t opcode) const { // JSR: 0100 1110 10xx xxxx (0x4E80-0x4EBF) // BSR: 0110 0001 xxxx xxxx (0x6100-0x61FF) From d7a9a8e77e2f4d34ad6af90970f97e4786ce72ef Mon Sep 17 00:00:00 2001 From: John O'Laughlin Date: Tue, 20 Jan 2026 17:16:56 -0800 Subject: [PATCH 2/6] Add test coverage for per-address cycle histogram - AddressHistogramDisabledByDefault: verify empty when not enabled - AddressHistogramCollected: verify populated when enabled - AddressHistogramSumsToTotal: verify sum equals total cycles - AddressHistogramWithSampling: verify works with sampling (with variance) - ResetClearsAddressHistogram: verify Reset() clears histogram - WriteAddressHistogramJSON: verify JSON output structure - WriteAddressHistogramRecordsSampleRate: verify sample_rate in JSON - WriteAddressHistogramInvalidPath: verify graceful failure - AddressHistogramContainsKnownAddresses: verify expected addresses Co-Authored-By: Claude Opus 4.5 --- tests/profiler_test.cpp | 253 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 253 insertions(+) diff --git a/tests/profiler_test.cpp b/tests/profiler_test.cpp index fc8655a..8e71253 100644 --- a/tests/profiler_test.cpp +++ b/tests/profiler_test.cpp @@ -14,6 +14,8 @@ #include "prime_sieve_rom.h" #include #include +#include +#include #include #include @@ -332,4 +334,255 @@ TEST_F(ProfilerTest, DoubleStart) { EXPECT_FALSE(profiler.IsRunning()); } +// ============================================================================= +// Address Histogram Tests +// ============================================================================= + +/** + * Test that address histogram is empty when disabled (default) + */ +TEST_F(ProfilerTest, AddressHistogramDisabledByDefault) { + profiler.Start(); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + const auto& histogram = profiler.GetAddressHistogram(); + EXPECT_TRUE(histogram.empty()) + << "Address histogram should be empty when collect_address_histogram is false"; +} + +/** + * Test that address histogram is populated when enabled + */ +TEST_F(ProfilerTest, AddressHistogramCollected) { + GX::ProfileOptions opts; + opts.collect_address_histogram = true; + + profiler.Start(opts); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + const auto& histogram = profiler.GetAddressHistogram(); + EXPECT_FALSE(histogram.empty()) + << "Address histogram should have entries when enabled"; + + // Should have multiple unique addresses + EXPECT_GT(histogram.size(), 10u) + << "Expected many unique instruction addresses"; + + // All addresses should be in valid ROM range + for (const auto& kv : histogram) { + EXPECT_GE(kv.first, 0x200u) << "Address should be >= ROM start"; + EXPECT_LT(kv.first, 0x400000u) << "Address should be < ROM end"; + EXPECT_GT(kv.second, 0u) << "Cycle count should be positive"; + } +} + +/** + * Test that address histogram cycles sum to total cycles + */ +TEST_F(ProfilerTest, AddressHistogramSumsToTotal) { + GX::ProfileOptions opts; + opts.collect_address_histogram = true; + + profiler.Start(opts); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + const auto& histogram = profiler.GetAddressHistogram(); + uint64_t histogram_total = 0; + for (const auto& kv : histogram) { + histogram_total += kv.second; + } + + uint64_t total_cycles = profiler.GetTotalCycles(); + + // Histogram sum should equal total cycles + EXPECT_EQ(histogram_total, total_cycles) + << "Sum of address histogram should equal total cycles"; +} + +/** + * Test that address histogram works with sampling + */ +TEST_F(ProfilerTest, AddressHistogramWithSampling) { + GX::ProfileOptions opts; + opts.collect_address_histogram = true; + opts.sample_rate = 10; + + profiler.Start(opts); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + const auto& histogram = profiler.GetAddressHistogram(); + EXPECT_FALSE(histogram.empty()) + << "Address histogram should work with sampling"; + + // With sampling, we'll have fewer addresses but should still capture main ones + uint64_t histogram_total = 0; + for (const auto& kv : histogram) { + histogram_total += kv.second; + } + + // Histogram sum should be very close to total cycles. With sampling, + // there may be a small difference due to pending_cycles_ not yet attributed + // at the time profiling stopped (at most sample_rate * cycles_per_instruction). + uint64_t total_cycles = profiler.GetTotalCycles(); + uint64_t max_variance = opts.sample_rate * 200; // ~200 cycles max per 68k instruction + EXPECT_LE(total_cycles - histogram_total, max_variance) + << "Histogram total should be within " << max_variance << " of total cycles"; + EXPECT_GE(histogram_total, total_cycles * 99 / 100) + << "Histogram should capture at least 99% of cycles"; +} + +/** + * Test that Reset clears address histogram + */ +TEST_F(ProfilerTest, ResetClearsAddressHistogram) { + GX::ProfileOptions opts; + opts.collect_address_histogram = true; + + profiler.Start(opts); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + EXPECT_FALSE(profiler.GetAddressHistogram().empty()); + + profiler.Reset(); + + EXPECT_TRUE(profiler.GetAddressHistogram().empty()) + << "Reset should clear address histogram"; +} + +/** + * Test WriteAddressHistogram produces valid JSON + */ +TEST_F(ProfilerTest, WriteAddressHistogramJSON) { + GX::ProfileOptions opts; + opts.collect_address_histogram = true; + + profiler.Start(opts); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + // Write to temp file + std::string temp_path = "/tmp/gxtest_histogram_test.json"; + ASSERT_TRUE(profiler.WriteAddressHistogram(temp_path)) + << "WriteAddressHistogram should succeed"; + + // Read and verify JSON structure + std::ifstream file(temp_path); + ASSERT_TRUE(file.good()) << "Should be able to read output file"; + + std::string content((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + file.close(); + + // Basic JSON structure checks + EXPECT_NE(content.find("\"sample_rate\":"), std::string::npos) + << "JSON should contain sample_rate"; + EXPECT_NE(content.find("\"total_cycles\":"), std::string::npos) + << "JSON should contain total_cycles"; + EXPECT_NE(content.find("\"address_count\":"), std::string::npos) + << "JSON should contain address_count"; + EXPECT_NE(content.find("\"addresses\":"), std::string::npos) + << "JSON should contain addresses object"; + + // Verify sample_rate value + EXPECT_NE(content.find("\"sample_rate\": 1"), std::string::npos) + << "Sample rate should be 1"; + + // Verify total_cycles matches + std::string total_str = "\"total_cycles\": " + std::to_string(profiler.GetTotalCycles()); + EXPECT_NE(content.find(total_str), std::string::npos) + << "Total cycles in JSON should match profiler"; + + // Verify address_count matches histogram size + std::string count_str = "\"address_count\": " + std::to_string(profiler.GetAddressHistogram().size()); + EXPECT_NE(content.find(count_str), std::string::npos) + << "Address count in JSON should match histogram size"; + + // Clean up + std::remove(temp_path.c_str()); +} + +/** + * Test WriteAddressHistogram with sampling records correct sample_rate + */ +TEST_F(ProfilerTest, WriteAddressHistogramRecordsSampleRate) { + GX::ProfileOptions opts; + opts.collect_address_histogram = true; + opts.sample_rate = 50; + + profiler.Start(opts); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + std::string temp_path = "/tmp/gxtest_histogram_sample_test.json"; + ASSERT_TRUE(profiler.WriteAddressHistogram(temp_path)); + + std::ifstream file(temp_path); + std::string content((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + file.close(); + + EXPECT_NE(content.find("\"sample_rate\": 50"), std::string::npos) + << "JSON should record correct sample rate"; + + std::remove(temp_path.c_str()); +} + +/** + * Test WriteAddressHistogram fails gracefully on invalid path + */ +TEST_F(ProfilerTest, WriteAddressHistogramInvalidPath) { + GX::ProfileOptions opts; + opts.collect_address_histogram = true; + + profiler.Start(opts); + emu.RunFrames(1); + profiler.Stop(); + + // Try to write to an invalid path + EXPECT_FALSE(profiler.WriteAddressHistogram("/nonexistent/directory/file.json")) + << "WriteAddressHistogram should return false for invalid path"; +} + +/** + * Test address histogram contains expected addresses from known code + */ +TEST_F(ProfilerTest, AddressHistogramContainsKnownAddresses) { + GX::ProfileOptions opts; + opts.collect_address_histogram = true; + + profiler.Start(opts); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + const auto& histogram = profiler.GetAddressHistogram(); + + // The prime sieve starts at 0x200 (_start) and calls main at 0x2A0 + // We should see addresses in the run_sieve function (0x236-0x26A) + // which contains the main loop + bool found_sieve_addr = false; + for (const auto& kv : histogram) { + if (kv.first >= FUNC_RUN_SIEVE && kv.first < FUNC_COLLECT_PRIMES) { + found_sieve_addr = true; + break; + } + } + EXPECT_TRUE(found_sieve_addr) + << "Should have recorded cycles in run_sieve function"; + + // run_sieve should have significant cycles (it's the main loop) + uint64_t sieve_cycles = 0; + for (const auto& kv : histogram) { + if (kv.first >= FUNC_RUN_SIEVE && kv.first < FUNC_COLLECT_PRIMES) { + sieve_cycles += kv.second; + } + } + EXPECT_GT(sieve_cycles, profiler.GetTotalCycles() / 10) + << "run_sieve should account for significant portion of cycles"; +} + } // namespace From b12c8c314ce3498f849c24ea1a0414af78630088 Mon Sep 17 00:00:00 2001 From: John O'Laughlin Date: Tue, 20 Jan 2026 18:35:55 -0800 Subject: [PATCH 3/6] Use portable temp directory path in tests Replace hardcoded /tmp paths with std::filesystem::temp_directory_path() for cross-platform compatibility. Co-Authored-By: Claude Opus 4.5 --- tests/profiler_test.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/profiler_test.cpp b/tests/profiler_test.cpp index 8e71253..3a1d804 100644 --- a/tests/profiler_test.cpp +++ b/tests/profiler_test.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -466,7 +467,7 @@ TEST_F(ProfilerTest, WriteAddressHistogramJSON) { profiler.Stop(); // Write to temp file - std::string temp_path = "/tmp/gxtest_histogram_test.json"; + std::string temp_path = (std::filesystem::temp_directory_path() / "gxtest_histogram_test.json").string(); ASSERT_TRUE(profiler.WriteAddressHistogram(temp_path)) << "WriteAddressHistogram should succeed"; @@ -518,7 +519,7 @@ TEST_F(ProfilerTest, WriteAddressHistogramRecordsSampleRate) { emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); profiler.Stop(); - std::string temp_path = "/tmp/gxtest_histogram_sample_test.json"; + std::string temp_path = (std::filesystem::temp_directory_path() / "gxtest_histogram_sample_test.json").string(); ASSERT_TRUE(profiler.WriteAddressHistogram(temp_path)); std::ifstream file(temp_path); From 0fc514764bc4d4bc4e7d1c2896468773f89feb18 Mon Sep 17 00:00:00 2001 From: John O'Laughlin Date: Tue, 20 Jan 2026 18:36:48 -0800 Subject: [PATCH 4/6] Add comment clarifying stream error handling in WriteAddressHistogram Co-Authored-By: Claude Opus 4.5 --- src/profiler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/profiler.cpp b/src/profiler.cpp index 3100664..eb2e3a4 100644 --- a/src/profiler.cpp +++ b/src/profiler.cpp @@ -392,7 +392,9 @@ bool Profiler::WriteAddressHistogram(const std::string& path) const { return false; } - // Write JSON format for use with disassembly viewer + // Write JSON format for use with disassembly viewer. + // Note: Stream error state persists across writes, so out.good() at the + // end correctly detects any intermediate write failures. out << "{\n"; out << " \"sample_rate\": " << sample_rate_ << ",\n"; out << " \"total_cycles\": " << total_cycles_ << ",\n"; From e742f8137a24d20533fdc582c6fe53d49c21bdb1 Mon Sep 17 00:00:00 2001 From: John O'Laughlin Date: Tue, 20 Jan 2026 18:37:30 -0800 Subject: [PATCH 5/6] Document sampling limitation for address histogram Clarify that when sample_rate > 1, address histogram granularity is reduced since only every Nth instruction is recorded. Recommend sample_rate = 1 for accurate line-level profiling. Co-Authored-By: Claude Opus 4.5 --- include/profiler.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/profiler.h b/include/profiler.h index 504b68e..f31dffb 100644 --- a/include/profiler.h +++ b/include/profiler.h @@ -43,7 +43,14 @@ enum class ProfileMode { struct ProfileOptions { ProfileMode mode = ProfileMode::Simple; uint32_t sample_rate = 1; // 1 = every instruction, N = every Nth instruction - bool collect_address_histogram = false; // Collect per-address cycle counts + + // Collect per-address cycle counts for line-level profiling. + // NOTE: When sample_rate > 1, only every Nth instruction's address is + // recorded, with all accumulated cycles attributed to that address. This + // reduces granularity significantly. For accurate line-level profiling, + // use sample_rate = 1. Sampling is still useful for reducing overhead when + // only function-level stats are needed. + bool collect_address_histogram = false; }; /** From 2dc6ab1bcf8cec11ad8f9fd759cafcca58f885ed Mon Sep 17 00:00:00 2001 From: John O'Laughlin Date: Tue, 20 Jan 2026 18:48:11 -0800 Subject: [PATCH 6/6] Fix potential unsigned integer underflow in histogram test Use absolute difference to avoid underflow if histogram_total exceeds total_cycles. Co-Authored-By: Claude Opus 4.5 --- tests/profiler_test.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/profiler_test.cpp b/tests/profiler_test.cpp index 3a1d804..6976d78 100644 --- a/tests/profiler_test.cpp +++ b/tests/profiler_test.cpp @@ -430,7 +430,10 @@ TEST_F(ProfilerTest, AddressHistogramWithSampling) { // at the time profiling stopped (at most sample_rate * cycles_per_instruction). uint64_t total_cycles = profiler.GetTotalCycles(); uint64_t max_variance = opts.sample_rate * 200; // ~200 cycles max per 68k instruction - EXPECT_LE(total_cycles - histogram_total, max_variance) + uint64_t diff = (total_cycles > histogram_total) + ? (total_cycles - histogram_total) + : (histogram_total - total_cycles); + EXPECT_LE(diff, max_variance) << "Histogram total should be within " << max_variance << " of total cycles"; EXPECT_GE(histogram_total, total_cycles * 99 / 100) << "Histogram should capture at least 99% of cycles";