diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 0000000..f9c71a5 --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +8.5.1 diff --git a/BUILD.bazel b/BUILD.bazel index e51f186..28be2a1 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -67,8 +67,11 @@ cc_library( "vendor/genplusgx/cd_hw", "vendor/genplusgx/debug", ], - # Link math library on Linux (handled automatically on other platforms) - linkopts = ["-lm"], + # Link math library on Linux (not needed on macOS where it's in libSystem) + linkopts = select({ + "@platforms//os:linux": ["-lm"], + "//conditions:default": [], + }), ) # gxtest C++ wrapper library @@ -129,3 +132,16 @@ cc_test( "@googletest//:gtest_main", ], ) + +# Profiler test +cc_test( + name = "gxtest_profiler", + srcs = [ + "tests/profiler_test.cpp", + "tests/prime_sieve_rom.h", + ], + deps = [ + ":gxtest", + "@googletest//:gtest_main", + ], +) diff --git a/include/profiler.h b/include/profiler.h index cc856b4..2e9f277 100644 --- a/include/profiler.h +++ b/include/profiler.h @@ -9,7 +9,7 @@ * GX::Profiler profiler; * profiler.AddFunction(0x001000, 0x001100, "generate_moves"); * profiler.AddFunction(0x001100, 0x001200, "score_move"); - * // Or load from ELF: profiler.LoadSymbols("game.elf"); + * // Or load from ELF: profiler.LoadSymbolsFromELF("game.elf"); * * profiler.Start(); * emu.RunFrames(1000); @@ -37,6 +37,14 @@ enum class ProfileMode { CallStack // Tracks call stack for inclusive cycle counts }; +/** + * Profiling options + */ +struct ProfileOptions { + ProfileMode mode = ProfileMode::Simple; + uint32_t sample_rate = 1; // 1 = every instruction, N = every Nth instruction +}; + /** * Statistics for a single function */ @@ -126,6 +134,12 @@ class Profiler { */ void Start(ProfileMode mode = ProfileMode::Simple); + /** + * Start profiling with options + * @param options ProfileOptions struct with mode and sample_rate + */ + void Start(const ProfileOptions& options); + /** * Stop profiling - removes the cpu_hook callback */ @@ -162,6 +176,11 @@ class Profiler { */ uint64_t GetTotalCycles() const { return total_cycles_; } + /** + * Get current sample rate (1 = every instruction) + */ + uint32_t GetSampleRate() const { return sample_rate_; } + /** * Print a formatted profile report * @param out Output stream @@ -198,6 +217,9 @@ class Profiler { uint32_t last_pc_ = 0; int64_t last_cycles_ = 0; uint64_t total_cycles_ = 0; + uint32_t sample_rate_ = 1; + uint32_t sample_counter_ = 0; + int64_t pending_cycles_ = 0; // Accumulated cycles since last sample (for sampling mode) }; /** Global profiler instance (needed for cpu_hook callback) */ diff --git a/roms/prime_sieve/main.c b/roms/prime_sieve/main.c index 0d8bf5f..6ab5e76 100644 --- a/roms/prime_sieve/main.c +++ b/roms/prime_sieve/main.c @@ -30,6 +30,7 @@ typedef unsigned int uint32_t; /** * Initialize the sieve array to all zeros (0 = potentially prime) */ +__attribute__((noinline)) static void clear_sieve(void) { for (int i = 0; i < SIEVE_SIZE; i++) { @@ -40,6 +41,7 @@ static void clear_sieve(void) /** * Mark 0 and 1 as composite (not prime) */ +__attribute__((noinline)) static void mark_trivial_composites(void) { SIEVE_ARRAY[0] = 1; /* 0 is not prime */ @@ -50,6 +52,7 @@ static void mark_trivial_composites(void) * Run the Sieve of Eratosthenes algorithm * For each prime p, mark all multiples of p as composite */ +__attribute__((noinline)) static void run_sieve(void) { /* Only need to check up to sqrt(SIEVE_SIZE) ≈ 24 */ @@ -69,6 +72,7 @@ static void run_sieve(void) /** * Collect the first NUM_PRIMES primes from the sieve into the results array */ +__attribute__((noinline)) static void collect_primes(void) { int count = 0; diff --git a/roms/prime_sieve/prime_sieve_rom.h b/roms/prime_sieve/prime_sieve_rom.h index 118baf0..aabf9ec 100644 --- a/roms/prime_sieve/prime_sieve_rom.h +++ b/roms/prime_sieve/prime_sieve_rom.h @@ -10,14 +10,14 @@ namespace GX { namespace TestRoms { -// Prime Sieve ROM (676 bytes) +// Prime Sieve ROM (706 bytes) // Computes first 100 primes using Sieve of Eratosthenes // Results written to: // $FF0300-$FF03C7: Prime values (100 x 16-bit words) // $FF0500: Prime count // $FF0502: Done flag ($DEAD when complete) -constexpr size_t PRIME_SIEVE_ROM_SIZE = 676; +constexpr size_t PRIME_SIEVE_ROM_SIZE = 706; constexpr uint8_t PRIME_SIEVE_ROM[] = { 0x00, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -63,20 +63,22 @@ constexpr uint8_t PRIME_SIEVE_ROM[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4f, 0xf9, 0x00, 0xff, - 0xfe, 0x00, 0x4e, 0xb9, 0x00, 0x00, 0x02, 0x10, 0x60, 0xfe, 0x00, 0x00, + 0xfe, 0x00, 0x4e, 0xb9, 0x00, 0x00, 0x02, 0xa0, 0x60, 0xfe, 0x00, 0x00, 0x20, 0x7c, 0x00, 0xff, 0x00, 0x00, 0x10, 0xfc, 0x00, 0x00, 0xb1, 0xfc, - 0x00, 0xff, 0x02, 0x58, 0x66, 0xf4, 0x13, 0xfc, 0x00, 0x01, 0x00, 0xff, - 0x00, 0x00, 0x13, 0xfc, 0x00, 0x01, 0x00, 0xff, 0x00, 0x01, 0x72, 0x02, - 0x20, 0x41, 0xd1, 0xfc, 0x00, 0xff, 0x00, 0x00, 0x10, 0x10, 0x67, 0x46, - 0x52, 0x81, 0x70, 0x19, 0xb0, 0x81, 0x66, 0xec, 0x22, 0x7c, 0x00, 0xff, - 0x00, 0x02, 0x72, 0x00, 0x10, 0x11, 0x66, 0x12, 0x70, 0x00, 0x30, 0x01, - 0xd0, 0x80, 0x20, 0x40, 0xd1, 0xfc, 0x00, 0xff, 0x03, 0x00, 0x30, 0x89, - 0x52, 0x81, 0xb3, 0xfc, 0x00, 0xff, 0x02, 0x57, 0x67, 0x08, 0x52, 0x89, - 0x70, 0x64, 0xb0, 0x81, 0x66, 0xda, 0x33, 0xc1, 0x00, 0xff, 0x05, 0x00, - 0x33, 0xfc, 0xde, 0xad, 0x00, 0xff, 0x05, 0x02, 0x4e, 0x75, 0x70, 0x00, - 0x30, 0x01, 0xd0, 0x80, 0x20, 0x40, 0xd1, 0xfc, 0x00, 0xff, 0x00, 0x00, - 0x10, 0xbc, 0x00, 0x01, 0xd0, 0x81, 0x0c, 0x80, 0x00, 0x00, 0x02, 0x57, - 0x6f, 0xea, 0x60, 0x9c + 0x00, 0xff, 0x02, 0x58, 0x66, 0xf4, 0x4e, 0x75, 0x13, 0xfc, 0x00, 0x01, + 0x00, 0xff, 0x00, 0x00, 0x13, 0xfc, 0x00, 0x01, 0x00, 0xff, 0x00, 0x01, + 0x4e, 0x75, 0x72, 0x02, 0x20, 0x41, 0xd1, 0xfc, 0x00, 0xff, 0x00, 0x00, + 0x10, 0x10, 0x66, 0x1c, 0x70, 0x00, 0x30, 0x01, 0xd0, 0x80, 0x20, 0x40, + 0xd1, 0xfc, 0x00, 0xff, 0x00, 0x00, 0x10, 0xbc, 0x00, 0x01, 0xd0, 0x81, + 0x0c, 0x80, 0x00, 0x00, 0x02, 0x57, 0x6f, 0xea, 0x52, 0x81, 0x70, 0x19, + 0xb0, 0x81, 0x66, 0xd0, 0x4e, 0x75, 0x22, 0x7c, 0x00, 0xff, 0x00, 0x02, + 0x72, 0x00, 0x10, 0x11, 0x66, 0x12, 0x70, 0x00, 0x30, 0x01, 0xd0, 0x80, + 0x20, 0x40, 0xd1, 0xfc, 0x00, 0xff, 0x03, 0x00, 0x30, 0x89, 0x52, 0x81, + 0xb3, 0xfc, 0x00, 0xff, 0x02, 0x57, 0x67, 0x08, 0x52, 0x89, 0x70, 0x64, + 0xb0, 0x81, 0x66, 0xda, 0x33, 0xc1, 0x00, 0xff, 0x05, 0x00, 0x4e, 0x75, + 0x4e, 0xb9, 0x00, 0x00, 0x02, 0x10, 0x4e, 0xb9, 0x00, 0x00, 0x02, 0x24, + 0x4e, 0xb9, 0x00, 0x00, 0x02, 0x36, 0x4e, 0xb9, 0x00, 0x00, 0x02, 0x6a, + 0x33, 0xfc, 0xde, 0xad, 0x00, 0xff, 0x05, 0x02, 0x4e, 0x75 }; // First 100 prime numbers for verification diff --git a/src/profiler.cpp b/src/profiler.cpp index 9bd12a1..ee4de6b 100644 --- a/src/profiler.cpp +++ b/src/profiler.cpp @@ -44,6 +44,11 @@ Profiler::~Profiler() { } void Profiler::AddFunction(uint32_t start_addr, uint32_t end_addr, const std::string& name) { + // Validate function range + if (end_addr <= start_addr) { + return; // Invalid range, ignore + } + FunctionDef func = {start_addr, end_addr, name}; // Insert in sorted order by start_addr @@ -60,7 +65,17 @@ void Profiler::AddFunction(uint32_t start_addr, uint32_t end_addr, const std::st int Profiler::LoadSymbolsFromELF(const std::string& elf_path) { // Use nm to extract symbols // Format: "address type name" - std::string cmd = "nm -S --defined-only " + elf_path + " 2>/dev/null"; + // Shell-quote the path to prevent command injection + std::string quoted_path = "'"; + for (char c : elf_path) { + if (c == '\'') { + quoted_path += "'\\''"; // End quote, escaped quote, start quote + } else { + quoted_path += c; + } + } + quoted_path += "'"; + std::string cmd = "nm -S --defined-only " + quoted_path + " 2>/dev/null"; FILE* pipe = popen(cmd.c_str(), "r"); if (!pipe) { return -1; @@ -73,17 +88,21 @@ int Profiler::LoadSymbolsFromELF(const std::string& elf_path) { char type; char name[256]; - // Parse: "address size type name" (with size) or "address type name" (without) + // Parse nm -S output: "hex_address hex_size type name" (both in hex, with size) or "hex_address type name" (without size) if (sscanf(line, "%x %x %c %255s", &addr, &size, &type, name) == 4) { // Has size - use it if (type == 'T' || type == 't') { // Text (code) symbols only - AddFunction(addr, addr + size, name); + // Guard against overflow + uint32_t end_addr = (size <= UINT32_MAX - addr) ? addr + size : UINT32_MAX; + AddFunction(addr, end_addr, name); count++; } } else if (sscanf(line, "%x %c %255s", &addr, &type, name) == 3) { // No size - estimate from next symbol (done after loading all) if (type == 'T' || type == 't') { - AddFunction(addr, addr + 0x100, name); // Default 256 bytes + // Guard against overflow (0x100 = 256 byte default) + uint32_t end_addr = (addr <= UINT32_MAX - 0x100) ? addr + 0x100 : UINT32_MAX; + AddFunction(addr, end_addr, name); count++; } } @@ -102,6 +121,16 @@ int Profiler::LoadSymbolsFromELF(const std::string& elf_path) { } int Profiler::LoadSymbolsFromFile(const std::string& path) { + // Load symbols from a simple text file format: + // + // Example: + // 00000200 16 _start + // 00000210 94 main + // + // Parsing note: + // - This function parses the address as hex (%x) and the size as decimal (%u). + // - This intentionally differs from LoadSymbolsFromELF (nm output), which uses + // hex for both address and size (%x for each). std::ifstream file(path); if (!file) { return -1; @@ -114,7 +143,9 @@ int Profiler::LoadSymbolsFromFile(const std::string& path) { char name[256]; if (sscanf(line.c_str(), "%x %u %255s", &addr, &size, name) == 3) { - AddFunction(addr, addr + size, name); + // Guard against overflow + uint32_t end_addr = (size <= UINT32_MAX - addr) ? addr + size : UINT32_MAX; + AddFunction(addr, end_addr, name); count++; } } @@ -128,9 +159,19 @@ void Profiler::ClearSymbols() { } void Profiler::Start(ProfileMode mode) { + ProfileOptions opts; + opts.mode = mode; + opts.sample_rate = 1; + Start(opts); +} + +void Profiler::Start(const ProfileOptions& options) { if (running_) return; - mode_ = mode; + mode_ = options.mode; + sample_rate_ = options.sample_rate > 0 ? options.sample_rate : 1; + sample_counter_ = 0; + pending_cycles_ = 0; g_active_profiler = this; set_cpu_hook(ProfilerHook); running_ = true; @@ -153,6 +194,7 @@ void Profiler::Reset() { } call_stack_.clear(); total_cycles_ = 0; + pending_cycles_ = 0; last_pc_ = 0; if (running_) { last_cycles_ = m68k.cycles; @@ -184,7 +226,7 @@ const FunctionDef* Profiler::LookupFunction(uint32_t addr) const { uint16_t Profiler::ReadWord(uint32_t addr) const { // Read from ROM (cart.rom is byteswapped on little-endian) - if (addr < 0x400000 && addr < cart.romsize) { + if (addr < 0x400000 && addr + 1 < cart.romsize) { #ifdef LSB_FIRST return (cart.rom[addr ^ 1] << 8) | cart.rom[(addr + 1) ^ 1]; #else @@ -204,6 +246,22 @@ void Profiler::OnExecute(uint32_t pc) { total_cycles_ += delta; + // Sampling: only do expensive work every Nth instruction + if (sample_rate_ > 1) { + pending_cycles_ += delta; + sample_counter_++; + if (sample_counter_ < sample_rate_) { + // Update last_pc_ even on skipped samples so CallStack mode + // can track call/return instructions correctly + last_pc_ = pc; + return; + } + sample_counter_ = 0; + // Use accumulated cycles since last sample (more accurate than scaling) + delta = pending_cycles_; + pending_cycles_ = 0; + } + // Attribute cycles to current function const FunctionDef* func = LookupFunction(pc); if (func) { @@ -211,6 +269,7 @@ void Profiler::OnExecute(uint32_t pc) { s.cycles_exclusive += delta; // Count function entry (PC moved into this function from outside) + // Note: With sampling, this undercounts entries that happen between samples if (last_pc_ != 0) { const FunctionDef* last_func = LookupFunction(last_pc_); if (last_func != func) { @@ -220,12 +279,17 @@ void Profiler::OnExecute(uint32_t pc) { } // CallStack mode: track JSR/BSR/RTS for inclusive cycles + // Note: With sampling enabled, we only check every Nth instruction for + // call/return opcodes, so inclusive timing will be less accurate. if (mode_ == ProfileMode::CallStack && last_pc_ != 0) { uint16_t opcode = ReadWord(last_pc_); if (IsCallOpcode(opcode)) { // Entering a new function - push frame - if (func) { + // Limit stack depth to prevent unbounded growth from unbalanced calls + // (e.g., indirect jumps, exceptions, or non-standard control flow) + constexpr size_t MAX_CALL_STACK_DEPTH = 256; + if (func && call_stack_.size() < MAX_CALL_STACK_DEPTH) { call_stack_.push_back({func->start_addr, current_cycles}); } } else if (IsReturnOpcode(opcode) && !call_stack_.empty()) { @@ -279,6 +343,9 @@ void Profiler::PrintReport(std::ostream& out, size_t max_functions) const { // Print header bool show_inclusive = (mode_ == ProfileMode::CallStack); out << "\n"; + if (sample_rate_ > 1) { + out << "Sample rate: 1/" << sample_rate_ << " (estimated cycles)\n"; + } out << std::setw(30) << std::left << "Function" << std::setw(12) << std::right << "Cycles"; if (show_inclusive) { diff --git a/tests/prime_sieve_rom.h b/tests/prime_sieve_rom.h index 118baf0..aabf9ec 100644 --- a/tests/prime_sieve_rom.h +++ b/tests/prime_sieve_rom.h @@ -10,14 +10,14 @@ namespace GX { namespace TestRoms { -// Prime Sieve ROM (676 bytes) +// Prime Sieve ROM (706 bytes) // Computes first 100 primes using Sieve of Eratosthenes // Results written to: // $FF0300-$FF03C7: Prime values (100 x 16-bit words) // $FF0500: Prime count // $FF0502: Done flag ($DEAD when complete) -constexpr size_t PRIME_SIEVE_ROM_SIZE = 676; +constexpr size_t PRIME_SIEVE_ROM_SIZE = 706; constexpr uint8_t PRIME_SIEVE_ROM[] = { 0x00, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -63,20 +63,22 @@ constexpr uint8_t PRIME_SIEVE_ROM[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4f, 0xf9, 0x00, 0xff, - 0xfe, 0x00, 0x4e, 0xb9, 0x00, 0x00, 0x02, 0x10, 0x60, 0xfe, 0x00, 0x00, + 0xfe, 0x00, 0x4e, 0xb9, 0x00, 0x00, 0x02, 0xa0, 0x60, 0xfe, 0x00, 0x00, 0x20, 0x7c, 0x00, 0xff, 0x00, 0x00, 0x10, 0xfc, 0x00, 0x00, 0xb1, 0xfc, - 0x00, 0xff, 0x02, 0x58, 0x66, 0xf4, 0x13, 0xfc, 0x00, 0x01, 0x00, 0xff, - 0x00, 0x00, 0x13, 0xfc, 0x00, 0x01, 0x00, 0xff, 0x00, 0x01, 0x72, 0x02, - 0x20, 0x41, 0xd1, 0xfc, 0x00, 0xff, 0x00, 0x00, 0x10, 0x10, 0x67, 0x46, - 0x52, 0x81, 0x70, 0x19, 0xb0, 0x81, 0x66, 0xec, 0x22, 0x7c, 0x00, 0xff, - 0x00, 0x02, 0x72, 0x00, 0x10, 0x11, 0x66, 0x12, 0x70, 0x00, 0x30, 0x01, - 0xd0, 0x80, 0x20, 0x40, 0xd1, 0xfc, 0x00, 0xff, 0x03, 0x00, 0x30, 0x89, - 0x52, 0x81, 0xb3, 0xfc, 0x00, 0xff, 0x02, 0x57, 0x67, 0x08, 0x52, 0x89, - 0x70, 0x64, 0xb0, 0x81, 0x66, 0xda, 0x33, 0xc1, 0x00, 0xff, 0x05, 0x00, - 0x33, 0xfc, 0xde, 0xad, 0x00, 0xff, 0x05, 0x02, 0x4e, 0x75, 0x70, 0x00, - 0x30, 0x01, 0xd0, 0x80, 0x20, 0x40, 0xd1, 0xfc, 0x00, 0xff, 0x00, 0x00, - 0x10, 0xbc, 0x00, 0x01, 0xd0, 0x81, 0x0c, 0x80, 0x00, 0x00, 0x02, 0x57, - 0x6f, 0xea, 0x60, 0x9c + 0x00, 0xff, 0x02, 0x58, 0x66, 0xf4, 0x4e, 0x75, 0x13, 0xfc, 0x00, 0x01, + 0x00, 0xff, 0x00, 0x00, 0x13, 0xfc, 0x00, 0x01, 0x00, 0xff, 0x00, 0x01, + 0x4e, 0x75, 0x72, 0x02, 0x20, 0x41, 0xd1, 0xfc, 0x00, 0xff, 0x00, 0x00, + 0x10, 0x10, 0x66, 0x1c, 0x70, 0x00, 0x30, 0x01, 0xd0, 0x80, 0x20, 0x40, + 0xd1, 0xfc, 0x00, 0xff, 0x00, 0x00, 0x10, 0xbc, 0x00, 0x01, 0xd0, 0x81, + 0x0c, 0x80, 0x00, 0x00, 0x02, 0x57, 0x6f, 0xea, 0x52, 0x81, 0x70, 0x19, + 0xb0, 0x81, 0x66, 0xd0, 0x4e, 0x75, 0x22, 0x7c, 0x00, 0xff, 0x00, 0x02, + 0x72, 0x00, 0x10, 0x11, 0x66, 0x12, 0x70, 0x00, 0x30, 0x01, 0xd0, 0x80, + 0x20, 0x40, 0xd1, 0xfc, 0x00, 0xff, 0x03, 0x00, 0x30, 0x89, 0x52, 0x81, + 0xb3, 0xfc, 0x00, 0xff, 0x02, 0x57, 0x67, 0x08, 0x52, 0x89, 0x70, 0x64, + 0xb0, 0x81, 0x66, 0xda, 0x33, 0xc1, 0x00, 0xff, 0x05, 0x00, 0x4e, 0x75, + 0x4e, 0xb9, 0x00, 0x00, 0x02, 0x10, 0x4e, 0xb9, 0x00, 0x00, 0x02, 0x24, + 0x4e, 0xb9, 0x00, 0x00, 0x02, 0x36, 0x4e, 0xb9, 0x00, 0x00, 0x02, 0x6a, + 0x33, 0xfc, 0xde, 0xad, 0x00, 0xff, 0x05, 0x02, 0x4e, 0x75 }; // First 100 prime numbers for verification diff --git a/tests/profiler_test.cpp b/tests/profiler_test.cpp new file mode 100644 index 0000000..fc8655a --- /dev/null +++ b/tests/profiler_test.cpp @@ -0,0 +1,335 @@ +/** + * gxtest - Profiler Test + * + * Tests the CPU cycle profiler using the prime sieve ROM. + * Verifies: + * 1. Cycle counting works correctly + * 2. Function attribution via manually added symbols + * 3. Sample-based profiling produces reasonable estimates + * 4. Profiler state management (start/stop/reset) + */ + +#include +#include +#include "prime_sieve_rom.h" +#include +#include +#include +#include + +namespace { + +using namespace GX::TestRoms; + +// Prime sieve ROM function addresses (from ELF: m68k-elf-nm -S prime_sieve.elf) +// 00000200 T _start +// 00000210 000014 t clear_sieve +// 00000224 000012 t mark_trivial_composites +// 00000236 000034 t run_sieve +// 0000026a 000036 t collect_primes +// 000002a0 000022 T main +constexpr uint32_t FUNC_START = 0x200; +constexpr uint32_t FUNC_CLEAR_SIEVE = 0x210; +constexpr uint32_t FUNC_MARK_TRIVIAL = 0x224; +constexpr uint32_t FUNC_RUN_SIEVE = 0x236; +constexpr uint32_t FUNC_COLLECT_PRIMES = 0x26A; +constexpr uint32_t FUNC_MAIN = 0x2A0; +constexpr uint32_t FUNC_MAIN_END = 0x2C2; + +class ProfilerTest : public GX::Test { +protected: + GX::Profiler profiler; + + void SetUp() override { + ASSERT_TRUE(emu.LoadRom(PRIME_SIEVE_ROM, PRIME_SIEVE_ROM_SIZE)) + << "Failed to load prime sieve ROM"; + + // Add function symbols for the prime sieve ROM + profiler.AddFunction(FUNC_START, FUNC_CLEAR_SIEVE, "_start"); + profiler.AddFunction(FUNC_CLEAR_SIEVE, FUNC_MARK_TRIVIAL, "clear_sieve"); + profiler.AddFunction(FUNC_MARK_TRIVIAL, FUNC_RUN_SIEVE, "mark_trivial_composites"); + profiler.AddFunction(FUNC_RUN_SIEVE, FUNC_COLLECT_PRIMES, "run_sieve"); + profiler.AddFunction(FUNC_COLLECT_PRIMES, FUNC_MAIN, "collect_primes"); + profiler.AddFunction(FUNC_MAIN, FUNC_MAIN_END, "main"); + } + + void TearDown() override { + if (profiler.IsRunning()) { + profiler.Stop(); + } + } +}; + +// ============================================================================= +// Basic Profiler Tests +// ============================================================================= + +/** + * Test that profiler starts and stops correctly + */ +TEST_F(ProfilerTest, StartStop) { + EXPECT_FALSE(profiler.IsRunning()); + + profiler.Start(); + EXPECT_TRUE(profiler.IsRunning()); + + profiler.Stop(); + EXPECT_FALSE(profiler.IsRunning()); +} + +/** + * Test that symbols are loaded correctly + */ +TEST_F(ProfilerTest, SymbolsLoaded) { + EXPECT_EQ(profiler.GetSymbolCount(), 6u); +} + +/** + * Test that cycles are counted during execution + */ +TEST_F(ProfilerTest, CyclesCounted) { + profiler.Start(); + + // Run until sieve completes + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + + profiler.Stop(); + + // Should have counted some cycles + uint64_t total = profiler.GetTotalCycles(); + EXPECT_GT(total, 0u) << "Should have counted cycles"; + + // Prime sieve should take at least a few thousand cycles + EXPECT_GT(total, 1000u) << "Expected significant cycle count for prime sieve"; +} + +/** + * Test that function stats are populated + */ +TEST_F(ProfilerTest, FunctionStatsPopulated) { + profiler.Start(); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + const auto& stats = profiler.GetAllStats(); + + // Should have stats for at least one function + EXPECT_FALSE(stats.empty()) << "Should have function stats"; + + // Check that cycles were attributed + uint64_t total_attributed = 0; + for (const auto& kv : stats) { + total_attributed += kv.second.cycles_exclusive; + } + EXPECT_GT(total_attributed, 0u) << "Should have attributed cycles to functions"; +} + +/** + * Test profiler reset clears stats + */ +TEST_F(ProfilerTest, ResetClearsStats) { + profiler.Start(); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + uint64_t before = profiler.GetTotalCycles(); + EXPECT_GT(before, 0u); + + profiler.Reset(); + + EXPECT_EQ(profiler.GetTotalCycles(), 0u) << "Reset should clear total cycles"; + + // Stats should still exist but be zeroed + const auto& stats = profiler.GetAllStats(); + for (const auto& kv : stats) { + EXPECT_EQ(kv.second.cycles_exclusive, 0u) << "Reset should clear function cycles"; + EXPECT_EQ(kv.second.call_count, 0u) << "Reset should clear call counts"; + } +} + +// ============================================================================= +// Sample-Based Profiling Tests +// ============================================================================= + +/** + * Test that sample rate is correctly reported + */ +TEST_F(ProfilerTest, SampleRateReported) { + GX::ProfileOptions opts; + opts.sample_rate = 10; + profiler.Start(opts); + + EXPECT_EQ(profiler.GetSampleRate(), 10u); + + profiler.Stop(); +} + +/** + * Test sampled profiling produces reasonable cycle estimates + */ +TEST_F(ProfilerTest, SampledProfilingCycles) { + // First run with full profiling + profiler.Start(GX::ProfileMode::Simple); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + uint64_t full_cycles = profiler.GetTotalCycles(); + ASSERT_GT(full_cycles, 0u); + + // Reset emulator and profiler + emu.Reset(); + profiler.Reset(); + + // Run with 1/10 sampling + GX::ProfileOptions opts; + opts.sample_rate = 10; + profiler.Start(opts); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + uint64_t sampled_cycles = profiler.GetTotalCycles(); + + // Sampled cycles should be close to full cycles (same computation) + // Allow 5% tolerance since sampling introduces some variance + double ratio = static_cast(sampled_cycles) / full_cycles; + EXPECT_NEAR(ratio, 1.0, 0.05) + << "Sampled cycles (" << sampled_cycles << ") should be close to full cycles (" + << full_cycles << "), ratio=" << ratio; +} + +/** + * Test that higher sample rates reduce profiler overhead (faster execution) + * This is a sanity check that sampling is actually skipping work + */ +TEST_F(ProfilerTest, SamplingReducesOverhead) { + // Time full profiling + auto start1 = std::chrono::high_resolution_clock::now(); + profiler.Start(GX::ProfileMode::Simple); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + auto end1 = std::chrono::high_resolution_clock::now(); + auto full_time = std::chrono::duration_cast(end1 - start1); + + emu.Reset(); + profiler.Reset(); + + // Time sampled profiling (1/100) + GX::ProfileOptions opts; + opts.sample_rate = 100; + + auto start2 = std::chrono::high_resolution_clock::now(); + profiler.Start(opts); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + auto end2 = std::chrono::high_resolution_clock::now(); + auto sampled_time = std::chrono::duration_cast(end2 - start2); + + // Sampled should be faster (or at least not slower) + // Note: For very fast ROMs like prime_sieve, the difference may be small + std::cout << "Full profiling: " << full_time.count() << " us" << std::endl; + std::cout << "Sampled (1/100): " << sampled_time.count() << " us" << std::endl; + + // Just verify both completed - timing can be noisy on fast operations + EXPECT_GT(full_time.count(), 0); + EXPECT_GT(sampled_time.count(), 0); +} + +// ============================================================================= +// Multiple Runs Tests +// ============================================================================= + +/** + * Test that profiler accumulates across multiple start/stop cycles + */ +TEST_F(ProfilerTest, AccumulatesAcrossRuns) { + // First run + profiler.Start(); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + uint64_t first_cycles = profiler.GetTotalCycles(); + + // Reset emulator but NOT profiler + emu.Reset(); + + // Second run - profiler accumulates + profiler.Start(); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + uint64_t total_cycles = profiler.GetTotalCycles(); + + // Should have roughly 2x the cycles + EXPECT_GT(total_cycles, first_cycles) << "Cycles should accumulate"; + double ratio = static_cast(total_cycles) / first_cycles; + EXPECT_NEAR(ratio, 2.0, 0.1) << "Expected ~2x cycles after two runs"; +} + +/** + * Test consistent results across repeated profiling + */ +TEST_F(ProfilerTest, ConsistentResults) { + std::vector cycle_counts; + + for (int i = 0; i < 5; i++) { + emu.Reset(); + profiler.Reset(); + + profiler.Start(); + emu.RunUntilMemoryEquals(DONE_FLAG_ADDR + 1, 0xAD, 60); + profiler.Stop(); + + cycle_counts.push_back(profiler.GetTotalCycles()); + } + + // All runs should produce identical cycle counts (deterministic emulation) + for (size_t i = 1; i < cycle_counts.size(); i++) { + EXPECT_EQ(cycle_counts[i], cycle_counts[0]) + << "Run " << i << " cycle count differs from run 0"; + } +} + +// ============================================================================= +// Edge Cases +// ============================================================================= + +/** + * Test profiling with no symbols + */ +TEST_F(ProfilerTest, NoSymbols) { + GX::Profiler empty_profiler; + EXPECT_EQ(empty_profiler.GetSymbolCount(), 0u); + + empty_profiler.Start(); + emu.RunFrames(10); + empty_profiler.Stop(); + + // Should still count total cycles + EXPECT_GT(empty_profiler.GetTotalCycles(), 0u); + + // But no function stats + EXPECT_TRUE(empty_profiler.GetAllStats().empty()); +} + +/** + * Test stopping profiler that wasn't started + */ +TEST_F(ProfilerTest, StopWithoutStart) { + EXPECT_FALSE(profiler.IsRunning()); + profiler.Stop(); // Should not crash + EXPECT_FALSE(profiler.IsRunning()); +} + +/** + * Test starting profiler twice + */ +TEST_F(ProfilerTest, DoubleStart) { + profiler.Start(); + EXPECT_TRUE(profiler.IsRunning()); + + profiler.Start(); // Should be no-op + EXPECT_TRUE(profiler.IsRunning()); + + profiler.Stop(); + EXPECT_FALSE(profiler.IsRunning()); +} + +} // namespace