diff --git a/BUILD.bazel b/BUILD.bazel index f18e18c..e51f186 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -21,6 +21,8 @@ cc_library( ]) + [ # Stubs for Sega CD, MegaSD, YX5200 (not needed for cartridge games) "src/stubs.c", + # CPU hook support for profiling + "vendor/genplusgx/debug/cpuhook.c", ], hdrs = glob([ "vendor/genplusgx/*.h", @@ -32,8 +34,10 @@ cc_library( "vendor/genplusgx/cart_hw/svp/*.h", "vendor/genplusgx/ntsc/*.h", "vendor/genplusgx/cd_hw/*.h", + "vendor/genplusgx/debug/*.h", ]) + ["src/osd.h"], copts = [ + "-O3", # Always optimize emulator for fast test execution "-Wno-unused-parameter", "-Wno-sign-compare", "-Wno-implicit-function-declaration", @@ -48,6 +52,7 @@ cc_library( "HAVE_YM3438_CORE", "HAVE_OPLL_CORE", "LSB_FIRST", # Most modern systems (x86, ARM) are little-endian + "HOOK_CPU", # Enable CPU hooks for profiling ], includes = [ "src", @@ -60,6 +65,7 @@ cc_library( "vendor/genplusgx/cart_hw/svp", "vendor/genplusgx/ntsc", "vendor/genplusgx/cd_hw", + "vendor/genplusgx/debug", ], # Link math library on Linux (handled automatically on other platforms) linkopts = ["-lm"], @@ -70,13 +76,16 @@ cc_library( name = "gxtest", srcs = [ "src/gxtest.cpp", + "src/profiler.cpp", ], hdrs = [ "include/gxtest.h", + "include/profiler.h", "src/osd.h", ], defines = [ "LSB_FIRST", # Most modern systems (x86, ARM) are little-endian + "HOOK_CPU", # Enable CPU hooks for profiling ], includes = [ "include", @@ -90,6 +99,7 @@ cc_library( "vendor/genplusgx/cart_hw/svp", "vendor/genplusgx/ntsc", "vendor/genplusgx/cd_hw", + "vendor/genplusgx/debug", ], deps = [ ":genplusgx_core", diff --git a/include/gxtest.h b/include/gxtest.h index ee7a1ac..d425c2c 100644 --- a/include/gxtest.h +++ b/include/gxtest.h @@ -4,7 +4,38 @@ * This library wraps the Genesis Plus GX emulator core, providing a clean C++ * interface for headless execution and memory instrumentation. * - * Example usage: + * THREAD SAFETY WARNING: + * The Genesis Plus GX emulator uses global state throughout its implementation + * (config, bitmap, cart.rom, work_ram, zram, input state, VDP state, etc.). + * This means: + * + * - Multiple GX::Emulator instances CANNOT run concurrently in threads + * - Creating multiple Emulator objects shares the same underlying state + * - For parallel test execution, use process-based parallelism (fork()) + * instead of thread-based parallelism (std::thread, std::async) + * + * Example of SAFE parallel execution (fork-based): + * + * pid_t pid = fork(); + * if (pid < 0) { + * perror("fork"); // Handle fork() error + * } else if (pid == 0) { + * // Child process - has its own copy of global state + * GX::Emulator emu; + * emu.LoadRom("game.bin"); + * emu.RunFrames(1000); + * // Write results to pipe, then exit child + * _exit(0); + * } + * // Parent collects results from children + * + * Example of UNSAFE parallel execution (will crash or corrupt state): + * + * // DON'T DO THIS - threads share global state + * std::thread t1([](){ GX::Emulator e1; e1.LoadRom("a.bin"); e1.RunFrames(100); }); + * std::thread t2([](){ GX::Emulator e2; e2.LoadRom("b.bin"); e2.RunFrames(100); }); + * + * Basic usage example: * * #include * @@ -59,13 +90,17 @@ struct Input { /** * Emulator wrapper class providing the test harness interface + * + * WARNING: NOT THREAD-SAFE. Genesis Plus GX uses global state, so only one + * Emulator can be active per process. For parallel execution, use fork() + * to run each emulator in a separate process. See file header for details. */ class Emulator { public: Emulator(); ~Emulator(); - // Prevent copying (singleton emulator state) + // Prevent copying (singleton emulator state - there's only one underlying emulator) Emulator(const Emulator&) = delete; Emulator& operator=(const Emulator&) = delete; diff --git a/include/profiler.h b/include/profiler.h new file mode 100644 index 0000000..cc856b4 --- /dev/null +++ b/include/profiler.h @@ -0,0 +1,208 @@ +/** + * profiler.h - 68k CPU cycle profiler for Genesis Plus GX + * + * Provides per-function cycle counting using the emulator's native cpu_hook + * mechanism. No ROM modification required - profiling is done entirely in + * the emulator by tracking PC values and cycle counts. + * + * Usage: + * GX::Profiler profiler; + * profiler.AddFunction(0x001000, 0x001100, "generate_moves"); + * profiler.AddFunction(0x001100, 0x001200, "score_move"); + * // Or load from ELF: profiler.LoadSymbols("game.elf"); + * + * profiler.Start(); + * emu.RunFrames(1000); + * profiler.Stop(); + * + * profiler.PrintReport(std::cout); + */ + +#ifndef GXTEST_PROFILER_H +#define GXTEST_PROFILER_H + +#include +#include +#include +#include +#include + +namespace GX { + +/** + * Profiling mode + */ +enum class ProfileMode { + Simple, // Fast - just tracks which function PC is in + CallStack // Tracks call stack for inclusive cycle counts +}; + +/** + * Statistics for a single function + */ +struct FunctionStats { + uint64_t call_count = 0; // Number of times function was entered + uint64_t cycles_exclusive = 0; // Cycles spent in this function only + uint64_t cycles_inclusive = 0; // Cycles including callees (CallStack mode only) +}; + +/** + * Call stack frame for tracking nested function calls + */ +struct CallFrame { + uint32_t func_addr; // Start address of function + int64_t entry_cycles; // Cycle count when function was entered +}; + +/** + * Function definition from symbol table + */ +struct FunctionDef { + uint32_t start_addr; + uint32_t end_addr; + std::string name; +}; + +/** + * 68k CPU cycle profiler + * + * Tracks cycles per function using the emulator's cpu_hook callback. + * Simply attributes cycles to whichever function the PC is currently in. + * Minimal overhead - just a binary search lookup per instruction. + */ +class Profiler { +public: + Profiler(); + ~Profiler(); + + // Prevent copying (uses global hook) + Profiler(const Profiler&) = delete; + Profiler& operator=(const Profiler&) = delete; + + // ------------------------------------------------------------------------- + // Symbol Table Management + // ------------------------------------------------------------------------- + + /** + * Add a function to the symbol table + * @param start_addr Start address (inclusive) + * @param end_addr End address (exclusive) + * @param name Function name for reporting + */ + void AddFunction(uint32_t start_addr, uint32_t end_addr, const std::string& name); + + /** + * Load symbols from an ELF file + * @param elf_path Path to ELF file with debug symbols + * @return Number of functions loaded, or -1 on error + */ + int LoadSymbolsFromELF(const std::string& elf_path); + + /** + * Load symbols from nm-style text output + * Format: "address size name" per line (hex address, decimal size) + * @param path Path to symbol file + * @return Number of functions loaded, or -1 on error + */ + int LoadSymbolsFromFile(const std::string& path); + + /** + * Clear all symbols + */ + void ClearSymbols(); + + /** + * Get number of loaded symbols + */ + size_t GetSymbolCount() const { return functions_.size(); } + + // ------------------------------------------------------------------------- + // Profiling Control + // ------------------------------------------------------------------------- + + /** + * Start profiling - installs the cpu_hook callback + * @param mode ProfileMode::Simple (fast) or ProfileMode::CallStack (inclusive cycles) + */ + void Start(ProfileMode mode = ProfileMode::Simple); + + /** + * Stop profiling - removes the cpu_hook callback + */ + void Stop(); + + /** + * Check if profiling is active + */ + bool IsRunning() const { return running_; } + + /** + * Reset all statistics (keeps symbols) + */ + void Reset(); + + // ------------------------------------------------------------------------- + // Results + // ------------------------------------------------------------------------- + + /** + * Get statistics for a specific function by address + * @return Pointer to stats, or nullptr if not found + */ + const FunctionStats* GetStats(uint32_t func_addr) const; + + /** + * Get all function statistics + * @return Map of function start address to stats + */ + const std::unordered_map& GetAllStats() const { return stats_; } + + /** + * Get total cycles recorded + */ + uint64_t GetTotalCycles() const { return total_cycles_; } + + /** + * Print a formatted profile report + * @param out Output stream + * @param max_functions Maximum functions to show (0 = all) + */ + void PrintReport(std::ostream& out, size_t max_functions = 0) const; + + // ------------------------------------------------------------------------- + // Internal (called by cpu_hook) + // ------------------------------------------------------------------------- + + /** Called by cpu_hook on each instruction execute */ + void OnExecute(uint32_t pc); + +private: + /** Look up function containing address */ + const FunctionDef* LookupFunction(uint32_t addr) const; + + /** Read 16-bit word from 68k address space */ + uint16_t ReadWord(uint32_t addr) const; + + /** Check if opcode is JSR or BSR */ + bool IsCallOpcode(uint16_t opcode) const; + + /** Check if opcode is RTS or RTR */ + bool IsReturnOpcode(uint16_t opcode) const; + + std::vector functions_; // Sorted by start_addr + std::unordered_map stats_; + std::vector call_stack_; // For CallStack mode + + ProfileMode mode_ = ProfileMode::Simple; + bool running_ = false; + uint32_t last_pc_ = 0; + int64_t last_cycles_ = 0; + uint64_t total_cycles_ = 0; +}; + +/** Global profiler instance (needed for cpu_hook callback) */ +Profiler* GetActiveProfiler(); + +} // namespace GX + +#endif // GXTEST_PROFILER_H diff --git a/src/profiler.cpp b/src/profiler.cpp new file mode 100644 index 0000000..9bd12a1 --- /dev/null +++ b/src/profiler.cpp @@ -0,0 +1,327 @@ +/** + * profiler.cpp - 68k CPU cycle profiler implementation + */ + +#include "profiler.h" +#include +#include +#include +#include +#include + +// Genesis Plus GX headers (C linkage) +extern "C" { +#include "shared.h" +#include "cpuhook.h" +} + +namespace GX { + +// Global profiler for cpu_hook callback +static Profiler* g_active_profiler = nullptr; + +// cpu_hook callback - called before each 68k instruction +static void ProfilerHook(hook_type_t type, int /*width*/, unsigned int address, unsigned int /*value*/) { + if (type == HOOK_M68K_E && g_active_profiler) { + g_active_profiler->OnExecute(address); + } +} + +Profiler* GetActiveProfiler() { + return g_active_profiler; +} + +// --------------------------------------------------------------------------- +// Profiler Implementation +// --------------------------------------------------------------------------- + +Profiler::Profiler() {} + +Profiler::~Profiler() { + if (running_) { + Stop(); + } +} + +void Profiler::AddFunction(uint32_t start_addr, uint32_t end_addr, const std::string& name) { + FunctionDef func = {start_addr, end_addr, name}; + + // Insert in sorted order by start_addr + auto it = std::lower_bound(functions_.begin(), functions_.end(), func, + [](const FunctionDef& a, const FunctionDef& b) { + return a.start_addr < b.start_addr; + }); + functions_.insert(it, func); + + // Initialize stats for this function + stats_[start_addr] = FunctionStats(); +} + +int Profiler::LoadSymbolsFromELF(const std::string& elf_path) { + // Use nm to extract symbols + // Format: "address type name" + std::string cmd = "nm -S --defined-only " + elf_path + " 2>/dev/null"; + FILE* pipe = popen(cmd.c_str(), "r"); + if (!pipe) { + return -1; + } + + int count = 0; + char line[512]; + while (fgets(line, sizeof(line), pipe)) { + uint32_t addr, size; + char type; + char name[256]; + + // Parse: "address size type name" (with size) or "address type name" (without) + if (sscanf(line, "%x %x %c %255s", &addr, &size, &type, name) == 4) { + // Has size - use it + if (type == 'T' || type == 't') { // Text (code) symbols only + AddFunction(addr, addr + size, name); + count++; + } + } else if (sscanf(line, "%x %c %255s", &addr, &type, name) == 3) { + // No size - estimate from next symbol (done after loading all) + if (type == 'T' || type == 't') { + AddFunction(addr, addr + 0x100, name); // Default 256 bytes + count++; + } + } + } + + pclose(pipe); + + // Fix up end addresses based on next function start + for (size_t i = 0; i + 1 < functions_.size(); i++) { + if (functions_[i].end_addr > functions_[i + 1].start_addr) { + functions_[i].end_addr = functions_[i + 1].start_addr; + } + } + + return count; +} + +int Profiler::LoadSymbolsFromFile(const std::string& path) { + std::ifstream file(path); + if (!file) { + return -1; + } + + int count = 0; + std::string line; + while (std::getline(file, line)) { + uint32_t addr, size; + char name[256]; + + if (sscanf(line.c_str(), "%x %u %255s", &addr, &size, name) == 3) { + AddFunction(addr, addr + size, name); + count++; + } + } + + return count; +} + +void Profiler::ClearSymbols() { + functions_.clear(); + stats_.clear(); +} + +void Profiler::Start(ProfileMode mode) { + if (running_) return; + + mode_ = mode; + g_active_profiler = this; + set_cpu_hook(ProfilerHook); + running_ = true; + last_pc_ = 0; + last_cycles_ = m68k.cycles; + call_stack_.clear(); +} + +void Profiler::Stop() { + if (!running_) return; + + set_cpu_hook(nullptr); + g_active_profiler = nullptr; + running_ = false; +} + +void Profiler::Reset() { + for (auto& kv : stats_) { + kv.second = FunctionStats(); + } + call_stack_.clear(); + total_cycles_ = 0; + last_pc_ = 0; + if (running_) { + last_cycles_ = m68k.cycles; + } +} + +const FunctionStats* Profiler::GetStats(uint32_t func_addr) const { + auto it = stats_.find(func_addr); + return it != stats_.end() ? &it->second : nullptr; +} + +const FunctionDef* Profiler::LookupFunction(uint32_t addr) const { + if (functions_.empty()) return nullptr; + + // Binary search for function containing addr + auto it = std::upper_bound(functions_.begin(), functions_.end(), addr, + [](uint32_t a, const FunctionDef& f) { + return a < f.start_addr; + }); + + if (it == functions_.begin()) return nullptr; + --it; + + if (addr >= it->start_addr && addr < it->end_addr) { + return &(*it); + } + return nullptr; +} + +uint16_t Profiler::ReadWord(uint32_t addr) const { + // Read from ROM (cart.rom is byteswapped on little-endian) + if (addr < 0x400000 && addr < cart.romsize) { +#ifdef LSB_FIRST + return (cart.rom[addr ^ 1] << 8) | cart.rom[(addr + 1) ^ 1]; +#else + return (cart.rom[addr] << 8) | cart.rom[addr + 1]; +#endif + } + return 0; +} + +void Profiler::OnExecute(uint32_t pc) { + // Get cycles since last instruction + int64_t current_cycles = m68k.cycles; + int64_t delta = current_cycles - last_cycles_; + last_cycles_ = current_cycles; + + if (delta <= 0) return; // First call or cycle counter wrapped + + total_cycles_ += delta; + + // Attribute cycles to current function + const FunctionDef* func = LookupFunction(pc); + if (func) { + auto& s = stats_[func->start_addr]; + s.cycles_exclusive += delta; + + // Count function entry (PC moved into this function from outside) + if (last_pc_ != 0) { + const FunctionDef* last_func = LookupFunction(last_pc_); + if (last_func != func) { + s.call_count++; + } + } + } + + // CallStack mode: track JSR/BSR/RTS for inclusive cycles + if (mode_ == ProfileMode::CallStack && last_pc_ != 0) { + uint16_t opcode = ReadWord(last_pc_); + + if (IsCallOpcode(opcode)) { + // Entering a new function - push frame + if (func) { + call_stack_.push_back({func->start_addr, current_cycles}); + } + } else if (IsReturnOpcode(opcode) && !call_stack_.empty()) { + // Returning from function - pop frame and accumulate inclusive time + CallFrame frame = call_stack_.back(); + call_stack_.pop_back(); + + int64_t inclusive = current_cycles - frame.entry_cycles; + if (inclusive > 0) { + stats_[frame.func_addr].cycles_inclusive += inclusive; + } + } + } + + last_pc_ = pc; +} + +void Profiler::PrintReport(std::ostream& out, size_t max_functions) const { + // Build sorted list by cycles (descending) + struct FuncReport { + std::string name; + uint32_t addr; + uint64_t cycles_excl; + uint64_t cycles_incl; + uint64_t calls; + }; + + std::vector report; + for (const auto& func : functions_) { + auto it = stats_.find(func.start_addr); + if (it != stats_.end() && it->second.cycles_exclusive > 0) { + report.push_back({ + func.name, + func.start_addr, + it->second.cycles_exclusive, + it->second.cycles_inclusive, + it->second.call_count + }); + } + } + + std::sort(report.begin(), report.end(), + [](const FuncReport& a, const FuncReport& b) { + return a.cycles_excl > b.cycles_excl; + }); + + if (max_functions > 0 && report.size() > max_functions) { + report.resize(max_functions); + } + + // Print header + bool show_inclusive = (mode_ == ProfileMode::CallStack); + out << "\n"; + out << std::setw(30) << std::left << "Function" + << std::setw(12) << std::right << "Cycles"; + if (show_inclusive) { + out << std::setw(12) << "Inclusive"; + } + out << std::setw(10) << "Calls" + << std::setw(8) << "%" + << std::setw(10) << "Cyc/Call" + << "\n"; + out << std::string(show_inclusive ? 82 : 70, '-') << "\n"; + + // Print functions + for (const auto& r : report) { + double pct = total_cycles_ > 0 ? 100.0 * r.cycles_excl / total_cycles_ : 0.0; + uint64_t per_call = r.calls > 0 ? r.cycles_excl / r.calls : 0; + + out << std::setw(30) << std::left << r.name + << std::setw(12) << std::right << r.cycles_excl; + if (show_inclusive) { + out << std::setw(12) << r.cycles_incl; + } + out << std::setw(10) << r.calls + << std::setw(7) << std::fixed << std::setprecision(2) << pct << "%" + << std::setw(10) << per_call + << "\n"; + } + + out << std::string(show_inclusive ? 82 : 70, '-') << "\n"; + out << std::setw(30) << std::left << "Total" + << std::setw(12) << std::right << total_cycles_ + << "\n"; +} + +bool Profiler::IsCallOpcode(uint16_t opcode) const { + // JSR: 0100 1110 10xx xxxx (0x4E80-0x4EBF) + // BSR: 0110 0001 xxxx xxxx (0x6100-0x61FF) + return ((opcode & 0xFFC0) == 0x4E80) || ((opcode & 0xFF00) == 0x6100); +} + +bool Profiler::IsReturnOpcode(uint16_t opcode) const { + // RTS: 0x4E75 + // RTR: 0x4E77 + return opcode == 0x4E75 || opcode == 0x4E77; +} + +} // namespace GX diff --git a/vendor/genplusgx/debug/cpuhook.h b/vendor/genplusgx/debug/cpuhook.h index 1fea3ec..a890266 100644 --- a/vendor/genplusgx/debug/cpuhook.h +++ b/vendor/genplusgx/debug/cpuhook.h @@ -80,7 +80,7 @@ typedef enum { /* CPU hook is called on read, write, and execute. */ -void (*cpu_hook)(hook_type_t type, int width, unsigned int address, unsigned int value); +extern void (*cpu_hook)(hook_type_t type, int width, unsigned int address, unsigned int value); /* Use set_cpu_hook() to assign a callback that can process the data provided * by cpu_hook().