-
Notifications
You must be signed in to change notification settings - Fork 0
Add thread-safety documentation and -O3 optimization #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -4,7 +4,38 @@ | |||||||||||||||||||
| * This library wraps the Genesis Plus GX emulator core, providing a clean C++ | ||||||||||||||||||||
| * interface for headless execution and memory instrumentation. | ||||||||||||||||||||
| * | ||||||||||||||||||||
| * Example usage: | ||||||||||||||||||||
| * THREAD SAFETY WARNING: | ||||||||||||||||||||
| * The Genesis Plus GX emulator uses global state throughout its implementation | ||||||||||||||||||||
| * (config, bitmap, cart.rom, work_ram, zram, input state, VDP state, etc.). | ||||||||||||||||||||
| * This means: | ||||||||||||||||||||
| * | ||||||||||||||||||||
| * - Multiple GX::Emulator instances CANNOT run concurrently in threads | ||||||||||||||||||||
| * - Creating multiple Emulator objects shares the same underlying state | ||||||||||||||||||||
| * - For parallel test execution, use process-based parallelism (fork()) | ||||||||||||||||||||
| * instead of thread-based parallelism (std::thread, std::async) | ||||||||||||||||||||
| * | ||||||||||||||||||||
| * Example of SAFE parallel execution (fork-based): | ||||||||||||||||||||
| * | ||||||||||||||||||||
| * pid_t pid = fork(); | ||||||||||||||||||||
| * if (pid < 0) { | ||||||||||||||||||||
| * perror("fork"); // Handle fork() error | ||||||||||||||||||||
| * } else if (pid == 0) { | ||||||||||||||||||||
| * // Child process - has its own copy of global state | ||||||||||||||||||||
| * GX::Emulator emu; | ||||||||||||||||||||
| * emu.LoadRom("game.bin"); | ||||||||||||||||||||
| * emu.RunFrames(1000); | ||||||||||||||||||||
| * // Write results to pipe, then exit child | ||||||||||||||||||||
| * _exit(0); | ||||||||||||||||||||
| * } | ||||||||||||||||||||
| * // Parent collects results from children | ||||||||||||||||||||
|
Comment on lines
+29
to
+30
|
||||||||||||||||||||
| * } | |
| * // Parent collects results from children | |
| * } else { | |
| * int status = 0; | |
| * if (waitpid(pid, &status, 0) < 0) { | |
| * perror("waitpid"); // Handle waitpid() error | |
| * } | |
| * // Parent collects results from children | |
| * } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,208 @@ | ||
| /** | ||
| * profiler.h - 68k CPU cycle profiler for Genesis Plus GX | ||
| * | ||
| * Provides per-function cycle counting using the emulator's native cpu_hook | ||
| * mechanism. No ROM modification required - profiling is done entirely in | ||
| * the emulator by tracking PC values and cycle counts. | ||
| * | ||
| * Usage: | ||
| * GX::Profiler profiler; | ||
| * profiler.AddFunction(0x001000, 0x001100, "generate_moves"); | ||
| * profiler.AddFunction(0x001100, 0x001200, "score_move"); | ||
| * // Or load from ELF: profiler.LoadSymbols("game.elf"); | ||
| * | ||
| * profiler.Start(); | ||
| * emu.RunFrames(1000); | ||
| * profiler.Stop(); | ||
| * | ||
| * profiler.PrintReport(std::cout); | ||
| */ | ||
|
|
||
| #ifndef GXTEST_PROFILER_H | ||
| #define GXTEST_PROFILER_H | ||
|
|
||
| #include <cstdint> | ||
| #include <string> | ||
| #include <vector> | ||
| #include <unordered_map> | ||
| #include <ostream> | ||
|
|
||
| namespace GX { | ||
|
|
||
| /** | ||
| * Profiling mode | ||
| */ | ||
| enum class ProfileMode { | ||
| Simple, // Fast - just tracks which function PC is in | ||
| CallStack // Tracks call stack for inclusive cycle counts | ||
| }; | ||
|
|
||
| /** | ||
| * Statistics for a single function | ||
| */ | ||
| struct FunctionStats { | ||
| uint64_t call_count = 0; // Number of times function was entered | ||
| uint64_t cycles_exclusive = 0; // Cycles spent in this function only | ||
| uint64_t cycles_inclusive = 0; // Cycles including callees (CallStack mode only) | ||
| }; | ||
|
|
||
| /** | ||
| * Call stack frame for tracking nested function calls | ||
| */ | ||
| struct CallFrame { | ||
| uint32_t func_addr; // Start address of function | ||
| int64_t entry_cycles; // Cycle count when function was entered | ||
| }; | ||
|
|
||
| /** | ||
| * Function definition from symbol table | ||
| */ | ||
| struct FunctionDef { | ||
| uint32_t start_addr; | ||
| uint32_t end_addr; | ||
| std::string name; | ||
| }; | ||
|
|
||
| /** | ||
| * 68k CPU cycle profiler | ||
| * | ||
| * Tracks cycles per function using the emulator's cpu_hook callback. | ||
| * Simply attributes cycles to whichever function the PC is currently in. | ||
| * Minimal overhead - just a binary search lookup per instruction. | ||
| */ | ||
| class Profiler { | ||
| public: | ||
| Profiler(); | ||
| ~Profiler(); | ||
|
|
||
| // Prevent copying (uses global hook) | ||
| Profiler(const Profiler&) = delete; | ||
| Profiler& operator=(const Profiler&) = delete; | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Symbol Table Management | ||
| // ------------------------------------------------------------------------- | ||
|
|
||
| /** | ||
| * Add a function to the symbol table | ||
| * @param start_addr Start address (inclusive) | ||
| * @param end_addr End address (exclusive) | ||
| * @param name Function name for reporting | ||
| */ | ||
| void AddFunction(uint32_t start_addr, uint32_t end_addr, const std::string& name); | ||
|
|
||
| /** | ||
| * Load symbols from an ELF file | ||
| * @param elf_path Path to ELF file with debug symbols | ||
| * @return Number of functions loaded, or -1 on error | ||
| */ | ||
| int LoadSymbolsFromELF(const std::string& elf_path); | ||
|
|
||
| /** | ||
| * Load symbols from nm-style text output | ||
| * Format: "address size name" per line (hex address, decimal size) | ||
| * @param path Path to symbol file | ||
| * @return Number of functions loaded, or -1 on error | ||
| */ | ||
| int LoadSymbolsFromFile(const std::string& path); | ||
|
|
||
| /** | ||
| * Clear all symbols | ||
| */ | ||
| void ClearSymbols(); | ||
|
|
||
| /** | ||
| * Get number of loaded symbols | ||
| */ | ||
| size_t GetSymbolCount() const { return functions_.size(); } | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Profiling Control | ||
| // ------------------------------------------------------------------------- | ||
|
|
||
| /** | ||
| * Start profiling - installs the cpu_hook callback | ||
| * @param mode ProfileMode::Simple (fast) or ProfileMode::CallStack (inclusive cycles) | ||
| */ | ||
| void Start(ProfileMode mode = ProfileMode::Simple); | ||
|
|
||
| /** | ||
| * Stop profiling - removes the cpu_hook callback | ||
| */ | ||
| void Stop(); | ||
|
|
||
| /** | ||
| * Check if profiling is active | ||
| */ | ||
| bool IsRunning() const { return running_; } | ||
|
|
||
| /** | ||
| * Reset all statistics (keeps symbols) | ||
| */ | ||
| void Reset(); | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Results | ||
| // ------------------------------------------------------------------------- | ||
|
|
||
| /** | ||
| * Get statistics for a specific function by address | ||
| * @return Pointer to stats, or nullptr if not found | ||
| */ | ||
| const FunctionStats* GetStats(uint32_t func_addr) const; | ||
|
|
||
| /** | ||
| * Get all function statistics | ||
| * @return Map of function start address to stats | ||
| */ | ||
| const std::unordered_map<uint32_t, FunctionStats>& GetAllStats() const { return stats_; } | ||
|
|
||
| /** | ||
| * Get total cycles recorded | ||
| */ | ||
| uint64_t GetTotalCycles() const { return total_cycles_; } | ||
|
|
||
| /** | ||
| * Print a formatted profile report | ||
| * @param out Output stream | ||
| * @param max_functions Maximum functions to show (0 = all) | ||
| */ | ||
| void PrintReport(std::ostream& out, size_t max_functions = 0) const; | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Internal (called by cpu_hook) | ||
| // ------------------------------------------------------------------------- | ||
|
|
||
| /** Called by cpu_hook on each instruction execute */ | ||
| void OnExecute(uint32_t pc); | ||
|
|
||
| private: | ||
| /** Look up function containing address */ | ||
| const FunctionDef* LookupFunction(uint32_t addr) const; | ||
|
|
||
| /** Read 16-bit word from 68k address space */ | ||
| uint16_t ReadWord(uint32_t addr) const; | ||
|
|
||
| /** Check if opcode is JSR or BSR */ | ||
| bool IsCallOpcode(uint16_t opcode) const; | ||
|
|
||
| /** Check if opcode is RTS or RTR */ | ||
| bool IsReturnOpcode(uint16_t opcode) const; | ||
|
|
||
| std::vector<FunctionDef> functions_; // Sorted by start_addr | ||
| std::unordered_map<uint32_t, FunctionStats> stats_; | ||
| std::vector<CallFrame> call_stack_; // For CallStack mode | ||
|
|
||
| ProfileMode mode_ = ProfileMode::Simple; | ||
| bool running_ = false; | ||
| uint32_t last_pc_ = 0; | ||
| int64_t last_cycles_ = 0; | ||
| uint64_t total_cycles_ = 0; | ||
| }; | ||
|
|
||
| /** Global profiler instance (needed for cpu_hook callback) */ | ||
| Profiler* GetActiveProfiler(); | ||
|
|
||
| } // namespace GX | ||
|
|
||
| #endif // GXTEST_PROFILER_H |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The -O3 optimization flag is set unconditionally with the comment "Always optimize emulator for fast test execution". While this is beneficial for test execution speed, it may hinder debugging efforts when issues arise in the emulator core. Consider whether this should respect the build configuration mode (e.g., using select() to apply -O3 only in optimized builds, or opt builds) to preserve debuggability in debug configurations.